From caf3f4bdb535f73c6e7b828bc98097f275f819bb Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Wed, 17 Jul 2019 22:49:08 +0900 Subject: [PATCH 0001/1202] h8300: fix memset return value. The return address is not as specified. Signed-off-by: Yoshinori Sato --- arch/h8300/lib/memset.S | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/h8300/lib/memset.S b/arch/h8300/lib/memset.S index 2d1abc37fd08b..df873779bb5e0 100644 --- a/arch/h8300/lib/memset.S +++ b/arch/h8300/lib/memset.S @@ -19,13 +19,15 @@ ;; c = er1(r1l) ;; count = er2 memset: - btst #0,r0l + mov.l er4,@-sp + mov.l er0,er4 + btst #0,r4l beq 2f ;; odd address 1: - mov.b r1l,@er0 - adds #1,er0 + mov.b r1l,@er4 + adds #1,er4 dec.l #1,er2 beq 6f @@ -46,8 +48,8 @@ memset: mov.b r1l,r1h mov.w r1,e1 3: - mov.l er1,@er0 - adds #4,er0 + mov.l er1,@er4 + adds #4,er4 dec.l #1,er2 bne 3b 4: @@ -55,11 +57,12 @@ memset: and.b #3,r3l beq 6f 5: - mov.b r1l,@er0 - adds #1,er0 + mov.b r1l,@er4 + adds #1,er4 dec.b r3l bne 5b 6: + mov.l @sp+,er4 rts clear_user: From 3e138fe1a4fb6aa834b46529e4faaee4ce2d5505 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Wed, 17 Jul 2019 22:55:00 +0900 Subject: [PATCH 0002/1202] h8300: Add missing symbol "BOOT_LINK_OFFSET" Signed-off-by: Yoshinori Sato --- arch/h8300/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index ec800e9d5aadf..14bb45644c0c8 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -48,4 +48,7 @@ config NR_CPUS int default 1 +config BOOT_LINK_OFFSET + hex "zImage link offset" + source "arch/h8300/Kconfig.cpu" From ece7be2c964c2aa5015e74628ca33c03a5ff4e47 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 23 Jul 2019 19:21:06 +0900 Subject: [PATCH 0003/1202] h8300: move definition of __kernel_size_t etc. to posix_types.h These types should be defined in posix_types.h, not in bitsperlong.h . With these defines moved, h8300-specific bitsperlong.h is no longer needed since Kbuild will automatically create a wrapper of include/uapi/asm-generic/bitsperlong.h Signed-off-by: Masahiro Yamada Signed-off-by: Yoshinori Sato --- arch/h8300/include/uapi/asm/bitsperlong.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 arch/h8300/include/uapi/asm/bitsperlong.h diff --git a/arch/h8300/include/uapi/asm/bitsperlong.h b/arch/h8300/include/uapi/asm/bitsperlong.h new file mode 100644 index 0000000000000..0a9ffb344ea99 --- /dev/null +++ b/arch/h8300/include/uapi/asm/bitsperlong.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__ASM_H8300_BITS_PER_LONG +#define _UAPI__ASM_H8300_BITS_PER_LONG + +#include + +#if !defined(__ASSEMBLY__) +/* h8300-unknown-linux required long */ +#define __kernel_size_t __kernel_size_t +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +#endif + +#endif /* _UAPI__ASM_H8300_BITS_PER_LONG */ From de98871736825349b367275b0790e42ea4157350 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 16 Apr 2020 13:43:01 +0900 Subject: [PATCH 0004/1202] h8300: move definition of __kernel_size_t etc. to posix_types.h These types should be defined in posix_types.h, not in bitsperlong.h . With these defines moved, h8300-specific bitsperlong.h is no longer needed since Kbuild will automatically create a wrapper of include/uapi/asm-generic/bitsperlong.h Signed-off-by: Masahiro Yamada Signed-off-by: Yoshinori Sato --- arch/h8300/include/uapi/asm/bitsperlong.h | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 arch/h8300/include/uapi/asm/bitsperlong.h diff --git a/arch/h8300/include/uapi/asm/bitsperlong.h b/arch/h8300/include/uapi/asm/bitsperlong.h deleted file mode 100644 index 0a9ffb344ea99..0000000000000 --- a/arch/h8300/include/uapi/asm/bitsperlong.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI__ASM_H8300_BITS_PER_LONG -#define _UAPI__ASM_H8300_BITS_PER_LONG - -#include - -#if !defined(__ASSEMBLY__) -/* h8300-unknown-linux required long */ -#define __kernel_size_t __kernel_size_t -typedef unsigned long __kernel_size_t; -typedef long __kernel_ssize_t; -typedef long __kernel_ptrdiff_t; -#endif - -#endif /* _UAPI__ASM_H8300_BITS_PER_LONG */ From 5d90be1ecf44a116b503649e67e952a7ec070745 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Tue, 13 Aug 2019 19:19:35 +0900 Subject: [PATCH 0005/1202] h8300: Fix BOOT_LINK_OFFSET Signed-off-by: Yoshinori Sato --- arch/h8300/Kconfig | 3 --- arch/h8300/Kconfig.cpu | 4 ++++ arch/h8300/configs/edosk2674_defconfig | 10 +++------- arch/h8300/configs/h8300h-sim_defconfig | 8 ++------ arch/h8300/configs/h8s-sim_defconfig | 8 ++------ 5 files changed, 11 insertions(+), 22 deletions(-) diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 14bb45644c0c8..ec800e9d5aadf 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -48,7 +48,4 @@ config NR_CPUS int default 1 -config BOOT_LINK_OFFSET - hex "zImage link offset" - source "arch/h8300/Kconfig.cpu" diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu index b5e14d513e622..2b9cbaf41cd00 100644 --- a/arch/h8300/Kconfig.cpu +++ b/arch/h8300/Kconfig.cpu @@ -97,4 +97,8 @@ config OFFSET hex "Load offset" default 0 +config BOOT_LINK_OFFSET + hex "zImage link offset" + default 0x200000 + endmenu diff --git a/arch/h8300/configs/edosk2674_defconfig b/arch/h8300/configs/edosk2674_defconfig index 23791dcf6c259..bcf2edb8fff94 100644 --- a/arch/h8300/configs/edosk2674_defconfig +++ b/arch/h8300/configs/edosk2674_defconfig @@ -1,9 +1,7 @@ # CONFIG_LOCALVERSION_AUTO is not set -# CONFIG_USELIB is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_UID16 is not set # CONFIG_SYSFS_SYSCALL is not set -# CONFIG_KALLSYMS is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set @@ -12,17 +10,17 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EVENTFD is not set # CONFIG_AIO is not set # CONFIG_ADVISE_SYSCALLS is not set +# CONFIG_KALLSYMS is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLOB=y +CONFIG_BOOT_LINK_OFFSET=0x400000 +CONFIG_H8S_EDOSK2674=y # CONFIG_BLOCK is not set -CONFIG_H8S_SIM=y -CONFIG_H8300_BUILTIN_DTB="h8s_sim" # CONFIG_BINFMT_SCRIPT is not set CONFIG_BINFMT_FLAT=y # CONFIG_COREDUMP is not set -# CONFIG_UEVENT_HELPER is not set # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set @@ -32,9 +30,7 @@ CONFIG_BINFMT_FLAT=y # CONFIG_VT is not set # CONFIG_UNIX98_PTYS is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y -CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set diff --git a/arch/h8300/configs/h8300h-sim_defconfig b/arch/h8300/configs/h8300h-sim_defconfig index 7fc9c2f0acc00..1b90399758f3a 100644 --- a/arch/h8300/configs/h8300h-sim_defconfig +++ b/arch/h8300/configs/h8300h-sim_defconfig @@ -1,9 +1,7 @@ # CONFIG_LOCALVERSION_AUTO is not set -# CONFIG_USELIB is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_UID16 is not set # CONFIG_SYSFS_SYSCALL is not set -# CONFIG_KALLSYMS is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set @@ -12,17 +10,17 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EVENTFD is not set # CONFIG_AIO is not set # CONFIG_ADVISE_SYSCALLS is not set +# CONFIG_KALLSYMS is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLOB=y -# CONFIG_BLOCK is not set CONFIG_H8300H_SIM=y CONFIG_H8300_BUILTIN_DTB="h8300h_sim" +# CONFIG_BLOCK is not set # CONFIG_BINFMT_SCRIPT is not set CONFIG_BINFMT_FLAT=y # CONFIG_COREDUMP is not set -# CONFIG_UEVENT_HELPER is not set # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set @@ -32,9 +30,7 @@ CONFIG_BINFMT_FLAT=y # CONFIG_VT is not set # CONFIG_UNIX98_PTYS is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y -CONFIG_SERIAL_SH_SCI_EARLYCON=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set diff --git a/arch/h8300/configs/h8s-sim_defconfig b/arch/h8300/configs/h8s-sim_defconfig index 23791dcf6c259..4d46adcc21a48 100644 --- a/arch/h8300/configs/h8s-sim_defconfig +++ b/arch/h8300/configs/h8s-sim_defconfig @@ -1,9 +1,7 @@ # CONFIG_LOCALVERSION_AUTO is not set -# CONFIG_USELIB is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_UID16 is not set # CONFIG_SYSFS_SYSCALL is not set -# CONFIG_KALLSYMS is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set @@ -12,17 +10,17 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EVENTFD is not set # CONFIG_AIO is not set # CONFIG_ADVISE_SYSCALLS is not set +# CONFIG_KALLSYMS is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLOB=y -# CONFIG_BLOCK is not set CONFIG_H8S_SIM=y CONFIG_H8300_BUILTIN_DTB="h8s_sim" +# CONFIG_BLOCK is not set # CONFIG_BINFMT_SCRIPT is not set CONFIG_BINFMT_FLAT=y # CONFIG_COREDUMP is not set -# CONFIG_UEVENT_HELPER is not set # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set @@ -32,9 +30,7 @@ CONFIG_BINFMT_FLAT=y # CONFIG_VT is not set # CONFIG_UNIX98_PTYS is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_SH_SCI=y -CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set From b2c03bb18a6dd957130ad8a37f660030e4b7932a Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 26 Dec 2019 18:21:36 +0900 Subject: [PATCH 0006/1202] smc91x: remove GPIOLIB dependency. Signed-off-by: Yoshinori Sato --- drivers/net/ethernet/smsc/Kconfig | 1 - drivers/net/ethernet/smsc/smc91x.c | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig index 9e1c3752b2004..64ca1b36b91e0 100644 --- a/drivers/net/ethernet/smsc/Kconfig +++ b/drivers/net/ethernet/smsc/Kconfig @@ -37,7 +37,6 @@ config SMC91X tristate "SMC 91C9x/91C1xxx support" select CRC32 select MII - depends on !OF || GPIOLIB depends on ARM || ARM64 || ATARI_ETHERNAT || COLDFIRE || \ MIPS || NIOS2 || SUPERH || XTENSA || H8300 ---help--- diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 90410f9d3b1aa..605dc17290efb 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2190,6 +2190,7 @@ static const struct of_device_id smc91x_match[] = { }; MODULE_DEVICE_TABLE(of, smc91x_match); +#if defined(CONFIG_GPIOLIB) /** * of_try_set_control_gpio - configure a gpio if it exists */ @@ -2214,6 +2215,15 @@ static int try_toggle_control_gpio(struct device *dev, return 0; } +#else +static int try_toggle_control_gpio(struct device *dev, + struct gpio_desc **desc, + const char *name, int index, + int value, unsigned int nsdelay) +{ + return 0; +} +#endif #endif /* From 2fdf50bd510fd183ee89e4fdea52e7474ed9709c Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 26 Dec 2019 18:22:22 +0900 Subject: [PATCH 0007/1202] sh-sci: 8bit register fix. Signed-off-by: Yoshinori Sato --- drivers/tty/serial/sh-sci.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index c073aa7001c4f..d51edc40fcb18 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2965,10 +2965,7 @@ static int sci_init_single(struct platform_device *dev, port->fifosize = sci_port->params->fifosize; if (port->type == PORT_SCI) { - if (sci_port->reg_size >= 0x20) - port->regshift = 2; - else - port->regshift = 1; + port->regshift = sci_port->reg_size >> 4; } /* From 04fffbe01d2df57e600da35d3f31e3cca8ee14f4 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 26 Dec 2019 18:23:24 +0900 Subject: [PATCH 0008/1202] h8300: update dts. Signed-off-by: Yoshinori Sato --- arch/h8300/boot/dts/edosk2674.dts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/h8300/boot/dts/edosk2674.dts b/arch/h8300/boot/dts/edosk2674.dts index d1733805ea67a..f9731fe8c7d09 100644 --- a/arch/h8300/boot/dts/edosk2674.dts +++ b/arch/h8300/boot/dts/edosk2674.dts @@ -61,7 +61,7 @@ compatible = "renesas,h8s-intc", "renesas,h8300-intc"; #interrupt-cells = <2>; interrupt-controller; - reg = <0xfffe00 24>; + reg = <0xfffe00 24>, <0xffff30 6>; }; bsc: memory-controller@fffec0 { @@ -79,7 +79,7 @@ timer8: timer@ffffb0 { compatible = "renesas,8bit-timer"; reg = <0xffffb0 10>; - interrupts = <72 0>; + interrupts = <72 0>, <73 0>, <74 0>; clocks = <&fclk>; clock-names = "fck"; }; @@ -105,4 +105,10 @@ clocks = <&fclk>; clock-names = "fck"; }; + ethernet: ethernet@f80000 { + compatible = "smsc,lan91c94"; + reg = <0xf80000 0xfbffff>; + reg-io-width = <1>; + interrupts = <16 0>; + }; }; From 951261bc483479d4c18ee435c35f94cccd659012 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 26 Dec 2019 18:24:04 +0900 Subject: [PATCH 0009/1202] h8300_timer8: fix count mode. Signed-off-by: Yoshinori Sato --- drivers/clocksource/h8300_timer8.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c index 1d740a8c42ab3..5bf514c78d8b6 100644 --- a/drivers/clocksource/h8300_timer8.c +++ b/drivers/clocksource/h8300_timer8.c @@ -25,8 +25,8 @@ #define TCORB 6 #define _8TCNT 8 -#define CMIEA 6 -#define CMFA 6 +#define OVIE 5 +#define OVF 5 #define FLAG_STARTED (1 << 3) @@ -40,6 +40,7 @@ struct timer8_priv { void __iomem *mapbase; unsigned long flags; unsigned int rate; + uint16_t cnt; }; static irqreturn_t timer8_interrupt(int irq, void *dev_id) @@ -51,7 +52,8 @@ static irqreturn_t timer8_interrupt(int irq, void *dev_id) p->ced.event_handler(&p->ced); - bclr(CMFA, p->mapbase + _8TCSR); + iowrite16be(p->cnt, p->mapbase + _8TCNT); + bclr(OVF, p->mapbase + _8TCSR); return IRQ_HANDLED; } @@ -60,16 +62,14 @@ static void timer8_set_next(struct timer8_priv *p, unsigned long delta) { if (delta >= 0x10000) pr_warn("delta out of range\n"); - bclr(CMIEA, p->mapbase + _8TCR); - iowrite16be(delta, p->mapbase + TCORA); - iowrite16be(0x0000, p->mapbase + _8TCNT); - bclr(CMFA, p->mapbase + _8TCSR); - bset(CMIEA, p->mapbase + _8TCR); + p->cnt = 0x10000 - delta; + iowrite16be(p->cnt, p->mapbase + _8TCNT); + bclr(OVF, p->mapbase + _8TCSR); + bset(OVIE, p->mapbase + _8TCR); } static int timer8_enable(struct timer8_priv *p) { - iowrite16be(0xffff, p->mapbase + TCORA); iowrite16be(0x0000, p->mapbase + _8TCNT); iowrite16be(0x0c02, p->mapbase + _8TCR); @@ -177,7 +177,7 @@ static int __init h8300_8timer_init(struct device_node *node) } ret = -EINVAL; - irq = irq_of_parse_and_map(node, 0); + irq = irq_of_parse_and_map(node, 2); if (!irq) { pr_err("failed to get irq for clockevent\n"); goto unmap_reg; From 2c496a11304da8263e1dce9bdcef1ce04c027414 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Thu, 26 Dec 2019 18:24:28 +0900 Subject: [PATCH 0010/1202] irq-renesas-h8s: fix interrupt handling. Signed-off-by: Yoshinori Sato --- drivers/irqchip/irq-renesas-h8s.c | 36 +++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-renesas-h8s.c b/drivers/irqchip/irq-renesas-h8s.c index 4e2461bae944d..b69f2c825a7f8 100644 --- a/drivers/irqchip/irq-renesas-h8s.c +++ b/drivers/irqchip/irq-renesas-h8s.c @@ -11,8 +11,9 @@ #include #include -static void *intc_baseaddr; -#define IPRA (intc_baseaddr) +static void *ipr_base; +static void *icr_base; +#define IPRA (ipr_base) static const unsigned char ipr_table[] = { 0x03, 0x02, 0x01, 0x00, 0x13, 0x12, 0x11, 0x10, /* 16 - 23 */ @@ -41,8 +42,8 @@ static void h8s_disable_irq(struct irq_data *data) addr = IPRA + ((ipr_table[irq - 16] & 0xf0) >> 3); pos = (ipr_table[irq - 16] & 0x0f) * 4; pri = ~(0x000f << pos); - pri &= readw(addr); - writew(pri, addr); + pri &= __raw_readw(addr); + __raw_writew(pri, addr); } static void h8s_enable_irq(struct irq_data *data) @@ -55,15 +56,30 @@ static void h8s_enable_irq(struct irq_data *data) addr = IPRA + ((ipr_table[irq - 16] & 0xf0) >> 3); pos = (ipr_table[irq - 16] & 0x0f) * 4; pri = ~(0x000f << pos); - pri &= readw(addr); + pri &= __raw_readw(addr); pri |= 1 << pos; - writew(pri, addr); + __raw_writew(pri, addr); +} + +static void h8s_ack_irq(struct irq_data *data) +{ + void __iomem *isr_addr = icr_base + 4; + int irq = data->irq; + uint16_t isr; + + if (irq >= 16 && irq < 32) { + irq -= 16; + isr = __raw_readw(isr_addr); + isr &= ~(1 << irq); + __raw_writew(isr, isr_addr); + } } struct irq_chip h8s_irq_chip = { .name = "H8S-INTC", .irq_enable = h8s_enable_irq, .irq_disable = h8s_disable_irq, + .irq_ack = h8s_ack_irq, }; static __init int irq_map(struct irq_domain *h, unsigned int virq, @@ -85,14 +101,16 @@ static int __init h8s_intc_of_init(struct device_node *intc, struct irq_domain *domain; int n; - intc_baseaddr = of_iomap(intc, 0); - BUG_ON(!intc_baseaddr); + ipr_base = of_iomap(intc, 0); + icr_base = of_iomap(intc, 1); + BUG_ON(!ipr_base || !icr_base); /* All interrupt priority is 0 (disable) */ /* IPRA to IPRK */ for (n = 0; n <= 'k' - 'a'; n++) - writew(0x0000, IPRA + (n * 2)); + __raw_writew(0x0000, IPRA + (n * 2)); + __raw_writew(0xffff, icr_base + 2); domain = irq_domain_add_linear(intc, NR_IRQS, &irq_ops, NULL); BUG_ON(!domain); irq_set_default_host(domain); From 8bd55b5fce8011d0026f7d233f7b45788f01ad0c Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Wed, 15 Apr 2020 21:59:05 +0900 Subject: [PATCH 0011/1202] irq-renesas-h8300h: Fix external interrupt contorol. Signed-off-by: Yoshinori Sato --- drivers/irqchip/irq-renesas-h8300h.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-renesas-h8300h.c b/drivers/irqchip/irq-renesas-h8300h.c index 1054d74b7edde..42e2918eee2fe 100644 --- a/drivers/irqchip/irq-renesas-h8300h.c +++ b/drivers/irqchip/irq-renesas-h8300h.c @@ -24,7 +24,10 @@ static const char ipr_bit[] = { static void __iomem *intc_baseaddr; -#define IPR (intc_baseaddr + 6) +#define ICSR (intc_baseaddr + 2) +#define IER (intc_baseaddr + 3) +#define ISR (intc_baseaddr + 4) +#define IPR (intc_baseaddr + 6) static void h8300h_disable_irq(struct irq_data *data) { @@ -38,6 +41,8 @@ static void h8300h_disable_irq(struct irq_data *data) else ctrl_bclr(bit & 7, (IPR+1)); } + if (irq < 6) + ctrl_bclr(irq, IER); } static void h8300h_enable_irq(struct irq_data *data) @@ -52,12 +57,24 @@ static void h8300h_enable_irq(struct irq_data *data) else ctrl_bset(bit & 7, (IPR+1)); } + if (irq < 6) + ctrl_bset(irq, IER); +} + +static void h8300h_ack_irq(struct irq_data *data) +{ + int bit; + int irq = data->irq - 12; + + if (irq < 6) + ctrl_bclr(irq, ISR); } struct irq_chip h8300h_irq_chip = { .name = "H8/300H-INTC", .irq_enable = h8300h_enable_irq, .irq_disable = h8300h_disable_irq, + .irq_eoi = h8300h_ack_irq, }; static int irq_map(struct irq_domain *h, unsigned int virq, From beab3d5cb0e14e6397c4e596b3dbf1d36bf49d06 Mon Sep 17 00:00:00 2001 From: Yoshinori Sato Date: Wed, 15 Apr 2020 21:59:50 +0900 Subject: [PATCH 0012/1202] irq-renesas-h8s: Fix external interrupt control. Signed-off-by: Yoshinori Sato --- drivers/irqchip/irq-renesas-h8s.c | 39 +++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/irqchip/irq-renesas-h8s.c b/drivers/irqchip/irq-renesas-h8s.c index b69f2c825a7f8..0b8d9d7ef2ee1 100644 --- a/drivers/irqchip/irq-renesas-h8s.c +++ b/drivers/irqchip/irq-renesas-h8s.c @@ -14,6 +14,8 @@ static void *ipr_base; static void *icr_base; #define IPRA (ipr_base) +#define IER (icr_base + 2) +#define ISR (icr_base + 4) static const unsigned char ipr_table[] = { 0x03, 0x02, 0x01, 0x00, 0x13, 0x12, 0x11, 0x10, /* 16 - 23 */ @@ -37,13 +39,22 @@ static void h8s_disable_irq(struct irq_data *data) int pos; void __iomem *addr; unsigned short pri; - int irq = data->irq; + int irq = data->irq - 16; + unsigned short ier; + + if (irq < 0) + return; - addr = IPRA + ((ipr_table[irq - 16] & 0xf0) >> 3); - pos = (ipr_table[irq - 16] & 0x0f) * 4; + addr = IPRA + ((ipr_table[irq] & 0xf0) >> 3); + pos = (ipr_table[irq] & 0x0f) * 4; pri = ~(0x000f << pos); pri &= __raw_readw(addr); __raw_writew(pri, addr); + if (irq < 16) { + ier = __raw_readw(IER); + ier &= ~(1 << irq); + __raw_writew(ier, IER); + } } static void h8s_enable_irq(struct irq_data *data) @@ -51,27 +62,35 @@ static void h8s_enable_irq(struct irq_data *data) int pos; void __iomem *addr; unsigned short pri; - int irq = data->irq; + int irq = data->irq - 16; + unsigned short ier; + + if (irq < 0) + return; - addr = IPRA + ((ipr_table[irq - 16] & 0xf0) >> 3); - pos = (ipr_table[irq - 16] & 0x0f) * 4; + addr = IPRA + ((ipr_table[irq] & 0xf0) >> 3); + pos = (ipr_table[irq] & 0x0f) * 4; pri = ~(0x000f << pos); pri &= __raw_readw(addr); pri |= 1 << pos; __raw_writew(pri, addr); + if (irq < 16) { + ier = __raw_readw(IER); + ier &= ~(1 << irq); + __raw_writew(ier, IER); + } } static void h8s_ack_irq(struct irq_data *data) { - void __iomem *isr_addr = icr_base + 4; int irq = data->irq; uint16_t isr; if (irq >= 16 && irq < 32) { irq -= 16; - isr = __raw_readw(isr_addr); + isr = __raw_readw(ISR); isr &= ~(1 << irq); - __raw_writew(isr, isr_addr); + __raw_writew(isr, ISR); } } @@ -110,7 +129,7 @@ static int __init h8s_intc_of_init(struct device_node *intc, for (n = 0; n <= 'k' - 'a'; n++) __raw_writew(0x0000, IPRA + (n * 2)); - __raw_writew(0xffff, icr_base + 2); + __raw_writew(0xffff, IER); domain = irq_domain_add_linear(intc, NR_IRQS, &irq_ops, NULL); BUG_ON(!domain); irq_set_default_host(domain); From 35b34568912cb19c53b83d6daba96fe960941279 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Aug 2020 14:28:38 +0200 Subject: [PATCH 0013/1202] h8300: dts: Fix /chosen:stdout-path arch/h8300/boot/dts/h8s_sim.dts:11.3-25: Warning (chosen_node_stdout_path): /chosen:stdout-path: property is not a string arch/h8300/boot/dts/h8300h_sim.dts:11.3-25: Warning (chosen_node_stdout_path): /chosen:stdout-path: property is not a string Drop the angle brackets to fix this. A similar fix was already applied to arch/h8300/boot/dts/edosk2674.dts in commit 780ffcd51cb28717 ("h8300: register address fix"). Fixes: 38d6bded13084d50 ("h8300: devicetree source") Signed-off-by: Geert Uytterhoeven Reviewed-by: Masahiro Yamada Signed-off-by: Yoshinori Sato --- arch/h8300/boot/dts/h8300h_sim.dts | 2 +- arch/h8300/boot/dts/h8s_sim.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/h8300/boot/dts/h8300h_sim.dts b/arch/h8300/boot/dts/h8300h_sim.dts index 595398b9d0180..e1d4d9b7f6b40 100644 --- a/arch/h8300/boot/dts/h8300h_sim.dts +++ b/arch/h8300/boot/dts/h8300h_sim.dts @@ -8,7 +8,7 @@ chosen { bootargs = "earlyprintk=h8300-sim"; - stdout-path = <&sci0>; + stdout-path = &sci0; }; aliases { serial0 = &sci0; diff --git a/arch/h8300/boot/dts/h8s_sim.dts b/arch/h8300/boot/dts/h8s_sim.dts index 932cc3c5a81bc..4848e40e607ec 100644 --- a/arch/h8300/boot/dts/h8s_sim.dts +++ b/arch/h8300/boot/dts/h8s_sim.dts @@ -8,7 +8,7 @@ chosen { bootargs = "earlyprintk=h8300-sim"; - stdout-path = <&sci0>; + stdout-path = &sci0; }; aliases { serial0 = &sci0; From 8808515be0ed4e33de9bfdc65f4c1b547ee11065 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Aug 2020 14:29:25 +0200 Subject: [PATCH 0014/1202] h8300: Replace by The H8/300 platform code is not a clock provider, and just needs to call of_clk_init(). Hence it can include instead of . Signed-off-by: Geert Uytterhoeven Reviewed-by: Stephen Boyd Signed-off-by: Yoshinori Sato --- arch/h8300/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index 28ac88358a89a..0ecaac7dd7e93 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include From 1efbcec2ef8c037f1e801c76e4b9434ee2400be7 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 9 Dec 2020 21:34:48 +0100 Subject: [PATCH 0015/1202] =?UTF-8?q?coresight:=20cti:=20Reduce=20scope=20?= =?UTF-8?q?for=20the=20variable=20=E2=80=9Ccs=5Ffwnode=E2=80=9D=20in=20cti?= =?UTF-8?q?=5Fplat=5Fcreate=5Fconnection()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A local variable was used only within an else branch. Thus move the definition for the variable “cs_fwnode” into the corresponding code block. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mathieu Poirier --- drivers/hwtracing/coresight/coresight-cti-platform.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-cti-platform.c b/drivers/hwtracing/coresight/coresight-cti-platform.c index 98f830c6ed507..ccef04f27f12f 100644 --- a/drivers/hwtracing/coresight/coresight-cti-platform.c +++ b/drivers/hwtracing/coresight/coresight-cti-platform.c @@ -343,7 +343,6 @@ static int cti_plat_create_connection(struct device *dev, { struct cti_trig_con *tc = NULL; int cpuid = -1, err = 0; - struct fwnode_handle *cs_fwnode = NULL; struct coresight_device *csdev = NULL; const char *assoc_name = "unknown"; char cpu_name_str[16]; @@ -397,8 +396,9 @@ static int cti_plat_create_connection(struct device *dev, assoc_name = cpu_name_str; } else { /* associated device ? */ - cs_fwnode = fwnode_find_reference(fwnode, - CTI_DT_CSDEV_ASSOC, 0); + struct fwnode_handle *cs_fwnode = fwnode_find_reference(fwnode, + CTI_DT_CSDEV_ASSOC, + 0); if (!IS_ERR(cs_fwnode)) { assoc_name = cti_plat_get_csdev_or_node_name(cs_fwnode, &csdev); From fbafce2f535dc8d18ef2f9e1f7a1e4672c0ea469 Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Fri, 22 Jan 2021 13:10:51 -0500 Subject: [PATCH 0016/1202] certs: Add EFI_CERT_X509_GUID support for dbx entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes CVE-2020-26541. The Secure Boot Forbidden Signature Database, dbx, contains a list of now revoked signatures and keys previously approved to boot with UEFI Secure Boot enabled. The dbx is capable of containing any number of EFI_CERT_X509_SHA256_GUID, EFI_CERT_SHA256_GUID, and EFI_CERT_X509_GUID entries. Currently when EFI_CERT_X509_GUID are contained in the dbx, the entries are skipped. Add support for EFI_CERT_X509_GUID dbx entries. When a EFI_CERT_X509_GUID is found, it is added as an asymmetrical key to the .blacklist keyring. Anytime the .platform keyring is used, the keys in the .blacklist keyring are referenced, if a matching key is found, the key will be rejected. [DH: Made the following changes: - Added to have a config option to enable the facility. This allows a Kconfig solution to make sure that pkcs7_validate_trust() is enabled. - Moved the functions out from the middle of the blacklist functions. - Added kerneldoc comments.] Signed-off-by: Eric Snowberg Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen cc: Randy Dunlap cc: Mickaël Salaün cc: Arnd Bergmann cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/20200901165143.10295-1-eric.snowberg@oracle.com/ Link: https://lore.kernel.org/r/20200909172736.73003-1-eric.snowberg@oracle.com/ # v2 Link: https://lore.kernel.org/r/20200911182230.62266-1-eric.snowberg@oracle.com/ # v3 Link: https://lore.kernel.org/r/20200916004927.64276-1-eric.snowberg@oracle.com/ # v4 Link: https://lore.kernel.org/r/2660556.1610545213@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/20210122181054.32635-2-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/bc2c24e3-ed68-2521-0bf4-a1f6be4a895d@infradead.org/ Link: https://lore.kernel.org/r/20210225125638.1841436-1-arnd@kernel.org/ Link: https://lore.kernel.org/r/161428672051.677100.11064981943343605138.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/161433310942.902181.4901864302675874242.stgit@warthog.procyon.org.uk/ # v2 --- certs/Kconfig | 9 ++++ certs/blacklist.c | 43 +++++++++++++++++++ certs/blacklist.h | 2 + certs/system_keyring.c | 6 +++ include/keys/system_keyring.h | 15 +++++++ .../platform_certs/keyring_handler.c | 11 +++++ 6 files changed, 86 insertions(+) diff --git a/certs/Kconfig b/certs/Kconfig index c94e93d8bccf0..76e469b56a773 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -83,4 +83,13 @@ config SYSTEM_BLACKLIST_HASH_LIST wrapper to incorporate the list into the kernel. Each should be a string of hex digits. +config SYSTEM_REVOCATION_LIST + bool "Provide system-wide ring of revocation certificates" + depends on SYSTEM_BLACKLIST_KEYRING + depends on PKCS7_MESSAGE_PARSER=y + help + If set, this allows revocation certificates to be stored in the + blacklist keyring and implements a hook whereby a PKCS#7 message can + be checked to see if it matches such a certificate. + endmenu diff --git a/certs/blacklist.c b/certs/blacklist.c index bffe4c6f4a9e2..2b8644123d5fd 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -145,6 +145,49 @@ int is_binary_blacklisted(const u8 *hash, size_t hash_len) } EXPORT_SYMBOL_GPL(is_binary_blacklisted); +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +/** + * add_key_to_revocation_list - Add a revocation certificate to the blacklist + * @data: The data blob containing the certificate + * @size: The size of data blob + */ +int add_key_to_revocation_list(const char *data, size_t size) +{ + key_ref_t key; + + key = key_create_or_update(make_key_ref(blacklist_keyring, true), + "asymmetric", + NULL, + data, + size, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW), + KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN); + + if (IS_ERR(key)) { + pr_err("Problem with revocation key (%ld)\n", PTR_ERR(key)); + return PTR_ERR(key); + } + + return 0; +} + +/** + * is_key_on_revocation_list - Determine if the key for a PKCS#7 message is revoked + * @pkcs7: The PKCS#7 message to check + */ +int is_key_on_revocation_list(struct pkcs7_message *pkcs7) +{ + int ret; + + ret = pkcs7_validate_trust(pkcs7, blacklist_keyring); + + if (ret == 0) + return -EKEYREJECTED; + + return -ENOKEY; +} +#endif + /* * Initialise the blacklist */ diff --git a/certs/blacklist.h b/certs/blacklist.h index 1efd6fa0dc608..51b320cf85749 100644 --- a/certs/blacklist.h +++ b/certs/blacklist.h @@ -1,3 +1,5 @@ #include +#include +#include extern const char __initconst *const blacklist_hashes[]; diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 4b693da488f14..ed98754d5795a 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -242,6 +242,12 @@ int verify_pkcs7_message_sig(const void *data, size_t len, pr_devel("PKCS#7 platform keyring is not available\n"); goto error; } + + ret = is_key_on_revocation_list(pkcs7); + if (ret != -ENOKEY) { + pr_devel("PKCS#7 platform key is on revocation list\n"); + goto error; + } } ret = pkcs7_validate_trust(pkcs7, trusted_keys); if (ret < 0) { diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index fb8b07daa9d15..875e002a41804 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -31,6 +31,7 @@ extern int restrict_link_by_builtin_and_secondary_trusted( #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted #endif +extern struct pkcs7_message *pkcs7; #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING extern int mark_hash_blacklisted(const char *hash); extern int is_hash_blacklisted(const u8 *hash, size_t hash_len, @@ -49,6 +50,20 @@ static inline int is_binary_blacklisted(const u8 *hash, size_t hash_len) } #endif +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +extern int add_key_to_revocation_list(const char *data, size_t size); +extern int is_key_on_revocation_list(struct pkcs7_message *pkcs7); +#else +static inline int add_key_to_revocation_list(const char *data, size_t size) +{ + return 0; +} +static inline int is_key_on_revocation_list(struct pkcs7_message *pkcs7) +{ + return -ENOKEY; +} +#endif + #ifdef CONFIG_IMA_BLACKLIST_KEYRING extern struct key *ima_blacklist_keyring; diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c index c5ba695c10e3a..5604bd57c9907 100644 --- a/security/integrity/platform_certs/keyring_handler.c +++ b/security/integrity/platform_certs/keyring_handler.c @@ -55,6 +55,15 @@ static __init void uefi_blacklist_binary(const char *source, uefi_blacklist_hash(source, data, len, "bin:", 4); } +/* + * Add an X509 cert to the revocation list. + */ +static __init void uefi_revocation_list_x509(const char *source, + const void *data, size_t len) +{ + add_key_to_revocation_list(data, len); +} + /* * Return the appropriate handler for particular signature list types found in * the UEFI db and MokListRT tables. @@ -76,5 +85,7 @@ __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type) return uefi_blacklist_x509_tbs; if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0) return uefi_blacklist_binary; + if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) + return uefi_revocation_list_x509; return 0; } From 9536390dcc8cc19da6442f45c9e0ee14bcfa6dfc Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Fri, 22 Jan 2021 13:10:52 -0500 Subject: [PATCH 0017/1202] certs: Move load_system_certificate_list to a common function Move functionality within load_system_certificate_list to a common function, so it can be reused in the future. DH Changes: - Added inclusion of common.h to common.c (Eric [1]). Signed-off-by: Eric Snowberg Acked-by: Jarkko Sakkinen Signed-off-by: David Howells cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/20200930201508.35113-2-eric.snowberg@oracle.com/ Link: https://lore.kernel.org/r/20210122181054.32635-3-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/EDA280F9-F72D-4181-93C7-CDBE95976FF7@oracle.com/ [1] Link: https://lore.kernel.org/r/161428672825.677100.7545516389752262918.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/161433311696.902181.3599366124784670368.stgit@warthog.procyon.org.uk/ # v2 --- certs/Makefile | 2 +- certs/common.c | 57 ++++++++++++++++++++++++++++++++++++++++++ certs/common.h | 9 +++++++ certs/system_keyring.c | 49 +++--------------------------------- 4 files changed, 70 insertions(+), 47 deletions(-) create mode 100644 certs/common.c create mode 100644 certs/common.h diff --git a/certs/Makefile b/certs/Makefile index f4c25b67aad90..f4b90bad8690a 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -3,7 +3,7 @@ # Makefile for the linux kernel signature checking certificates. # -obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o +obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"") obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o diff --git a/certs/common.c b/certs/common.c new file mode 100644 index 0000000000000..16a220887a53e --- /dev/null +++ b/certs/common.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "common.h" + +int load_certificate_list(const u8 cert_list[], + const unsigned long list_size, + const struct key *keyring) +{ + key_ref_t key; + const u8 *p, *end; + size_t plen; + + p = cert_list; + end = p + list_size; + while (p < end) { + /* Each cert begins with an ASN.1 SEQUENCE tag and must be more + * than 256 bytes in size. + */ + if (end - p < 4) + goto dodgy_cert; + if (p[0] != 0x30 && + p[1] != 0x82) + goto dodgy_cert; + plen = (p[2] << 8) | p[3]; + plen += 4; + if (plen > end - p) + goto dodgy_cert; + + key = key_create_or_update(make_key_ref(keyring, 1), + "asymmetric", + NULL, + p, + plen, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); + if (IS_ERR(key)) { + pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", + PTR_ERR(key)); + } else { + pr_notice("Loaded X.509 cert '%s'\n", + key_ref_to_ptr(key)->description); + key_ref_put(key); + } + p += plen; + } + + return 0; + +dodgy_cert: + pr_err("Problem parsing in-kernel X.509 certificate list\n"); + return 0; +} diff --git a/certs/common.h b/certs/common.h new file mode 100644 index 0000000000000..abdb5795936b7 --- /dev/null +++ b/certs/common.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _CERT_COMMON_H +#define _CERT_COMMON_H + +int load_certificate_list(const u8 cert_list[], const unsigned long list_size, + const struct key *keyring); + +#endif diff --git a/certs/system_keyring.c b/certs/system_keyring.c index ed98754d5795a..0c9a4795e847b 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -16,6 +16,7 @@ #include #include #include +#include "common.h" static struct key *builtin_trusted_keys; #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING @@ -137,54 +138,10 @@ device_initcall(system_trusted_keyring_init); */ static __init int load_system_certificate_list(void) { - key_ref_t key; - const u8 *p, *end; - size_t plen; - pr_notice("Loading compiled-in X.509 certificates\n"); - p = system_certificate_list; - end = p + system_certificate_list_size; - while (p < end) { - /* Each cert begins with an ASN.1 SEQUENCE tag and must be more - * than 256 bytes in size. - */ - if (end - p < 4) - goto dodgy_cert; - if (p[0] != 0x30 && - p[1] != 0x82) - goto dodgy_cert; - plen = (p[2] << 8) | p[3]; - plen += 4; - if (plen > end - p) - goto dodgy_cert; - - key = key_create_or_update(make_key_ref(builtin_trusted_keys, 1), - "asymmetric", - NULL, - p, - plen, - ((KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ), - KEY_ALLOC_NOT_IN_QUOTA | - KEY_ALLOC_BUILT_IN | - KEY_ALLOC_BYPASS_RESTRICTION); - if (IS_ERR(key)) { - pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", - PTR_ERR(key)); - } else { - pr_notice("Loaded X.509 cert '%s'\n", - key_ref_to_ptr(key)->description); - key_ref_put(key); - } - p += plen; - } - - return 0; - -dodgy_cert: - pr_err("Problem parsing in-kernel X.509 certificate list\n"); - return 0; + return load_certificate_list(system_certificate_list, system_certificate_list_size, + builtin_trusted_keys); } late_initcall(load_system_certificate_list); From ad33a49b42c5b5dc37b5dd0ba6159fb01cf10761 Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Fri, 22 Jan 2021 13:10:53 -0500 Subject: [PATCH 0018/1202] certs: Add ability to preload revocation certs Add a new Kconfig option called SYSTEM_REVOCATION_KEYS. If set, this option should be the filename of a PEM-formated file containing X.509 certificates to be included in the default blacklist keyring. DH Changes: - Make the new Kconfig option depend on SYSTEM_REVOCATION_LIST. - Fix SYSTEM_REVOCATION_KEYS=n, but CONFIG_SYSTEM_REVOCATION_LIST=y[1][2]. - Use CONFIG_SYSTEM_REVOCATION_LIST for extract-cert[3]. - Use CONFIG_SYSTEM_REVOCATION_LIST for revocation_certificates.o[3]. Signed-off-by: Eric Snowberg Acked-by: Jarkko Sakkinen Signed-off-by: David Howells cc: Randy Dunlap cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/20200930201508.35113-3-eric.snowberg@oracle.com/ Link: https://lore.kernel.org/r/20210122181054.32635-4-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/161428673564.677100.4112098280028451629.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/161433312452.902181.4146169951896577982.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/e1c15c74-82ce-3a69-44de-a33af9b320ea@infradead.org/ [1] Link: https://lore.kernel.org/r/20210303034418.106762-1-eric.snowberg@oracle.com/ [2] Link: https://lore.kernel.org/keyrings/20210304175030.184131-1-eric.snowberg@oracle.com/ [3] --- certs/Kconfig | 8 ++++++++ certs/Makefile | 19 +++++++++++++++++-- certs/blacklist.c | 21 +++++++++++++++++++++ certs/revocation_certificates.S | 21 +++++++++++++++++++++ scripts/Makefile | 1 + 5 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 certs/revocation_certificates.S diff --git a/certs/Kconfig b/certs/Kconfig index 76e469b56a773..ab88d2a7f3c7f 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -92,4 +92,12 @@ config SYSTEM_REVOCATION_LIST blacklist keyring and implements a hook whereby a PKCS#7 message can be checked to see if it matches such a certificate. +config SYSTEM_REVOCATION_KEYS + string "X.509 certificates to be preloaded into the system blacklist keyring" + depends on SYSTEM_REVOCATION_LIST + help + If set, this option should be the filename of a PEM-formatted file + containing X.509 certificates to be included in the default blacklist + keyring. + endmenu diff --git a/certs/Makefile b/certs/Makefile index f4b90bad8690a..b6db52ebf0beb 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -4,7 +4,8 @@ # obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o -obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o +obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o +obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"") obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o else @@ -29,7 +30,7 @@ $(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREF $(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS)) endif # CONFIG_SYSTEM_TRUSTED_KEYRING -clean-files := x509_certificate_list .x509.list +clean-files := x509_certificate_list .x509.list x509_revocation_list ifeq ($(CONFIG_MODULE_SIG),y) ############################################################################### @@ -104,3 +105,17 @@ targets += signing_key.x509 $(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE $(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY)) endif # CONFIG_MODULE_SIG + +ifeq ($(CONFIG_SYSTEM_REVOCATION_LIST),y) + +$(eval $(call config_filename,SYSTEM_REVOCATION_KEYS)) + +$(obj)/revocation_certificates.o: $(obj)/x509_revocation_list + +quiet_cmd_extract_certs = EXTRACT_CERTS $(patsubst "%",%,$(2)) + cmd_extract_certs = scripts/extract-cert $(2) $@ + +targets += x509_revocation_list +$(obj)/x509_revocation_list: scripts/extract-cert $(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(SYSTEM_REVOCATION_KEYS_FILENAME) FORCE + $(call if_changed,extract_certs,$(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_REVOCATION_KEYS)) +endif diff --git a/certs/blacklist.c b/certs/blacklist.c index 2b8644123d5fd..c9a435b15af40 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -17,9 +17,15 @@ #include #include #include "blacklist.h" +#include "common.h" static struct key *blacklist_keyring; +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +extern __initconst const u8 revocation_certificate_list[]; +extern __initconst const unsigned long revocation_certificate_list_size; +#endif + /* * The description must be a type prefix, a colon and then an even number of * hex digits. The hash is kept in the description. @@ -220,3 +226,18 @@ static int __init blacklist_init(void) * Must be initialised before we try and load the keys into the keyring. */ device_initcall(blacklist_init); + +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +/* + * Load the compiled-in list of revocation X.509 certificates. + */ +static __init int load_revocation_certificate_list(void) +{ + if (revocation_certificate_list_size) + pr_notice("Loading compiled-in revocation X.509 certificates\n"); + + return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size, + blacklist_keyring); +} +late_initcall(load_revocation_certificate_list); +#endif diff --git a/certs/revocation_certificates.S b/certs/revocation_certificates.S new file mode 100644 index 0000000000000..f21aae8a8f0ef --- /dev/null +++ b/certs/revocation_certificates.S @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include + + __INITRODATA + + .align 8 + .globl revocation_certificate_list +revocation_certificate_list: +__revocation_list_start: + .incbin "certs/x509_revocation_list" +__revocation_list_end: + + .align 8 + .globl revocation_certificate_list_size +revocation_certificate_list_size: +#ifdef CONFIG_64BIT + .quad __revocation_list_end - __revocation_list_start +#else + .long __revocation_list_end - __revocation_list_start +#endif diff --git a/scripts/Makefile b/scripts/Makefile index b5418ec587fbd..bd0718f7c493e 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -11,6 +11,7 @@ hostprogs-always-$(CONFIG_ASN1) += asn1_compiler hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file hostprogs-always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert +hostprogs-always-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include From e377c31f788fc98815e1ab90b5a35704ce35843a Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Fri, 22 Jan 2021 13:10:54 -0500 Subject: [PATCH 0019/1202] integrity: Load mokx variables into the blacklist keyring During boot the Secure Boot Forbidden Signature Database, dbx, is loaded into the blacklist keyring. Systems booted with shim have an equivalent Forbidden Signature Database called mokx. Currently mokx is only used by shim and grub, the contents are ignored by the kernel. Add the ability to load mokx into the blacklist keyring during boot. Signed-off-by: Eric Snowberg Suggested-by: James Bottomley Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/20210122181054.32635-5-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/c33c8e3839a41e9654f41cc92c7231104931b1d7.camel@HansenPartnership.com/ Link: https://lore.kernel.org/r/161428674320.677100.12637282414018170743.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/161433313205.902181.2502803393898221637.stgit@warthog.procyon.org.uk/ # v2 --- security/integrity/platform_certs/load_uefi.c | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index ee4b4c666854f..f290f78c3f301 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -132,8 +132,9 @@ static int __init load_moklist_certs(void) static int __init load_uefi_certs(void) { efi_guid_t secure_var = EFI_IMAGE_SECURITY_DATABASE_GUID; - void *db = NULL, *dbx = NULL; - unsigned long dbsize = 0, dbxsize = 0; + efi_guid_t mok_var = EFI_SHIM_LOCK_GUID; + void *db = NULL, *dbx = NULL, *mokx = NULL; + unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0; efi_status_t status; int rc = 0; @@ -175,6 +176,21 @@ static int __init load_uefi_certs(void) kfree(dbx); } + mokx = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &status); + if (!mokx) { + if (status == EFI_NOT_FOUND) + pr_debug("mokx variable wasn't found\n"); + else + pr_info("Couldn't get mokx list\n"); + } else { + rc = parse_efi_signature_list("UEFI:MokListXRT", + mokx, mokxsize, + get_handler_for_dbx); + if (rc) + pr_err("Couldn't parse mokx signatures %d\n", rc); + kfree(mokx); + } + /* Load the MokListRT certs */ rc = load_moklist_certs(); From c5d4fb2539cad2e62c5a3f0d8237613c394f297e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 29 Jun 2021 12:37:00 +0200 Subject: [PATCH 0020/1202] pstore/blk: Use "%lu" to format unsigned long MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit: fs/pstore/blk.c: In function ‘__best_effort_init’: include/linux/kern_levels.h:5:18: warning: format ‘%zu’ expects argument of type ‘size_t’, but argument 3 has type ‘long unsigned int’ [-Wformat=] 5 | #define KERN_SOH "\001" /* ASCII Start Of Header */ | ^~~~~~ include/linux/kern_levels.h:14:19: note: in expansion of macro ‘KERN_SOH’ 14 | #define KERN_INFO KERN_SOH "6" /* informational */ | ^~~~~~~~ include/linux/printk.h:373:9: note: in expansion of macro ‘KERN_INFO’ 373 | printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) | ^~~~~~~~~ fs/pstore/blk.c:314:3: note: in expansion of macro ‘pr_info’ 314 | pr_info("attached %s (%zu) (no dedicated panic_write!)\n", | ^~~~~~~ Fixes: 7bb9557b48fcabaa ("pstore/blk: Use the normal block device I/O path") Signed-off-by: Geert Uytterhoeven Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210629103700.1935012-1-geert@linux-m68k.org --- fs/pstore/blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index 04ce58c939a0b..6093088de49fd 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -311,7 +311,7 @@ static int __init __best_effort_init(void) if (ret) kfree(best_effort_dev); else - pr_info("attached %s (%zu) (no dedicated panic_write!)\n", + pr_info("attached %s (%lu) (no dedicated panic_write!)\n", blkdev, best_effort_dev->zone.total_size); return ret; From 95b115332a835fb0cbd36dfabacf1c57d915e705 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Wed, 30 Jun 2021 10:33:36 -0400 Subject: [PATCH 0021/1202] zonefs: remove redundant null bio check bio_alloc() with __GFP_DIRECT_RECLAIM, which is included in GFP_NOFS, never fails, see comments in bio_alloc_bioset(). Signed-off-by: Xianting Tian Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index cd145d318b178..d6d08da505f38 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -705,9 +705,6 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) return 0; bio = bio_alloc(GFP_NOFS, nr_pages); - if (!bio) - return -ENOMEM; - bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = zi->i_zsector; bio->bi_write_hint = iocb->ki_hint; From 0f79ce970e79ffb771733f9634d5918d0eb3e30a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 8 Jan 2021 12:22:33 +0100 Subject: [PATCH 0022/1202] gnss: drop stray semicolons Drop semicolons after function definitions that have managed to sneak in and get reproduced. Signed-off-by: Johan Hovold --- drivers/gnss/mtk.c | 2 +- drivers/gnss/serial.c | 2 +- drivers/gnss/sirf.c | 2 +- drivers/gnss/ubx.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gnss/mtk.c b/drivers/gnss/mtk.c index d1fc55560daf6..c62b1211f4fe4 100644 --- a/drivers/gnss/mtk.c +++ b/drivers/gnss/mtk.c @@ -126,7 +126,7 @@ static void mtk_remove(struct serdev_device *serdev) if (data->vbackup) regulator_disable(data->vbackup); gnss_serial_free(gserial); -}; +} #ifdef CONFIG_OF static const struct of_device_id mtk_of_match[] = { diff --git a/drivers/gnss/serial.c b/drivers/gnss/serial.c index def64b36d9941..5d8e9bfb24d02 100644 --- a/drivers/gnss/serial.c +++ b/drivers/gnss/serial.c @@ -165,7 +165,7 @@ void gnss_serial_free(struct gnss_serial *gserial) { gnss_put_device(gserial->gdev); kfree(gserial); -}; +} EXPORT_SYMBOL_GPL(gnss_serial_free); int gnss_serial_register(struct gnss_serial *gserial) diff --git a/drivers/gnss/sirf.c b/drivers/gnss/sirf.c index 2ecb1d3e8eeb3..bcb53ccfee4d5 100644 --- a/drivers/gnss/sirf.c +++ b/drivers/gnss/sirf.c @@ -551,7 +551,7 @@ static void sirf_remove(struct serdev_device *serdev) regulator_disable(data->vcc); gnss_put_device(data->gdev); -}; +} #ifdef CONFIG_OF static const struct of_device_id sirf_of_match[] = { diff --git a/drivers/gnss/ubx.c b/drivers/gnss/ubx.c index 7b05bc40532e5..c951be202ca25 100644 --- a/drivers/gnss/ubx.c +++ b/drivers/gnss/ubx.c @@ -126,7 +126,7 @@ static void ubx_remove(struct serdev_device *serdev) if (data->v_bckp) regulator_disable(data->v_bckp); gnss_serial_free(gserial); -}; +} #ifdef CONFIG_OF static const struct of_device_id ubx_of_match[] = { From 81dd24966885113968dc5599b9b371b60c3bf7b2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Jul 2021 13:53:10 +0200 Subject: [PATCH 0023/1202] h8300: remove memory.c None of the empty stubs defined in this file are used or even declared in a header. Signed-off-by: Christoph Hellwig Signed-off-by: Yoshinori Sato --- arch/h8300/mm/Makefile | 2 +- arch/h8300/mm/memory.c | 53 ------------------------------------------ 2 files changed, 1 insertion(+), 54 deletions(-) delete mode 100644 arch/h8300/mm/memory.c diff --git a/arch/h8300/mm/Makefile b/arch/h8300/mm/Makefile index e85b5c91f5bc0..a62be61378465 100644 --- a/arch/h8300/mm/Makefile +++ b/arch/h8300/mm/Makefile @@ -3,4 +3,4 @@ # Makefile for the linux h8300-specific parts of the memory manager. # -obj-y := init.o fault.o memory.o +obj-y := init.o fault.o diff --git a/arch/h8300/mm/memory.c b/arch/h8300/mm/memory.c deleted file mode 100644 index 4a60e2b5eb961..0000000000000 --- a/arch/h8300/mm/memory.c +++ /dev/null @@ -1,53 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/arch/h8300/mm/memory.c - * - * Copyright (C) 2002 Yoshinori Sato , - * - * Based on: - * - * linux/arch/m68knommu/mm/memory.c - * - * Copyright (C) 1998 Kenneth Albanowski , - * Copyright (C) 1999-2002, Greg Ungerer (gerg@snapgear.com) - * - * Based on: - * - * linux/arch/m68k/mm/memory.c - * - * Copyright (C) 1995 Hamish Macdonald - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -void cache_clear(unsigned long paddr, int len) -{ -} - - -void cache_push(unsigned long paddr, int len) -{ -} - -void cache_push_v(unsigned long vaddr, int len) -{ -} - -/* - * Map some physical address range into the kernel address space. - */ - -unsigned long kernel_map(unsigned long paddr, unsigned long size, - int nocacheflag, unsigned long *memavailp) -{ - return paddr; -} From 1ec10274d436fbe77b821fbdf095b45d0888e46d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Jul 2021 13:53:11 +0200 Subject: [PATCH 0024/1202] h8300: don't implement set_fs There is no need for set_fs on this nommu-only architecture, so just remove all the boilerplate code. Signed-off-by: Christoph Hellwig Signed-off-by: Yoshinori Sato --- arch/h8300/Kconfig | 1 - arch/h8300/include/asm/processor.h | 1 - arch/h8300/include/asm/segment.h | 40 ---------------------------- arch/h8300/include/asm/thread_info.h | 3 --- arch/h8300/kernel/entry.S | 1 - arch/h8300/kernel/head_ram.S | 1 - arch/h8300/mm/init.c | 6 ----- 7 files changed, 53 deletions(-) delete mode 100644 arch/h8300/include/asm/segment.h diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 3e3e0f16f7e0a..fe48c4f26cc83 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -24,7 +24,6 @@ config H8300 select HAVE_ARCH_KGDB select HAVE_ARCH_HASH select CPU_NO_EFFICIENT_FFS - select SET_FS select UACCESS_MEMCPY config CPU_BIG_ENDIAN diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h index a060b41b2d31c..66d7d5271f10a 100644 --- a/arch/h8300/include/asm/processor.h +++ b/arch/h8300/include/asm/processor.h @@ -13,7 +13,6 @@ #define __ASM_H8300_PROCESSOR_H #include -#include #include #include diff --git a/arch/h8300/include/asm/segment.h b/arch/h8300/include/asm/segment.h deleted file mode 100644 index 37950725d9b9c..0000000000000 --- a/arch/h8300/include/asm/segment.h +++ /dev/null @@ -1,40 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _H8300_SEGMENT_H -#define _H8300_SEGMENT_H - -/* define constants */ -#define USER_DATA (1) -#ifndef __USER_DS -#define __USER_DS (USER_DATA) -#endif -#define USER_PROGRAM (2) -#define SUPER_DATA (3) -#ifndef __KERNEL_DS -#define __KERNEL_DS (SUPER_DATA) -#endif -#define SUPER_PROGRAM (4) - -#ifndef __ASSEMBLY__ - -typedef struct { - unsigned long seg; -} mm_segment_t; - -#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) -#define USER_DS MAKE_MM_SEG(__USER_DS) -#define KERNEL_DS MAKE_MM_SEG(__KERNEL_DS) - -/* - * Get/set the SFC/DFC registers for MOVES instructions - */ - -static inline mm_segment_t get_fs(void) -{ - return USER_DS; -} - -#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) - -#endif /* __ASSEMBLY__ */ - -#endif /* _H8300_SEGMENT_H */ diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index a518214d4ddd8..ff2d873749a4d 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -10,7 +10,6 @@ #define _ASM_THREAD_INFO_H #include -#include #ifdef __KERNEL__ @@ -31,7 +30,6 @@ struct thread_info { unsigned long flags; /* low level flags */ int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ - mm_segment_t addr_limit; }; /* @@ -43,7 +41,6 @@ struct thread_info { .flags = 0, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .addr_limit = KERNEL_DS, \ } /* how to get the thread information struct from C */ diff --git a/arch/h8300/kernel/entry.S b/arch/h8300/kernel/entry.S index c6e289b5f1f28..42db87c17917b 100644 --- a/arch/h8300/kernel/entry.S +++ b/arch/h8300/kernel/entry.S @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/h8300/kernel/head_ram.S b/arch/h8300/kernel/head_ram.S index dbf8429f5fab5..489462f0ee57b 100644 --- a/arch/h8300/kernel/head_ram.S +++ b/arch/h8300/kernel/head_ram.S @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index 1f3b345d68b97..3a2f006119f9b 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -34,7 +34,6 @@ #include #include -#include #include #include @@ -71,11 +70,6 @@ void __init paging_init(void) panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, PAGE_SIZE, PAGE_SIZE); - /* - * Set up SFC/DFC registers (user data space). - */ - set_fs(USER_DS); - pr_debug("before free_area_init\n"); pr_debug("free_area_init -> start_mem is %#lx\nvirtual_end is %#lx\n", From 0927a71fd0edd0882ef6a143c99fd2e03624524c Mon Sep 17 00:00:00 2001 From: Salah Triki Date: Tue, 13 Jul 2021 15:05:21 +0100 Subject: [PATCH 0025/1202] gpu: ipu-v3: image-convert: use swap() Use swap() instead of implementing it since it makes code cleaner. Signed-off-by: Salah Triki Link: https://lore.kernel.org/r/20210713140521.GA1873885@pc [p.zabel@pengutronix.de: add "image-convert:" prefix to commit description] Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index aa1d4b6d278f7..af1612044eefd 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -990,7 +990,7 @@ static int calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx, const struct ipu_image_pixfmt *fmt = image->fmt; unsigned int row, col, tile = 0; u32 H, top, y_stride, uv_stride; - u32 uv_row_off, uv_col_off, uv_off, u_off, v_off, tmp; + u32 uv_row_off, uv_col_off, uv_off, u_off, v_off; u32 y_row_off, y_col_off, y_off; u32 y_size, uv_size; @@ -1021,11 +1021,8 @@ static int calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx, u_off = y_size - y_off + uv_off; v_off = (fmt->uv_packed) ? 0 : u_off + uv_size; - if (fmt->uv_swapped) { - tmp = u_off; - u_off = v_off; - v_off = tmp; - } + if (fmt->uv_swapped) + swap(u_off, v_off); image->tile[tile].offset = y_off; image->tile[tile].u_off = u_off; From 20fbfc81e390180db738c414c1b7ac85d31e24b3 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Tue, 31 Aug 2021 21:55:52 +0800 Subject: [PATCH 0026/1202] drm/imx: imx-tve: Make use of the helper function devm_platform_ioremap_resource() Use the devm_platform_ioremap_resource() helper instead of calling platform_get_resource() and devm_ioremap_resource() separately Signed-off-by: Cai Huoqing Link: https://lore.kernel.org/r/20210831135553.4426-1-caihuoqing@baidu.com Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/imx-tve.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c index bc8c3f802a152..2b1fdf2cbbce1 100644 --- a/drivers/gpu/drm/imx/imx-tve.c +++ b/drivers/gpu/drm/imx/imx-tve.c @@ -526,7 +526,6 @@ static int imx_tve_probe(struct platform_device *pdev) struct device_node *np = dev->of_node; struct device_node *ddc_node; struct imx_tve *tve; - struct resource *res; void __iomem *base; unsigned int val; int irq; @@ -568,8 +567,7 @@ static int imx_tve_probe(struct platform_device *pdev) } } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(dev, res); + base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) return PTR_ERR(base); From f96cb827ce49627543b4aabe8d54d8ea9740ae4e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 9 Jun 2021 12:21:28 +0100 Subject: [PATCH 0027/1202] ntb: ntb_pingpong: remove redundant initialization of variables msg_data and spad_data The variables msg_data and spad_data are being initialized with values that are never read, they are being updated later on. The initializations are redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_pingpong.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index 2164e8492772d..8aeca79140509 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -187,7 +187,7 @@ static void pp_ping(struct pp_ctx *pp) static void pp_pong(struct pp_ctx *pp) { - u32 msg_data = -1, spad_data = -1; + u32 msg_data, spad_data; int pidx = 0; /* Read pong data */ From 37160f01e8ea6111b31f75e08953308b51a03d99 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Aug 2021 01:44:27 +0900 Subject: [PATCH 0028/1202] nds32: move core-y in arch/nds32/Makefile to arch/nds32/Kbuild Use obj-y to clean up Makefile. Signed-off-by: Masahiro Yamada Signed-off-by: Greentime Hu --- arch/nds32/Kbuild | 3 +++ arch/nds32/Makefile | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/nds32/Kbuild b/arch/nds32/Kbuild index a4e40e534e6a8..565b9bc3c9db4 100644 --- a/arch/nds32/Kbuild +++ b/arch/nds32/Kbuild @@ -1 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-y += kernel/ mm/ +obj-$(CONFIG_FPU) += math-emu/ +obj-y += boot/dts/ diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index ccdca71420201..853a5df32f687 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -25,8 +25,6 @@ export TEXTADDR # If we have a machine-specific directory, then include it in the build. -core-y += arch/nds32/kernel/ arch/nds32/mm/ -core-$(CONFIG_FPU) += arch/nds32/math-emu/ libs-y += arch/nds32/lib/ ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""' @@ -48,7 +46,6 @@ CHECKFLAGS += -D__NDS32_EB__ endif boot := arch/nds32/boot -core-y += $(boot)/dts/ Image: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ From 36b9b19753937a6fe0e591b518d46a1f2efae679 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 16 May 2021 20:13:54 -0700 Subject: [PATCH 0029/1202] nds32: add a Kconfig symbol for LOCKDEP_SUPPORT Each architecture (arch/) should define its own LOCKDEP_SUPPORT Kconfig symbol (if it is needed). arch/nds32/ is the only one that does "select LOCKDEP_SUPPORT", which is basically a no-op since the symbol is not defined. (It might be nice for kconfig to warn about that.) Add a "config LOCKDEP_SUPPORT" to arch/nds32/Kconfig to correct this issue. Since the current Kconfig file selects LOCKDEP_SUPPORT unconditionally, use "def_bool y" to set/enable it always. Signed-off-by: Randy Dunlap Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Andrew Morton Signed-off-by: Greentime Hu --- arch/nds32/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 62313902d75d9..a20b42d4bdb6e 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -36,7 +36,6 @@ config NDS32 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_PERF_EVENTS select IRQ_DOMAIN - select LOCKDEP_SUPPORT select MODULES_USE_ELF_RELA select OF select OF_EARLY_FLATTREE @@ -64,6 +63,9 @@ config GENERIC_LOCKBREAK def_bool y depends on PREEMPTION +config LOCKDEP_SUPPORT + def_bool y + config TRACE_IRQFLAGS_SUPPORT def_bool y From 07cd7745c6f2081dac0aff7f57ea2b48c86de5fa Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 12 Jul 2021 15:52:18 +0300 Subject: [PATCH 0030/1202] nds32/setup: remove unused memblock_region variable in setup_memory() kernel test robot reports unused variable warning: cppcheck possible warnings: (new ones prefixed by >>, may not real problems) >> arch/nds32/kernel/setup.c:247:26: warning: Unused variable: region >> [unusedVariable] struct memblock_region *region; ^ Remove the unused variable. Reported-by: kernel test robot Signed-off-by: Mike Rapoport Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Greentime Hu --- arch/nds32/kernel/setup.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index 41725eaf8bac6..b3d34d6466521 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -244,7 +244,6 @@ static void __init setup_memory(void) unsigned long ram_start_pfn; unsigned long free_ram_start_pfn; phys_addr_t memory_start, memory_end; - struct memblock_region *region; memory_end = memory_start = 0; From 0caa2f7cec18b0337334797aa08e9e9b62bcbac7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 31 Aug 2021 09:27:53 +0200 Subject: [PATCH 0031/1202] um: fix ndelay/udelay defines Many places in the kernel use 'udelay' as an identifier, and are broken with the current "#define udelay um_udelay". Fix this by adding an argument to the macro, and do the same to 'ndelay' as well, just in case. Fixes: 0bc8fb4dda2b ("um: Implement ndelay/udelay in time-travel mode") Reported-by: kernel test robot Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/asm/delay.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/include/asm/delay.h b/arch/um/include/asm/delay.h index 56fc2b8f2dd01..e79b2ab6f40c8 100644 --- a/arch/um/include/asm/delay.h +++ b/arch/um/include/asm/delay.h @@ -14,7 +14,7 @@ static inline void um_ndelay(unsigned long nsecs) ndelay(nsecs); } #undef ndelay -#define ndelay um_ndelay +#define ndelay(n) um_ndelay(n) static inline void um_udelay(unsigned long usecs) { @@ -26,5 +26,5 @@ static inline void um_udelay(unsigned long usecs) udelay(usecs); } #undef udelay -#define udelay um_udelay +#define udelay(n) um_udelay(n) #endif /* __UM_DELAY_H */ From 2346402756756e55b2e6574fba50b00d1db5b292 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 31 Aug 2021 09:11:15 +0200 Subject: [PATCH 0032/1202] um: rename set_signals() to um_set_signals() Rename set_signals() as there's at least one driver that uses the same name and can now be built on UM due to PCI support, and thus we can get symbol conflicts. Also rename set_signals_trace() to be consistent. Reported-by: kernel test robot Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver") Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger --- arch/um/include/asm/irqflags.h | 4 ++-- arch/um/include/shared/longjmp.h | 2 +- arch/um/include/shared/os.h | 4 ++-- arch/um/kernel/ksyms.c | 2 +- arch/um/os-Linux/sigio.c | 6 +++--- arch/um/os-Linux/signal.c | 8 ++++---- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h index dab5744e9253d..1e69ef5bc35e0 100644 --- a/arch/um/include/asm/irqflags.h +++ b/arch/um/include/asm/irqflags.h @@ -3,7 +3,7 @@ #define __UM_IRQFLAGS_H extern int signals_enabled; -int set_signals(int enable); +int um_set_signals(int enable); void block_signals(void); void unblock_signals(void); @@ -16,7 +16,7 @@ static inline unsigned long arch_local_save_flags(void) #define arch_local_irq_restore arch_local_irq_restore static inline void arch_local_irq_restore(unsigned long flags) { - set_signals(flags); + um_set_signals(flags); } #define arch_local_irq_enable arch_local_irq_enable diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h index bdb2869b72b31..8863319039f3d 100644 --- a/arch/um/include/shared/longjmp.h +++ b/arch/um/include/shared/longjmp.h @@ -18,7 +18,7 @@ extern void longjmp(jmp_buf, int); enable = *(volatile int *)&signals_enabled; \ n = setjmp(*buf); \ if(n != 0) \ - set_signals_trace(enable); \ + um_set_signals_trace(enable); \ n; }) #endif diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 96d400387c93e..03ffbdddcc480 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -238,8 +238,8 @@ extern void send_sigio_to_self(void); extern int change_sig(int signal, int on); extern void block_signals(void); extern void unblock_signals(void); -extern int set_signals(int enable); -extern int set_signals_trace(int enable); +extern int um_set_signals(int enable); +extern int um_set_signals_trace(int enable); extern int os_is_signal_stack(void); extern void deliver_alarm(void); extern void register_pm_wake_signal(void); diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index b1e5634398d09..3a85bde3e1734 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -6,7 +6,7 @@ #include #include -EXPORT_SYMBOL(set_signals); +EXPORT_SYMBOL(um_set_signals); EXPORT_SYMBOL(signals_enabled); EXPORT_SYMBOL(os_stat_fd); diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index 6597ea1986ffa..9e71794839e87 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -132,7 +132,7 @@ static void update_thread(void) int n; char c; - flags = set_signals_trace(0); + flags = um_set_signals_trace(0); CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c))); if (n != sizeof(c)) { printk(UM_KERN_ERR "update_thread : write failed, err = %d\n", @@ -147,7 +147,7 @@ static void update_thread(void) goto fail; } - set_signals_trace(flags); + um_set_signals_trace(flags); return; fail: /* Critical section start */ @@ -161,7 +161,7 @@ static void update_thread(void) close(write_sigio_fds[0]); close(write_sigio_fds[1]); /* Critical section end */ - set_signals_trace(flags); + um_set_signals_trace(flags); } int __add_sigio_fd(int fd) diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 6cf098c23a394..24a403a70a020 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -94,7 +94,7 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) sig_handler_common(sig, si, mc); - set_signals_trace(enabled); + um_set_signals_trace(enabled); } static void timer_real_alarm_handler(mcontext_t *mc) @@ -126,7 +126,7 @@ void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) signals_active &= ~SIGALRM_MASK; - set_signals_trace(enabled); + um_set_signals_trace(enabled); } void deliver_alarm(void) { @@ -348,7 +348,7 @@ void unblock_signals(void) } } -int set_signals(int enable) +int um_set_signals(int enable) { int ret; if (signals_enabled == enable) @@ -362,7 +362,7 @@ int set_signals(int enable) return ret; } -int set_signals_trace(int enable) +int um_set_signals_trace(int enable) { int ret; if (signals_enabled == enable) From 79a0dc5530a91694b1e85ef7219893397d85e5bb Mon Sep 17 00:00:00 2001 From: Andreas Rammhold Date: Fri, 16 Jul 2021 22:00:34 +0200 Subject: [PATCH 0033/1202] tools: cpupower: fix typo in cpupower-idle-set(1) manpage The tools name was wrong in the SYNTAX section of the manpage it should read "idle-set" instead of "idle-info". Signed-off-by: Andreas Rammhold Signed-off-by: Shuah Khan --- tools/power/cpupower/man/cpupower-idle-set.1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1 index 21916cff7516a..8cef3c71e19e7 100644 --- a/tools/power/cpupower/man/cpupower-idle-set.1 +++ b/tools/power/cpupower/man/cpupower-idle-set.1 @@ -4,7 +4,7 @@ cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options .SH "SYNTAX" .LP -cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP] +cpupower [ \-c cpulist ] idle\-set [\fIoptions\fP] .SH "DESCRIPTION" .LP The cpupower idle\-set subcommand allows to set cpu idle, also called cpu From 536267aafb8aeaa8c5bc4c44105ecf7a6b95c27b Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 30 Aug 2021 08:35:10 -0700 Subject: [PATCH 0034/1202] nvmem: core: Add stubs for nvmem_cell_read_variable_le_u32/64 if !CONFIG_NVMEM When I added nvmem_cell_read_variable_le_u32() and nvmem_cell_read_variable_le_u64() I forgot to add the "static inline" stub functions for when CONFIG_NVMEM wasn't defined. Add them now. This was causing problems with randconfig builds that compiled `drivers/soc/qcom/cpr.c`. Fixes: 6feba6a62c57 ("PM: AVS: qcom-cpr: Use nvmem_cell_read_variable_le_u32()") Fixes: a28e824fb827 ("nvmem: core: Add functions to make number reading easy") Reported-by: kernel test robot Signed-off-by: Douglas Anderson Reviewed-by: Bjorn Andersson Signed-off-by: Srinivas Kandagatla --- include/linux/nvmem-consumer.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 923dada24eb41..c0c0cefc3b925 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -150,6 +150,20 @@ static inline int nvmem_cell_read_u64(struct device *dev, return -EOPNOTSUPP; } +static inline int nvmem_cell_read_variable_le_u32(struct device *dev, + const char *cell_id, + u32 *val) +{ + return -EOPNOTSUPP; +} + +static inline int nvmem_cell_read_variable_le_u64(struct device *dev, + const char *cell_id, + u64 *val) +{ + return -EOPNOTSUPP; +} + static inline struct nvmem_device *nvmem_device_get(struct device *dev, const char *name) { From 605fa23646dd9a86c4300e1719542a865111d1b6 Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sun, 5 Sep 2021 16:40:54 +0200 Subject: [PATCH 0035/1202] i3c/master/mipi-i3c-hci: Prefer struct_size over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, use the struct_size() helper to do the arithmetic instead of the argument "size + count * size" in the kzalloc() function. [1] https://www.kernel.org/doc/html/v5.14/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Acked-by: Nicolas Pitre Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210905144054.5124-1-len.baker@gmx.com --- drivers/i3c/master/mipi-i3c-hci/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c index af873a9be0507..2990ac9eaade7 100644 --- a/drivers/i3c/master/mipi-i3c-hci/dma.c +++ b/drivers/i3c/master/mipi-i3c-hci/dma.c @@ -223,7 +223,7 @@ static int hci_dma_init(struct i3c_hci *hci) } if (nr_rings > XFER_RINGS) nr_rings = XFER_RINGS; - rings = kzalloc(sizeof(*rings) + nr_rings * sizeof(*rh), GFP_KERNEL); + rings = kzalloc(struct_size(rings, headers, nr_rings), GFP_KERNEL); if (!rings) return -ENOMEM; hci->io_data = rings; From 41a0430dd5ca65afdecf10fb0b8b56966a1c5d04 Mon Sep 17 00:00:00 2001 From: Len Baker Date: Sun, 12 Sep 2021 17:51:35 +0200 Subject: [PATCH 0036/1202] i3c/master/mipi-i3c-hci: Prefer kcalloc over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, use the purpose specific kcalloc() function instead of the argument size * count in the kzalloc() function. [1] https://www.kernel.org/doc/html/v5.14/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments Signed-off-by: Len Baker Acked-by: Nicolas Pitre Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210912155135.7541-1-len.baker@gmx.com --- drivers/i3c/master/mipi-i3c-hci/hci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/hci.h b/drivers/i3c/master/mipi-i3c-hci/hci.h index 80beb1d5be8f2..f109923f6c3f3 100644 --- a/drivers/i3c/master/mipi-i3c-hci/hci.h +++ b/drivers/i3c/master/mipi-i3c-hci/hci.h @@ -98,7 +98,7 @@ struct hci_xfer { static inline struct hci_xfer *hci_alloc_xfer(unsigned int n) { - return kzalloc(sizeof(struct hci_xfer) * n, GFP_KERNEL); + return kcalloc(n, sizeof(struct hci_xfer), GFP_KERNEL); } static inline void hci_free_xfer(struct hci_xfer *xfer, unsigned int n) From e053322ed118dedb868db3b9250a7ea4754f60ed Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 14 Aug 2020 16:14:34 -0700 Subject: [PATCH 0037/1202] tools/memory-model: Document locking corner cases Most Linux-kernel uses of locking are straightforward, but there are corner-case uses that rely on less well-known aspects of the lock and unlock primitives. This commit therefore adds a locking.txt and litmus tests in Documentation/litmus-tests/locking to explain these corner-case uses. Signed-off-by: Paul E. McKenney --- .../litmus-tests/locking/DCL-broken.litmus | 55 +++ .../litmus-tests/locking/DCL-fixed.litmus | 56 +++ .../litmus-tests/locking/RM-broken.litmus | 42 +++ .../litmus-tests/locking/RM-fixed.litmus | 42 +++ tools/memory-model/Documentation/locking.txt | 320 ++++++++++++++++++ 5 files changed, 515 insertions(+) create mode 100644 Documentation/litmus-tests/locking/DCL-broken.litmus create mode 100644 Documentation/litmus-tests/locking/DCL-fixed.litmus create mode 100644 Documentation/litmus-tests/locking/RM-broken.litmus create mode 100644 Documentation/litmus-tests/locking/RM-fixed.litmus create mode 100644 tools/memory-model/Documentation/locking.txt diff --git a/Documentation/litmus-tests/locking/DCL-broken.litmus b/Documentation/litmus-tests/locking/DCL-broken.litmus new file mode 100644 index 0000000000000..cfaa25ff82b1e --- /dev/null +++ b/Documentation/litmus-tests/locking/DCL-broken.litmus @@ -0,0 +1,55 @@ +C DCL-broken + +(* + * Result: Sometimes + * + * This litmus test demonstrates more than just locking is required to + * correctly implement double-checked locking. + *) + +{ + int flag; + int data; + int lck; +} + +P0(int *flag, int *data, int *lck) +{ + int r0; + int r1; + int r2; + + r0 = READ_ONCE(*flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + WRITE_ONCE(*flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); +} + +P1(int *flag, int *data, int *lck) +{ + int r0; + int r1; + int r2; + + r0 = READ_ONCE(*flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + WRITE_ONCE(*flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); +} + +locations [flag;data;lck;0:r0;0:r1;1:r0;1:r1] +exists (0:r2=0 \/ 1:r2=0) diff --git a/Documentation/litmus-tests/locking/DCL-fixed.litmus b/Documentation/litmus-tests/locking/DCL-fixed.litmus new file mode 100644 index 0000000000000..579d6c246f167 --- /dev/null +++ b/Documentation/litmus-tests/locking/DCL-fixed.litmus @@ -0,0 +1,56 @@ +C DCL-fixed + +(* + * Result: Never + * + * This litmus test demonstrates that double-checked locking can be + * reliable given proper use of smp_load_acquire() and smp_store_release() + * in addition to the locking. + *) + +{ + int flag; + int data; + int lck; +} + +P0(int *flag, int *data, int *lck) +{ + int r0; + int r1; + int r2; + + r0 = smp_load_acquire(flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + smp_store_release(flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); +} + +P1(int *flag, int *data, int *lck) +{ + int r0; + int r1; + int r2; + + r0 = smp_load_acquire(flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + smp_store_release(flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); +} + +locations [flag;data;lck;0:r0;0:r1;1:r0;1:r1] +exists (0:r2=0 \/ 1:r2=0) diff --git a/Documentation/litmus-tests/locking/RM-broken.litmus b/Documentation/litmus-tests/locking/RM-broken.litmus new file mode 100644 index 0000000000000..c586ae4b547de --- /dev/null +++ b/Documentation/litmus-tests/locking/RM-broken.litmus @@ -0,0 +1,42 @@ +C RM-broken + +(* + * Result: DEADLOCK + * + * This litmus test demonstrates that the old "roach motel" approach + * to locking, where code can be freely moved into critical sections, + * cannot be used in the Linux kernel. + *) + +{ + int lck; + int x; + int y; +} + +P0(int *x, int *y, int *lck) +{ + int r2; + + spin_lock(lck); + r2 = atomic_inc_return(y); + WRITE_ONCE(*x, 1); + spin_unlock(lck); +} + +P1(int *x, int *y, int *lck) +{ + int r0; + int r1; + int r2; + + spin_lock(lck); + r0 = READ_ONCE(*x); + r1 = READ_ONCE(*x); + r2 = atomic_inc_return(y); + spin_unlock(lck); +} + +locations [x;lck;0:r2;1:r0;1:r1;1:r2] +filter (y=2 /\ 1:r0=0 /\ 1:r1=1) +exists (1:r2=1) diff --git a/Documentation/litmus-tests/locking/RM-fixed.litmus b/Documentation/litmus-tests/locking/RM-fixed.litmus new file mode 100644 index 0000000000000..672856736b42e --- /dev/null +++ b/Documentation/litmus-tests/locking/RM-fixed.litmus @@ -0,0 +1,42 @@ +C RM-fixed + +(* + * Result: Never + * + * This litmus test demonstrates that the old "roach motel" approach + * to locking, where code can be freely moved into critical sections, + * cannot be used in the Linux kernel. + *) + +{ + int lck; + int x; + int y; +} + +P0(int *x, int *y, int *lck) +{ + int r2; + + spin_lock(lck); + r2 = atomic_inc_return(y); + WRITE_ONCE(*x, 1); + spin_unlock(lck); +} + +P1(int *x, int *y, int *lck) +{ + int r0; + int r1; + int r2; + + r0 = READ_ONCE(*x); + r1 = READ_ONCE(*x); + spin_lock(lck); + r2 = atomic_inc_return(y); + spin_unlock(lck); +} + +locations [x;lck;0:r2;1:r0;1:r1;1:r2] +filter (y=2 /\ 1:r0=0 /\ 1:r1=1) +exists (1:r2=1) diff --git a/tools/memory-model/Documentation/locking.txt b/tools/memory-model/Documentation/locking.txt new file mode 100644 index 0000000000000..4e05c6d53ab72 --- /dev/null +++ b/tools/memory-model/Documentation/locking.txt @@ -0,0 +1,320 @@ +Locking +======= + +Locking is well-known and the common use cases are straightforward: Any +CPU holding a given lock sees any changes previously seen or made by any +CPU before it previously released that same lock. This last sentence +is the only part of this document that most developers will need to read. + +However, developers who would like to also access lock-protected shared +variables outside of their corresponding locks should continue reading. + + +Locking and Prior Accesses +-------------------------- + +The basic rule of locking is worth repeating: + + Any CPU holding a given lock sees any changes previously seen + or made by any CPU before it previously released that same lock. + +Note that this statement is a bit stronger than "Any CPU holding a +given lock sees all changes made by any CPU during the time that CPU was +previously holding this same lock". For example, consider the following +pair of code fragments: + + /* See MP+polocks.litmus. */ + void CPU0(void) + { + WRITE_ONCE(x, 1); + spin_lock(&mylock); + WRITE_ONCE(y, 1); + spin_unlock(&mylock); + } + + void CPU1(void) + { + spin_lock(&mylock); + r0 = READ_ONCE(y); + spin_unlock(&mylock); + r1 = READ_ONCE(x); + } + +The basic rule guarantees that if CPU0() acquires mylock before CPU1(), +then both r0 and r1 must be set to the value 1. This also has the +consequence that if the final value of r0 is equal to 1, then the final +value of r1 must also be equal to 1. In contrast, the weaker rule would +say nothing about the final value of r1. + + +Locking and Subsequent Accesses +------------------------------- + +The converse to the basic rule also holds: Any CPU holding a given +lock will not see any changes that will be made by any CPU after it +subsequently acquires this same lock. This converse statement is +illustrated by the following litmus test: + + /* See MP+porevlocks.litmus. */ + void CPU0(void) + { + r0 = READ_ONCE(y); + spin_lock(&mylock); + r1 = READ_ONCE(x); + spin_unlock(&mylock); + } + + void CPU1(void) + { + spin_lock(&mylock); + WRITE_ONCE(x, 1); + spin_unlock(&mylock); + WRITE_ONCE(y, 1); + } + +This converse to the basic rule guarantees that if CPU0() acquires +mylock before CPU1(), then both r0 and r1 must be set to the value 0. +This also has the consequence that if the final value of r1 is equal +to 0, then the final value of r0 must also be equal to 0. In contrast, +the weaker rule would say nothing about the final value of r0. + +These examples show only a single pair of CPUs, but the effects of the +locking basic rule extend across multiple acquisitions of a given lock +across multiple CPUs. + + +Double-Checked Locking +---------------------- + +It is well known that more than just a lock is required to make +double-checked locking work correctly, This litmus test illustrates +one incorrect approach: + + /* See Documentation/litmus-tests/locking/DCL-broken.litmus. */ + P0(int *flag, int *data, int *lck) + { + int r0; + int r1; + int r2; + + r0 = READ_ONCE(*flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + WRITE_ONCE(*flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); + } + /* P1() is the exactly the same as P0(). */ + +There are two problems. First, there is no ordering between the first +READ_ONCE() of "flag" and the READ_ONCE() of "data". Second, there is +no ordering between the two WRITE_ONCE() calls. It should therefore be +no surprise that "r2" can be zero, and a quick herd7 run confirms this. + +One way to fix this is to use smp_load_acquire() and smp_store_release() +as shown in this corrected version: + + /* See Documentation/litmus-tests/locking/DCL-fixed.litmus. */ + P0(int *flag, int *data, int *lck) + { + int r0; + int r1; + int r2; + + r0 = smp_load_acquire(flag); + if (r0 == 0) { + spin_lock(lck); + r1 = READ_ONCE(*flag); + if (r1 == 0) { + WRITE_ONCE(*data, 1); + smp_store_release(flag, 1); + } + spin_unlock(lck); + } + r2 = READ_ONCE(*data); + } + /* P1() is the exactly the same as P0(). */ + +The smp_load_acquire() guarantees that its load from "flags" will +be ordered before the READ_ONCE() from data, thus solving the first +problem. The smp_store_release() guarantees that its store will be +ordered after the WRITE_ONCE() to "data", solving the second problem. +The smp_store_release() pairs with the smp_load_acquire(), thus ensuring +that the ordering provided by each actually takes effect. Again, a +quick herd7 run confirms this. + +In short, if you access a lock-protected variable without holding the +corresponding lock, you will need to provide additional ordering, in +this case, via the smp_load_acquire() and the smp_store_release(). + + +Ordering Provided by a Lock to CPUs Not Holding That Lock +--------------------------------------------------------- + +It is not necessarily the case that accesses ordered by locking will be +seen as ordered by CPUs not holding that lock. Consider this example: + + /* See Z6.0+pooncelock+pooncelock+pombonce.litmus. */ + void CPU0(void) + { + spin_lock(&mylock); + WRITE_ONCE(x, 1); + WRITE_ONCE(y, 1); + spin_unlock(&mylock); + } + + void CPU1(void) + { + spin_lock(&mylock); + r0 = READ_ONCE(y); + WRITE_ONCE(z, 1); + spin_unlock(&mylock); + } + + void CPU2(void) + { + WRITE_ONCE(z, 2); + smp_mb(); + r1 = READ_ONCE(x); + } + +Counter-intuitive though it might be, it is quite possible to have +the final value of r0 be 1, the final value of z be 2, and the final +value of r1 be 0. The reason for this surprising outcome is that CPU2() +never acquired the lock, and thus did not fully benefit from the lock's +ordering properties. + +Ordering can be extended to CPUs not holding the lock by careful use +of smp_mb__after_spinlock(): + + /* See Z6.0+pooncelock+poonceLock+pombonce.litmus. */ + void CPU0(void) + { + spin_lock(&mylock); + WRITE_ONCE(x, 1); + WRITE_ONCE(y, 1); + spin_unlock(&mylock); + } + + void CPU1(void) + { + spin_lock(&mylock); + smp_mb__after_spinlock(); + r0 = READ_ONCE(y); + WRITE_ONCE(z, 1); + spin_unlock(&mylock); + } + + void CPU2(void) + { + WRITE_ONCE(z, 2); + smp_mb(); + r1 = READ_ONCE(x); + } + +This addition of smp_mb__after_spinlock() strengthens the lock +acquisition sufficiently to rule out the counter-intuitive outcome. +In other words, the addition of the smp_mb__after_spinlock() prohibits +the counter-intuitive result where the final value of r0 is 1, the final +value of z is 2, and the final value of r1 is 0. + + +No Roach-Motel Locking! +----------------------- + +This example requires familiarity with the herd7 "filter" clause, so +please read up on that topic in litmus-tests.txt. + +It is tempting to allow memory-reference instructions to be pulled +into a critical section, but this cannot be allowed in the general case. +For example, consider a spin loop preceding a lock-based critical section. +Now, herd7 does not model spin loops, but we can emulate one with two +loads, with a "filter" clause to constrain the first to return the +initial value and the second to return the updated value, as shown below: + + /* See Documentation/litmus-tests/locking/RM-fixed.litmus. */ + P0(int *x, int *y, int *lck) + { + int r2; + + spin_lock(lck); + r2 = atomic_inc_return(y); + WRITE_ONCE(*x, 1); + spin_unlock(lck); + } + + P1(int *x, int *y, int *lck) + { + int r0; + int r1; + int r2; + + r0 = READ_ONCE(*x); + r1 = READ_ONCE(*x); + spin_lock(lck); + r2 = atomic_inc_return(y); + spin_unlock(lck); + } + + filter (y=2 /\ 1:r0=0 /\ 1:r1=1) + exists (1:r2=1) + +The variable "x" is the control variable for the emulated spin loop. +P0() sets it to "1" while holding the lock, and P1() emulates the +spin loop by reading it twice, first into "1:r0" (which should get the +initial value "0") and then into "1:r1" (which should get the updated +value "1"). + +The purpose of the variable "y" is to reject deadlocked executions. +Only those executions where the final value of "y" have avoided deadlock. + +The "filter" clause takes all this into account, constraining "y" to +equal "2", "1:r0" to equal "0", and "1:r1" to equal 1. + +Then the "exists" clause checks to see if P1() acquired its lock first, +which should not happen given the filter clause because P0() updates +"x" while holding the lock. And herd7 confirms this. + +But suppose that the compiler was permitted to reorder the spin loop +into P1()'s critical section, like this: + + /* See Documentation/litmus-tests/locking/RM-broken.litmus. */ + P0(int *x, int *y, int *lck) + { + int r2; + + spin_lock(lck); + r2 = atomic_inc_return(y); + WRITE_ONCE(*x, 1); + spin_unlock(lck); + } + + P1(int *x, int *y, int *lck) + { + int r0; + int r1; + int r2; + + spin_lock(lck); + r0 = READ_ONCE(*x); + r1 = READ_ONCE(*x); + r2 = atomic_inc_return(y); + spin_unlock(lck); + } + + locations [x;lck;0:r2;1:r0;1:r1;1:r2] + filter (y=2 /\ 1:r0=0 /\ 1:r1=1) + exists (1:r2=1) + +If "1:r0" is equal to "0", "1:r1" can never equal "1" because P0() +cannot update "x" while P1() holds the lock. And herd7 confirms this, +showing zero executions matching the "filter" criteria. + +And this is why Linux-kernel lock and unlock primitives must prevent +code from entering critical sections. It is not sufficient to only +prevent code from leaving them. From 7cdc2ce57ce19a3dcabf68e1744a9d7d679d4c93 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 18 Mar 2019 11:53:50 -0700 Subject: [PATCH 0038/1202] tools/memory-model: Make judgelitmus.sh note timeouts Currently, judgelitmus.sh treats timeouts (as in the "--timeout" argument) as "!!! Verification error". This can be misleading because it is quite possible that running the test longer would have produced a verification. This commit therefore changes judgelitmus.sh to check for timeouts and to report them with "!!! Timeout". Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/judgelitmus.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index 0cc63875e395d..d3c313b9a458a 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -42,6 +42,14 @@ grep '^Observation' $LKMM_DESTDIR/$litmus.out if grep -q '^Observation' $LKMM_DESTDIR/$litmus.out then : +elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmus.out +then + echo ' !!! Timeout' $litmus + if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + then + echo ' !!! Timeout' >> $LKMM_DESTDIR/$litmus.out 2>&1 + fi + exit 124 else echo ' !!! Verification error' $litmus if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out From df805c14723f7fdc5e712c09967480ed00903f73 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 18 Mar 2019 13:07:46 -0700 Subject: [PATCH 0039/1202] tools/memory-model: Make cmplitmushist.sh note timeouts Currently, cmplitmushist.sh treats timeouts (as in the "--timeout" argument) as "Missing Observation line". This can be misleading because it is quite possible that running the test longer would have produced a verification. This commit therefore changes cmplitmushist.sh to check for timeouts and to report them with "Timed out". Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/cmplitmushist.sh | 22 +++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/memory-model/scripts/cmplitmushist.sh b/tools/memory-model/scripts/cmplitmushist.sh index 0f498aeeccf5e..b9c174dd80042 100755 --- a/tools/memory-model/scripts/cmplitmushist.sh +++ b/tools/memory-model/scripts/cmplitmushist.sh @@ -12,12 +12,30 @@ trap 'rm -rf $T' 0 mkdir $T # comparetest oldpath newpath +timedout=0 perfect=0 obsline=0 noobsline=0 obsresult=0 badcompare=0 comparetest () { + if grep -q '^Command exited with non-zero status 124' $1 || + grep -q '^Command exited with non-zero status 124' $2 + then + if grep -q '^Command exited with non-zero status 124' $1 && + grep -q '^Command exited with non-zero status 124' $2 + then + echo Both runs timed out: $2 + elif grep -q '^Command exited with non-zero status 124' $1 + then + echo Old run timed out: $2 + elif grep -q '^Command exited with non-zero status 124' $2 + then + echo New run timed out: $2 + fi + timedout=`expr "$timedout" + 1` + return 0 + fi grep -v 'maxresident)k\|minor)pagefaults\|^Time' $1 > $T/oldout grep -v 'maxresident)k\|minor)pagefaults\|^Time' $2 > $T/newout if cmp -s $T/oldout $T/newout && grep -q '^Observation' $1 @@ -78,6 +96,10 @@ if test "$obsresult" -ne 0 then echo Matching Observation Always/Sometimes/Never result: $obsresult 1>&2 fi +if test "$timedout" -ne 0 +then + echo "!!!" Timed out: $timedout 1>&2 +fi if test "$badcompare" -ne 0 then echo "!!!" Result changed: $badcompare 1>&2 From bf9b5e1f574c9d3391f07b28ad693fb7ae03fbaa Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 18 Mar 2019 13:40:57 -0700 Subject: [PATCH 0040/1202] tools/memory-model: Make judgelitmus.sh identify bad macros Currently, judgelitmus.sh treats use of unknown primitives (such as srcu_read_lock() prior to SRCU support) as "!!! Verification error". This can be misleading because it fails to call out typos and running a version LKMM on a litmus test requiring a feature not provided by that version. This commit therefore changes judgelitmus.sh to check for unknown primitives and to report them, for example, with: '!!! Current LKMM version does not know "rcu_write_lock"'. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/cmplitmushist.sh | 31 ++++++++++++++++++--- tools/memory-model/scripts/judgelitmus.sh | 12 ++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/tools/memory-model/scripts/cmplitmushist.sh b/tools/memory-model/scripts/cmplitmushist.sh index b9c174dd80042..ca1ac8b646144 100755 --- a/tools/memory-model/scripts/cmplitmushist.sh +++ b/tools/memory-model/scripts/cmplitmushist.sh @@ -12,6 +12,7 @@ trap 'rm -rf $T' 0 mkdir $T # comparetest oldpath newpath +badmacnam=0 timedout=0 perfect=0 obsline=0 @@ -19,8 +20,26 @@ noobsline=0 obsresult=0 badcompare=0 comparetest () { - if grep -q '^Command exited with non-zero status 124' $1 || - grep -q '^Command exited with non-zero status 124' $2 + if grep -q ': Unknown macro ' $1 || grep -q ': Unknown macro ' $2 + then + if grep -q ': Unknown macro ' $1 + then + badname=`grep ': Unknown macro ' $1 | + sed -e 's/^.*: Unknown macro //' | + sed -e 's/ (User error).*$//'` + echo 'Current LKMM version does not know "'$badname'"' $1 + fi + if grep -q ': Unknown macro ' $2 + then + badname=`grep ': Unknown macro ' $2 | + sed -e 's/^.*: Unknown macro //' | + sed -e 's/ (User error).*$//'` + echo 'Current LKMM version does not know "'$badname'"' $2 + fi + badmacnam=`expr "$badmacnam" + 1` + return 0 + elif grep -q '^Command exited with non-zero status 124' $1 || + grep -q '^Command exited with non-zero status 124' $2 then if grep -q '^Command exited with non-zero status 124' $1 && grep -q '^Command exited with non-zero status 124' $2 @@ -56,7 +75,7 @@ comparetest () { return 0 fi else - echo Missing Observation line "(e.g., herd7 timeout)": $2 + echo Missing Observation line "(e.g., syntax error)": $2 noobsline=`expr "$noobsline" + 1` return 0 fi @@ -90,7 +109,7 @@ then fi if test "$noobsline" -ne 0 then - echo Missing Observation line "(e.g., herd7 timeout)": $noobsline 1>&2 + echo Missing Observation line "(e.g., syntax error)": $noobsline 1>&2 fi if test "$obsresult" -ne 0 then @@ -100,6 +119,10 @@ if test "$timedout" -ne 0 then echo "!!!" Timed out: $timedout 1>&2 fi +if test "$badmacnam" -ne 0 +then + echo "!!!" Unknown primitive: $badmacnam 1>&2 +fi if test "$badcompare" -ne 0 then echo "!!!" Result changed: $badcompare 1>&2 diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index d3c313b9a458a..d40439c7b71e0 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -42,6 +42,18 @@ grep '^Observation' $LKMM_DESTDIR/$litmus.out if grep -q '^Observation' $LKMM_DESTDIR/$litmus.out then : +elif grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out +then + badname=`grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out | + sed -e 's/^.*: Unknown macro //' | + sed -e 's/ (User error).*$//'` + badmsg=' !!! Current LKMM version does not know "'$badname'"'" $litmus" + echo $badmsg + if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + then + echo ' !!! '$badmsg >> $LKMM_DESTDIR/$litmus.out 2>&1 + fi + exit 254 elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmus.out then echo ' !!! Timeout' $litmus From e828bc8f84d65ef7045dc97270496156bdc75657 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 14:27:06 -0700 Subject: [PATCH 0041/1202] tools/memory-model: Make judgelitmus.sh detect hard deadlocks If a litmus test specifies "Result: Never" and if it contains an unconditional ("hard") deadlock, then running checklitmus.sh on it will not flag any errors, despite the fact that there are no executions. This commit therefore updates judgelitmus.sh to complain about tests with no executions that are marked, but not as "Result: DEADLOCK". Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/judgelitmus.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index d40439c7b71e0..84c62eee321bf 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -83,6 +83,14 @@ then fi ret=1 fi +elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q 'Never 0 0$' +then + echo " !!! Unexpected non-$outcome deadlock" $litmus + if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + then + echo " !!! Unexpected non-$outcome deadlock" $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 + fi + ret=1 elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q $outcome || test "$outcome" = Maybe then ret=0 From e484012ea65c0737b96c5ec66ed73ecd2af71f22 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 11 Apr 2019 07:33:18 -0700 Subject: [PATCH 0042/1202] tools/memory-model: Fix paulmck email address on pre-existing scripts Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checkalllitmus.sh | 2 +- tools/memory-model/scripts/checklitmus.sh | 2 +- tools/memory-model/scripts/checklitmushist.sh | 2 +- tools/memory-model/scripts/judgelitmus.sh | 2 +- tools/memory-model/scripts/newlitmushist.sh | 2 +- tools/memory-model/scripts/parseargs.sh | 2 +- tools/memory-model/scripts/runlitmushist.sh | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh index 3c0c7fbbd223b..10e14d94acee5 100755 --- a/tools/memory-model/scripts/checkalllitmus.sh +++ b/tools/memory-model/scripts/checkalllitmus.sh @@ -17,7 +17,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney . scripts/parseargs.sh diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh index 11461ed40b5e4..638b8c610894b 100755 --- a/tools/memory-model/scripts/checklitmus.sh +++ b/tools/memory-model/scripts/checklitmus.sh @@ -15,7 +15,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney litmus=$1 herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} diff --git a/tools/memory-model/scripts/checklitmushist.sh b/tools/memory-model/scripts/checklitmushist.sh index 1d210ffb7c8af..406ecfc0aee4c 100755 --- a/tools/memory-model/scripts/checklitmushist.sh +++ b/tools/memory-model/scripts/checklitmushist.sh @@ -12,7 +12,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney . scripts/parseargs.sh diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index 84c62eee321bf..d82133e75580c 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -13,7 +13,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney litmus=$1 diff --git a/tools/memory-model/scripts/newlitmushist.sh b/tools/memory-model/scripts/newlitmushist.sh index 991f8f8148817..3f4b06e299886 100755 --- a/tools/memory-model/scripts/newlitmushist.sh +++ b/tools/memory-model/scripts/newlitmushist.sh @@ -12,7 +12,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney . scripts/parseargs.sh diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh index 40f52080fdbd6..afe7bd23de6b8 100755 --- a/tools/memory-model/scripts/parseargs.sh +++ b/tools/memory-model/scripts/parseargs.sh @@ -9,7 +9,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney T=/tmp/parseargs.sh.$$ mkdir $T diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh index 6ed376f495bb4..852786fef179f 100755 --- a/tools/memory-model/scripts/runlitmushist.sh +++ b/tools/memory-model/scripts/runlitmushist.sh @@ -13,7 +13,7 @@ # # Copyright IBM Corporation, 2018 # -# Author: Paul E. McKenney +# Author: Paul E. McKenney T=/tmp/runlitmushist.sh.$$ trap 'rm -rf $T' 0 From 11fe5447155716505eee4bcca5ef929500b97b5c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 15:59:26 -0700 Subject: [PATCH 0043/1202] tools/memory-model: Update parseargs.sh for hardware verification This commit adds a --hw argument to parseargs.sh to specify the CPU family for a hardware verification. For example, "--hw AArch64" will specify that a C-language litmus test is to be translated to ARMv8 and the result verified. This will set the LKMM_HW_MAP_FILE environment variable accordingly. If there is no --hw argument, this environment variable will be set to the empty string. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/parseargs.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh index afe7bd23de6b8..5f016fc3f3af5 100755 --- a/tools/memory-model/scripts/parseargs.sh +++ b/tools/memory-model/scripts/parseargs.sh @@ -27,6 +27,7 @@ initparam () { initparam LKMM_DESTDIR "." initparam LKMM_HERD_OPTIONS "-conf linux-kernel.cfg" +initparam LKMM_HW_MAP_FILE "" initparam LKMM_JOBS `getconf _NPROCESSORS_ONLN` initparam LKMM_PROCS "3" initparam LKMM_TIMEOUT "1m" @@ -37,10 +38,11 @@ usagehelp () { echo "Usage $scriptname [ arguments ]" echo " --destdir path (place for .litmus.out, default by .litmus)" echo " --herdopts -conf linux-kernel.cfg ..." + echo " --hw AArch64" echo " --jobs N (number of jobs, default one per CPU)" echo " --procs N (litmus tests with at most this many processes)" echo " --timeout N (herd7 timeout (e.g., 10s, 1m, 2hr, 1d, '')" - echo "Defaults: --destdir '$LKMM_DESTDIR_DEF' --herdopts '$LKMM_HERD_OPTIONS_DEF' --jobs '$LKMM_JOBS_DEF' --procs '$LKMM_PROCS_DEF' --timeout '$LKMM_TIMEOUT_DEF'" + echo "Defaults: --destdir '$LKMM_DESTDIR_DEF' --herdopts '$LKMM_HERD_OPTIONS_DEF' --hw '$LKMM_HW_MAP_FILE' --jobs '$LKMM_JOBS_DEF' --procs '$LKMM_PROCS_DEF' --timeout '$LKMM_TIMEOUT_DEF'" exit 1 } @@ -95,6 +97,11 @@ do LKMM_HERD_OPTIONS="$2" shift ;; + --hw) + checkarg --hw "(.map file architecture name)" "$#" "$2" '^[A-Za-z0-9_-]\+' '^--' + LKMM_HW_MAP_FILE="$2" + shift + ;; -j[1-9]*) njobs="`echo $1 | sed -e 's/^-j//'`" trailchars="`echo $njobs | sed -e 's/[0-9]\+\(.*\)$/\1/'`" From dc20e38348b364a8edb91f3fea5de4d1a5ae53a6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 14:39:10 -0700 Subject: [PATCH 0044/1202] tools/memory-model: Make judgelitmus.sh handle hardware verifications This commit makes the judgelitmus.sh script check the --hw argument (AKA the LKMM_HW_MAP_FILE environment variable) and to adjust its judgment for a run where a C-language litmus test has been translated to assembly and the assembly version verified. In this case, the assembly verification output is checked against the C-language script's "Result:" comment. However, because hardware can be stronger than LKMM requires, the judgelitmus.sh script forgives verification mismatches featuring a "Sometimes" in the C-language script and an "Always" or "Never" assembly-language verification. Note that deadlock is not forgiven, however, this should not normally be an issue given that C-language tests containing locking, RCU, or SRCU cannot be translated to assembly. However, this issue can crop up in litmus tests that mimic deadlock by using the "filter" clause to ignore all executions. It can also crop up when certain herd arguments are used to autofilter everything that does not match the "exists" clause in cases where the "exists" clause cannot be satisfied. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/README | 8 +-- tools/memory-model/scripts/judgelitmus.sh | 75 ++++++++++++++--------- 2 files changed, 51 insertions(+), 32 deletions(-) diff --git a/tools/memory-model/scripts/README b/tools/memory-model/scripts/README index 095c7eb36f9f9..0e29a52044c1a 100644 --- a/tools/memory-model/scripts/README +++ b/tools/memory-model/scripts/README @@ -43,10 +43,10 @@ initlitmushist.sh judgelitmus.sh - Given a .litmus file and its .litmus.out herd7 output, check the - .litmus.out file against the .litmus file's "Result:" comment to - judge whether the test ran correctly. Not normally run manually, - provided instead for use by other scripts. + Given a .litmus file and its herd7 output, check the output file + against the .litmus file's "Result:" comment to judge whether + the test ran correctly. Not normally run manually, provided + instead for use by other scripts. newlitmushist.sh diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index d82133e75580c..6f3c60065c8b5 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -1,9 +1,14 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0+ # -# Given a .litmus test and the corresponding .litmus.out file, check -# the .litmus.out file against the "Result:" comment to judge whether -# the test ran correctly. +# Given a .litmus test and the corresponding litmus output file, check +# the .litmus.out file against the "Result:" comment to judge whether the +# test ran correctly. If the --hw argument is omitted, check against the +# LKMM output, which is assumed to be in file.litmus.out. If this argument +# is provided, this is assumed to be a hardware test, and the output is +# assumed to be in file.HW.litmus.out, where "HW" is the --hw argument. +# In addition, non-Sometimes verification results will be noted, but +# forgiven. # # Usage: # judgelitmus.sh file.litmus @@ -24,11 +29,18 @@ else echo ' --- ' error: \"$litmus\" is not a readable file exit 255 fi -if test -f "$LKMM_DESTDIR/$litmus".out -a -r "$LKMM_DESTDIR/$litmus".out +if test -z "$LKMM_HW_MAP_FILE" +then + litmusout=$litmus.out +else + litmusout="`echo $litmus | + sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`.out" +fi +if test -f "$LKMM_DESTDIR/$litmusout" -a -r "$LKMM_DESTDIR/$litmusout" then : else - echo ' --- ' error: \"$LKMM_DESTDIR/$litmus\".out is not a readable file + echo ' --- ' error: \"$LKMM_DESTDIR/$litmusout is not a readable file exit 255 fi if grep -q '^ \* Result: ' $litmus @@ -38,69 +50,76 @@ else outcome=specified fi -grep '^Observation' $LKMM_DESTDIR/$litmus.out -if grep -q '^Observation' $LKMM_DESTDIR/$litmus.out +grep '^Observation' $LKMM_DESTDIR/$litmusout +if grep -q '^Observation' $LKMM_DESTDIR/$litmusout then : -elif grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out +elif grep ': Unknown macro ' $LKMM_DESTDIR/$litmusout then - badname=`grep ': Unknown macro ' $LKMM_DESTDIR/$litmus.out | + badname=`grep ': Unknown macro ' $LKMM_DESTDIR/$litmusout | sed -e 's/^.*: Unknown macro //' | sed -e 's/ (User error).*$//'` badmsg=' !!! Current LKMM version does not know "'$badname'"'" $litmus" echo $badmsg - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout then - echo ' !!! '$badmsg >> $LKMM_DESTDIR/$litmus.out 2>&1 + echo ' !!! '$badmsg >> $LKMM_DESTDIR/$litmusout 2>&1 fi exit 254 -elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmus.out +elif grep '^Command exited with non-zero status 124' $LKMM_DESTDIR/$litmusout then echo ' !!! Timeout' $litmus - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout then - echo ' !!! Timeout' >> $LKMM_DESTDIR/$litmus.out 2>&1 + echo ' !!! Timeout' >> $LKMM_DESTDIR/$litmusout 2>&1 fi exit 124 else echo ' !!! Verification error' $litmus - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout then - echo ' !!! Verification error' >> $LKMM_DESTDIR/$litmus.out 2>&1 + echo ' !!! Verification error' >> $LKMM_DESTDIR/$litmusout 2>&1 fi exit 255 fi if test "$outcome" = DEADLOCK then - if grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q 'Never 0 0$' + if grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q 'Never 0 0$' then ret=0 else echo " !!! Unexpected non-$outcome verification" $litmus - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout then - echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmus.out 2>&1 + echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmusout 2>&1 fi ret=1 fi -elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q 'Never 0 0$' +elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q 'Never 0 0$' then echo " !!! Unexpected non-$outcome deadlock" $litmus - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if ! grep -q '!!!' $LKMM_DESTDIR/$litmusout then - echo " !!! Unexpected non-$outcome deadlock" $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 + echo " !!! Unexpected non-$outcome deadlock" $litmus >> $LKMM_DESTDIR/$litmusout 2>&1 fi ret=1 -elif grep '^Observation' $LKMM_DESTDIR/$litmus.out | grep -q $outcome || test "$outcome" = Maybe +elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q $outcome || test "$outcome" = Maybe then ret=0 else - echo " !!! Unexpected non-$outcome verification" $litmus - if ! grep -q '!!!' $LKMM_DESTDIR/$litmus.out + if test -n "$LKMM_HW_MAP_FILE" -a "$outcome" = Sometimes then - echo " !!! Unexpected non-$outcome verification" >> $LKMM_DESTDIR/$litmus.out 2>&1 + flag="--- Forgiven" + ret=0 + else + flag="!!! Unexpected" + ret=1 + fi + echo " $flag non-$outcome verification" $litmus + if ! grep -qe "$flag" $LKMM_DESTDIR/$litmusout + then + echo " $flag non-$outcome verification" >> $LKMM_DESTDIR/$litmusout 2>&1 fi - ret=1 fi -tail -2 $LKMM_DESTDIR/$litmus.out | head -1 +tail -2 $LKMM_DESTDIR/$litmusout | head -1 exit $ret From caa5e2cfcbe6591740acac04b285be098a2402b1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 16:21:09 -0700 Subject: [PATCH 0045/1202] tools/memory-model: Add simpletest.sh to check locking, RCU, and SRCU This commit abstracts out common function to check a given litmus test for locking, RCU, and SRCU in order to avoid duplicating code. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/simpletest.sh | 35 ++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 tools/memory-model/scripts/simpletest.sh diff --git a/tools/memory-model/scripts/simpletest.sh b/tools/memory-model/scripts/simpletest.sh new file mode 100755 index 0000000000000..7edc5d3616657 --- /dev/null +++ b/tools/memory-model/scripts/simpletest.sh @@ -0,0 +1,35 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Give zero status if this is a simple test and non-zero otherwise. +# Simple tests do not contain locking, RCU, or SRCU. +# +# Usage: +# simpletest.sh file.litmus +# +# Copyright IBM Corporation, 2019 +# +# Author: Paul E. McKenney + + +litmus=$1 + +if test -f "$litmus" -a -r "$litmus" +then + : +else + echo ' --- ' error: \"$litmus\" is not a readable file + exit 255 +fi +exclude="^[[:space:]]*\(" +exclude="${exclude}spin_lock(\|spin_unlock(\|spin_trylock(\|spin_is_locked(" +exclude="${exclude}\|rcu_read_lock(\|rcu_read_unlock(" +exclude="${exclude}\|synchronize_rcu(\|synchronize_rcu_expedited(" +exclude="${exclude}\|srcu_read_lock(\|srcu_read_unlock(" +exclude="${exclude}\|synchronize_srcu(\|synchronize_srcu_expedited(" +exclude="${exclude}\)" +if grep -q $exclude $litmus +then + exit 255 +fi +exit 0 From 6352b2858a6e8f072ff06f179770ff58d3a3625b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 19 Mar 2019 16:37:01 -0700 Subject: [PATCH 0046/1202] tools/memory-model: Fix checkalllitmus.sh comment The checkalllitmus.sh runs litmus tests in the litmus-tests directory, not those in the github archive, so this commit updates the comment to reflect this reality. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checkalllitmus.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh index 10e14d94acee5..54d8da8c338e1 100755 --- a/tools/memory-model/scripts/checkalllitmus.sh +++ b/tools/memory-model/scripts/checkalllitmus.sh @@ -30,8 +30,8 @@ else exit 255 fi -# Create any new directories that have appeared in the github litmus -# repo since the last run. +# Create any new directories that have appeared in the litmus-tests +# directory since the last run. if test "$LKMM_DESTDIR" != "." then find $litmusdir -type d -print | From 9f41dc9f4b00071a17d241fb6670f706145c6c99 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 20 Mar 2019 12:39:27 -0700 Subject: [PATCH 0047/1202] tools/memory-model: Hardware checking for check{,all}litmus.sh This commit makes checklitmus.sh and checkalllitmus.sh check to see if a hardware verification was specified (via the --hw command-line argument, which sets the LKMM_HW_MAP_FILE environment variable). If so, the C-language litmus test is converted to the specified type of assembly-language litmus test and herd is run on it. Hardware is permitted to be stronger than LKMM requires, so "Always" and "Never" verifications of "Sometimes" C-language litmus tests are forgiven. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checkalllitmus.sh | 23 +++++------ tools/memory-model/scripts/checklitmus.sh | 42 ++++++++++++++++++-- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh index 54d8da8c338e1..2d3ee850a8399 100755 --- a/tools/memory-model/scripts/checkalllitmus.sh +++ b/tools/memory-model/scripts/checkalllitmus.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0+ # # Run herd7 tests on all .litmus files in the litmus-tests directory @@ -8,6 +8,11 @@ # "^^^". It also outputs verification results to a file whose name is # that of the specified litmus test, but with ".out" appended. # +# If the --hw argument is specified, this script translates the .litmus +# C-language file to the specified type of assembly and verifies that. +# But in this case, litmus tests using complex synchronization (such as +# locking, RCU, and SRCU) are cheerfully ignored. +# # Usage: # checkalllitmus.sh # @@ -38,21 +43,15 @@ then ( cd "$LKMM_DESTDIR"; sed -e 's/^/mkdir -p /' | sh ) fi -# Find the checklitmus script. If it is not where we expect it, then -# assume that the caller has the PATH environment variable set -# appropriately. -if test -x scripts/checklitmus.sh -then - clscript=scripts/checklitmus.sh -else - clscript=checklitmus.sh -fi - # Run the script on all the litmus tests in the specified directory ret=0 for i in $litmusdir/*.litmus do - if ! $clscript $i + if test -n "$LKMM_HW_MAP_FILE" && ! scripts/simpletest.sh $i + then + continue + fi + if ! scripts/checklitmus.sh $i then ret=1 fi diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh index 638b8c610894b..42ff11869cd62 100755 --- a/tools/memory-model/scripts/checklitmus.sh +++ b/tools/memory-model/scripts/checklitmus.sh @@ -6,6 +6,11 @@ # results to a file whose name is that of the specified litmus test, but # with ".out" appended. # +# If the --hw argument is specified, this script translates the .litmus +# C-language file to the specified type of assembly and verifies that. +# But in this case, litmus tests using complex synchronization (such as +# locking, RCU, and SRCU) are cheerfully ignored. +# # Usage: # checklitmus.sh file.litmus # @@ -18,8 +23,6 @@ # Author: Paul E. McKenney litmus=$1 -herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} - if test -f "$litmus" -a -r "$litmus" then : @@ -28,7 +31,38 @@ else exit 255 fi -echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out -/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 +if test -z "$LKMM_HW_MAP_FILE" +then + # LKMM run + herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} + echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out + /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 +else + # Hardware run + + T=/tmp/checklitmushw.sh.$$ + trap 'rm -rf $T' 0 2 + mkdir $T + + # Generate filenames + catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" + mapfile="Linux2${LKMM_HW_MAP_FILE}.map" + themefile="$T/${LKMM_HW_MAP_FILE}.theme" + herdoptions="-model $LKMM_HW_CAT_FILE" + hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` + hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` + + # Don't run on litmus tests with complex synchronization + if ! scripts/simpletest.sh $litmus + then + echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU + exit 254 + fi + + # Generate the assembly code and run herd7 on it. + gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile + jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out + /usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +fi scripts/judgelitmus.sh $litmus From a2ba13b1ca03aea0b92661f5ae584b9caf68361f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 20 Mar 2019 14:37:46 -0700 Subject: [PATCH 0048/1202] tools/memory-model: Make judgelitmus.sh ransack .litmus.out files The judgelitmus.sh script currently relies solely on the "Result:" comment in the .litmus file. This is problematic when using the --hw argument, because it is necessary to check the hardware model against LKMM even in the absence of "Result:" comments. This commit therefore modifies judgelitmus.sh to check the observation in a .litmus.out file, in case one was generated by a previous LKMM run. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/judgelitmus.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index 6f3c60065c8b5..fe9131f8eb969 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -8,7 +8,9 @@ # is provided, this is assumed to be a hardware test, and the output is # assumed to be in file.HW.litmus.out, where "HW" is the --hw argument. # In addition, non-Sometimes verification results will be noted, but -# forgiven. +# forgiven. Furthermore, if there is no "Result:" comment but there is +# an LKMM .litmus.out file, the observation in that file will be used +# to judge the assembly-language verification. # # Usage: # judgelitmus.sh file.litmus @@ -32,9 +34,11 @@ fi if test -z "$LKMM_HW_MAP_FILE" then litmusout=$litmus.out + lkmmout= else litmusout="`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`.out" + lkmmout=$litmus.out fi if test -f "$LKMM_DESTDIR/$litmusout" -a -r "$LKMM_DESTDIR/$litmusout" then @@ -46,6 +50,9 @@ fi if grep -q '^ \* Result: ' $litmus then outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'` +elif test -n "$LKMM_HW_MAP_FILE" && grep -q '^Observation' $LKMM_DESTDIR/$lkmmout > /dev/null 2>&1 +then + outcome=`grep -m 1 '^Observation ' $LKMM_DESTDIR/$lkmmout | awk '{ print $3 }'` else outcome=specified fi From 245fdbf1f30038ddd491df03f49e1f8b80277a0d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 20 Mar 2019 14:57:56 -0700 Subject: [PATCH 0049/1202] tools/memory-model: Split runlitmus.sh out of checklitmus.sh This commit prepares for adding --hw capability to github litmus-test scripts by splitting runlitmus.sh (which simply runs the verification) out of checklitmus.sh (which also judges the results). Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checklitmus.sh | 57 ++----------------- tools/memory-model/scripts/runlitmus.sh | 69 +++++++++++++++++++++++ 2 files changed, 73 insertions(+), 53 deletions(-) create mode 100755 tools/memory-model/scripts/runlitmus.sh diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh index 42ff11869cd62..4c1d0cf0ddadc 100755 --- a/tools/memory-model/scripts/checklitmus.sh +++ b/tools/memory-model/scripts/checklitmus.sh @@ -1,15 +1,8 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0+ # -# Run a herd7 test and invokes judgelitmus.sh to check the result against -# a "Result:" comment within the litmus test. It also outputs verification -# results to a file whose name is that of the specified litmus test, but -# with ".out" appended. -# -# If the --hw argument is specified, this script translates the .litmus -# C-language file to the specified type of assembly and verifies that. -# But in this case, litmus tests using complex synchronization (such as -# locking, RCU, and SRCU) are cheerfully ignored. +# Invokes runlitmus.sh and judgelitmus.sh on its arguments to run the +# specified litmus test and pass judgment on the results. # # Usage: # checklitmus.sh file.litmus @@ -22,47 +15,5 @@ # # Author: Paul E. McKenney -litmus=$1 -if test -f "$litmus" -a -r "$litmus" -then - : -else - echo ' --- ' error: \"$litmus\" is not a readable file - exit 255 -fi - -if test -z "$LKMM_HW_MAP_FILE" -then - # LKMM run - herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} - echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out - /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 -else - # Hardware run - - T=/tmp/checklitmushw.sh.$$ - trap 'rm -rf $T' 0 2 - mkdir $T - - # Generate filenames - catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" - mapfile="Linux2${LKMM_HW_MAP_FILE}.map" - themefile="$T/${LKMM_HW_MAP_FILE}.theme" - herdoptions="-model $LKMM_HW_CAT_FILE" - hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` - hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` - - # Don't run on litmus tests with complex synchronization - if ! scripts/simpletest.sh $litmus - then - echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU - exit 254 - fi - - # Generate the assembly code and run herd7 on it. - gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile - jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out - /usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 -fi - -scripts/judgelitmus.sh $litmus +scripts/runlitmus.sh $1 +scripts/judgelitmus.sh $1 diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh new file mode 100755 index 0000000000000..91af859c0e90c --- /dev/null +++ b/tools/memory-model/scripts/runlitmus.sh @@ -0,0 +1,69 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Without the -hw argument, runs a herd7 test and outputs verification +# results to a file whose name is that of the specified litmus test, +# but with ".out" appended. +# +# If the --hw argument is specified, this script translates the .litmus +# C-language file to the specified type of assembly and verifies that. +# But in this case, litmus tests using complex synchronization (such as +# locking, RCU, and SRCU) are cheerfully ignored. +# +# Either way, return the status of the herd7 command. +# +# Usage: +# runlitmus.sh file.litmus +# +# Run this in the directory containing the memory model, specifying the +# pathname of the litmus test to check. The caller is expected to have +# properly set up the LKMM environment variables. +# +# Copyright IBM Corporation, 2019 +# +# Author: Paul E. McKenney + +litmus=$1 +if test -f "$litmus" -a -r "$litmus" +then + : +else + echo ' --- ' error: \"$litmus\" is not a readable file + exit 255 +fi + +if test -z "$LKMM_HW_MAP_FILE" +then + # LKMM run + herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} + echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out + /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 +else + # Hardware run + + T=/tmp/checklitmushw.sh.$$ + trap 'rm -rf $T' 0 2 + mkdir $T + + # Generate filenames + catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" + mapfile="Linux2${LKMM_HW_MAP_FILE}.map" + themefile="$T/${LKMM_HW_MAP_FILE}.theme" + herdoptions="-model $LKMM_HW_CAT_FILE" + hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` + hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` + + # Don't run on litmus tests with complex synchronization + if ! scripts/simpletest.sh $litmus + then + echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU + exit 254 + fi + + # Generate the assembly code and run herd on it. + gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile + jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out + /usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +fi + +exit $? From 7fe4f4048b44d74d55cc92fff63e4a4b7c55a60e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 20 Mar 2019 16:41:41 -0700 Subject: [PATCH 0050/1202] tools/memory-model: Make runlitmus.sh generate .litmus.out for --hw In the absence of "Result:" comments, the runlitmus.sh script relies on litmus.out files from prior LKMM runs. This can be a bit user-hostile, so this commit makes runlitmus.sh generate any needed .litmus.out files that don't already exist. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/runlitmus.sh | 54 ++++++++++++++----------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index 91af859c0e90c..2865a9661b078 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -28,42 +28,48 @@ if test -f "$litmus" -a -r "$litmus" then : else - echo ' --- ' error: \"$litmus\" is not a readable file + echo ' !!! ' error: \"$litmus\" is not a readable file exit 255 fi -if test -z "$LKMM_HW_MAP_FILE" +if test -z "$LKMM_HW_MAP_FILE" -o ! -e $LKMM_DESTDIR/$litmus.out then # LKMM run herdoptions=${LKMM_HERD_OPTIONS--conf linux-kernel.cfg} echo Herd options: $herdoptions > $LKMM_DESTDIR/$litmus.out /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $litmus >> $LKMM_DESTDIR/$litmus.out 2>&1 -else - # Hardware run + ret=$? + if test -z "$LKMM_HW_MAP_FILE" + then + exit $ret + fi + echo " --- " Automatically generated LKMM output for '"'--hw $LKMM_HW_MAP_FILE'"' run +fi - T=/tmp/checklitmushw.sh.$$ - trap 'rm -rf $T' 0 2 - mkdir $T +# Hardware run - # Generate filenames - catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" - mapfile="Linux2${LKMM_HW_MAP_FILE}.map" - themefile="$T/${LKMM_HW_MAP_FILE}.theme" - herdoptions="-model $LKMM_HW_CAT_FILE" - hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` - hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` +T=/tmp/checklitmushw.sh.$$ +trap 'rm -rf $T' 0 2 +mkdir $T - # Don't run on litmus tests with complex synchronization - if ! scripts/simpletest.sh $litmus - then - echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU - exit 254 - fi +# Generate filenames +catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" +mapfile="Linux2${LKMM_HW_MAP_FILE}.map" +themefile="$T/${LKMM_HW_MAP_FILE}.theme" +herdoptions="-model $LKMM_HW_CAT_FILE" +hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` +hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` - # Generate the assembly code and run herd on it. - gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile - jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out - /usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +# Don't run on litmus tests with complex synchronization +if ! scripts/simpletest.sh $litmus +then + echo ' --- ' error: \"$litmus\" contains locking, RCU, or SRCU + exit 254 fi +# Generate the assembly code and run herd7 on it. +gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile +jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out +/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 + exit $? From 30f9cfefec39ea1c4e86568c60de6851bf9e4b69 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Mar 2019 14:06:27 -0700 Subject: [PATCH 0051/1202] tools/memory-model: Move from .AArch64.litmus.out to .litmus.AArch.out When the github scripts see ".litmus.out", they assume that there must be a corresponding C-language ".litmus" file. Won't they be disappointed when they instead see nothing, or, worse yet, the corresponding assembly-language litmus test? This commit therefore swaps the hardware tag with the "litmus" to avoid this sort of disappointment. This commit also adjusts the .gitignore file so as to avoid adding these new ".out" files to git. [ paulmck: Apply Akira Yokosawa feedback. ] Signed-off-by: Paul E. McKenney --- tools/memory-model/litmus-tests/.gitignore | 2 +- tools/memory-model/scripts/judgelitmus.sh | 4 ++-- tools/memory-model/scripts/runlitmus.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore index c492a1ddad91d..d65462d64816d 100644 --- a/tools/memory-model/litmus-tests/.gitignore +++ b/tools/memory-model/litmus-tests/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -*.litmus.out +*.out diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index fe9131f8eb969..9abda72fe013c 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -6,7 +6,7 @@ # test ran correctly. If the --hw argument is omitted, check against the # LKMM output, which is assumed to be in file.litmus.out. If this argument # is provided, this is assumed to be a hardware test, and the output is -# assumed to be in file.HW.litmus.out, where "HW" is the --hw argument. +# assumed to be in file.litmus.HW.out, where "HW" is the --hw argument. # In addition, non-Sometimes verification results will be noted, but # forgiven. Furthermore, if there is no "Result:" comment but there is # an LKMM .litmus.out file, the observation in that file will be used @@ -37,7 +37,7 @@ then lkmmout= else litmusout="`echo $litmus | - sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'`.out" + sed -e 's/\.litmus$/.litmus.'${LKMM_HW_MAP_FILE}'/'`.out" lkmmout=$litmus.out fi if test -f "$LKMM_DESTDIR/$litmusout" -a -r "$LKMM_DESTDIR/$litmusout" diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index 2865a9661b078..c84124b32bee6 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -57,7 +57,7 @@ catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" mapfile="Linux2${LKMM_HW_MAP_FILE}.map" themefile="$T/${LKMM_HW_MAP_FILE}.theme" herdoptions="-model $LKMM_HW_CAT_FILE" -hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.'${LKMM_HW_MAP_FILE}'.litmus/'` +hwlitmus=`echo $litmus | sed -e 's/\.litmus$/.litmus.'${LKMM_HW_MAP_FILE}'/'` hwlitmusfile=`echo $hwlitmus | sed -e 's,^.*/,,'` # Don't run on litmus tests with complex synchronization From 3f75a557f5c70928f517b6563483d47fddf60987 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 21 Mar 2019 14:44:09 -0700 Subject: [PATCH 0052/1202] tools/memory-model: Keep assembly-language litmus tests This commit retains the assembly-language litmus tests generated from the C-language litmus tests, appending the hardware tag to the original C-language litmus test's filename. Thus, S+poonceonces.litmus.AArch64 contains the Armv8 assembly language corresponding to the C-language S+poonceonces.litmus test. This commit also updates the .gitignore to avoid committing these automatically generated assembly-language litmus tests. Signed-off-by: Paul E. McKenney --- tools/memory-model/litmus-tests/.gitignore | 2 +- tools/memory-model/scripts/runlitmus.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore index d65462d64816d..19c379cf069d2 100644 --- a/tools/memory-model/litmus-tests/.gitignore +++ b/tools/memory-model/litmus-tests/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -*.out +*.litmus.* diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index c84124b32bee6..62b47c7e1ba93 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -69,7 +69,7 @@ fi # Generate the assembly code and run herd7 on it. gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile -jingle7 -theme $themefile $litmus > $T/$hwlitmusfile 2> $T/$hwlitmusfile.jingle7.out -/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $T/$hwlitmusfile > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out +/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 exit $? From 5944cbfd4036d6d5c8661647f8fd0408763d568f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 22 Mar 2019 08:57:20 -0700 Subject: [PATCH 0053/1202] tools/memory-model: Allow herd to deduce CPU type Currently, the scripts specify the CPU's .cat file to herd. But this is pointless because herd will select a good and sufficient .cat file from the assembly-language litmus test itself. This commit therefore removes the -model argument to herd, allowing herd to figure the CPU family out itself. Note that the user can override herd's choice using the "--herdopts" argument to the scripts. Suggested-by: Luc Maranget Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/runlitmus.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index 62b47c7e1ba93..afb196d7ef106 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -53,7 +53,6 @@ trap 'rm -rf $T' 0 2 mkdir $T # Generate filenames -catfile="`echo $LKMM_HW_MAP_FILE | tr '[A-Z]' '[a-z]'`.cat" mapfile="Linux2${LKMM_HW_MAP_FILE}.map" themefile="$T/${LKMM_HW_MAP_FILE}.theme" herdoptions="-model $LKMM_HW_CAT_FILE" @@ -70,6 +69,6 @@ fi # Generate the assembly code and run herd7 on it. gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out -/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -model $catfile $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 exit $? From 1b5fdb8ac3b0d9385e52c26680a369bd5c7c4146 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 25 Mar 2019 17:20:51 -0700 Subject: [PATCH 0054/1202] tools/memory-model: Make runlitmus.sh check for jingle errors It turns out that the jingle7 tool is currently a bit picky about the litmus tests it is willing to process. This commit therefore ensures that jingle7 failures are reported. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/runlitmus.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index afb196d7ef106..5f2d29b460ff0 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -69,6 +69,11 @@ fi # Generate the assembly code and run herd7 on it. gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out +if grep -q "Generated 0 tests" $T/$hwlitmusfile.jingle7.out +then + echo ' !!! ' jingle7 failed, no $hwlitmus generated + exit 253 +fi /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 exit $? From b323b08527f56c2b9953bb612005a4ace701f91c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 5 Apr 2019 12:34:56 -0700 Subject: [PATCH 0055/1202] tools/memory-model: Add -v flag to jingle7 runs Adding the -v flag to jingle7 invocations gives much useful information on why jingle7 didn't like a given litmus test. This commit therefore adds this flag and saves off any such information into a .err file. Suggested-by: Luc Maranget Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/runlitmus.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index 5f2d29b460ff0..dfdb1f00fcc03 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -68,10 +68,11 @@ fi # Generate the assembly code and run herd7 on it. gen_theme7 -n 10 -map $mapfile -call Linux.call > $themefile -jingle7 -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out +jingle7 -v -theme $themefile $litmus > $LKMM_DESTDIR/$hwlitmus 2> $T/$hwlitmusfile.jingle7.out if grep -q "Generated 0 tests" $T/$hwlitmusfile.jingle7.out then - echo ' !!! ' jingle7 failed, no $hwlitmus generated + echo ' !!! ' jingle7 failed, errors in $hwlitmus.err + cp $T/$hwlitmusfile.jingle7.out $LKMM_DESTDIR/$hwlitmus.err exit 253 fi /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 From 3dfc1933f249c223424c9b2c7da92aabc8845d31 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 22 Mar 2019 17:18:43 -0700 Subject: [PATCH 0056/1202] tools/memory-model: Implement --hw support for checkghlitmus.sh This commits enables the "--hw" argument for the checkghlitmus.sh script, causing it to convert any applicable C-language litmus tests to the specified flavor of assembly language, to verify these assembly-language litmus tests, and checking compatibility of the outcomes. Note that the conversion does not yet handle locking, RCU, SRCU, plain C-language memory accesses, or casts. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checkghlitmus.sh | 9 ++++--- tools/memory-model/scripts/hwfnseg.sh | 20 +++++++++++++++ tools/memory-model/scripts/runlitmushist.sh | 27 +++++++++++++-------- 3 files changed, 42 insertions(+), 14 deletions(-) create mode 100755 tools/memory-model/scripts/hwfnseg.sh diff --git a/tools/memory-model/scripts/checkghlitmus.sh b/tools/memory-model/scripts/checkghlitmus.sh index 6589fbb6f6538..2ea220d2564b9 100755 --- a/tools/memory-model/scripts/checkghlitmus.sh +++ b/tools/memory-model/scripts/checkghlitmus.sh @@ -10,6 +10,7 @@ # parseargs.sh scripts for arguments. . scripts/parseargs.sh +. scripts/hwfnseg.sh T=/tmp/checkghlitmus.sh.$$ trap 'rm -rf $T' 0 @@ -32,9 +33,9 @@ then ( cd "$LKMM_DESTDIR"; sed -e 's/^/mkdir -p /' | sh ) fi -# Create a list of the C-language litmus tests previously run. -( cd $LKMM_DESTDIR; find litmus -name '*.litmus.out' -print ) | - sed -e 's/\.out$//' | +# Create a list of the specified litmus tests previously run. +( cd $LKMM_DESTDIR; find litmus -name "*.litmus${hwfnseg}.out" -print ) | + sed -e "s/${hwfnseg}"'\.out$//' | xargs -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' | xargs -r grep -L "^P${LKMM_PROCS}"> $T/list-C-already @@ -44,7 +45,7 @@ find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C xargs < $T/list-C -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' > $T/list-C-result xargs < $T/list-C-result -r grep -L "^P${LKMM_PROCS}" > $T/list-C-result-short -# Form list of tests without corresponding .litmus.out files +# Form list of tests without corresponding .out files sort $T/list-C-already $T/list-C-result-short | uniq -u > $T/list-C-needed # Run any needed tests. diff --git a/tools/memory-model/scripts/hwfnseg.sh b/tools/memory-model/scripts/hwfnseg.sh new file mode 100755 index 0000000000000..580c3281181c5 --- /dev/null +++ b/tools/memory-model/scripts/hwfnseg.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Generate the hardware extension to the litmus-test filename, or the +# empty string if this is an LKMM run. The extension is placed in +# the shell variable hwfnseg. +# +# Usage: +# . hwfnseg.sh +# +# Copyright IBM Corporation, 2019 +# +# Author: Paul E. McKenney + +if test -z "$LKMM_HW_MAP_FILE" +then + hwfnseg= +else + hwfnseg=".$LKMM_HW_MAP_FILE" +fi diff --git a/tools/memory-model/scripts/runlitmushist.sh b/tools/memory-model/scripts/runlitmushist.sh index 852786fef179f..c6c2bdc67a502 100755 --- a/tools/memory-model/scripts/runlitmushist.sh +++ b/tools/memory-model/scripts/runlitmushist.sh @@ -15,6 +15,8 @@ # # Author: Paul E. McKenney +. scripts/hwfnseg.sh + T=/tmp/runlitmushist.sh.$$ trap 'rm -rf $T' 0 mkdir $T @@ -30,15 +32,12 @@ fi # Prefixes for per-CPU scripts for ((i=0;i<$LKMM_JOBS;i++)) do - echo dir="$LKMM_DESTDIR" > $T/$i.sh echo T=$T >> $T/$i.sh - echo herdoptions=\"$LKMM_HERD_OPTIONS\" >> $T/$i.sh cat << '___EOF___' >> $T/$i.sh runtest () { - echo ' ... ' /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $1 '>' $dir/$1.out '2>&1' - if /usr/bin/time $LKMM_TIMEOUT_CMD herd7 $herdoptions $1 > $dir/$1.out 2>&1 + if scripts/runlitmus.sh $1 then - if ! grep -q '^Observation ' $dir/$1.out + if ! grep -q '^Observation ' $LKMM_DESTDIR/$1$2.out then echo ' !!! Herd failed, no Observation:' $1 fi @@ -47,10 +46,16 @@ do if test "$exitcode" -eq 124 then exitmsg="timed out" + elif test "$exitcode" -eq 253 + then + exitmsg= else exitmsg="failed, exit code $exitcode" fi - echo ' !!! Herd' ${exitmsg}: $1 + if test -n "$exitmsg" + then + echo ' !!! Herd' ${exitmsg}: $1 + fi fi } ___EOF___ @@ -59,11 +64,13 @@ done awk -v q="'" -v b='\\' ' { print "echo `grep " q "^P[0-9]" b "+(" q " " $0 " | tail -1 | sed -e " q "s/^P" b "([0-9]" b "+" b ")(.*$/" b "1/" q "` " $0 -}' | bash | -sort -k1n | -awk -v ncpu=$LKMM_JOBS -v t=$T ' +}' | sh | sort -k1n | +awk -v dq='"' -v hwfnseg="$hwfnseg" -v ncpu="$LKMM_JOBS" -v t="$T" ' { - print "runtest " $2 >> t "/" NR % ncpu ".sh"; + print "if test -z " dq hwfnseg dq " || scripts/simpletest.sh " dq $2 dq + print "then" + print "\techo runtest " dq $2 dq " " hwfnseg " >> " t "/" NR % ncpu ".sh"; + print "fi" } END { From 07c06956fc2accc18f768d2bb869741517be5703 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 27 Mar 2019 11:47:14 -0700 Subject: [PATCH 0057/1202] tools/memory-model: Fix scripting --jobs argument The parseargs.sh regular expression for the --jobs argument incorrectly requires that the number of jobs be at least 10, that is, have at least two digits. This commit therefore adjusts this regular expression to allow single-digit numbers of jobs to be specified. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/parseargs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh index 5f016fc3f3af5..25a81ac0dfdf4 100755 --- a/tools/memory-model/scripts/parseargs.sh +++ b/tools/memory-model/scripts/parseargs.sh @@ -113,7 +113,7 @@ do LKMM_JOBS="`echo $njobs | sed -e 's/^\([0-9]\+\).*$/\1/'`" ;; --jobs|--job|-j) - checkarg --jobs "(number)" "$#" "$2" '^[1-9][0-9]\+$' '^--' + checkarg --jobs "(number)" "$#" "$2" '^[1-9][0-9]*$' '^--' LKMM_JOBS="$2" shift ;; From 2774beb8d80a1afbb71841437118e6728fe5e16e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 8 Apr 2019 09:27:28 -0700 Subject: [PATCH 0058/1202] tools/memory-model: Make checkghlitmus.sh use mselect7 The checkghlitmus.sh script currently uses grep to ignore non-C-language litmus tests, which is a bit fragile. This commit therefore enlists the aid of "mselect7 -arch C", given Luc Maraget's recent modifications that allow mselect7 to operate in filter mode. This change requires herdtools 7.52-32-g1da3e0e50977 or later. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/checkghlitmus.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/checkghlitmus.sh b/tools/memory-model/scripts/checkghlitmus.sh index 2ea220d2564b9..cedd0290b73f8 100755 --- a/tools/memory-model/scripts/checkghlitmus.sh +++ b/tools/memory-model/scripts/checkghlitmus.sh @@ -41,7 +41,7 @@ fi # Create a list of C-language litmus tests with "Result:" commands and # no more than the specified number of processes. -find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C +find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C xargs < $T/list-C -r egrep -l '^ \* Result: (Never|Sometimes|Always|DEADLOCK)' > $T/list-C-result xargs < $T/list-C-result -r grep -L "^P${LKMM_PROCS}" > $T/list-C-result-short From 926aed148ad8ca9e8d5caa5c38c4726876e9df30 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 8 Apr 2019 10:02:23 -0700 Subject: [PATCH 0059/1202] tools/memory-model: Make history-check scripts use mselect7 The history-check scripts currently use grep to ignore non-C-language litmus tests, which is a bit fragile. This commit therefore enlists the aid of "mselect7 -arch C", given Luc Maraget's recent modifications that allow mselect7 to operate in filter mode. This change requires herdtools 7.52-32-g1da3e0e50977 or later. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/initlitmushist.sh | 2 +- tools/memory-model/scripts/newlitmushist.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/memory-model/scripts/initlitmushist.sh b/tools/memory-model/scripts/initlitmushist.sh index 956b6957484d8..31ea782955d3f 100755 --- a/tools/memory-model/scripts/initlitmushist.sh +++ b/tools/memory-model/scripts/initlitmushist.sh @@ -60,7 +60,7 @@ fi # Create a list of the C-language litmus tests with no more than the # specified number of processes (per the --procs argument). -find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C +find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C xargs < $T/list-C -r grep -L "^P${LKMM_PROCS}" > $T/list-C-short scripts/runlitmushist.sh < $T/list-C-short diff --git a/tools/memory-model/scripts/newlitmushist.sh b/tools/memory-model/scripts/newlitmushist.sh index 3f4b06e299886..25235e2049cf0 100755 --- a/tools/memory-model/scripts/newlitmushist.sh +++ b/tools/memory-model/scripts/newlitmushist.sh @@ -43,7 +43,7 @@ fi # Form full list of litmus tests with no more than the specified # number of processes (per the --procs argument). -find litmus -name '*.litmus' -exec grep -l -m 1 "^C " {} \; > $T/list-C-all +find litmus -name '*.litmus' -print | mselect7 -arch C > $T/list-C-all xargs < $T/list-C-all -r grep -L "^P${LKMM_PROCS}" > $T/list-C-short # Form list of new tests. Note: This does not handle litmus-test deletion! From 139dbab9d465d1d138d1f3c9069703238cc26ce0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 May 2019 09:51:57 -0700 Subject: [PATCH 0060/1202] tools/memory-model: Add "--" to parseargs.sh for additional arguments Currently, parseargs.sh expects to consume all the command-line arguments, which prevents the calling script from having any of its own arguments. This commit therefore causes parseargs.sh to stop consuming arguments when it encounters a "--" argument, leaving any remaining arguments for the calling script. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/parseargs.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh index 25a81ac0dfdf4..7aa58755adfc0 100755 --- a/tools/memory-model/scripts/parseargs.sh +++ b/tools/memory-model/scripts/parseargs.sh @@ -83,7 +83,7 @@ do echo "Cannot create directory --destdir '$LKMM_DESTDIR'" usage fi - if test -d "$LKMM_DESTDIR" -a -w "$LKMM_DESTDIR" -a -x "$LKMM_DESTDIR" + if test -d "$LKMM_DESTDIR" -a -x "$LKMM_DESTDIR" then : else @@ -127,6 +127,10 @@ do LKMM_TIMEOUT="$2" shift ;; + --) + shift + break + ;; *) echo Unknown argument $1 usage From 55e771147bdf8811bd3132abbd648d7085835bd8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 May 2019 10:03:29 -0700 Subject: [PATCH 0061/1202] tools/memory-model: Repair parseargs.sh header comment Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/parseargs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/parseargs.sh b/tools/memory-model/scripts/parseargs.sh index 7aa58755adfc0..08ded59098607 100755 --- a/tools/memory-model/scripts/parseargs.sh +++ b/tools/memory-model/scripts/parseargs.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0+ # -# the corresponding .litmus.out file, and does not judge the result. +# Parse arguments common to the various scripts. # # . scripts/parseargs.sh # From 1b7daa7ff68b3b1823e1ec4e021be95a58c1645c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 May 2019 10:05:14 -0700 Subject: [PATCH 0062/1202] tools/memory-model: Add checktheselitmus.sh to run specified litmus tests This commit adds a checktheselitmus.sh script that runs the litmus tests specified on the command line. This is useful for verifying fixes to specific litmus tests. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/README | 8 ++++ .../memory-model/scripts/checktheselitmus.sh | 43 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100755 tools/memory-model/scripts/checktheselitmus.sh diff --git a/tools/memory-model/scripts/README b/tools/memory-model/scripts/README index 0e29a52044c1a..cc2c4e5be9ec1 100644 --- a/tools/memory-model/scripts/README +++ b/tools/memory-model/scripts/README @@ -27,6 +27,14 @@ checklitmushist.sh checklitmus.sh Check a single litmus test against its "Result:" expected result. + Not intended to for manual use. + +checktheselitmus.sh + + Check the specified list of litmus tests against their "Result:" + expected results. This takes optional parseargs.sh arguments, + followed by "--" followed by pathnames starting from the current + directory. cmplitmushist.sh diff --git a/tools/memory-model/scripts/checktheselitmus.sh b/tools/memory-model/scripts/checktheselitmus.sh new file mode 100755 index 0000000000000..10eeb5ecea6de --- /dev/null +++ b/tools/memory-model/scripts/checktheselitmus.sh @@ -0,0 +1,43 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Invokes checklitmus.sh on its arguments to run the specified litmus +# test and pass judgment on the results. +# +# Usage: +# checktheselitmus.sh -- [ file1.litmus [ file2.litmus ... ] ] +# +# Run this in the directory containing the memory model, specifying the +# pathname of the litmus test to check. The usual parseargs.sh arguments +# can be specified prior to the "--". +# +# This script is intended for use with pathnames that start from the +# tools/memory-model directory. If some of the pathnames instead start at +# the root directory, they all must do so and the "--destdir /" parseargs.sh +# argument must be specified prior to the "--". Alternatively, some other +# "--destdir" argument can be supplied as long as the needed subdirectories +# are populated. +# +# Copyright IBM Corporation, 2018 +# +# Author: Paul E. McKenney + +. scripts/parseargs.sh + +ret=0 +for i in "$@" +do + if scripts/checklitmus.sh $i + then + : + else + ret=1 + fi +done +if test "$ret" -ne 0 +then + echo " ^^^ VERIFICATION MISMATCHES" 1>&2 +else + echo All litmus tests verified as was expected. 1>&2 +fi +exit $ret From 3e49e7196cfe1b70a5c3d955cabaef7ee691633b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 3 May 2019 07:34:20 -0700 Subject: [PATCH 0063/1202] tools/memory-model: Add data-race capabilities to judgelitmus.sh This commit adds functionality to judgelitmus.sh to allow it to handle both the "DATARACE" markers in the "Result:" comments in litmus tests and the "Flag data-race" markers in LKMM output. For C-language tests, if either marker is present, the other must also be as well, at least for litmus tests having a "Result:" comment. If the LKMM output indicates a data race, then failures of the Always/Sometimes/Never portion of the "Result:" prediction are forgiven. The reason for forgiving "Result:" mispredictions is that data races can result in "interesting" compiler optimizations, so that all bets are off in the data-race case. [ paulmck: Apply Akira Yokosawa feedback. ] Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/judgelitmus.sh | 40 ++++++++++++++++++----- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index 9abda72fe013c..2700481d20f01 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -4,13 +4,19 @@ # Given a .litmus test and the corresponding litmus output file, check # the .litmus.out file against the "Result:" comment to judge whether the # test ran correctly. If the --hw argument is omitted, check against the -# LKMM output, which is assumed to be in file.litmus.out. If this argument -# is provided, this is assumed to be a hardware test, and the output is -# assumed to be in file.litmus.HW.out, where "HW" is the --hw argument. -# In addition, non-Sometimes verification results will be noted, but -# forgiven. Furthermore, if there is no "Result:" comment but there is -# an LKMM .litmus.out file, the observation in that file will be used -# to judge the assembly-language verification. +# LKMM output, which is assumed to be in file.litmus.out. If either a +# "DATARACE" marker in the "Result:" comment or a "Flag data-race" marker +# in the LKMM output is present, the other must also be as well, at least +# for litmus tests having a "Result:" comment. In this case, a failure of +# the Always/Sometimes/Never portion of the "Result:" prediction will be +# noted, but forgiven. +# +# If the --hw argument is provided, this is assumed to be a hardware +# test, and the output is assumed to be in file.litmus.HW.out, where +# "HW" is the --hw argument. In addition, non-Sometimes verification +# results will be noted, but forgiven. Furthermore, if there is no +# "Result:" comment but there is an LKMM .litmus.out file, the observation +# in that file will be used to judge the assembly-language verification. # # Usage: # judgelitmus.sh file.litmus @@ -47,9 +53,27 @@ else echo ' --- ' error: \"$LKMM_DESTDIR/$litmusout is not a readable file exit 255 fi +if grep -q '^Flag data-race$' "$LKMM_DESTDIR/$litmusout" +then + datarace_modeled=1 +fi if grep -q '^ \* Result: ' $litmus then outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'` + if grep -m1 '^ \* Result: .* DATARACE' $litmus + then + datarace_predicted=1 + fi + if test -n "$datarace_predicted" -a -z "$datarace_modeled" -a -z "$LKMM_HW_MAP_FILE" + then + echo '!!! Predicted data race not modeled' $litmus + exit 252 + elif test -z "$datarace_predicted" -a -n "$datarace_modeled" + then + # Note that hardware models currently don't model data races + echo '!!! Unexpected data race modeled' $litmus + exit 253 + fi elif test -n "$LKMM_HW_MAP_FILE" && grep -q '^Observation' $LKMM_DESTDIR/$lkmmout > /dev/null 2>&1 then outcome=`grep -m 1 '^Observation ' $LKMM_DESTDIR/$lkmmout | awk '{ print $3 }'` @@ -114,7 +138,7 @@ elif grep '^Observation' $LKMM_DESTDIR/$litmusout | grep -q $outcome || test "$o then ret=0 else - if test -n "$LKMM_HW_MAP_FILE" -a "$outcome" = Sometimes + if test \( -n "$LKMM_HW_MAP_FILE" -a "$outcome" = Sometimes \) -o -n "$datarace_modeled" then flag="--- Forgiven" ret=0 From b11225c67539bcddd189e92fdc3ab8fa4a6311f4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Jun 2019 02:13:27 -0700 Subject: [PATCH 0064/1202] tools/memory-model: Make judgelitmus.sh handle scripted Result: tag The scripts that generate the litmus tests in the "auto" directory of the https://github.com/paulmckrcu/litmus archive place the "Result:" tag into a single-line ocaml comment, which judgelitmus.sh currently does not recognize. This commit therefore makes judgelitmus.sh recognize both the multiline comment format that it currently does and the automatically generated single-line format. Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/judgelitmus.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/memory-model/scripts/judgelitmus.sh b/tools/memory-model/scripts/judgelitmus.sh index 2700481d20f01..1ec5d89fcfbb2 100755 --- a/tools/memory-model/scripts/judgelitmus.sh +++ b/tools/memory-model/scripts/judgelitmus.sh @@ -57,10 +57,10 @@ if grep -q '^Flag data-race$' "$LKMM_DESTDIR/$litmusout" then datarace_modeled=1 fi -if grep -q '^ \* Result: ' $litmus +if grep -q '^[( ]\* Result: ' $litmus then - outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'` - if grep -m1 '^ \* Result: .* DATARACE' $litmus + outcome=`grep -m 1 '^[( ]\* Result: ' $litmus | awk '{ print $3 }'` + if grep -m1 '^[( ]\* Result: .* DATARACE' $litmus then datarace_predicted=1 fi From 1ff5a4bb68065fb55caad06c30dd6e4c9e67e9c1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 24 Jun 2019 22:30:32 -0700 Subject: [PATCH 0065/1202] tools/memory-model: Use "-unroll 0" to keep --hw runs finite Litmus tests involving atomic operations produce LL/SC loops on a number of architectures, and unrolling these loops can result in excessive verification times or even stack overflows. This commit therefore uses the "-unroll 0" herd7 argument to avoid unrolling, on the grounds that additional passes through an LL/SC loop should not change the verification. Note however, that certain bugs in the mapping of the LL/SC loop to machine instructions may go undetected. On the other hand, herd7 might not be the best vehicle for finding such bugs in any case. (You do stress-test your architecture-specific code, don't you?) Suggested-by: Luc Maranget Signed-off-by: Paul E. McKenney --- tools/memory-model/scripts/runlitmus.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/memory-model/scripts/runlitmus.sh b/tools/memory-model/scripts/runlitmus.sh index dfdb1f00fcc03..94608d4b6502e 100755 --- a/tools/memory-model/scripts/runlitmus.sh +++ b/tools/memory-model/scripts/runlitmus.sh @@ -75,6 +75,6 @@ then cp $T/$hwlitmusfile.jingle7.out $LKMM_DESTDIR/$hwlitmus.err exit 253 fi -/usr/bin/time $LKMM_TIMEOUT_CMD herd7 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 +/usr/bin/time $LKMM_TIMEOUT_CMD herd7 -unroll 0 $LKMM_DESTDIR/$hwlitmus > $LKMM_DESTDIR/$hwlitmus.out 2>&1 exit $? From 78665f57c3faa09f123c4818101526e43ae9b6a4 Mon Sep 17 00:00:00 2001 From: Piyush Mehta Date: Mon, 13 Sep 2021 19:30:05 +0530 Subject: [PATCH 0066/1202] usb: chipidea: udc: make controller hardware endpoint primed Root-cause: There is an issue like endpoint is not recognized as primed, when bus have more pressure and the add dTD tripwire semaphore (ATDTW bit in USBCMD register) that can cause the controller to ignore a dTD that is added to a primed endpoint. This issue observed with the Windows10 host machine. Workaround: The software must implement a periodic cycle, and check for each dTD, if the endpoint is primed. It can do this by reading the corresponding bits in the ENDPTPRIME and ENDPTSTAT registers. If these bits are read at 0, the software needs to re-prime the endpoint by writing 1 to the corresponding bit in the ENDPTPRIME register. Added conditional revision check of 2.20[CI_REVISION_22]. Signed-off-by: Piyush Mehta Link: https://lore.kernel.org/r/20210913140005.955699-1-piyush.mehta@xilinx.com Signed-off-by: Peter Chen --- drivers/usb/chipidea/udc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 8834ca6137219..f9ca5010f65be 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -49,6 +49,8 @@ ctrl_endpt_in_desc = { .wMaxPacketSize = cpu_to_le16(CTRL_PAYLOAD_MAX), }; +static int reprime_dtd(struct ci_hdrc *ci, struct ci_hw_ep *hwep, + struct td_node *node); /** * hw_ep_bit: calculates the bit number * @num: endpoint number @@ -599,6 +601,12 @@ static int _hardware_enqueue(struct ci_hw_ep *hwep, struct ci_hw_req *hwreq) prevlastnode->ptr->next = cpu_to_le32(next); wmb(); + + if (ci->rev == CI_REVISION_22) { + if (!hw_read(ci, OP_ENDPTSTAT, BIT(n))) + reprime_dtd(ci, hwep, prevlastnode); + } + if (hw_read(ci, OP_ENDPTPRIME, BIT(n))) goto done; do { From e4d0262e9a2a3022191b5dace931e54735bcb136 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 15 Jul 2021 21:06:06 +0200 Subject: [PATCH 0067/1202] extcon: usb-gpio: Use the right includes The USB GPIO extcon driver does not use any of the legacy includes or but exploits the fact that this brings in . Fix this up by using the right includes. Signed-off-by: Linus Walleij Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-usb-gpio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/extcon/extcon-usb-gpio.c b/drivers/extcon/extcon-usb-gpio.c index f06be6d4e2a9c..0cb440bdd5cbe 100644 --- a/drivers/extcon/extcon-usb-gpio.c +++ b/drivers/extcon/extcon-usb-gpio.c @@ -7,18 +7,17 @@ */ #include -#include #include #include #include #include #include #include -#include #include #include #include #include +#include #define USB_GPIO_DEBOUNCE_MS 20 /* ms */ From a864e1bf1fbbb4d17cb655bfeb6efdd817462e0f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 15 Jul 2021 17:26:57 +0200 Subject: [PATCH 0068/1202] extcon: max3355: Drop unused include This driver includes the legacy header but does not use it. Drop this include. Cc: Sergei Shtylyov Signed-off-by: Linus Walleij Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-max3355.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/extcon/extcon-max3355.c b/drivers/extcon/extcon-max3355.c index fa01926c09f16..d7795607f6935 100644 --- a/drivers/extcon/extcon-max3355.c +++ b/drivers/extcon/extcon-max3355.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include From 904ba1cf4e75d96088874e1d15c55efbaa426f87 Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Fri, 23 Jul 2021 15:28:08 +0200 Subject: [PATCH 0069/1202] ARM: dts: stm32: add STM32MP13 SoCs support Add initial support of STM32MP13 family. The STM32MP13 SoC diversity is composed by: -STM32MP131: -core: 1*CA7, 17*TIMERS, 5*LPTIMERS, DMA/MDMA/DMAMUX -storage: 3*SDMCC, 1*QSPI, FMC -com: USB (OHCI/EHCI, OTG), 5*I2C, 5*SPI/I2S, 8*U(S)ART -audio: 2*SAI -network: 1*ETH(GMAC) -STM32MP133: STM32MP131 + 2*CAN, ETH2(GMAC), ADC1 -STM32MP135: STM32MP133 + DCMIPP, LTDC A second diversity layer exists for security features: -STM32MP13xY, "Y" gives information: -Y = A/D means no cryp IP and no secure boot. -Y = C/F means cryp IP + secure boot. This commit adds basic peripheral. Signed-off-by: Alexandre Torgue Acked-by: Arnd Bergmann --- arch/arm/boot/dts/stm32mp131.dtsi | 283 +++++++++++++++++++++++++++++ arch/arm/boot/dts/stm32mp133.dtsi | 37 ++++ arch/arm/boot/dts/stm32mp135.dtsi | 12 ++ arch/arm/boot/dts/stm32mp13xc.dtsi | 17 ++ arch/arm/boot/dts/stm32mp13xf.dtsi | 17 ++ 5 files changed, 366 insertions(+) create mode 100644 arch/arm/boot/dts/stm32mp131.dtsi create mode 100644 arch/arm/boot/dts/stm32mp133.dtsi create mode 100644 arch/arm/boot/dts/stm32mp135.dtsi create mode 100644 arch/arm/boot/dts/stm32mp13xc.dtsi create mode 100644 arch/arm/boot/dts/stm32mp13xf.dtsi diff --git a/arch/arm/boot/dts/stm32mp131.dtsi b/arch/arm/boot/dts/stm32mp131.dtsi new file mode 100644 index 0000000000000..86126dc0d8980 --- /dev/null +++ b/arch/arm/boot/dts/stm32mp131.dtsi @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright (C) STMicroelectronics 2021 - All Rights Reserved + * Author: Alexandre Torgue for STMicroelectronics. + */ +#include + +/ { + #address-cells = <1>; + #size-cells = <1>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + compatible = "arm,cortex-a7"; + device_type = "cpu"; + reg = <0>; + }; + }; + + arm-pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = ; + interrupt-affinity = <&cpu0>; + interrupt-parent = <&intc>; + }; + + clocks { + clk_axi: clk-axi { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <266500000>; + }; + + clk_hse: clk-hse { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <24000000>; + }; + + clk_hsi: clk-hsi { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <64000000>; + }; + + clk_lsi: clk-lsi { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <32000>; + }; + + clk_pclk3: clk-pclk3 { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <104438965>; + }; + + clk_pclk4: clk-pclk4 { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <133250000>; + }; + + clk_pll4_p: clk-pll4_p { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <50000000>; + }; + + clk_pll4_r: clk-pll4_r { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <99000000>; + }; + }; + + intc: interrupt-controller@a0021000 { + compatible = "arm,cortex-a7-gic"; + #interrupt-cells = <3>; + interrupt-controller; + reg = <0xa0021000 0x1000>, + <0xa0022000 0x2000>; + }; + + psci { + compatible = "arm,psci-1.0"; + method = "smc"; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = , + , + , + ; + interrupt-parent = <&intc>; + always-on; + }; + + soc { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&intc>; + ranges; + + uart4: serial@40010000 { + compatible = "st,stm32h7-uart"; + reg = <0x40010000 0x400>; + interrupts = ; + clocks = <&clk_hsi>; + status = "disabled"; + }; + + syscfg: syscon@50020000 { + compatible = "st,stm32mp157-syscfg", "syscon"; + reg = <0x50020000 0x400>; + clocks = <&clk_pclk3>; + }; + + sdmmc1: mmc@58005000 { + compatible = "arm,pl18x", "arm,primecell"; + arm,primecell-periphid = <0x00253180>; + reg = <0x58005000 0x1000>, <0x58006000 0x1000>; + interrupts = ; + interrupt-names = "cmd_irq"; + clocks = <&clk_pll4_p>; + clock-names = "apb_pclk"; + cap-sd-highspeed; + cap-mmc-highspeed; + max-frequency = <120000000>; + status = "disabled"; + }; + + iwdg2: watchdog@5a002000 { + compatible = "st,stm32mp1-iwdg"; + reg = <0x5a002000 0x400>; + clocks = <&clk_pclk4>, <&clk_lsi>; + clock-names = "pclk", "lsi"; + status = "disabled"; + }; + + bsec: efuse@5c005000 { + compatible = "st,stm32mp15-bsec"; + reg = <0x5c005000 0x400>; + #address-cells = <1>; + #size-cells = <1>; + + part_number_otp: part_number_otp@4 { + reg = <0x4 0x2>; + }; + ts_cal1: calib@5c { + reg = <0x5c 0x2>; + }; + ts_cal2: calib@5e { + reg = <0x5e 0x2>; + }; + }; + + /* + * Break node order to solve dependency probe issue between + * pinctrl and exti. + */ + pinctrl: pin-controller@50002000 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "st,stm32mp135-pinctrl"; + ranges = <0 0x50002000 0x8400>; + pins-are-numbered; + + gpioa: gpio@50002000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x0 0x400>; + clocks = <&clk_pclk4>; + st,bank-name = "GPIOA"; + ngpios = <16>; + gpio-ranges = <&pinctrl 0 0 16>; + }; + + gpiob: gpio@50003000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x1000 0x400>; + clocks = <&clk_pclk4>; + st,bank-name = "GPIOB"; + ngpios = <16>; + gpio-ranges = <&pinctrl 0 16 16>; + }; + + gpioc: gpio@50004000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x2000 0x400>; + clocks = <&clk_pclk4>; + st,bank-name = "GPIOC"; + ngpios = <16>; + gpio-ranges = <&pinctrl 0 32 16>; + }; + + gpiod: gpio@50005000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x3000 0x400>; + clocks = <&clk_pclk4>; + st,bank-name = "GPIOD"; + ngpios = <16>; + gpio-ranges = <&pinctrl 0 48 16>; + }; + + gpioe: gpio@50006000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x4000 0x400>; + clocks = <&clk_pclk4>; + st,bank-name = "GPIOE"; + ngpios = <16>; + gpio-ranges = <&pinctrl 0 64 16>; + }; + + gpiof: gpio@50007000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x5000 0x400>; + clocks = <&clk_pclk4>; + st,bank-name = "GPIOF"; + ngpios = <16>; + gpio-ranges = <&pinctrl 0 80 16>; + }; + + gpiog: gpio@50008000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x6000 0x400>; + clocks = <&clk_pclk4>; + st,bank-name = "GPIOG"; + ngpios = <16>; + gpio-ranges = <&pinctrl 0 96 16>; + }; + + gpioh: gpio@50009000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x7000 0x400>; + clocks = <&clk_pclk4>; + st,bank-name = "GPIOH"; + ngpios = <15>; + gpio-ranges = <&pinctrl 0 112 15>; + }; + + gpioi: gpio@5000a000 { + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x8000 0x400>; + clocks = <&clk_pclk4>; + st,bank-name = "GPIOI"; + ngpios = <8>; + gpio-ranges = <&pinctrl 0 128 8>; + }; + }; + }; +}; diff --git a/arch/arm/boot/dts/stm32mp133.dtsi b/arch/arm/boot/dts/stm32mp133.dtsi new file mode 100644 index 0000000000000..0fb1386257cfa --- /dev/null +++ b/arch/arm/boot/dts/stm32mp133.dtsi @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright (C) STMicroelectronics 2021 - All Rights Reserved + * Author: Alexandre Torgue for STMicroelectronics. + */ + +#include "stm32mp131.dtsi" + +/ { + soc { + m_can1: can@4400e000 { + compatible = "bosch,m_can"; + reg = <0x4400e000 0x400>, <0x44011000 0x1400>; + reg-names = "m_can", "message_ram"; + interrupts = , + ; + interrupt-names = "int0", "int1"; + clocks = <&clk_hse>, <&clk_pll4_r>; + clock-names = "hclk", "cclk"; + bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; + status = "disabled"; + }; + + m_can2: can@4400f000 { + compatible = "bosch,m_can"; + reg = <0x4400f000 0x400>, <0x44011000 0x2800>; + reg-names = "m_can", "message_ram"; + interrupts = , + ; + interrupt-names = "int0", "int1"; + clocks = <&clk_hse>, <&clk_pll4_r>; + clock-names = "hclk", "cclk"; + bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; + status = "disabled"; + }; + }; +}; diff --git a/arch/arm/boot/dts/stm32mp135.dtsi b/arch/arm/boot/dts/stm32mp135.dtsi new file mode 100644 index 0000000000000..abf2acd37b4ea --- /dev/null +++ b/arch/arm/boot/dts/stm32mp135.dtsi @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright (C) STMicroelectronics 2021 - All Rights Reserved + * Author: Alexandre Torgue for STMicroelectronics. + */ + +#include "stm32mp133.dtsi" + +/ { + soc { + }; +}; diff --git a/arch/arm/boot/dts/stm32mp13xc.dtsi b/arch/arm/boot/dts/stm32mp13xc.dtsi new file mode 100644 index 0000000000000..fa6889e305913 --- /dev/null +++ b/arch/arm/boot/dts/stm32mp13xc.dtsi @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright (C) STMicroelectronics 2021 - All Rights Reserved + * Author: Alexandre Torgue for STMicroelectronics. + */ + +/ { + soc { + cryp: crypto@54002000 { + compatible = "st,stm32mp1-cryp"; + reg = <0x54002000 0x400>; + interrupts = ; + clocks = <&clk_axi>; + status = "disabled"; + }; + }; +}; diff --git a/arch/arm/boot/dts/stm32mp13xf.dtsi b/arch/arm/boot/dts/stm32mp13xf.dtsi new file mode 100644 index 0000000000000..fa6889e305913 --- /dev/null +++ b/arch/arm/boot/dts/stm32mp13xf.dtsi @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright (C) STMicroelectronics 2021 - All Rights Reserved + * Author: Alexandre Torgue for STMicroelectronics. + */ + +/ { + soc { + cryp: crypto@54002000 { + compatible = "st,stm32mp1-cryp"; + reg = <0x54002000 0x400>; + interrupts = ; + clocks = <&clk_axi>; + status = "disabled"; + }; + }; +}; From 5e257bcdd80138b37deeb4016fb4441bb07ed4be Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Fri, 23 Jul 2021 15:28:09 +0200 Subject: [PATCH 0070/1202] dt-bindings: stm32: document stm32mp135f-dk board Add new entry for stm32mp135f-dk board. Signed-off-by: Alexandre Torgue Acked-by: Arnd Bergmann Acked-by: Rob Herring --- Documentation/devicetree/bindings/arm/stm32/stm32.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml index 9a77ab74be99a..9ac7da01c6c3f 100644 --- a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml +++ b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml @@ -55,6 +55,10 @@ properties: - enum: - st,stm32h750i-art-pi - const: st,stm32h750 + - items: + - enum: + - st,stm32mp135f-dk + - const: st,stm32mp135 - items: - enum: - shiratech,stm32mp157a-iot-box # IoT Box From b94f4c482427dcaad7d9cf8815f87e651f470bee Mon Sep 17 00:00:00 2001 From: Alexandre Torgue Date: Fri, 23 Jul 2021 15:28:10 +0200 Subject: [PATCH 0071/1202] ARM: dts: stm32: add initial support of stm32mp135f-dk board Add support of stm32mp135f discovery board (part number: STM32MP135F-DK). It embeds a STM32MP135F SOC with 512 MB of DDR3. Several connections are available on this board: 4*USB2.0, 1*USB2.0 typeC DRD, SDcard, 2*RJ45, HDMI, Combo Wifi/BT, ... Only SD card, uart4 (console) and watchdog IPs are enabled in this commit. Signed-off-by: Alexandre Torgue Acked-by: Arnd Bergmann --- arch/arm/boot/dts/Makefile | 1 + arch/arm/boot/dts/stm32mp13-pinctrl.dtsi | 64 ++++++++++++++++++++++++ arch/arm/boot/dts/stm32mp135f-dk.dts | 56 +++++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 arch/arm/boot/dts/stm32mp13-pinctrl.dtsi create mode 100644 arch/arm/boot/dts/stm32mp135f-dk.dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 7e0934180724d..f5b21007d0dfa 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -1113,6 +1113,7 @@ dtb-$(CONFIG_ARCH_STM32) += \ stm32h743i-eval.dtb \ stm32h743i-disco.dtb \ stm32h750i-art-pi.dtb \ + stm32mp135f-dk.dtb \ stm32mp153c-dhcom-drc02.dtb \ stm32mp157a-avenger96.dtb \ stm32mp157a-dhcor-avenger96.dtb \ diff --git a/arch/arm/boot/dts/stm32mp13-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp13-pinctrl.dtsi new file mode 100644 index 0000000000000..069f95f2b6289 --- /dev/null +++ b/arch/arm/boot/dts/stm32mp13-pinctrl.dtsi @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright (C) STMicroelectronics 2021 - All Rights Reserved + * Author: Alexandre Torgue + */ +#include + +&pinctrl { + sdmmc1_b4_pins_a: sdmmc1-b4-0 { + pins1 { + pinmux = , /* SDMMC1_D0 */ + , /* SDMMC1_D1 */ + , /* SDMMC1_D2 */ + , /* SDMMC1_D3 */ + ; /* SDMMC1_CMD */ + slew-rate = <1>; + drive-push-pull; + bias-disable; + }; + pins2 { + pinmux = ; /* SDMMC1_CK */ + slew-rate = <2>; + drive-push-pull; + bias-disable; + }; + }; + + sdmmc1_b4_od_pins_a: sdmmc1-b4-od-0 { + pins1 { + pinmux = , /* SDMMC1_D0 */ + , /* SDMMC1_D1 */ + , /* SDMMC1_D2 */ + ; /* SDMMC1_D3 */ + slew-rate = <1>; + drive-push-pull; + bias-disable; + }; + pins2 { + pinmux = ; /* SDMMC1_CK */ + slew-rate = <2>; + drive-push-pull; + bias-disable; + }; + pins3 { + pinmux = ; /* SDMMC1_CMD */ + slew-rate = <1>; + drive-open-drain; + bias-disable; + }; + }; + + uart4_pins_a: uart4-0 { + pins1 { + pinmux = ; /* UART4_TX */ + bias-disable; + drive-push-pull; + slew-rate = <0>; + }; + pins2 { + pinmux = ; /* UART4_RX */ + bias-disable; + }; + }; +}; diff --git a/arch/arm/boot/dts/stm32mp135f-dk.dts b/arch/arm/boot/dts/stm32mp135f-dk.dts new file mode 100644 index 0000000000000..7e96d9e362179 --- /dev/null +++ b/arch/arm/boot/dts/stm32mp135f-dk.dts @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* + * Copyright (C) STMicroelectronics 2021 - All Rights Reserved + * Author: Alexandre Torgue for STMicroelectronics. + */ + +/dts-v1/; + +#include "stm32mp135.dtsi" +#include "stm32mp13xf.dtsi" +#include "stm32mp13-pinctrl.dtsi" + +/ { + model = "STMicroelectronics STM32MP135F-DK Discovery Board"; + compatible = "st,stm32mp135f-dk", "st,stm32mp135"; + + aliases { + serial0 = &uart4; + }; + + memory@c0000000 { + device_type = "memory"; + reg = <0xc0000000 0x20000000>; + }; + + vdd_sd: vdd-sd { + compatible = "regulator-fixed"; + regulator-name = "vdd_sd"; + regulator-min-microvolt = <2900000>; + regulator-max-microvolt = <2900000>; + regulator-always-on; + }; +}; + +&iwdg2 { + timeout-sec = <32>; + status = "okay"; +}; + +&sdmmc1 { + pinctrl-names = "default", "opendrain"; + pinctrl-0 = <&sdmmc1_b4_pins_a>; + pinctrl-1 = <&sdmmc1_b4_od_pins_a>; + broken-cd; + disable-wp; + st,neg-edge; + bus-width = <4>; + vmmc-supply = <&vdd_sd>; + status = "okay"; +}; + +&uart4 { + pinctrl-names = "default"; + pinctrl-0 = <&uart4_pins_a>; + status = "okay"; +}; From a7bb8fc30ad692b7c09c6900c93bea1067d40b61 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 9 Aug 2021 14:13:24 +0200 Subject: [PATCH 0072/1202] ARM: dts: stm32: Reduce DHCOR SPI NOR frequency to 50 MHz The SPI NOR is a bit further away from the SoC on DHCOR than on DHCOM, which causes additional signal delay. At 108 MHz, this delay triggers a sporadic issue where the first bit of RX data is not received by the QSPI controller. There are two options of addressing this problem, either by using the DLYB block to compensate the extra delay, or by reducing the QSPI bus clock frequency. The former requires calibration and that is overly complex, so opt for the second option. Fixes: 76045bc457104 ("ARM: dts: stm32: Add QSPI NOR on AV96") Signed-off-by: Marek Vasut Cc: Alexandre Torgue Cc: Patrice Chotard Cc: Patrick Delaunay Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi index 2b0ac605549d7..44ecc47085871 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi @@ -202,7 +202,7 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-rx-bus-width = <4>; - spi-max-frequency = <108000000>; + spi-max-frequency = <50000000>; #address-cells = <1>; #size-cells = <1>; }; From 3500810079160e7b3932320ad6db9eb3e8e68c3f Mon Sep 17 00:00:00 2001 From: Grzegorz Szymaszek Date: Thu, 26 Aug 2021 16:45:43 +0200 Subject: [PATCH 0073/1202] ARM: dts: stm32: set the DCMI pins on stm32mp157c-odyssey The Seeed Odyssey-STM32MP157C board has a 20-pin DVP camera output. The DCMI pins used on this output are defined in the pin state definition &pinctrl/dcmi-1, AKA &dcmi_pins_b (added in mainline commit 02814a41529a55dbfb9fbb2a3728e78e70646ea6). Set these pins as the default pinctrl of the DCMI peripheral in the board device tree. The pins are not used for any other purpose, so it seems safe to assume most users will not need to override (delete) what this patch provides. status defaults to "disabled", so the peripheral will not be unnecessarily started. And the users who actually intend to make use of a camera on the DVP port will have this little part of the configuration ready. Signed-off-by: Grzegorz Szymaszek Reviewed-by: Ahmad Fatoum Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp157c-odyssey.dts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/boot/dts/stm32mp157c-odyssey.dts b/arch/arm/boot/dts/stm32mp157c-odyssey.dts index be1dd5e9e7443..554f5d3bcdc31 100644 --- a/arch/arm/boot/dts/stm32mp157c-odyssey.dts +++ b/arch/arm/boot/dts/stm32mp157c-odyssey.dts @@ -22,6 +22,12 @@ }; }; +&dcmi { + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&dcmi_pins_b>; + pinctrl-1 = <&dcmi_sleep_pins_b>; +}; + ðernet0 { status = "okay"; pinctrl-0 = <ðernet0_rgmii_pins_a>; From 88769e64cf99f14a0ab3e229059dd02101aeaa4e Mon Sep 17 00:00:00 2001 From: Nathan Rossi Date: Fri, 10 Sep 2021 02:58:23 +0000 Subject: [PATCH 0074/1202] PCI: Add ACS quirk for Pericom PI7C9X2G switches The Pericom PI7C9X2G404/PI7C9X2G304/PI7C9X2G303 PCIe switches have an erratum for ACS P2P Request Redirect behaviour when used in the cut-through forwarding mode. The recommended work around for this issue is to use the switch in store and forward mode. The erratum results in packets being queued and not being delivered upstream, which can be observed as very poor downstream device performance and/or dropped device-generated data/interrupts. Add a fixup so that when enabling or resuming the downstream port we check if it has enabled ACS P2P Request Redirect, and if so, change the device (via the upstream port) to use the store and forward operating mode. Link: https://bugzilla.kernel.org/show_bug.cgi?id=177471 Link: https://lore.kernel.org/r/20210910025823.196508-1-nathan@nathanrossi.com Tested-by: Alex Williamson Signed-off-by: Nathan Rossi Signed-off-by: Bjorn Helgaas --- drivers/pci/quirks.c | 56 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6c957124f84d6..b6b4c803bdc94 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5796,3 +5796,59 @@ static void apex_pci_fixup_class(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_CLASS_HEADER(0x1ac1, 0x089a, PCI_CLASS_NOT_DEFINED, 8, apex_pci_fixup_class); + +/* + * Pericom PI7C9X2G404/PI7C9X2G304/PI7C9X2G303 switch erratum E5 - + * ACS P2P Request Redirect is not functional + * + * When ACS P2P Request Redirect is enabled and bandwidth is not balanced + * between upstream and downstream ports, packets are queued in an internal + * buffer until CPLD packet. The workaround is to use the switch in store and + * forward mode. + */ +#define PI7C9X2Gxxx_MODE_REG 0x74 +#define PI7C9X2Gxxx_STORE_FORWARD_MODE BIT(0) +static void pci_fixup_pericom_acs_store_forward(struct pci_dev *pdev) +{ + struct pci_dev *upstream; + u16 val; + + /* Downstream ports only */ + if (pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM) + return; + + /* Check for ACS P2P Request Redirect use */ + if (!pdev->acs_cap) + return; + pci_read_config_word(pdev, pdev->acs_cap + PCI_ACS_CTRL, &val); + if (!(val & PCI_ACS_RR)) + return; + + upstream = pci_upstream_bridge(pdev); + if (!upstream) + return; + + pci_read_config_word(upstream, PI7C9X2Gxxx_MODE_REG, &val); + if (!(val & PI7C9X2Gxxx_STORE_FORWARD_MODE)) { + pci_info(upstream, "Setting PI7C9X2Gxxx store-forward mode to avoid ACS erratum\n"); + /* Enable store-foward mode */ + pci_write_config_word(upstream, PI7C9X2Gxxx_MODE_REG, val | + PI7C9X2Gxxx_STORE_FORWARD_MODE); + } +} +/* + * Apply fixup on enable and on resume, in order to apply the fix up whenever + * ACS configuration changes or switch mode is reset + */ +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0x2404, + pci_fixup_pericom_acs_store_forward); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0x2404, + pci_fixup_pericom_acs_store_forward); +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0x2304, + pci_fixup_pericom_acs_store_forward); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0x2304, + pci_fixup_pericom_acs_store_forward); +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0x2303, + pci_fixup_pericom_acs_store_forward); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0x2303, + pci_fixup_pericom_acs_store_forward); From 4f33a76b6f71d32e5ffa82097a00b7a67b53f318 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Sat, 11 Sep 2021 03:03:05 -0700 Subject: [PATCH 0075/1202] PCI: Do not enable AtomicOps on VFs Host crashes when pci_enable_atomic_ops_to_root() is called for VFs with virtual buses. The virtual buses added to SR-IOV have bus->self set to NULL and host crashes due to this. PID: 4481 TASK: ffff89c6941b0000 CPU: 53 COMMAND: "bash" ... #3 [ffff9a9481713808] oops_end at ffffffffb9025cd6 #4 [ffff9a9481713828] page_fault_oops at ffffffffb906e417 #5 [ffff9a9481713888] exc_page_fault at ffffffffb9a0ad14 #6 [ffff9a94817138b0] asm_exc_page_fault at ffffffffb9c00ace [exception RIP: pcie_capability_read_dword+28] RIP: ffffffffb952fd5c RSP: ffff9a9481713960 RFLAGS: 00010246 RAX: 0000000000000001 RBX: ffff89c6b1096000 RCX: 0000000000000000 RDX: ffff9a9481713990 RSI: 0000000000000024 RDI: 0000000000000000 RBP: 0000000000000080 R8: 0000000000000008 R9: ffff89c64341a2f8 R10: 0000000000000002 R11: 0000000000000000 R12: ffff89c648bab000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff89c648bab0c8 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffff9a9481713988] pci_enable_atomic_ops_to_root at ffffffffb95359a6 #8 [ffff9a94817139c0] bnxt_qplib_determine_atomics at ffffffffc08c1a33 [bnxt_re] #9 [ffff9a94817139d0] bnxt_re_dev_init at ffffffffc08ba2d1 [bnxt_re] Per PCIe r5.0, sec 9.3.5.10, the AtomicOp Requester Enable bit in Device Control 2 is reserved for VFs. The PF value applies to all associated PFs. Return -EINVAL if pci_enable_atomic_ops_to_root() is called for a VF. Link: https://lore.kernel.org/r/1631354585-16597-1-git-send-email-selvin.xavier@broadcom.com Fixes: 35f5ace5dea4 ("RDMA/bnxt_re: Enable global atomic ops if platform supports") Fixes: 430a23689dea ("PCI: Add pci_enable_atomic_ops_to_root()") Signed-off-by: Selvin Xavier Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Gospodarek --- drivers/pci/pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c63598c1cdd85..e55d944ff30f1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3719,6 +3719,14 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask) struct pci_dev *bridge; u32 cap, ctl2; + /* + * Per PCIe r5.0, sec 9.3.5.10, the AtomicOp Requester Enable bit + * in Device Control 2 is reserved in VFs and the PF value applies + * to all associated VFs. + */ + if (dev->is_virtfn) + return -EINVAL; + if (!pci_is_pcie(dev)) return -EINVAL; From 6bc07a20444994ade28c9781ad133df733ba5e80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 19 Aug 2021 17:25:52 +0200 Subject: [PATCH 0076/1202] dt-bindings: mfd: Add Broadcom's MISC block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broadcom's MISC is an MFD hardware block used on some of their SoCs like bcm63xx and bcm4908. At this point only PCIe reset is fully understood and documented. More functions may be added later. Signed-off-by: Rafał Miłecki Acked-by: Florian Fainelli Reviewed-by: Rob Herring Signed-off-by: Lee Jones --- .../devicetree/bindings/mfd/brcm,misc.yaml | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 Documentation/devicetree/bindings/mfd/brcm,misc.yaml diff --git a/Documentation/devicetree/bindings/mfd/brcm,misc.yaml b/Documentation/devicetree/bindings/mfd/brcm,misc.yaml new file mode 100644 index 0000000000000..cff7d772a7dbd --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/brcm,misc.yaml @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/brcm,misc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom's MISC block + +maintainers: + - Rafał Miłecki + +description: | + Broadcom's MISC is a hardware block used on some SoCs (e.g. bcm63xx and + bcm4908). It's used to implement some simple functions like a watchdog, PCIe + reset, UniMAC control and more. + +properties: + compatible: + items: + - const: brcm,misc + - const: simple-mfd + + reg: + description: MISC block registers + + ranges: true + + "#address-cells": + const: 1 + + "#size-cells": + const: 1 + +patternProperties: + '^reset-controller@[a-f0-9]+$': + $ref: ../reset/brcm,bcm4908-misc-pcie-reset.yaml + +additionalProperties: false + +required: + - reg + - '#address-cells' + - '#size-cells' + +examples: + - | + misc@ff802600 { + compatible = "brcm,misc", "simple-mfd"; + reg = <0xff802600 0xe4>; + + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x0 0xe4>; + + reset-controller@44 { + compatible = "brcm,bcm4908-misc-pcie-reset"; + reg = <0x44 0x4>; + #reset-cells = <1>; + }; + }; From b8deedf685acdc88f57b8fa666ef08eafd524f29 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 20 Sep 2021 09:17:51 +0200 Subject: [PATCH 0077/1202] devfreq: exynos-ppmu: use node names with hyphens Devicetree naming convention requires device node names to use hyphens instead of underscore, so Exynos5422 devfreq event name "ppmu-event3-dmc0_0" should be "ppmu-event3-dmc0-0". Newly introduced dtschema enforces this, however the driver still expects old name with an underscore. Add new events for Exynos5422 while still accepting old name for backwards compatibility. Signed-off-by: Krzysztof Kozlowski Tested-by: Marek Szyprowski Signed-off-by: Chanwoo Choi --- drivers/devfreq/event/exynos-ppmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 17ed980d90998..541bd13ab61da 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -94,11 +94,16 @@ static struct __exynos_ppmu_events { PPMU_EVENT(d1-general), PPMU_EVENT(d1-rt), - /* For Exynos5422 SoC */ + /* For Exynos5422 SoC, deprecated (backwards compatible) */ PPMU_EVENT(dmc0_0), PPMU_EVENT(dmc0_1), PPMU_EVENT(dmc1_0), PPMU_EVENT(dmc1_1), + /* For Exynos5422 SoC */ + PPMU_EVENT(dmc0-0), + PPMU_EVENT(dmc0-1), + PPMU_EVENT(dmc1-0), + PPMU_EVENT(dmc1-1), }; static int __exynos_ppmu_find_ppmu_id(const char *edev_name) From 7f6490afc97fe0b21ec4b38226414286561a2994 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 20 Sep 2021 09:17:52 +0200 Subject: [PATCH 0078/1202] devfreq: exynos-ppmu: simplify parsing event-type from DT When parsing devicetree, the function of_get_devfreq_events(), for each device child node, iterates over array of possible events "ppmu_events" till it finds one matching by node name. When match is found the ppmu_events[i] points to element having both the name of the event and the counters ID. Each PPMU device child node might have an "event-name" property with the name of the event, however due to the design of devfreq it must be the same as the device node name. If it is not the same, the devfreq client won't be able to use it via devfreq_event_get_edev_by_phandle(). Since PPMU device child node name must be equal to the "event-name" property (event-name == ppmu_events[i].name), there is no need to find the counters ID by the "event-name". Instead use ppmu_events[i].id which must be equal to it. Signed-off-by: Krzysztof Kozlowski Tested-by: Marek Szyprowski Signed-off-by: Chanwoo Choi --- drivers/devfreq/event/exynos-ppmu.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 541bd13ab61da..9b849d7811167 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -566,13 +566,10 @@ static int of_get_devfreq_events(struct device_node *np, * use default if not. */ if (info->ppmu_type == EXYNOS_TYPE_PPMU_V2) { - int id; /* Not all registers take the same value for * read+write data count. */ - id = __exynos_ppmu_find_ppmu_id(desc[j].name); - - switch (id) { + switch (ppmu_events[i].id) { case PPMU_PMNCNT0: case PPMU_PMNCNT1: case PPMU_PMNCNT2: From de80ebd88b9ea5923804dbe98df91d976a1cd45c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 13 Sep 2021 10:00:21 +0200 Subject: [PATCH 0079/1202] dt-bindings: mfd: brcm,cru: Add clkset syscon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CRU has a shared register that is used e.g. to control USB 2.0 PHY block access. It's a single 32 b register. Document it as syscon so it can be used with a regmap. Signed-off-by: Rafał Miłecki Reviewed-by: Rob Herring Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/brcm,cru.yaml | 8 ++++++++ Documentation/devicetree/bindings/mfd/syscon.yaml | 1 + 2 files changed, 9 insertions(+) diff --git a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml index fc1317ab32264..bf4e585d3c186 100644 --- a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml +++ b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml @@ -39,6 +39,9 @@ patternProperties: '^clock-controller@[a-f0-9]+$': $ref: ../clock/brcm,iproc-clocks.yaml + '^syscon@[a-f0-9]+$': + $ref: syscon.yaml + '^thermal@[a-f0-9]+$': $ref: ../thermal/brcm,ns-thermal.yaml @@ -73,6 +76,11 @@ examples: "iprocfast", "sata1", "sata2"; }; + syscon@180 { + compatible = "brcm,cru-clkset", "syscon"; + reg = <0x180 0x4>; + }; + pinctrl { compatible = "brcm,bcm4708-pinmux"; offset = <0x1c0>; diff --git a/Documentation/devicetree/bindings/mfd/syscon.yaml b/Documentation/devicetree/bindings/mfd/syscon.yaml index abe3fd817e0b7..0dcffc273c2bb 100644 --- a/Documentation/devicetree/bindings/mfd/syscon.yaml +++ b/Documentation/devicetree/bindings/mfd/syscon.yaml @@ -38,6 +38,7 @@ properties: - allwinner,sun8i-h3-system-controller - allwinner,sun8i-v3s-system-controller - allwinner,sun50i-a64-system-controller + - brcm,cru-clkset - hisilicon,dsa-subctrl - hisilicon,hi6220-sramctrl - hisilicon,pcie-sas-subctrl From dcc24e936da224d21536493626b22e79886c8543 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 13 Sep 2021 10:00:23 +0200 Subject: [PATCH 0080/1202] dt-bindings: mfd: brcm,cru: Add USB 2.0 PHY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Northstar's USB 2.0 PHY is part of the CRU MFD. Signed-off-by: Rafał Miłecki Reviewed-by: Rob Herring Signed-off-by: Lee Jones --- .../devicetree/bindings/mfd/brcm,cru.yaml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml index bf4e585d3c186..262284ae80f1a 100644 --- a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml +++ b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml @@ -39,6 +39,9 @@ patternProperties: '^clock-controller@[a-f0-9]+$': $ref: ../clock/brcm,iproc-clocks.yaml + '^phy@[a-f0-9]+$': + $ref: ../phy/bcm-ns-usb2-phy.yaml + '^syscon@[a-f0-9]+$': $ref: syscon.yaml @@ -52,6 +55,7 @@ required: examples: - | + #include cru-bus@1800c100 { compatible = "brcm,ns-cru", "simple-mfd"; reg = <0x1800c100 0x1d0>; @@ -76,7 +80,16 @@ examples: "iprocfast", "sata1", "sata2"; }; - syscon@180 { + phy@164 { + compatible = "brcm,ns-usb2-phy"; + reg = <0x164 0x4>; + brcm,syscon-clkset = <&clkset>; + clocks = <&genpll BCM_NSP_GENPLL_USB_PHY_REF_CLK>; + clock-names = "phy-ref-clk"; + #phy-cells = <0>; + }; + + clkset: syscon@180 { compatible = "brcm,cru-clkset", "syscon"; reg = <0x180 0x4>; }; From 609506476b8eed6e2cdc867de87300c6bb2e45f8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 26 Aug 2021 13:32:50 +0100 Subject: [PATCH 0081/1202] mfd: ti_am335x_tscadc: Fix spelling mistake "atleast" -> "at least" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Lee Jones --- drivers/mfd/ti_am335x_tscadc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 55adc379f94be..825bd5884a184 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -158,7 +158,7 @@ static int ti_tscadc_probe(struct platform_device *pdev) return -EINVAL; } if (total_channels == 0) { - dev_err(&pdev->dev, "Need atleast one channel.\n"); + dev_err(&pdev->dev, "Need at least one channel.\n"); return -EINVAL; } From fe02eed5d203fd9b26a8bd9b523a67fc99cbe95a Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 25 Aug 2021 08:57:35 -0400 Subject: [PATCH 0082/1202] mfd: intel_pmt: Only compile on x86 The intel_pmt driver shows up as a compile option for all arches but is 32-bit and 64-bit x86 specific. Add a CONFIG dependency on X86 for intel_pmt. Signed-off-by: Prarit Bhargava Reviewed-by: David E. Box Reviewed-by: Hans de Goede Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index ca0edab91aeb6..bd090e766ca48 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -692,7 +692,7 @@ config MFD_INTEL_PMC_BXT config MFD_INTEL_PMT tristate "Intel Platform Monitoring Technology (PMT) support" - depends on PCI + depends on X86 && PCI select MFD_CORE help The Intel Platform Monitoring Technology (PMT) is an interface that From 0996f75fbdf8e121766f5ea6c9259f132b10bd00 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sun, 29 Aug 2021 04:51:53 +0200 Subject: [PATCH 0083/1202] mfd: rk808: Add support for power off on RK817 RK817 has a power-off bit in SYS_CFG3. Add support for powering off the PMIC. Signed-off-by: Ondrej Jirman Signed-off-by: Lee Jones --- drivers/mfd/rk808.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mfd/rk808.c b/drivers/mfd/rk808.c index 77ccd31ca1d91..b181fe4013303 100644 --- a/drivers/mfd/rk808.c +++ b/drivers/mfd/rk808.c @@ -543,6 +543,10 @@ static void rk808_pm_power_off(void) reg = RK808_DEVCTRL_REG, bit = DEV_OFF_RST; break; + case RK817_ID: + reg = RK817_SYS_CFG(3); + bit = DEV_OFF; + break; case RK818_ID: reg = RK818_DEVCTRL_REG; bit = DEV_OFF; From 677c9aed74fd8769b2c1327a71163f0ffc807b77 Mon Sep 17 00:00:00 2001 From: Carlos de Paula Date: Mon, 30 Aug 2021 16:53:45 -0300 Subject: [PATCH 0084/1202] mfd: da9063: Add support for latest EA silicon revision This update adds new regmap to support the latest EA silicon which will be selected based on the chip and variant information read from the device. Signed-off-by: Carlos de Paula Reviewed-by: Adam Thomson Signed-off-by: Lee Jones --- drivers/mfd/da9063-i2c.c | 2 ++ include/linux/mfd/da9063/core.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c index 4b7f707b7952c..343ed6e96d87e 100644 --- a/drivers/mfd/da9063-i2c.c +++ b/drivers/mfd/da9063-i2c.c @@ -391,6 +391,7 @@ static int da9063_i2c_probe(struct i2c_client *i2c, &da9063_bb_da_volatile_table; break; case PMIC_DA9063_DA: + case PMIC_DA9063_EA: da9063_regmap_config.rd_table = &da9063_da_readable_table; da9063_regmap_config.wr_table = @@ -416,6 +417,7 @@ static int da9063_i2c_probe(struct i2c_client *i2c, &da9063l_bb_da_volatile_table; break; case PMIC_DA9063_DA: + case PMIC_DA9063_EA: da9063_regmap_config.rd_table = &da9063l_da_readable_table; da9063_regmap_config.wr_table = diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h index fa7a43f02f274..8db52324f4169 100644 --- a/include/linux/mfd/da9063/core.h +++ b/include/linux/mfd/da9063/core.h @@ -36,6 +36,7 @@ enum da9063_variant_codes { PMIC_DA9063_BB = 0x5, PMIC_DA9063_CA = 0x6, PMIC_DA9063_DA = 0x7, + PMIC_DA9063_EA = 0x8, }; /* Interrupts */ From f5ee204646814165ad329f21052db8c05c5d17f4 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 30 Aug 2021 14:46:24 +0300 Subject: [PATCH 0085/1202] dt-bindings: mfd: qcom,tcsr: Document ipq6018 compatible Signed-off-by: Baruch Siach Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/qcom,tcsr.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/mfd/qcom,tcsr.txt b/Documentation/devicetree/bindings/mfd/qcom,tcsr.txt index e90519d566a30..c5f4f0ddfcc39 100644 --- a/Documentation/devicetree/bindings/mfd/qcom,tcsr.txt +++ b/Documentation/devicetree/bindings/mfd/qcom,tcsr.txt @@ -6,6 +6,7 @@ registers via syscon. Required properties: - compatible: Should contain: + "qcom,tcsr-ipq6018", "syscon", "simple-mfd" for IPQ6018 "qcom,tcsr-ipq8064", "syscon" for IPQ8064 "qcom,tcsr-apq8064", "syscon" for APQ8064 "qcom,tcsr-msm8660", "syscon" for MSM8660 From c0764872c1ff98308ddd3dfc5324b5c8a3a46ac7 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 15 Sep 2021 17:17:26 +0800 Subject: [PATCH 0086/1202] mfd: sprd: Add support for SC2730 PMIC SC2730 is a PMIC SoC integrated in UMS512. Signed-off-by: Chunyan Zhang Signed-off-by: Lee Jones --- drivers/mfd/sprd-sc27xx-spi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c index 6b7956604a0f0..57fb2445720fd 100644 --- a/drivers/mfd/sprd-sc27xx-spi.c +++ b/drivers/mfd/sprd-sc27xx-spi.c @@ -18,6 +18,9 @@ #define SPRD_PMIC_INT_RAW_STATUS 0x4 #define SPRD_PMIC_INT_EN 0x8 +#define SPRD_SC2730_IRQ_BASE 0x80 +#define SPRD_SC2730_IRQ_NUMS 10 +#define SPRD_SC2730_CHG_DET 0x1b9c #define SPRD_SC2731_IRQ_BASE 0x140 #define SPRD_SC2731_IRQ_NUMS 16 #define SPRD_SC2731_CHG_DET 0xedc @@ -52,6 +55,12 @@ struct sprd_pmic_data { * base address and irq number, we should save irq number and irq base * in the device data structure. */ +static const struct sprd_pmic_data sc2730_data = { + .irq_base = SPRD_SC2730_IRQ_BASE, + .num_irqs = SPRD_SC2730_IRQ_NUMS, + .charger_det = SPRD_SC2730_CHG_DET, +}; + static const struct sprd_pmic_data sc2731_data = { .irq_base = SPRD_SC2731_IRQ_BASE, .num_irqs = SPRD_SC2731_IRQ_NUMS, @@ -232,6 +241,7 @@ static SIMPLE_DEV_PM_OPS(sprd_pmic_pm_ops, sprd_pmic_suspend, sprd_pmic_resume); static const struct of_device_id sprd_pmic_match[] = { { .compatible = "sprd,sc2731", .data = &sc2731_data }, + { .compatible = "sprd,sc2730", .data = &sc2730_data }, {}, }; MODULE_DEVICE_TABLE(of, sprd_pmic_match); From 95c99b961e26f7e123554d76c588d50db58840b6 Mon Sep 17 00:00:00 2001 From: Ramona Alexandra Nechita Date: Wed, 15 Sep 2021 13:40:16 +0300 Subject: [PATCH 0087/1202] mfd: Kconfig: Fix typo in PMIC_ADP5520 from AD5520 to ADP5520 The description mentioned AD5520, which is a different device. Fixed the typo. Signed-off-by: Ramona Alexandra Nechita Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index bd090e766ca48..251ffcb06cc4c 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -93,7 +93,7 @@ config PMIC_ADP5520 bool "Analog Devices ADP5520/01 MFD PMIC Core Support" depends on I2C=y help - Say yes here to add support for Analog Devices AD5520 and ADP5501, + Say yes here to add support for Analog Devices ADP5520 and ADP5501, Multifunction Power Management IC. This includes the I2C driver and the core APIs _only_, you have to select individual components like LCD backlight, LEDs, GPIOs and Kepad From ff61d3b212ccc668c15b70e6f6252831ee8880bd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 16 Sep 2021 19:05:34 +0200 Subject: [PATCH 0088/1202] mfd: cros_ec: Drop unneeded MODULE_ALIAS The MODULE_DEVICE_TABLE already creates proper alias. Having another MODULE_ALIAS causes the alias to be duplicated: $ modinfo cros_ec_dev.ko alias: platform:cros-ec-dev srcversion: F84A69D2156719A4F717A76 alias: platform:cros-ec-dev Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- drivers/mfd/cros_ec_dev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c index 8c08d1c55726d..81cee1a5daa6c 100644 --- a/drivers/mfd/cros_ec_dev.c +++ b/drivers/mfd/cros_ec_dev.c @@ -326,7 +326,6 @@ static void __exit cros_ec_dev_exit(void) module_init(cros_ec_dev_init); module_exit(cros_ec_dev_exit); -MODULE_ALIAS("platform:" DRV_NAME); MODULE_AUTHOR("Bill Richardson "); MODULE_DESCRIPTION("Userspace interface to the Chrome OS Embedded Controller"); MODULE_VERSION("1.0"); From eb29ba5688de88c6da649f59e4d875c6203b2b85 Mon Sep 17 00:00:00 2001 From: Fabio Aiuto Date: Thu, 16 Sep 2021 09:12:55 +0200 Subject: [PATCH 0089/1202] extcon: extcon-axp288: Use P-Unit semaphore lock for register accesses use low level P-Unit semaphore lock for axp288 register accesses directly and for more than one access a time, to reduce the number of times this semaphore is locked and released which is an expensive operation. i2c-bus to the XPower is shared between the kernel and the SoCs P-Unit. The P-Unit has a semaphore wich the kernel must lock for axp288 register accesses. When the P-Unit semaphore is locked CPU and GPU power states cannot change or the system will freeze. The P-Unit semaphore lock is already managed inside the regmap access logic, but for each access the semaphore is locked and released. So use directly iosf_mbi_(un)block_punit_i2c_access(), we are safe in doing so because nested calls to the same semaphore are turned to nops. Suggested-by: Hans de Goede Reviewed-by: Hans de Goede Tested-by: Hans de Goede Signed-off-by: Fabio Aiuto Signed-off-by: Chanwoo Choi --- drivers/extcon/Kconfig | 2 +- drivers/extcon/extcon-axp288.c | 31 +++++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig index c69d40ae5619a..aab87c9b35c8a 100644 --- a/drivers/extcon/Kconfig +++ b/drivers/extcon/Kconfig @@ -23,7 +23,7 @@ config EXTCON_ADC_JACK config EXTCON_AXP288 tristate "X-Power AXP288 EXTCON support" - depends on MFD_AXP20X && USB_SUPPORT && X86 && ACPI + depends on MFD_AXP20X && USB_SUPPORT && X86 && ACPI && IOSF_MBI select USB_ROLE_SWITCH help Say Y here to enable support for USB peripheral detection diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index fdb31954cf2b6..7c6d5857ff25e 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -24,6 +24,7 @@ #include #include +#include /* Power source status register */ #define PS_STAT_VBUS_TRIGGER BIT(0) @@ -215,6 +216,10 @@ static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info) unsigned int cable = info->previous_cable; bool vbus_attach = false; + ret = iosf_mbi_block_punit_i2c_access(); + if (ret < 0) + return ret; + vbus_attach = axp288_get_vbus_attach(info); if (!vbus_attach) goto no_vbus; @@ -253,6 +258,8 @@ static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info) } no_vbus: + iosf_mbi_unblock_punit_i2c_access(); + extcon_set_state_sync(info->edev, info->previous_cable, false); if (info->previous_cable == EXTCON_CHG_USB_SDP) extcon_set_state_sync(info->edev, EXTCON_USB, false); @@ -275,6 +282,8 @@ static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info) return 0; dev_det_ret: + iosf_mbi_unblock_punit_i2c_access(); + if (ret < 0) dev_err(info->dev, "failed to detect BC Mod\n"); @@ -305,13 +314,23 @@ static irqreturn_t axp288_extcon_isr(int irq, void *data) return IRQ_HANDLED; } -static void axp288_extcon_enable(struct axp288_extcon_info *info) +static int axp288_extcon_enable(struct axp288_extcon_info *info) { + int ret = 0; + + ret = iosf_mbi_block_punit_i2c_access(); + if (ret < 0) + return ret; + regmap_update_bits(info->regmap, AXP288_BC_GLOBAL_REG, BC_GLOBAL_RUN, 0); /* Enable the charger detection logic */ regmap_update_bits(info->regmap, AXP288_BC_GLOBAL_REG, BC_GLOBAL_RUN, BC_GLOBAL_RUN); + + iosf_mbi_unblock_punit_i2c_access(); + + return ret; } static void axp288_put_role_sw(void *data) @@ -384,10 +403,16 @@ static int axp288_extcon_probe(struct platform_device *pdev) } } + ret = iosf_mbi_block_punit_i2c_access(); + if (ret < 0) + return ret; + info->vbus_attach = axp288_get_vbus_attach(info); axp288_extcon_log_rsi(info); + iosf_mbi_unblock_punit_i2c_access(); + /* Initialize extcon device */ info->edev = devm_extcon_dev_allocate(&pdev->dev, axp288_extcon_cables); @@ -441,7 +466,9 @@ static int axp288_extcon_probe(struct platform_device *pdev) } /* Start charger cable type detection */ - axp288_extcon_enable(info); + ret = axp288_extcon_enable(info); + if (ret < 0) + return ret; device_init_wakeup(dev, true); platform_set_drvdata(pdev, info); From a3587e2c0578101900d4abbbd72ab3dad6d5568a Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Wed, 22 Sep 2021 17:56:00 +0100 Subject: [PATCH 0090/1202] i3c: fix incorrect address slot lookup on 64-bit The address slot bitmap is an array of unsigned long's which are the same size as an int on 32-bit platforms but not 64-bit. Loading the bitmap into an int could result in the incorrect status being returned for a slot and slots being reported as the wrong status. Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure") Cc: Boris Brezillon Cc: Alexandre Belloni Signed-off-by: Jamie Iles Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210922165600.179394-1-quic_jiles@quicinc.com --- drivers/i3c/master.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index c3b4c677b4429..dfe18dcd008d4 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -343,7 +343,8 @@ struct bus_type i3c_bus_type = { static enum i3c_addr_slot_status i3c_bus_get_addr_slot_status(struct i3c_bus *bus, u16 addr) { - int status, bitpos = addr * 2; + unsigned long status; + int bitpos = addr * 2; if (addr > I2C_MAX_ADDR) return I3C_ADDR_SLOT_RSVD; From e7e7d02ad837642773b9b31cec0d42ae6c943398 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Tue, 14 Sep 2021 22:05:37 +0200 Subject: [PATCH 0091/1202] dt-bindings: mfd: logicvc: Add patternProperties for the display The LogiCVC multi-function device has a display part which is now described in its binding. Add a patternProperties match for it. Signed-off-by: Paul Kocialkowski Acked-by: Rob Herring Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/xylon,logicvc.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/mfd/xylon,logicvc.yaml b/Documentation/devicetree/bindings/mfd/xylon,logicvc.yaml index 8a1a6625c7825..9efd49c39bd2f 100644 --- a/Documentation/devicetree/bindings/mfd/xylon,logicvc.yaml +++ b/Documentation/devicetree/bindings/mfd/xylon,logicvc.yaml @@ -46,6 +46,9 @@ patternProperties: "^gpio@[0-9a-f]+$": $ref: /schemas/gpio/xylon,logicvc-gpio.yaml# + "^display@[0-9a-f]+$": + $ref: /schemas/display/xylon,logicvc-display.yaml# + required: - compatible - reg From 516e83636fce46d34aca49c9f0d481758c490aa7 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Jun 2021 13:04:42 +0200 Subject: [PATCH 0092/1202] mfd: sec-irq: Do not enforce (incorrect) interrupt trigger type Interrupt line can be configured on different hardware in different way, even inverted. Therefore driver should not enforce specific trigger type - edge falling - but instead rely on Devicetree to configure it. The Samsung PMIC drivers are used only on Devicetree boards. Additionally, the PMIC datasheets describe the interrupt line as active low with a requirement of acknowledge from the CPU therefore the edge falling is not correct. Marek Szyprowski reports that together with DTS change (proper level in DTS) it fixes RTC alarm failure that he observed from time to time on TM2e board. Signed-off-by: Krzysztof Kozlowski Tested-by: Marek Szyprowski Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210602110445.33536-1-krzysztof.kozlowski@canonical.com --- drivers/mfd/sec-irq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mfd/sec-irq.c b/drivers/mfd/sec-irq.c index e473c2fb42d5e..f5f59fdc72fec 100644 --- a/drivers/mfd/sec-irq.c +++ b/drivers/mfd/sec-irq.c @@ -479,8 +479,7 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic) } ret = devm_regmap_add_irq_chip(sec_pmic->dev, sec_pmic->regmap_pmic, - sec_pmic->irq, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + sec_pmic->irq, IRQF_ONESHOT, 0, sec_irq_chip, &sec_pmic->irq_data); if (ret != 0) { dev_err(sec_pmic->dev, "Failed to register IRQ chip: %d\n", ret); From 5a6c0a689e028ca1c15a5c614d55077d1c808705 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Jun 2021 13:04:43 +0200 Subject: [PATCH 0093/1202] mfd: max77686: Do not enforce (incorrect) interrupt trigger type Interrupt line can be configured on different hardware in different way, even inverted. Therefore driver should not enforce specific trigger type - edge falling - but instead rely on Devicetree to configure it. The Maxim 77686 datasheet describes the interrupt line as active low with a requirement of acknowledge from the CPU therefore the edge falling is not correct. The interrupt line is shared between PMIC and RTC driver, so using level sensitive interrupt is here especially important to avoid races. With an edge configuration in case if first PMIC signals interrupt followed shortly after by the RTC, the interrupt might not be yet cleared/acked thus the second one would not be noticed. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210602110445.33536-2-krzysztof.kozlowski@canonical.com --- Documentation/devicetree/bindings/clock/maxim,max77686.txt | 4 ++-- Documentation/devicetree/bindings/mfd/max77686.txt | 2 +- Documentation/devicetree/bindings/regulator/max77686.txt | 2 +- drivers/mfd/max77686.c | 3 +-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/maxim,max77686.txt b/Documentation/devicetree/bindings/clock/maxim,max77686.txt index 3472b461ca935..c10849efb4440 100644 --- a/Documentation/devicetree/bindings/clock/maxim,max77686.txt +++ b/Documentation/devicetree/bindings/clock/maxim,max77686.txt @@ -49,7 +49,7 @@ Example: max77686: max77686@9 { compatible = "maxim,max77686"; interrupt-parent = <&wakeup_eint>; - interrupts = <26 0>; + interrupts = <26 IRQ_TYPE_LEVEL_LOW>; reg = <0x09>; #clock-cells = <1>; @@ -74,7 +74,7 @@ Example: max77802: max77802@9 { compatible = "maxim,max77802"; interrupt-parent = <&wakeup_eint>; - interrupts = <26 0>; + interrupts = <26 IRQ_TYPE_LEVEL_LOW>; reg = <0x09>; #clock-cells = <1>; diff --git a/Documentation/devicetree/bindings/mfd/max77686.txt b/Documentation/devicetree/bindings/mfd/max77686.txt index 42968b7144e00..4447d074894a2 100644 --- a/Documentation/devicetree/bindings/mfd/max77686.txt +++ b/Documentation/devicetree/bindings/mfd/max77686.txt @@ -21,6 +21,6 @@ Example: max77686: pmic@9 { compatible = "maxim,max77686"; interrupt-parent = <&wakeup_eint>; - interrupts = <26 0>; + interrupts = <26 IRQ_TYPE_LEVEL_LOW>; reg = <0x09>; }; diff --git a/Documentation/devicetree/bindings/regulator/max77686.txt b/Documentation/devicetree/bindings/regulator/max77686.txt index e9f7578ca09a2..ff3d2dec8c4ba 100644 --- a/Documentation/devicetree/bindings/regulator/max77686.txt +++ b/Documentation/devicetree/bindings/regulator/max77686.txt @@ -43,7 +43,7 @@ Example: max77686: pmic@9 { compatible = "maxim,max77686"; interrupt-parent = <&wakeup_eint>; - interrupts = <26 IRQ_TYPE_NONE>; + interrupts = <26 IRQ_TYPE_LEVEL_LOW>; reg = <0x09>; voltage-regulators { diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c index 2ad554b921d96..f9e12ab2bc758 100644 --- a/drivers/mfd/max77686.c +++ b/drivers/mfd/max77686.c @@ -209,8 +209,7 @@ static int max77686_i2c_probe(struct i2c_client *i2c) ret = devm_regmap_add_irq_chip(&i2c->dev, max77686->regmap, max77686->irq, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT | - IRQF_SHARED, 0, irq_chip, + IRQF_ONESHOT | IRQF_SHARED, 0, irq_chip, &max77686->irq_data); if (ret < 0) { dev_err(&i2c->dev, "failed to add PMIC irq chip: %d\n", ret); From 15e24f5ab1f44adb0cb041d3d74259610393360b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Jun 2021 13:04:44 +0200 Subject: [PATCH 0094/1202] mfd: max77693: Do not enforce (incorrect) interrupt trigger type Interrupt line can be configured on different hardware in different way, even inverted. Therefore driver should not enforce specific trigger type - edge falling - but instead rely on Devicetree to configure it. The Maxim 77693 datasheet describes the interrupt line as active low with a requirement of acknowledge from the CPU therefore the edge falling is not correct. The interrupt line is shared between PMIC and RTC driver, so using level sensitive interrupt is here especially important to avoid races. With an edge configuration in case if first PMIC signals interrupt followed shortly after by the RTC, the interrupt might not be yet cleared/acked thus the second one would not be noticed. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210602110445.33536-3-krzysztof.kozlowski@canonical.com --- Documentation/devicetree/bindings/mfd/max77693.txt | 2 +- drivers/mfd/max77693.c | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/max77693.txt b/Documentation/devicetree/bindings/mfd/max77693.txt index 0ced96e16c16d..1032df14498bd 100644 --- a/Documentation/devicetree/bindings/mfd/max77693.txt +++ b/Documentation/devicetree/bindings/mfd/max77693.txt @@ -139,7 +139,7 @@ Example: compatible = "maxim,max77693"; reg = <0x66>; interrupt-parent = <&gpx1>; - interrupts = <5 2>; + interrupts = <5 IRQ_TYPE_LEVEL_LOW>; regulators { esafeout@1 { diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c index 596ed85cab3b5..4e6244e175593 100644 --- a/drivers/mfd/max77693.c +++ b/drivers/mfd/max77693.c @@ -222,8 +222,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c, } ret = regmap_add_irq_chip(max77693->regmap, max77693->irq, - IRQF_ONESHOT | IRQF_SHARED | - IRQF_TRIGGER_FALLING, 0, + IRQF_ONESHOT | IRQF_SHARED, 0, &max77693_led_irq_chip, &max77693->irq_data_led); if (ret) { @@ -232,8 +231,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c, } ret = regmap_add_irq_chip(max77693->regmap, max77693->irq, - IRQF_ONESHOT | IRQF_SHARED | - IRQF_TRIGGER_FALLING, 0, + IRQF_ONESHOT | IRQF_SHARED, 0, &max77693_topsys_irq_chip, &max77693->irq_data_topsys); if (ret) { @@ -242,8 +240,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c, } ret = regmap_add_irq_chip(max77693->regmap, max77693->irq, - IRQF_ONESHOT | IRQF_SHARED | - IRQF_TRIGGER_FALLING, 0, + IRQF_ONESHOT | IRQF_SHARED, 0, &max77693_charger_irq_chip, &max77693->irq_data_chg); if (ret) { @@ -252,8 +249,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c, } ret = regmap_add_irq_chip(max77693->regmap_muic, max77693->irq, - IRQF_ONESHOT | IRQF_SHARED | - IRQF_TRIGGER_FALLING, 0, + IRQF_ONESHOT | IRQF_SHARED, 0, &max77693_muic_irq_chip, &max77693->irq_data_muic); if (ret) { From 6e0b813c6bbba94a6856be83c795f5f772c4ca2f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Jun 2021 13:04:45 +0200 Subject: [PATCH 0095/1202] mfd: max14577: Do not enforce (incorrect) interrupt trigger type Interrupt line can be configured on different hardware in different way, even inverted. Therefore driver should not enforce specific trigger type - edge falling - but instead rely on Devicetree to configure it. The Maxim 14577/77836 datasheets describe the interrupt line as active low with a requirement of acknowledge from the CPU therefore the edge falling is not correct. The interrupt line is shared between PMIC and charger driver, so using level sensitive interrupt is here especially important to avoid races. With an edge configuration in case if first PMIC signals interrupt followed shortly after by the RTC, the interrupt might not be yet cleared/acked thus the second one would not be noticed. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210602110445.33536-4-krzysztof.kozlowski@canonical.com --- Documentation/devicetree/bindings/mfd/max14577.txt | 4 ++-- drivers/mfd/max14577.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/max14577.txt b/Documentation/devicetree/bindings/mfd/max14577.txt index 92070b3467569..be11943a0560f 100644 --- a/Documentation/devicetree/bindings/mfd/max14577.txt +++ b/Documentation/devicetree/bindings/mfd/max14577.txt @@ -71,7 +71,7 @@ max14577@25 { compatible = "maxim,max14577"; reg = <0x25>; interrupt-parent = <&gpx1>; - interrupts = <5 IRQ_TYPE_NONE>; + interrupts = <5 IRQ_TYPE_LEVEL_LOW>; muic: max14577-muic { compatible = "maxim,max14577-muic"; @@ -106,7 +106,7 @@ max77836@25 { compatible = "maxim,max77836"; reg = <0x25>; interrupt-parent = <&gpx1>; - interrupts = <5 IRQ_TYPE_NONE>; + interrupts = <5 IRQ_TYPE_LEVEL_LOW>; muic: max77836-muic { compatible = "maxim,max77836-muic"; diff --git a/drivers/mfd/max14577.c b/drivers/mfd/max14577.c index be185e9d5f16b..6c487fa14e9c8 100644 --- a/drivers/mfd/max14577.c +++ b/drivers/mfd/max14577.c @@ -332,7 +332,7 @@ static int max77836_init(struct max14577 *max14577) } ret = regmap_add_irq_chip(max14577->regmap_pmic, max14577->irq, - IRQF_TRIGGER_FALLING | IRQF_ONESHOT | IRQF_SHARED, + IRQF_ONESHOT | IRQF_SHARED, 0, &max77836_pmic_irq_chip, &max14577->irq_data_pmic); if (ret != 0) { @@ -418,14 +418,14 @@ static int max14577_i2c_probe(struct i2c_client *i2c, irq_chip = &max77836_muic_irq_chip; mfd_devs = max77836_devs; mfd_devs_size = ARRAY_SIZE(max77836_devs); - irq_flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT | IRQF_SHARED; + irq_flags = IRQF_ONESHOT | IRQF_SHARED; break; case MAXIM_DEVICE_TYPE_MAX14577: default: irq_chip = &max14577_irq_chip; mfd_devs = max14577_devs; mfd_devs_size = ARRAY_SIZE(max14577_devs); - irq_flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT; + irq_flags = IRQF_ONESHOT; break; } From d162d84b27c85a63cb5df260681157a58f88645e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 27 Sep 2021 17:06:12 -0700 Subject: [PATCH 0096/1202] nvmem: Fix shift-out-of-bound (UBSAN) with byte size cells If a cell has 'nbits' equal to a multiple of BITS_PER_BYTE the logic *p &= GENMASK((cell->nbits%BITS_PER_BYTE) - 1, 0); will become undefined behavior because nbits modulo BITS_PER_BYTE is 0, and we subtract one from that making a large number that is then shifted more than the number of bits that fit into an unsigned long. UBSAN reports this problem: UBSAN: shift-out-of-bounds in drivers/nvmem/core.c:1386:8 shift exponent 64 is too large for 64-bit type 'unsigned long' CPU: 6 PID: 7 Comm: kworker/u16:0 Not tainted 5.15.0-rc3+ #9 Hardware name: Google Lazor (rev3+) with KB Backlight (DT) Workqueue: events_unbound deferred_probe_work_func Call trace: dump_backtrace+0x0/0x170 show_stack+0x24/0x30 dump_stack_lvl+0x64/0x7c dump_stack+0x18/0x38 ubsan_epilogue+0x10/0x54 __ubsan_handle_shift_out_of_bounds+0x180/0x194 __nvmem_cell_read+0x1ec/0x21c nvmem_cell_read+0x58/0x94 nvmem_cell_read_variable_common+0x4c/0xb0 nvmem_cell_read_variable_le_u32+0x40/0x100 a6xx_gpu_init+0x170/0x2f4 adreno_bind+0x174/0x284 component_bind_all+0xf0/0x264 msm_drm_bind+0x1d8/0x7a0 try_to_bring_up_master+0x164/0x1ac __component_add+0xbc/0x13c component_add+0x20/0x2c dp_display_probe+0x340/0x384 platform_probe+0xc0/0x100 really_probe+0x110/0x304 __driver_probe_device+0xb8/0x120 driver_probe_device+0x4c/0xfc __device_attach_driver+0xb0/0x128 bus_for_each_drv+0x90/0xdc __device_attach+0xc8/0x174 device_initial_probe+0x20/0x2c bus_probe_device+0x40/0xa4 deferred_probe_work_func+0x7c/0xb8 process_one_work+0x128/0x21c process_scheduled_works+0x40/0x54 worker_thread+0x1ec/0x2a8 kthread+0x138/0x158 ret_from_fork+0x10/0x20 Fix it by making sure there are any bits to mask out. Cc: Douglas Anderson Fixes: 69aba7948cbe ("nvmem: Add a simple NVMEM framework for consumers") Cc: stable@vger.kernel.org Signed-off-by: Stephen Boyd Signed-off-by: Srinivas Kandagatla --- drivers/nvmem/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 3d87fadaa160d..8976da38b375a 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -1383,7 +1383,8 @@ static void nvmem_shift_read_buffer_in_place(struct nvmem_cell *cell, void *buf) *p-- = 0; /* clear msb bits if any leftover in the last byte */ - *p &= GENMASK((cell->nbits%BITS_PER_BYTE) - 1, 0); + if (cell->nbits % BITS_PER_BYTE) + *p &= GENMASK((cell->nbits % BITS_PER_BYTE) - 1, 0); } static int __nvmem_cell_read(struct nvmem_device *nvmem, From 176412f8674be8ba9a45bb61086e224e2118324a Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Sat, 18 Sep 2021 13:46:29 +0800 Subject: [PATCH 0097/1202] pinctrl: intel: Kconfig: Add configuration menu to Intel pin control Adding a configuration menu to hold many Intel pin control drivers helps to make the display more concise. Acked-by: Andy Shevchenko Signed-off-by: Cai Huoqing Signed-off-by: Mika Westerberg --- drivers/pinctrl/intel/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/Kconfig b/drivers/pinctrl/intel/Kconfig index fb1495bd77c43..e5ec8b8956da1 100644 --- a/drivers/pinctrl/intel/Kconfig +++ b/drivers/pinctrl/intel/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Intel pin control drivers - -if (X86 || COMPILE_TEST) +menu "Intel pinctrl drivers" + depends on X86 || COMPILE_TEST config PINCTRL_BAYTRAIL bool "Intel Baytrail GPIO pin control" @@ -168,4 +168,4 @@ config PINCTRL_TIGERLAKE This pinctrl driver provides an interface that allows configuring of Intel Tiger Lake PCH pins and using them as GPIOs. -endif +endmenu From b71c724c480c12f8ea2762b6bcd6c497f87bb5d2 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Wed, 22 Sep 2021 17:57:18 +0100 Subject: [PATCH 0098/1202] i2c: / ACPI: fix resource leak in reconfiguration device addition acpi_i2c_find_adapter_by_handle() calls bus_find_device() which takes a reference on the adapter which is never released which will result in a reference count leak and render the adapter unremovable. Make sure to put the adapter after creating the client in the same manner that we do for OF. Fixes: 525e6fabeae2 ("i2c / ACPI: add support for ACPI reconfigure notifications") Signed-off-by: Jamie Iles Acked-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index aaeeacc121215..546cc935e035a 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -454,6 +454,7 @@ static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, break; i2c_acpi_register_device(adapter, adev, &info); + put_device(&adapter->dev); break; case ACPI_RECONFIG_DEVICE_REMOVE: if (!acpi_device_enumerated(adev)) From 69c2255f1de58f2ec6c8e6778af3e93a2070bfe3 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Wed, 22 Sep 2021 17:57:18 +0100 Subject: [PATCH 0099/1202] i2c: / ACPI: fix resource leak in reconfiguration device addition acpi_i2c_find_adapter_by_handle() calls bus_find_device() which takes a reference on the adapter which is never released which will result in a reference count leak and render the adapter unremovable. Make sure to put the adapter after creating the client in the same manner that we do for OF. Fixes: 525e6fabeae2 ("i2c / ACPI: add support for ACPI reconfigure notifications") Signed-off-by: Jamie Iles Acked-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index aaeeacc121215..546cc935e035a 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -454,6 +454,7 @@ static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, break; i2c_acpi_register_device(adapter, adev, &info); + put_device(&adapter->dev); break; case ACPI_RECONFIG_DEVICE_REMOVE: if (!acpi_device_enumerated(adev)) From c13b574018a85cd696d2f3489c22d61ed29c9226 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Sat, 25 Sep 2021 05:45:39 +0000 Subject: [PATCH 0100/1202] extcon: usbc-tusb320: Add support for mode setting and reset Reset the chip and set its mode to default (maintain mode set by PORT pin) during probe to make sure it comes up in the default state. Signed-off-by: Yassine Oudjana Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-usbc-tusb320.c | 85 ++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 4 deletions(-) diff --git a/drivers/extcon/extcon-usbc-tusb320.c b/drivers/extcon/extcon-usbc-tusb320.c index 805af73b41521..1ed1dfe542069 100644 --- a/drivers/extcon/extcon-usbc-tusb320.c +++ b/drivers/extcon/extcon-usbc-tusb320.c @@ -19,15 +19,32 @@ #define TUSB320_REG9_ATTACHED_STATE_MASK 0x3 #define TUSB320_REG9_CABLE_DIRECTION BIT(5) #define TUSB320_REG9_INTERRUPT_STATUS BIT(4) -#define TUSB320_ATTACHED_STATE_NONE 0x0 -#define TUSB320_ATTACHED_STATE_DFP 0x1 -#define TUSB320_ATTACHED_STATE_UFP 0x2 -#define TUSB320_ATTACHED_STATE_ACC 0x3 + +#define TUSB320_REGA 0xa +#define TUSB320_REGA_I2C_SOFT_RESET BIT(3) +#define TUSB320_REGA_MODE_SELECT_SHIFT 4 +#define TUSB320_REGA_MODE_SELECT_MASK 0x3 + +enum tusb320_attached_state { + TUSB320_ATTACHED_STATE_NONE, + TUSB320_ATTACHED_STATE_DFP, + TUSB320_ATTACHED_STATE_UFP, + TUSB320_ATTACHED_STATE_ACC, +}; + +enum tusb320_mode { + TUSB320_MODE_PORT, + TUSB320_MODE_UFP, + TUSB320_MODE_DFP, + TUSB320_MODE_DRP, +}; struct tusb320_priv { struct device *dev; struct regmap *regmap; struct extcon_dev *edev; + + enum tusb320_attached_state state; }; static const char * const tusb_attached_states[] = { @@ -62,6 +79,53 @@ static int tusb320_check_signature(struct tusb320_priv *priv) return 0; } +static int tusb320_set_mode(struct tusb320_priv *priv, enum tusb320_mode mode) +{ + int ret; + + /* Mode cannot be changed while cable is attached */ + if (priv->state != TUSB320_ATTACHED_STATE_NONE) + return -EBUSY; + + /* Write mode */ + ret = regmap_write_bits(priv->regmap, TUSB320_REGA, + TUSB320_REGA_MODE_SELECT_MASK << TUSB320_REGA_MODE_SELECT_SHIFT, + mode << TUSB320_REGA_MODE_SELECT_SHIFT); + if (ret) { + dev_err(priv->dev, "failed to write mode: %d\n", ret); + return ret; + } + + return 0; +} + +static int tusb320_reset(struct tusb320_priv *priv) +{ + int ret; + + /* Set mode to default (follow PORT pin) */ + ret = tusb320_set_mode(priv, TUSB320_MODE_PORT); + if (ret && ret != -EBUSY) { + dev_err(priv->dev, + "failed to set mode to PORT: %d\n", ret); + return ret; + } + + /* Perform soft reset */ + ret = regmap_write_bits(priv->regmap, TUSB320_REGA, + TUSB320_REGA_I2C_SOFT_RESET, 1); + if (ret) { + dev_err(priv->dev, + "failed to write soft reset bit: %d\n", ret); + return ret; + } + + /* Wait for chip to go through reset */ + msleep(95); + + return 0; +} + static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) { struct tusb320_priv *priv = dev_id; @@ -96,6 +160,8 @@ static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) extcon_sync(priv->edev, EXTCON_USB); extcon_sync(priv->edev, EXTCON_USB_HOST); + priv->state = state; + regmap_write(priv->regmap, TUSB320_REG9, reg); return IRQ_HANDLED; @@ -145,6 +211,17 @@ static int tusb320_extcon_probe(struct i2c_client *client, /* update initial state */ tusb320_irq_handler(client->irq, priv); + /* Reset chip to its default state */ + ret = tusb320_reset(priv); + if (ret) + dev_warn(priv->dev, "failed to reset chip: %d\n", ret); + else + /* + * State and polarity might change after a reset, so update + * them again and make sure the interrupt status bit is cleared. + */ + tusb320_irq_handler(client->irq, priv); + ret = devm_request_threaded_irq(priv->dev, client->irq, NULL, tusb320_irq_handler, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, From c7a1f997a34c1fdc6ca5a7299d1c0621ccc90ab9 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Sat, 25 Sep 2021 05:45:54 +0000 Subject: [PATCH 0101/1202] extcon: usbc-tusb320: Add support for TUSB320L TUSB320L is a newer chip with additional features, and it has additional steps in its mode changing sequence: - Disable CC state machine, - Write to mode register, - Wait for 5 ms, - Re-enable CC state machine. It also has an additional register that a revision number can be read from. Add support for the mode changing sequence, and read the revision number during probe and print it as info. Signed-off-by: Yassine Oudjana Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-usbc-tusb320.c | 82 +++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/drivers/extcon/extcon-usbc-tusb320.c b/drivers/extcon/extcon-usbc-tusb320.c index 1ed1dfe542069..6ba3d89b106d0 100644 --- a/drivers/extcon/extcon-usbc-tusb320.c +++ b/drivers/extcon/extcon-usbc-tusb320.c @@ -21,10 +21,13 @@ #define TUSB320_REG9_INTERRUPT_STATUS BIT(4) #define TUSB320_REGA 0xa +#define TUSB320L_REGA_DISABLE_TERM BIT(0) #define TUSB320_REGA_I2C_SOFT_RESET BIT(3) #define TUSB320_REGA_MODE_SELECT_SHIFT 4 #define TUSB320_REGA_MODE_SELECT_MASK 0x3 +#define TUSB320L_REGA0_REVISION 0xa0 + enum tusb320_attached_state { TUSB320_ATTACHED_STATE_NONE, TUSB320_ATTACHED_STATE_DFP, @@ -39,11 +42,18 @@ enum tusb320_mode { TUSB320_MODE_DRP, }; +struct tusb320_priv; + +struct tusb320_ops { + int (*set_mode)(struct tusb320_priv *priv, enum tusb320_mode mode); + int (*get_revision)(struct tusb320_priv *priv, unsigned int *revision); +}; + struct tusb320_priv { struct device *dev; struct regmap *regmap; struct extcon_dev *edev; - + struct tusb320_ops *ops; enum tusb320_attached_state state; }; @@ -99,12 +109,46 @@ static int tusb320_set_mode(struct tusb320_priv *priv, enum tusb320_mode mode) return 0; } +static int tusb320l_set_mode(struct tusb320_priv *priv, enum tusb320_mode mode) +{ + int ret; + + /* Disable CC state machine */ + ret = regmap_write_bits(priv->regmap, TUSB320_REGA, + TUSB320L_REGA_DISABLE_TERM, 1); + if (ret) { + dev_err(priv->dev, + "failed to disable CC state machine: %d\n", ret); + return ret; + } + + /* Write mode */ + ret = regmap_write_bits(priv->regmap, TUSB320_REGA, + TUSB320_REGA_MODE_SELECT_MASK << TUSB320_REGA_MODE_SELECT_SHIFT, + mode << TUSB320_REGA_MODE_SELECT_SHIFT); + if (ret) { + dev_err(priv->dev, "failed to write mode: %d\n", ret); + goto err; + } + + msleep(5); +err: + /* Re-enable CC state machine */ + ret = regmap_write_bits(priv->regmap, TUSB320_REGA, + TUSB320L_REGA_DISABLE_TERM, 0); + if (ret) + dev_err(priv->dev, + "failed to re-enable CC state machine: %d\n", ret); + + return ret; +} + static int tusb320_reset(struct tusb320_priv *priv) { int ret; /* Set mode to default (follow PORT pin) */ - ret = tusb320_set_mode(priv, TUSB320_MODE_PORT); + ret = priv->ops->set_mode(priv, TUSB320_MODE_PORT); if (ret && ret != -EBUSY) { dev_err(priv->dev, "failed to set mode to PORT: %d\n", ret); @@ -126,6 +170,20 @@ static int tusb320_reset(struct tusb320_priv *priv) return 0; } +static int tusb320l_get_revision(struct tusb320_priv *priv, unsigned int *revision) +{ + return regmap_read(priv->regmap, TUSB320L_REGA0_REVISION, revision); +} + +static struct tusb320_ops tusb320_ops = { + .set_mode = tusb320_set_mode, +}; + +static struct tusb320_ops tusb320l_ops = { + .set_mode = tusb320l_set_mode, + .get_revision = tusb320l_get_revision, +}; + static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) { struct tusb320_priv *priv = dev_id; @@ -176,6 +234,8 @@ static int tusb320_extcon_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct tusb320_priv *priv; + const void *match_data; + unsigned int revision; int ret; priv = devm_kzalloc(&client->dev, sizeof(*priv), GFP_KERNEL); @@ -191,12 +251,27 @@ static int tusb320_extcon_probe(struct i2c_client *client, if (ret) return ret; + match_data = device_get_match_data(&client->dev); + if (!match_data) + return -EINVAL; + + priv->ops = (struct tusb320_ops*)match_data; + priv->edev = devm_extcon_dev_allocate(priv->dev, tusb320_extcon_cable); if (IS_ERR(priv->edev)) { dev_err(priv->dev, "failed to allocate extcon device\n"); return PTR_ERR(priv->edev); } + if (priv->ops->get_revision) { + ret = priv->ops->get_revision(priv, &revision); + if (ret) + dev_warn(priv->dev, + "failed to read revision register: %d\n", ret); + else + dev_info(priv->dev, "chip revision %d\n", revision); + } + ret = devm_extcon_dev_register(priv->dev, priv->edev); if (ret < 0) { dev_err(priv->dev, "failed to register extcon device\n"); @@ -231,7 +306,8 @@ static int tusb320_extcon_probe(struct i2c_client *client, } static const struct of_device_id tusb320_extcon_dt_match[] = { - { .compatible = "ti,tusb320", }, + { .compatible = "ti,tusb320", .data = &tusb320_ops, }, + { .compatible = "ti,tusb320l", .data = &tusb320l_ops, }, { } }; MODULE_DEVICE_TABLE(of, tusb320_extcon_dt_match); From f83d7033d4ec1edd02c0289e71dc77bf3f41fee6 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Sat, 25 Sep 2021 05:46:09 +0000 Subject: [PATCH 0102/1202] dt-bindings: extcon: usbc-tusb320: Add TUSB320L compatible string Add a compatible string for TUSB320L. Signed-off-by: Yassine Oudjana Acked-by: Rob Herring Acked-by: Chanwoo Choi Signed-off-by: Chanwoo Choi --- .../devicetree/bindings/extcon/extcon-usbc-tusb320.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/extcon/extcon-usbc-tusb320.yaml b/Documentation/devicetree/bindings/extcon/extcon-usbc-tusb320.yaml index 9875b4d5c356e..71a9f2e5d0dc1 100644 --- a/Documentation/devicetree/bindings/extcon/extcon-usbc-tusb320.yaml +++ b/Documentation/devicetree/bindings/extcon/extcon-usbc-tusb320.yaml @@ -11,7 +11,9 @@ maintainers: properties: compatible: - const: ti,tusb320 + enum: + - ti,tusb320 + - ti,tusb320l reg: maxItems: 1 From 6b28c7d0781e9554cd50e5e7d4b48aea0c2bdf93 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 28 Sep 2021 23:42:45 -0500 Subject: [PATCH 0103/1202] PM / devfreq: Strengthen check for freq_table Since commit ea572f816032 ("PM / devfreq: Change return type of devfreq_set_freq_table()"), all devfreq devices are expected to have a valid freq_table. The devfreq core unconditionally dereferences freq_table in the sysfs code and in get_freq_range(). Therefore, we need to ensure that freq_table is both non-null and non-empty (length is > 0). If either check fails, replace the table using set_freq_table() or return the error. Signed-off-by: Samuel Holland Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 85faa7a5c7d12..06333d4303829 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -827,7 +827,7 @@ struct devfreq *devfreq_add_device(struct device *dev, goto err_dev; } - if (!devfreq->profile->max_state && !devfreq->profile->freq_table) { + if (!devfreq->profile->max_state || !devfreq->profile->freq_table) { mutex_unlock(&devfreq->lock); err = set_freq_table(devfreq); if (err < 0) From f130d08a8d79fd419c339d95eb28ddff72a73f52 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 21 Sep 2021 08:37:54 -0300 Subject: [PATCH 0104/1202] usb: chipidea: ci_hdrc_imx: Also search for 'phys' phandle When passing 'phys' in the devicetree to describe the USB PHY phandle (which is the recommended way according to Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt) the following NULL pointer dereference is observed on i.MX7 and i.MX8MM: [ 1.489344] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098 [ 1.498170] Mem abort info: [ 1.500966] ESR = 0x96000044 [ 1.504030] EC = 0x25: DABT (current EL), IL = 32 bits [ 1.509356] SET = 0, FnV = 0 [ 1.512416] EA = 0, S1PTW = 0 [ 1.515569] FSC = 0x04: level 0 translation fault [ 1.520458] Data abort info: [ 1.523349] ISV = 0, ISS = 0x00000044 [ 1.527196] CM = 0, WnR = 1 [ 1.530176] [0000000000000098] user address but active_mm is swapper [ 1.536544] Internal error: Oops: 96000044 [#1] PREEMPT SMP [ 1.542125] Modules linked in: [ 1.545190] CPU: 3 PID: 7 Comm: kworker/u8:0 Not tainted 5.14.0-dirty #3 [ 1.551901] Hardware name: Kontron i.MX8MM N801X S (DT) [ 1.557133] Workqueue: events_unbound deferred_probe_work_func [ 1.562984] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) [ 1.568998] pc : imx7d_charger_detection+0x3f0/0x510 [ 1.573973] lr : imx7d_charger_detection+0x22c/0x510 This happens because the charger functions check for the phy presence inside the imx_usbmisc_data structure (data->usb_phy), but the chipidea core populates the usb_phy passed via 'phys' inside 'struct ci_hdrc' (ci->usb_phy) instead. This causes the NULL pointer dereference inside imx7d_charger_detection(). Fix it by also searching for 'phys' in case 'fsl,usbphy' is not found. Tested on a imx7s-warp board. Cc: stable@vger.kernel.org Fixes: 746f316b753a ("usb: chipidea: introduce imx7d USB charger detection") Reported-by: Heiko Thiery Signed-off-by: Fabio Estevam Tested-by: Frieder Schrempf Reviewed-by: Frieder Schrempf Link: https://lore.kernel.org/r/20210921113754.767631-1-festevam@gmail.com Signed-off-by: Peter Chen --- drivers/usb/chipidea/ci_hdrc_imx.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 8b7bc10b6e8b4..f1d100671ee6a 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -420,11 +420,16 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) data->phy = devm_usb_get_phy_by_phandle(dev, "fsl,usbphy", 0); if (IS_ERR(data->phy)) { ret = PTR_ERR(data->phy); - /* Return -EINVAL if no usbphy is available */ - if (ret == -ENODEV) - data->phy = NULL; - else - goto err_clk; + if (ret == -ENODEV) { + data->phy = devm_usb_get_phy_by_phandle(dev, "phys", 0); + if (IS_ERR(data->phy)) { + ret = PTR_ERR(data->phy); + if (ret == -ENODEV) + data->phy = NULL; + else + goto err_clk; + } + } } pdata.usb_phy = data->phy; From 0650e2b10afb3351e5f1b83fccef3c38c3db7bee Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Sep 2021 17:17:36 +0100 Subject: [PATCH 0105/1202] gpio: 74x164: Add SPI device ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding a SPI device ID table. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-74x164.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index 05637d5851526..4a55cdf089d62 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -174,6 +174,13 @@ static int gen_74x164_remove(struct spi_device *spi) return 0; } +static const struct spi_device_id gen_74x164_spi_ids[] = { + { .name = "74hc595" }, + { .name = "74lvc594" }, + {}, +}; +MODULE_DEVICE_TABLE(spi, gen_74x164_spi_ids); + static const struct of_device_id gen_74x164_dt_ids[] = { { .compatible = "fairchild,74hc595" }, { .compatible = "nxp,74lvc594" }, @@ -188,6 +195,7 @@ static struct spi_driver gen_74x164_driver = { }, .probe = gen_74x164_probe, .remove = gen_74x164_remove, + .id_table = gen_74x164_spi_ids, }; module_spi_driver(gen_74x164_driver); From 1649b83766944e320a6f087abf1e8a221f1cd8b0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Sep 2021 01:46:40 +0300 Subject: [PATCH 0106/1202] gpio: pca953x: Improve bias setting The commit 15add06841a3 ("gpio: pca953x: add ->set_config implementation") introduced support for bias setting. However this, due to being half-baked, brought potential issues: - the turning bias via disabling makes the pin floating for a while; - once enabled, bias can't be disabled. Fix all these by adding support for bias disabling and move the disabling part under the corresponding conditional. While at it, add support for default setting, since it's cheap to add. Fixes: 15add06841a3 ("gpio: pca953x: add ->set_config implementation") Cc: Thomas Petazzoni Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 8ebf369b3ba0f..d2fe76f3f34fd 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -559,21 +559,21 @@ static int pca953x_gpio_set_pull_up_down(struct pca953x_chip *chip, mutex_lock(&chip->i2c_lock); - /* Disable pull-up/pull-down */ - ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, 0); - if (ret) - goto exit; - /* Configure pull-up/pull-down */ if (config == PIN_CONFIG_BIAS_PULL_UP) ret = regmap_write_bits(chip->regmap, pull_sel_reg, bit, bit); else if (config == PIN_CONFIG_BIAS_PULL_DOWN) ret = regmap_write_bits(chip->regmap, pull_sel_reg, bit, 0); + else + ret = 0; if (ret) goto exit; - /* Enable pull-up/pull-down */ - ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, bit); + /* Disable/Enable pull-up/pull-down */ + if (config == PIN_CONFIG_BIAS_DISABLE) + ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, 0); + else + ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, bit); exit: mutex_unlock(&chip->i2c_lock); @@ -587,7 +587,9 @@ static int pca953x_gpio_set_config(struct gpio_chip *gc, unsigned int offset, switch (pinconf_to_config_param(config)) { case PIN_CONFIG_BIAS_PULL_UP: + case PIN_CONFIG_BIAS_PULL_PIN_DEFAULT: case PIN_CONFIG_BIAS_PULL_DOWN: + case PIN_CONFIG_BIAS_DISABLE: return pca953x_gpio_set_pull_up_down(chip, offset, config); default: return -ENOTSUPP; From e825696df7165fd451b35267ba41425673ee8dcf Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:16:57 -0700 Subject: [PATCH 0107/1202] bitops: protect find_first_{,zero}_bit properly find_first_bit() and find_first_zero_bit() are not protected with ifdefs as other functions in find.h. It causes build errors on some platforms if CONFIG_GENERIC_FIND_FIRST_BIT is enabled. Signed-off-by: Yury Norov Fixes: 2cc7b6a44ac2 ("lib: add fast path for find_first_*_bit() and find_last_bit()") Reported-by: kernel test robot Tested-by: Wolfram Sang --- include/asm-generic/bitops/find.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 0d132ee2a2913..835f959a25f25 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -97,6 +97,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, #ifdef CONFIG_GENERIC_FIND_FIRST_BIT +#ifndef find_first_bit /** * find_first_bit - find the first set bit in a memory region * @addr: The address to start the search at @@ -116,7 +117,9 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) return _find_first_bit(addr, size); } +#endif +#ifndef find_first_zero_bit /** * find_first_zero_bit - find the first cleared bit in a memory region * @addr: The address to start the search at @@ -136,6 +139,8 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) return _find_first_zero_bit(addr, size); } +#endif + #else /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifndef find_first_bit From 583fba98ff9ccfb7016f5338751bd80a32231215 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:16:58 -0700 Subject: [PATCH 0108/1202] bitops: move find_bit_*_le functions from le.h to find.h It's convenient to have all find_bit declarations in one place. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- include/asm-generic/bitops/find.h | 69 +++++++++++++++++++++++++++++++ include/asm-generic/bitops/le.h | 64 ---------------------------- 2 files changed, 69 insertions(+), 64 deletions(-) diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 835f959a25f25..91b1b23f2b0c6 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -190,4 +190,73 @@ extern unsigned long find_next_clump8(unsigned long *clump, #define find_first_clump8(clump, bits, size) \ find_next_clump8((clump), (bits), (size), 0) +#if defined(__LITTLE_ENDIAN) + +static inline unsigned long find_next_zero_bit_le(const void *addr, + unsigned long size, unsigned long offset) +{ + return find_next_zero_bit(addr, size, offset); +} + +static inline unsigned long find_next_bit_le(const void *addr, + unsigned long size, unsigned long offset) +{ + return find_next_bit(addr, size, offset); +} + +static inline unsigned long find_first_zero_bit_le(const void *addr, + unsigned long size) +{ + return find_first_zero_bit(addr, size); +} + +#elif defined(__BIG_ENDIAN) + +#ifndef find_next_zero_bit_le +static inline +unsigned long find_next_zero_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); +} +#endif + +#ifndef find_next_bit_le +static inline +unsigned long find_next_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, NULL, size, offset, 0UL, 1); +} +#endif + +#ifndef find_first_zero_bit_le +#define find_first_zero_bit_le(addr, size) \ + find_next_zero_bit_le((addr), (size), 0) +#endif + +#else +#error "Please fix " +#endif + #endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h index 5a28629cbf4d4..d51beff603755 100644 --- a/include/asm-generic/bitops/le.h +++ b/include/asm-generic/bitops/le.h @@ -2,83 +2,19 @@ #ifndef _ASM_GENERIC_BITOPS_LE_H_ #define _ASM_GENERIC_BITOPS_LE_H_ -#include #include #include -#include #if defined(__LITTLE_ENDIAN) #define BITOP_LE_SWIZZLE 0 -static inline unsigned long find_next_zero_bit_le(const void *addr, - unsigned long size, unsigned long offset) -{ - return find_next_zero_bit(addr, size, offset); -} - -static inline unsigned long find_next_bit_le(const void *addr, - unsigned long size, unsigned long offset) -{ - return find_next_bit(addr, size, offset); -} - -static inline unsigned long find_first_zero_bit_le(const void *addr, - unsigned long size) -{ - return find_first_zero_bit(addr, size); -} - #elif defined(__BIG_ENDIAN) #define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) -#ifndef find_next_zero_bit_le -static inline -unsigned long find_next_zero_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val = *(const unsigned long *)addr; - - if (unlikely(offset >= size)) - return size; - - val = swab(val) | ~GENMASK(size - 1, offset); - return val == ~0UL ? size : ffz(val); - } - - return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); -} -#endif - -#ifndef find_next_bit_le -static inline -unsigned long find_next_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val = *(const unsigned long *)addr; - - if (unlikely(offset >= size)) - return size; - - val = swab(val) & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; - } - - return _find_next_bit(addr, NULL, size, offset, 0UL, 1); -} #endif -#ifndef find_first_zero_bit_le -#define find_first_zero_bit_le(addr, size) \ - find_next_zero_bit_le((addr), (size), 0) -#endif - -#else -#error "Please fix " -#endif static inline int test_bit_le(int nr, const void *addr) { From a7c7d06a49d697ddfefc8f33b40a86ff9011dfaa Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:16:59 -0700 Subject: [PATCH 0109/1202] include: move find.h from asm_generic to linux find_bit API and bitmap API are closely related, but inclusion paths are different - include/asm-generic and include/linux, correspondingly. In the past it made a lot of troubles due to circular dependencies and/or undefined symbols. Fix this by moving find.h under include/linux. Signed-off-by: Yury Norov Tested-by: Wolfram Sang Acked-by: Geert Uytterhoeven --- MAINTAINERS | 2 +- arch/alpha/include/asm/bitops.h | 2 -- arch/arc/include/asm/bitops.h | 1 - arch/arm/include/asm/bitops.h | 1 - arch/arm64/include/asm/bitops.h | 1 - arch/csky/include/asm/bitops.h | 1 - arch/h8300/include/asm/bitops.h | 1 - arch/hexagon/include/asm/bitops.h | 1 - arch/ia64/include/asm/bitops.h | 2 -- arch/m68k/include/asm/bitops.h | 2 -- arch/mips/include/asm/bitops.h | 1 - arch/openrisc/include/asm/bitops.h | 1 - arch/parisc/include/asm/bitops.h | 2 -- arch/powerpc/include/asm/bitops.h | 2 -- arch/riscv/include/asm/bitops.h | 1 - arch/s390/include/asm/bitops.h | 1 - arch/sh/include/asm/bitops.h | 1 - arch/sparc/include/asm/bitops_32.h | 1 - arch/sparc/include/asm/bitops_64.h | 2 -- arch/x86/include/asm/bitops.h | 2 -- arch/xtensa/include/asm/bitops.h | 1 - include/asm-generic/bitops.h | 1 - include/linux/bitmap.h | 1 + include/{asm-generic/bitops => linux}/find.h | 12 +++++++++--- 24 files changed, 11 insertions(+), 32 deletions(-) rename include/{asm-generic/bitops => linux}/find.h (97%) diff --git a/MAINTAINERS b/MAINTAINERS index 62257ffca56a8..1499b22b111b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3290,8 +3290,8 @@ M: Yury Norov R: Andy Shevchenko R: Rasmus Villemoes S: Maintained -F: include/asm-generic/bitops/find.h F: include/linux/bitmap.h +F: include/linux/find.h F: lib/bitmap.c F: lib/find_bit.c F: lib/find_bit_benchmark.c diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index 5adca78830b5e..e1d8483a45f2e 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h @@ -430,8 +430,6 @@ static inline unsigned int __arch_hweight8(unsigned int w) #endif /* __KERNEL__ */ -#include - #ifdef __KERNEL__ /* diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index a7daaf64ae344..bdb7e190a294e 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -189,7 +189,6 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) #include #include -#include #include #include diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index c92e42a5c8f75..8e94fe7ab5ebc 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -264,7 +264,6 @@ static inline int find_next_bit_le(const void *p, int size, int offset) #endif -#include #include /* diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h index 81a3e519b07db..9b3c787132d22 100644 --- a/arch/arm64/include/asm/bitops.h +++ b/arch/arm64/include/asm/bitops.h @@ -18,7 +18,6 @@ #include #include -#include #include #include diff --git a/arch/csky/include/asm/bitops.h b/arch/csky/include/asm/bitops.h index 91818787d8609..9604f47bd8503 100644 --- a/arch/csky/include/asm/bitops.h +++ b/arch/csky/include/asm/bitops.h @@ -59,7 +59,6 @@ static __always_inline unsigned long __fls(unsigned long x) #include #include -#include #ifndef _LINUX_BITOPS_H #error only can be included directly diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h index c867a80cab5b5..4489e3d6edd3d 100644 --- a/arch/h8300/include/asm/bitops.h +++ b/arch/h8300/include/asm/bitops.h @@ -168,7 +168,6 @@ static inline unsigned long __ffs(unsigned long word) return result; } -#include #include #include #include diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 71429f756af0f..75d6ba3643b8d 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -271,7 +271,6 @@ static inline unsigned long __fls(unsigned long word) } #include -#include #include #include diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h index 2f24ee6459d20..577be93c0818c 100644 --- a/arch/ia64/include/asm/bitops.h +++ b/arch/ia64/include/asm/bitops.h @@ -441,8 +441,6 @@ static __inline__ unsigned long __arch_hweight64(unsigned long x) #endif /* __KERNEL__ */ -#include - #ifdef __KERNEL__ #include diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index 7b414099e5fc2..f551a21602949 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -529,6 +529,4 @@ static inline int __fls(int x) #include #endif /* __KERNEL__ */ -#include - #endif /* _M68K_BITOPS_H */ diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index dc2a6234dd3c7..c09d57f907f7e 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -446,7 +446,6 @@ static inline int ffs(int word) } #include -#include #ifdef __KERNEL__ diff --git a/arch/openrisc/include/asm/bitops.h b/arch/openrisc/include/asm/bitops.h index 7f1ca35213d8c..d773ed938acb4 100644 --- a/arch/openrisc/include/asm/bitops.h +++ b/arch/openrisc/include/asm/bitops.h @@ -30,7 +30,6 @@ #include #include #include -#include #ifndef _LINUX_BITOPS_H #error only can be included directly diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h index aa4e883431c1a..c7a9997ac9cbb 100644 --- a/arch/parisc/include/asm/bitops.h +++ b/arch/parisc/include/asm/bitops.h @@ -208,8 +208,6 @@ static __inline__ int fls(unsigned int x) #endif /* __KERNEL__ */ -#include - #ifdef __KERNEL__ #include diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 11847b6a244e4..abc8f2c7c570e 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -255,8 +255,6 @@ unsigned long __arch_hweight64(__u64 w); #include #endif -#include - /* wrappers that deal with KASAN instrumentation */ #include #include diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 396a3303c5374..3540b690944be 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index fd149480b6e2b..f7cefdde7c24f 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -387,7 +387,6 @@ static inline int fls(unsigned int word) #endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ #include -#include #include #include #include diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h index 3b6c7b5b7ec9f..10ceb0d6b5a99 100644 --- a/arch/sh/include/asm/bitops.h +++ b/arch/sh/include/asm/bitops.h @@ -68,6 +68,5 @@ static inline unsigned long __ffs(unsigned long word) #include #include -#include #endif /* __ASM_SH_BITOPS_H */ diff --git a/arch/sparc/include/asm/bitops_32.h b/arch/sparc/include/asm/bitops_32.h index 0ceff3b915a89..889afa9f990ff 100644 --- a/arch/sparc/include/asm/bitops_32.h +++ b/arch/sparc/include/asm/bitops_32.h @@ -100,7 +100,6 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) #include #include #include -#include #include #include diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index ca7ea5913494f..005a8ae858f16 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -52,8 +52,6 @@ unsigned int __arch_hweight8(unsigned int w); #include #endif /* __KERNEL__ */ -#include - #ifdef __KERNEL__ #include diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 0367efdc5b7a8..a288ecd230ab0 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -380,8 +380,6 @@ static __always_inline int fls64(__u64 x) #include #endif -#include - #include #include diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h index 3f71d364ba909..cd225896c40f4 100644 --- a/arch/xtensa/include/asm/bitops.h +++ b/arch/xtensa/include/asm/bitops.h @@ -205,7 +205,6 @@ BIT_OPS(change, "xor", ) #undef BIT_OP #undef TEST_AND_BIT_OP -#include #include #include diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h index df9b5bc3d2827..a47b8a71d6fe1 100644 --- a/include/asm-generic/bitops.h +++ b/include/asm-generic/bitops.h @@ -20,7 +20,6 @@ #include #include #include -#include #ifndef _LINUX_BITOPS_H #error only can be included directly diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 37f36dad18bd4..c88b2321ba14d 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/include/asm-generic/bitops/find.h b/include/linux/find.h similarity index 97% rename from include/asm-generic/bitops/find.h rename to include/linux/find.h index 91b1b23f2b0c6..c5410c243e046 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/linux/find.h @@ -1,6 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_GENERIC_BITOPS_FIND_H_ -#define _ASM_GENERIC_BITOPS_FIND_H_ +#ifndef __LINUX_FIND_H_ +#define __LINUX_FIND_H_ + +#ifndef __LINUX_BITMAP_H +#error only can be included directly +#endif + +#include extern unsigned long _find_next_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, @@ -259,4 +265,4 @@ unsigned long find_next_bit_le(const void *addr, unsigned #error "Please fix " #endif -#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ +#endif /*__LINUX_FIND_H_ */ From 8b444c98bb90d6c39a235fdaada28b1b74ea0b9d Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:00 -0700 Subject: [PATCH 0110/1202] arch: remove GENERIC_FIND_FIRST_BIT entirely In 5.12 cycle we enabled GENERIC_FIND_FIRST_BIT config option for ARM64 and MIPS. It increased performance and shrunk .text size; and so far I didn't receive any negative feedback on the change. https://lore.kernel.org/linux-arch/20210225135700.1381396-1-yury.norov@gmail.com/ Now I think it's a good time to switch all architectures to use find_{first,last}_bit() unconditionally, and so remove corresponding config option. The patch does't introduce functioal changes for arc, arm, arm64, mips, m68k, s390 and x86, for other architectures I expect improvement both in performance and .text size. Signed-off-by: Yury Norov Tested-by: Alexander Lobakin (mips) Reviewed-by: Alexander Lobakin (mips) Reviewed-by: Andy Shevchenko Acked-by: Will Deacon Tested-by: Wolfram Sang --- arch/arc/Kconfig | 1 - arch/arm64/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/s390/Kconfig | 1 - arch/x86/Kconfig | 1 - arch/x86/um/Kconfig | 1 - include/linux/find.h | 13 ------------- lib/Kconfig | 3 --- 8 files changed, 22 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 3a5a80f302e1d..248389278e8f4 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -20,7 +20,6 @@ config ARC select COMMON_CLK select DMA_DIRECT_REMAP select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) - select GENERIC_FIND_FIRST_BIT # for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5c7ae4c3954be..abbd7b1e549fa 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -119,7 +119,6 @@ config ARM64 select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP - select GENERIC_FIND_FIRST_BIT select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_IPI select GENERIC_IRQ_PROBE diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 771ca53af06d2..0cf31a6cbee17 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -32,7 +32,6 @@ config MIPS select GENERIC_ATOMIC64 if !64BIT select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE - select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY select GENERIC_IOMAP select GENERIC_IRQ_PROBE diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b86de61b8caa2..b0bb3aaec5f20 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -127,7 +127,6 @@ config S390 select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY - select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ab83c22d274e7..8a2c61f732ee6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -134,7 +134,6 @@ config X86 select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_ENTRY - select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP select GENERIC_IRQ_MATRIX_ALLOCATOR if X86_LOCAL_APIC diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 95d26a69088b1..40d6a06e41c81 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -8,7 +8,6 @@ endmenu config UML_X86 def_bool y - select GENERIC_FIND_FIRST_BIT config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" diff --git a/include/linux/find.h b/include/linux/find.h index c5410c243e046..ea57f7f38c497 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -101,8 +101,6 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, } #endif -#ifdef CONFIG_GENERIC_FIND_FIRST_BIT - #ifndef find_first_bit /** * find_first_bit - find the first set bit in a memory region @@ -147,17 +145,6 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) } #endif -#else /* CONFIG_GENERIC_FIND_FIRST_BIT */ - -#ifndef find_first_bit -#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) -#endif -#ifndef find_first_zero_bit -#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) -#endif - -#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ - #ifndef find_last_bit /** * find_last_bit - find the last set bit in a memory region diff --git a/lib/Kconfig b/lib/Kconfig index 5e7165e6a346c..6a6ae5312fa05 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -65,9 +65,6 @@ config GENERIC_STRNLEN_USER config GENERIC_NET_UTILS bool -config GENERIC_FIND_FIRST_BIT - bool - source "lib/math/Kconfig" config NO_GENERIC_PCI_IOPORT_MAP From f0a9b5ae3d373ee9160185fdcb488cc8e580b77f Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:01 -0700 Subject: [PATCH 0111/1202] lib: add find_first_and_bit() Currently find_first_and_bit() is an alias to find_next_and_bit(). However, it is widely used in cpumask, so it worth to optimize it. This patch adds its own implementation for find_first_and_bit(). On x86_64 find_bit_benchmark says: Before (#define find_first_and_bit(...) find_next_and_bit(..., 0): Start testing find_bit() with random-filled bitmap [ 140.291468] find_first_and_bit: 46890919 ns, 32671 iterations Start testing find_bit() with sparse bitmap [ 140.295028] find_first_and_bit: 7103 ns, 1 iterations After: Start testing find_bit() with random-filled bitmap [ 162.574907] find_first_and_bit: 25045813 ns, 32846 iterations Start testing find_bit() with sparse bitmap [ 162.578458] find_first_and_bit: 4900 ns, 1 iterations (Thanks to Alexey Klimov for thorough testing.) Signed-off-by: Yury Norov Tested-by: Wolfram Sang Tested-by: Alexey Klimov --- include/linux/find.h | 27 +++++++++++++++++++++++++++ lib/find_bit.c | 21 +++++++++++++++++++++ lib/find_bit_benchmark.c | 21 +++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/include/linux/find.h b/include/linux/find.h index ea57f7f38c497..6048f8c974185 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -12,6 +12,8 @@ extern unsigned long _find_next_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start, unsigned long invert, unsigned long le); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); @@ -123,6 +125,31 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) } #endif +#ifndef find_first_and_bit +/** + * find_first_and_bit - find the first set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_and_bit(addr1, addr2, size); +} +#endif + #ifndef find_first_zero_bit /** * find_first_zero_bit - find the first cleared bit in a memory region diff --git a/lib/find_bit.c b/lib/find_bit.c index 0f8e2e369b1d2..1b8e4b2a9cba8 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -89,6 +89,27 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) EXPORT_SYMBOL(_find_first_bit); #endif +#ifndef find_first_and_bit +/* + * Find the first set bit in two memory regions. + */ +unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + unsigned long idx, val; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + val = addr1[idx] & addr2[idx]; + if (val) + return min(idx * BITS_PER_LONG + __ffs(val), size); + } + + return size; +} +EXPORT_SYMBOL(_find_first_and_bit); +#endif + #ifndef find_first_zero_bit /* * Find the first cleared bit in a memory region. diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index 5637c5711db94..db904b57d4b87 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -49,6 +49,25 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len) return 0; } +static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, unsigned long len) +{ + static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata; + unsigned long i, cnt; + ktime_t time; + + bitmap_copy(cp, bitmap, BITMAP_LEN); + + time = ktime_get(); + for (cnt = i = 0; i < len; cnt++) { + i = find_first_and_bit(cp, bitmap2, len); + __clear_bit(i, cp); + } + time = ktime_get() - time; + pr_err("find_first_and_bit: %18llu ns, %6ld iterations\n", time, cnt); + + return 0; +} + static int __init test_find_next_bit(const void *bitmap, unsigned long len) { unsigned long i, cnt; @@ -129,6 +148,7 @@ static int __init find_bit_test(void) * traverse only part of bitmap to avoid soft lockup. */ test_find_first_bit(bitmap, BITMAP_LEN / 10); + test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); pr_err("\nStart testing find_bit() with sparse bitmap\n"); @@ -145,6 +165,7 @@ static int __init find_bit_test(void) test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); + test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); /* From 730b4f23ea71abe35193648620e4519cbc8dae22 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:02 -0700 Subject: [PATCH 0112/1202] cpumask: use find_first_and_bit() Now we have an efficient implementation for find_first_and_bit(), so switch cpumask to use it where appropriate. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- include/linux/cpumask.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 5d4d07a9e1ed1..c7be6c5b75236 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -123,6 +123,12 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return 0; } +static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return 0; +} + static inline unsigned int cpumask_last(const struct cpumask *srcp) { return 0; @@ -167,7 +173,7 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node) static inline int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p) { - return cpumask_next_and(-1, src1p, src2p); + return cpumask_first_and(src1p, src2p); } static inline int cpumask_any_distribute(const struct cpumask *srcp) @@ -195,6 +201,19 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } +/** + * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 + * @src1p: the first input + * @src2p: the second input + * + * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + */ +static inline +unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) +{ + return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); +} + /** * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer @@ -585,15 +604,6 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_any(srcp) cpumask_first(srcp) -/** - * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 - * @src1p: the first input - * @src2p: the second input - * - * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). - */ -#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p)) - /** * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2 * @mask1: the first input cpumask From 523f4c8e86c3f200d94a872f11da11ae9fbd734d Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:03 -0700 Subject: [PATCH 0113/1202] all: replace find_next{,_zero}_bit with find_first{,_zero}_bit where appropriate find_first{,_zero}_bit is a more effective analogue of 'next' version if start == 0. This patch replaces 'next' with 'first' where things look trivial. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- arch/powerpc/platforms/pasemi/dma_lib.c | 4 ++-- arch/s390/kvm/kvm-s390.c | 2 +- drivers/block/rnbd/rnbd-clt.c | 2 +- drivers/dma/ti/edma.c | 2 +- drivers/iio/adc/ad7124.c | 2 +- drivers/infiniband/hw/irdma/hw.c | 16 ++++++++-------- drivers/media/cec/core/cec-core.c | 2 +- drivers/media/mc/mc-devnode.c | 2 +- drivers/pci/controller/dwc/pci-dra7xx.c | 2 +- drivers/scsi/lpfc/lpfc_sli.c | 10 +++++----- drivers/soc/ti/k3-ringacc.c | 4 ++-- drivers/tty/n_tty.c | 2 +- drivers/virt/acrn/ioreq.c | 3 +-- fs/f2fs/segment.c | 8 ++++---- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/ocfs2/dlm/dlmdomain.c | 4 ++-- fs/ocfs2/dlm/dlmmaster.c | 18 +++++++++--------- fs/ocfs2/dlm/dlmrecovery.c | 2 +- fs/ocfs2/dlm/dlmthread.c | 2 +- lib/genalloc.c | 2 +- net/ncsi/ncsi-manage.c | 4 ++-- 21 files changed, 47 insertions(+), 48 deletions(-) diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index 270fa3c0d3724..26427311fc725 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -375,7 +375,7 @@ int pasemi_dma_alloc_flag(void) int bit; retry: - bit = find_next_bit(flags_free, MAX_FLAGS, 0); + bit = find_first_bit(flags_free, MAX_FLAGS); if (bit >= MAX_FLAGS) return -ENOSPC; if (!test_and_clear_bit(bit, flags_free)) @@ -440,7 +440,7 @@ int pasemi_dma_alloc_fun(void) int bit; retry: - bit = find_next_bit(fun_free, MAX_FLAGS, 0); + bit = find_first_bit(fun_free, MAX_FLAGS); if (bit >= MAX_FLAGS) return -ENOSPC; if (!test_and_clear_bit(bit, fun_free)) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6a6dd5e1daf63..59459960ec3a0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2019,7 +2019,7 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, while ((slotidx > 0) && (ofs >= ms->npages)) { slotidx--; ms = slots->memslots + slotidx; - ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); + ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); } return ms->base_gfn + ofs; } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index bd4a41afbbfc9..5864c9b46cb90 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -196,7 +196,7 @@ rnbd_get_cpu_qlist(struct rnbd_clt_session *sess, int cpu) return per_cpu_ptr(sess->cpu_queues, bit); } else if (cpu != 0) { /* Search from 0 to cpu */ - bit = find_next_bit(sess->cpu_queues_bm, cpu, 0); + bit = find_first_bit(sess->cpu_queues_bm, cpu); if (bit < cpu) return per_cpu_ptr(sess->cpu_queues, bit); } diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 35d81bd857f11..caa4050ecc021 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -1681,7 +1681,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val); emr = val; - for (i = find_next_bit(&emr, 32, 0); i < 32; + for (i = find_first_bit(&emr, 32); i < 32; i = find_next_bit(&emr, 32, i + 1)) { int k = (j << 5) + i; diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index e45c600fccc0b..bc2cfa5f95922 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -347,7 +347,7 @@ static int ad7124_find_free_config_slot(struct ad7124_state *st) { unsigned int free_cfg_slot; - free_cfg_slot = find_next_zero_bit(&st->cfg_slots_status, AD7124_MAX_CONFIGS, 0); + free_cfg_slot = find_first_zero_bit(&st->cfg_slots_status, AD7124_MAX_CONFIGS); if (free_cfg_slot == AD7124_MAX_CONFIGS) return -1; diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 7de525a5ccf8c..eea6bf9360d3e 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1704,14 +1704,14 @@ static enum irdma_status_code irdma_setup_init_state(struct irdma_pci_f *rf) */ static void irdma_get_used_rsrc(struct irdma_device *iwdev) { - iwdev->rf->used_pds = find_next_zero_bit(iwdev->rf->allocated_pds, - iwdev->rf->max_pd, 0); - iwdev->rf->used_qps = find_next_zero_bit(iwdev->rf->allocated_qps, - iwdev->rf->max_qp, 0); - iwdev->rf->used_cqs = find_next_zero_bit(iwdev->rf->allocated_cqs, - iwdev->rf->max_cq, 0); - iwdev->rf->used_mrs = find_next_zero_bit(iwdev->rf->allocated_mrs, - iwdev->rf->max_mr, 0); + iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds, + iwdev->rf->max_pd); + iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps, + iwdev->rf->max_qp); + iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs, + iwdev->rf->max_cq); + iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs, + iwdev->rf->max_mr); } void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) diff --git a/drivers/media/cec/core/cec-core.c b/drivers/media/cec/core/cec-core.c index 551689d371a71..7322e7cd9753e 100644 --- a/drivers/media/cec/core/cec-core.c +++ b/drivers/media/cec/core/cec-core.c @@ -106,7 +106,7 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode, /* Part 1: Find a free minor number */ mutex_lock(&cec_devnode_lock); - minor = find_next_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES, 0); + minor = find_first_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES); if (minor == CEC_NUM_DEVICES) { mutex_unlock(&cec_devnode_lock); pr_err("could not get a free minor\n"); diff --git a/drivers/media/mc/mc-devnode.c b/drivers/media/mc/mc-devnode.c index f11382afe23bf..680fbb3a93402 100644 --- a/drivers/media/mc/mc-devnode.c +++ b/drivers/media/mc/mc-devnode.c @@ -217,7 +217,7 @@ int __must_check media_devnode_register(struct media_device *mdev, /* Part 1: Find a free minor number */ mutex_lock(&media_devnode_lock); - minor = find_next_zero_bit(media_devnode_nums, MEDIA_NUM_DEVICES, 0); + minor = find_first_zero_bit(media_devnode_nums, MEDIA_NUM_DEVICES); if (minor == MEDIA_NUM_DEVICES) { mutex_unlock(&media_devnode_lock); pr_err("could not get a free minor\n"); diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c index fbbb78f6885e7..ab4b133e1ccdd 100644 --- a/drivers/pci/controller/dwc/pci-dra7xx.c +++ b/drivers/pci/controller/dwc/pci-dra7xx.c @@ -211,7 +211,7 @@ static int dra7xx_pcie_handle_msi(struct pcie_port *pp, int index) if (!val) return 0; - pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, 0); + pos = find_first_bit(&val, MAX_MSI_IRQS_PER_CTRL); while (pos != MAX_MSI_IRQS_PER_CTRL) { generic_handle_domain_irq(pp->irq_domain, (index * MAX_MSI_IRQS_PER_CTRL) + pos); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 78ce38d7251c5..52eb2e721315a 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -17933,8 +17933,8 @@ lpfc_sli4_alloc_xri(struct lpfc_hba *phba) * the driver starts at 0 each time. */ spin_lock_irq(&phba->hbalock); - xri = find_next_zero_bit(phba->sli4_hba.xri_bmask, - phba->sli4_hba.max_cfg_param.max_xri, 0); + xri = find_first_zero_bit(phba->sli4_hba.xri_bmask, + phba->sli4_hba.max_cfg_param.max_xri); if (xri >= phba->sli4_hba.max_cfg_param.max_xri) { spin_unlock_irq(&phba->hbalock); return NO_XRI; @@ -19611,7 +19611,7 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba) max_rpi = phba->sli4_hba.max_cfg_param.max_rpi; rpi_limit = phba->sli4_hba.next_rpi; - rpi = find_next_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit, 0); + rpi = find_first_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit); if (rpi >= rpi_limit) rpi = LPFC_RPI_ALLOC_ERROR; else { @@ -20254,8 +20254,8 @@ lpfc_sli4_fcf_rr_next_index_get(struct lpfc_hba *phba) * have been tested so that we can detect when we should * change the priority level. */ - next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask, - LPFC_SLI4_FCF_TBL_INDX_MAX, 0); + next_fcf_index = find_first_bit(phba->fcf.fcf_rr_bmask, + LPFC_SLI4_FCF_TBL_INDX_MAX); } diff --git a/drivers/soc/ti/k3-ringacc.c b/drivers/soc/ti/k3-ringacc.c index 312ba0f98ad79..573be88f81910 100644 --- a/drivers/soc/ti/k3-ringacc.c +++ b/drivers/soc/ti/k3-ringacc.c @@ -358,8 +358,8 @@ struct k3_ring *k3_ringacc_request_ring(struct k3_ringacc *ringacc, goto out; if (flags & K3_RINGACC_RING_USE_PROXY) { - proxy_id = find_next_zero_bit(ringacc->proxy_inuse, - ringacc->num_proxies, 0); + proxy_id = find_first_zero_bit(ringacc->proxy_inuse, + ringacc->num_proxies); if (proxy_id == ringacc->num_proxies) goto error; } diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 0ec93f1a61f5d..0965793dfe4f5 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1975,7 +1975,7 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty, more = n - (size - tail); if (eol == N_TTY_BUF_SIZE && more) { /* scan wrapped without finding set bit */ - eol = find_next_bit(ldata->read_flags, more, 0); + eol = find_first_bit(ldata->read_flags, more); found = eol != more; } else found = eol != size; diff --git a/drivers/virt/acrn/ioreq.c b/drivers/virt/acrn/ioreq.c index 80b2e3f0e2764..5ff1c53740c00 100644 --- a/drivers/virt/acrn/ioreq.c +++ b/drivers/virt/acrn/ioreq.c @@ -246,8 +246,7 @@ void acrn_ioreq_request_clear(struct acrn_vm *vm) spin_lock_bh(&vm->ioreq_clients_lock); client = vm->default_client; if (client) { - vcpu = find_next_bit(client->ioreqs_map, - ACRN_IO_REQUEST_MAX, 0); + vcpu = find_first_bit(client->ioreqs_map, ACRN_IO_REQUEST_MAX); while (vcpu < ACRN_IO_REQUEST_MAX) { acrn_ioreq_complete_request(client, vcpu, NULL); vcpu = find_next_bit(client->ioreqs_map, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a135d22474154..5831e66cb1474 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2538,8 +2538,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); if (secno >= MAIN_SECS(sbi)) { if (dir == ALLOC_RIGHT) { - secno = find_next_zero_bit(free_i->free_secmap, - MAIN_SECS(sbi), 0); + secno = find_first_zero_bit(free_i->free_secmap, + MAIN_SECS(sbi)); f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); } else { go_left = 1; @@ -2554,8 +2554,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, left_start--; continue; } - left_start = find_next_zero_bit(free_i->free_secmap, - MAIN_SECS(sbi), 0); + left_start = find_first_zero_bit(free_i->free_secmap, + MAIN_SECS(sbi)); f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); break; } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f89ffcbd585ff..a17be1618bf70 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -379,7 +379,7 @@ static void o2hb_nego_timeout(struct work_struct *work) o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); /* lowest node as master node to make negotiate decision. */ - master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0); + master_node = find_first_bit(live_node_bitmap, O2NM_MAX_NODES); if (master_node == o2nm_this_node()) { if (!test_bit(master_node, reg->hr_nego_node_bitmap)) { diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 9f90fc9551e1a..c4eccd499db8a 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1045,7 +1045,7 @@ static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) int status, ret = 0, i; char *p; - if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) + if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES) goto bail; qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); @@ -1217,7 +1217,7 @@ static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) struct o2nm_node *node; int ret = 0, status, count, i; - if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) + if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES) goto bail; qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9b88219febb52..227da5b1b6ab1 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -861,7 +861,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, * to see if there are any nodes that still need to be * considered. these will not appear in the mle nodemap * but they might own this lockres. wait on them. */ - bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES); if (bit < O2NM_MAX_NODES) { mlog(0, "%s: res %.*s, At least one node (%d) " "to recover before lock mastery can begin\n", @@ -912,7 +912,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, dlm_wait_for_recovery(dlm); spin_lock(&dlm->spinlock); - bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES); if (bit < O2NM_MAX_NODES) { mlog(0, "%s: res %.*s, At least one node (%d) " "to recover before lock mastery can begin\n", @@ -1079,7 +1079,7 @@ static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm, sleep = 1; /* have all nodes responded? */ if (voting_done && !*blocked) { - bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES); if (dlm->node_num <= bit) { /* my node number is lowest. * now tell other nodes that I am @@ -1234,8 +1234,8 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, } else { mlog(ML_ERROR, "node down! %d\n", node); if (blocked) { - int lowest = find_next_bit(mle->maybe_map, - O2NM_MAX_NODES, 0); + int lowest = find_first_bit(mle->maybe_map, + O2NM_MAX_NODES); /* act like it was never there */ clear_bit(node, mle->maybe_map); @@ -1795,7 +1795,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, "MLE for it! (%.*s)\n", assert->node_idx, namelen, name); } else { - int bit = find_next_bit (mle->maybe_map, O2NM_MAX_NODES, 0); + int bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES); if (bit >= O2NM_MAX_NODES) { /* not necessarily an error, though less likely. * could be master just re-asserting. */ @@ -2521,7 +2521,7 @@ static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm, } if (!nonlocal) { - node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + node_ref = find_first_bit(res->refmap, O2NM_MAX_NODES); if (node_ref >= O2NM_MAX_NODES) return 0; } @@ -3303,7 +3303,7 @@ static void dlm_clean_block_mle(struct dlm_ctxt *dlm, BUG_ON(mle->type != DLM_MLE_BLOCK); spin_lock(&mle->spinlock); - bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES); if (bit != dead_node) { mlog(0, "mle found, but dead node %u would not have been " "master\n", dead_node); @@ -3542,7 +3542,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) spin_lock(&dlm->master_lock); BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); - BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); + BUG_ON((find_first_bit(dlm->domain_map, O2NM_MAX_NODES) < O2NM_MAX_NODES)); for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 0e7aad1b11cc4..e24e327524f85 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -451,7 +451,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { int bit; - bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES); if (bit >= O2NM_MAX_NODES || bit < 0) dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); else diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index c350bd4df7701..eedf07ca23ca4 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -92,7 +92,7 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res) return 0; /* Another node has this resource with this node as the master */ - bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + bit = find_first_bit(res->refmap, O2NM_MAX_NODES); if (bit < O2NM_MAX_NODES) return 0; diff --git a/lib/genalloc.c b/lib/genalloc.c index 9a57257988c77..00fc50d0a640e 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -251,7 +251,7 @@ void gen_pool_destroy(struct gen_pool *pool) list_del(&chunk->next_chunk); end_bit = chunk_size(chunk) >> order; - bit = find_next_bit(chunk->bits, end_bit, 0); + bit = find_first_bit(chunk->bits, end_bit); BUG_ON(bit < end_bit); vfree(chunk); diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 7121ce2a47c0b..78814417d753f 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -608,7 +608,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, bitmap = &ncf->bitmap; spin_lock_irqsave(&nc->lock, flags); - index = find_next_bit(bitmap, ncf->n_vids, 0); + index = find_first_bit(bitmap, ncf->n_vids); if (index >= ncf->n_vids) { spin_unlock_irqrestore(&nc->lock, flags); return -1; @@ -667,7 +667,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, return -1; } - index = find_next_zero_bit(bitmap, ncf->n_vids, 0); + index = find_first_zero_bit(bitmap, ncf->n_vids); if (index < 0 || index >= ncf->n_vids) { netdev_err(ndp->ndev.dev, "Channel %u already has all VLAN filters set\n", From fa9611dc9a301ff2d8ecdfe6a0a01547c70f56c2 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:04 -0700 Subject: [PATCH 0114/1202] tools: sync tools/bitmap with mother linux Remove tools/include/asm-generic/bitops/find.h and copy include/linux/bitmap.h to tools. find_*_le() functions are not copied because not needed in tools. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- MAINTAINERS | 2 +- tools/include/asm-generic/bitops.h | 1 - tools/include/linux/bitmap.h | 7 +- .../{asm-generic/bitops => linux}/find.h | 81 +++++++++++++++++-- tools/lib/find_bit.c | 20 +++++ 5 files changed, 100 insertions(+), 11 deletions(-) rename tools/include/{asm-generic/bitops => linux}/find.h (63%) diff --git a/MAINTAINERS b/MAINTAINERS index 1499b22b111b5..66e67ff778221 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3296,8 +3296,8 @@ F: lib/bitmap.c F: lib/find_bit.c F: lib/find_bit_benchmark.c F: lib/test_bitmap.c -F: tools/include/asm-generic/bitops/find.h F: tools/include/linux/bitmap.h +F: tools/include/linux/find.h F: tools/lib/bitmap.c F: tools/lib/find_bit.c diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h index 5d2ab38965cca..9ab313e93555a 100644 --- a/tools/include/asm-generic/bitops.h +++ b/tools/include/asm-generic/bitops.h @@ -18,7 +18,6 @@ #include #include #include -#include #ifndef _TOOLS_LINUX_BITOPS_H_ #error only can be included directly diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 95611df1d26e5..ea97804d04d4a 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -1,9 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PERF_BITOPS_H -#define _PERF_BITOPS_H +#ifndef _TOOLS_LINUX_BITMAP_H +#define _TOOLS_LINUX_BITMAP_H #include #include +#include #include #include @@ -181,4 +182,4 @@ static inline int bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } -#endif /* _PERF_BITOPS_H */ +#endif /* _TOOLS_LINUX_BITMAP_H */ diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/linux/find.h similarity index 63% rename from tools/include/asm-generic/bitops/find.h rename to tools/include/linux/find.h index 6481fd11012a9..47e2bd6c51745 100644 --- a/tools/include/asm-generic/bitops/find.h +++ b/tools/include/linux/find.h @@ -1,11 +1,19 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ -#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ +#ifndef _TOOLS_LINUX_FIND_H_ +#define _TOOLS_LINUX_FIND_H_ + +#ifndef _TOOLS_LINUX_BITMAP_H +#error tools: only can be included directly +#endif + +#include extern unsigned long _find_next_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start, unsigned long invert, unsigned long le); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); @@ -96,7 +104,6 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, #endif #ifndef find_first_bit - /** * find_first_bit - find the first set bit in a memory region * @addr: The address to start the search at @@ -116,11 +123,34 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) return _find_first_bit(addr, size); } +#endif + +#ifndef find_first_and_bit +/** + * find_first_and_bit - find the first set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); -#endif /* find_first_bit */ + return val ? __ffs(val) : size; + } -#ifndef find_first_zero_bit + return _find_first_and_bit(addr1, addr2, size); +} +#endif +#ifndef find_first_zero_bit /** * find_first_zero_bit - find the first cleared bit in a memory region * @addr: The address to start the search at @@ -142,4 +172,43 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) } #endif -#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */ +#ifndef find_last_bit +/** + * find_last_bit - find the last set bit in a memory region + * @addr: The address to start the search at + * @size: The number of bits to search + * + * Returns the bit number of the last set bit, or size. + */ +static inline +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __fls(val) : size; + } + + return _find_last_bit(addr, size); +} +#endif + +/** + * find_next_clump8 - find next 8-bit clump with set bits in a memory region + * @clump: location to store copy of found clump + * @addr: address to base the search on + * @size: bitmap size in number of bits + * @offset: bit offset at which to start searching + * + * Returns the bit offset for the next set clump; the found clump value is + * copied to the location pointed by @clump. If no bits are set, returns @size. + */ +extern unsigned long find_next_clump8(unsigned long *clump, + const unsigned long *addr, + unsigned long size, unsigned long offset); + +#define find_first_clump8(clump, bits, size) \ + find_next_clump8((clump), (bits), (size), 0) + + +#endif /*__LINUX_FIND_H_ */ diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index 109aa7ffcf973..ba4b8d94e0048 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -96,6 +96,26 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) } #endif +#ifndef find_first_and_bit +/* + * Find the first set bit in two memory regions. + */ +unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + unsigned long idx, val; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + val = addr1[idx] & addr2[idx]; + if (val) + return min(idx * BITS_PER_LONG + __ffs(val), size); + } + + return size; +} +#endif + #ifndef find_first_zero_bit /* * Find the first cleared bit in a memory region. From 4e258d05437be76f9bbfa23af1970ddbc9a5fd39 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:05 -0700 Subject: [PATCH 0115/1202] cpumask: replace cpumask_next_* with cpumask_first_* where appropriate cpumask_first() is a more effective analogue of 'next' version if n == -1 (which means start == 0). This patch replaces 'next' with 'first' where things look trivial. There's no cpumask_first_zero() function, so create it. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- arch/powerpc/include/asm/cputhreads.h | 2 +- block/blk-mq.c | 2 +- drivers/net/virtio_net.c | 2 +- drivers/soc/fsl/qbman/bman_portal.c | 2 +- drivers/soc/fsl/qbman/qman_portal.c | 2 +- include/linux/cpumask.h | 16 ++++++++++++++++ kernel/time/clocksource.c | 4 ++-- 7 files changed, 23 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index b167186aaee4a..44286df21d2ae 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -52,7 +52,7 @@ static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads) for (i = 0; i < NR_CPUS; i += threads_per_core) { cpumask_shift_left(&tmp, &threads_core_mask, i); if (cpumask_intersects(threads, &tmp)) { - cpu = cpumask_next_and(-1, &tmp, cpu_online_mask); + cpu = cpumask_first_and(&tmp, cpu_online_mask); if (cpu < nr_cpu_ids) cpumask_set_cpu(cpu, &res); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 108a352051be5..d4f5a05f4470e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2536,7 +2536,7 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx) static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu, struct blk_mq_hw_ctx *hctx) { - if (cpumask_next_and(-1, hctx->cpumask, cpu_online_mask) != cpu) + if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu) return false; if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids) return false; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 79bd2585ec6b2..a3b16790846cc 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2085,7 +2085,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi) stragglers = num_cpu >= vi->curr_queue_pairs ? num_cpu % vi->curr_queue_pairs : 0; - cpu = cpumask_next(-1, cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); for (i = 0; i < vi->curr_queue_pairs; i++) { group_size = stride + (i < stragglers ? 1 : 0); diff --git a/drivers/soc/fsl/qbman/bman_portal.c b/drivers/soc/fsl/qbman/bman_portal.c index acda8a5637c52..4d7b9caee1c47 100644 --- a/drivers/soc/fsl/qbman/bman_portal.c +++ b/drivers/soc/fsl/qbman/bman_portal.c @@ -155,7 +155,7 @@ static int bman_portal_probe(struct platform_device *pdev) } spin_lock(&bman_lock); - cpu = cpumask_next_zero(-1, &portal_cpus); + cpu = cpumask_first_zero(&portal_cpus); if (cpu >= nr_cpu_ids) { __bman_portals_probed = 1; /* unassigned portal, skip init */ diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c index 96f74a1dc6035..e23b60618c1a1 100644 --- a/drivers/soc/fsl/qbman/qman_portal.c +++ b/drivers/soc/fsl/qbman/qman_portal.c @@ -248,7 +248,7 @@ static int qman_portal_probe(struct platform_device *pdev) pcfg->pools = qm_get_pools_sdqcr(); spin_lock(&qman_lock); - cpu = cpumask_next_zero(-1, &portal_cpus); + cpu = cpumask_first_zero(&portal_cpus); if (cpu >= nr_cpu_ids) { __qman_portals_probed = 1; /* unassigned portal, skip init */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c7be6c5b75236..862856fe00562 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -123,6 +123,11 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return 0; } +static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) +{ + return 0; +} + static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { @@ -201,6 +206,17 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } +/** + * cpumask_first_zero - get the first unset cpu in a cpumask + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if all cpus are set. + */ +static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) +{ + return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits); +} + /** * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 * @src1p: the first input diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index b8a14d2fb5ba6..f29d1a524fa5c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -257,7 +257,7 @@ static void clocksource_verify_choose_cpus(void) return; /* Make sure to select at least one CPU other than the current CPU. */ - cpu = cpumask_next(-1, cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); if (cpu == smp_processor_id()) cpu = cpumask_next(cpu, cpu_online_mask); if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) @@ -279,7 +279,7 @@ static void clocksource_verify_choose_cpus(void) cpu = prandom_u32() % nr_cpu_ids; cpu = cpumask_next(cpu - 1, cpu_online_mask); if (cpu >= nr_cpu_ids) - cpu = cpumask_next(-1, cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); if (!WARN_ON_ONCE(cpu >= nr_cpu_ids)) cpumask_set_cpu(cpu, &cpus_chosen); } From 1408638497db751272744967e00c4c17da516794 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:06 -0700 Subject: [PATCH 0116/1202] include/linux: move for_each_bit() macros from bitops.h to find.h for_each_bit() macros depend on find_bit() machinery, and so the proper place for them is the find.h header. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- include/linux/bitops.h | 34 ---------------------------------- include/linux/find.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5e62e2383b7f1..7aaed501f7686 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -32,40 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w); */ #include -#define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) - -/* same as for_each_set_bit() but use bit as value to start with */ -#define for_each_set_bit_from(bit, addr, size) \ - for ((bit) = find_next_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) - -#define for_each_clear_bit(bit, addr, size) \ - for ((bit) = find_first_zero_bit((addr), (size)); \ - (bit) < (size); \ - (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) - -/* same as for_each_clear_bit() but use bit as value to start with */ -#define for_each_clear_bit_from(bit, addr, size) \ - for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) - -/** - * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits - * @start: bit offset to start search and to store the current iteration offset - * @clump: location to store copy of current 8-bit clump - * @bits: bitmap address to base the search on - * @size: bitmap size in number of bits - */ -#define for_each_set_clump8(start, clump, bits, size) \ - for ((start) = find_first_clump8(&(clump), (bits), (size)); \ - (start) < (size); \ - (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) - static inline int get_bitmask_order(unsigned int count) { int order; diff --git a/include/linux/find.h b/include/linux/find.h index 6048f8c974185..4500e8ab93e24 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -279,4 +279,38 @@ unsigned long find_next_bit_le(const void *addr, unsigned #error "Please fix " #endif +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +#define for_each_clear_bit(bit, addr, size) \ + for ((bit) = find_first_zero_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + +/* same as for_each_clear_bit() but use bit as value to start with */ +#define for_each_clear_bit_from(bit, addr, size) \ + for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + +/** + * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits + * @start: bit offset to start search and to store the current iteration offset + * @clump: location to store copy of current 8-bit clump + * @bits: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_clump8(start, clump, bits, size) \ + for ((start) = find_first_clump8(&(clump), (bits), (size)); \ + (start) < (size); \ + (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) + #endif /*__LINUX_FIND_H_ */ From f79c0edd6d505e4d9e83f3d700085c99fa0d4746 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:07 -0700 Subject: [PATCH 0117/1202] find: micro-optimize for_each_{set,clear}_bit() The macros iterate thru all set/clear bits in a bitmap. They search a first bit using find_first_bit(), and the rest bits using find_next_bit(). Since find_next_bit() is called shortly after find_first_bit(), we can save few lines of I-cache by not using find_first_bit(). Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- include/linux/find.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/find.h b/include/linux/find.h index 4500e8ab93e24..ae9ed52b52b8b 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -280,7 +280,7 @@ unsigned long find_next_bit_le(const void *addr, unsigned #endif #define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ + for ((bit) = find_next_bit((addr), (size), 0); \ (bit) < (size); \ (bit) = find_next_bit((addr), (size), (bit) + 1)) @@ -291,7 +291,7 @@ unsigned long find_next_bit_le(const void *addr, unsigned (bit) = find_next_bit((addr), (size), (bit) + 1)) #define for_each_clear_bit(bit, addr, size) \ - for ((bit) = find_first_zero_bit((addr), (size)); \ + for ((bit) = find_next_zero_bit((addr), (size), 0); \ (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) From 747a53f547cb6b5b0fc1fcf440c3d867cb0c8942 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:08 -0700 Subject: [PATCH 0118/1202] Replace for_each_*_bit_from() with for_each_*_bit() where appropriate A couple of kernel functions call for_each_*_bit_from() with start bit equal to 0. Replace them with for_each_*_bit(). No functional changes, but might improve on readability. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- arch/x86/kernel/apic/vector.c | 4 ++-- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 4 ++-- drivers/hwmon/ltc2992.c | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index c132daabe6152..3e6f6b448f6aa 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -760,9 +760,9 @@ void __init lapic_update_legacy_vectors(void) void __init lapic_assign_system_vectors(void) { - unsigned int i, vector = 0; + unsigned int i, vector; - for_each_set_bit_from(vector, system_vectors, NR_VECTORS) + for_each_set_bit(vector, system_vectors, NR_VECTORS) irq_matrix_assign_system(vector_matrix, vector, false); if (nr_legacy_irqs() > 1) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index cc5b07f863463..789acae37f553 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1047,7 +1047,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m) void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) { - unsigned int i = 0; + unsigned int i; dev_err(gpu->dev, "recover hung GPU!\n"); @@ -1060,7 +1060,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) /* complete all events, the GPU won't do it after the reset */ spin_lock(&gpu->event_spinlock); - for_each_set_bit_from(i, gpu->event_bitmap, ETNA_NR_EVENTS) + for_each_set_bit(i, gpu->event_bitmap, ETNA_NR_EVENTS) complete(&gpu->event_free); bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS); spin_unlock(&gpu->event_spinlock); diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 2a4bed0ab226b..7352d2b3c756d 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -248,8 +248,7 @@ static int ltc2992_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask gpio_status = reg; - gpio_nr = 0; - for_each_set_bit_from(gpio_nr, mask, LTC2992_GPIO_NR) { + for_each_set_bit(gpio_nr, mask, LTC2992_GPIO_NR) { if (test_bit(LTC2992_GPIO_BIT(gpio_nr), &gpio_status)) set_bit(gpio_nr, bits); } From e0ce85629ef22aaf8b5d65bc462bcaeff8e41320 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:10 -0700 Subject: [PATCH 0119/1202] mm/percpu: micro-optimize pcpu_is_populated() bitmap_next_clear_region() calls find_next_zero_bit() and find_next_bit() sequentially to find a range of clear bits. In case of pcpu_is_populated() there's a chance to return earlier if bitmap has all bits set. Signed-off-by: Yury Norov Tested-by: Wolfram Sang Acked-by: Dennis Zhou --- mm/percpu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index e0a986818903d..1cf0bb904b1d0 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1070,17 +1070,18 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off, static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits, int *next_off) { - unsigned int page_start, page_end, rs, re; + unsigned int start, end; - page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE); - page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE); + start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE); + end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE); - rs = page_start; - bitmap_next_clear_region(chunk->populated, &rs, &re, page_end); - if (rs >= page_end) + start = find_next_zero_bit(chunk->populated, end, start); + if (start >= end) return true; - *next_off = re * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE; + end = find_next_bit(chunk->populated, end, start + 1); + + *next_off = end * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE; return false; } From 20a31ee8ae3a1a3d3742afae3a18381a850d6e7b Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:11 -0700 Subject: [PATCH 0120/1202] bitmap: unify find_bit operations bitmap_for_each_{set,clear}_region() are similar to for_each_bit() macros in include/linux/find.h, but interface and implementation of them are different. This patch adds for_each_bitrange() macros and drops unused bitmap_*_region() API in sake of unification. Signed-off-by: Yury Norov Tested-by: Wolfram Sang Acked-by: Dennis Zhou Acked-by: Ulf Hansson # For MMC --- drivers/mmc/host/renesas_sdhi_core.c | 2 +- include/linux/bitmap.h | 33 ---------------- include/linux/find.h | 56 ++++++++++++++++++++++++++++ mm/percpu.c | 20 ++++------ 4 files changed, 65 insertions(+), 46 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index a4407f391f66a..9c99fb5769112 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -628,7 +628,7 @@ static int renesas_sdhi_select_tuning(struct tmio_mmc_host *host) * is at least SH_MOBILE_SDHI_MIN_TAP_ROW probes long then use the * center index as the tap, otherwise bail out. */ - bitmap_for_each_set_region(bitmap, rs, re, 0, taps_size) { + for_each_set_bitrange(rs, re, bitmap, taps_size) { if (re - rs > tap_cnt) { tap_end = re; tap_start = rs; diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index c88b2321ba14d..e57dca0653436 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -55,12 +55,6 @@ struct device; * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area * bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off) as above - * bitmap_next_clear_region(map, &start, &end, nbits) Find next clear region - * bitmap_next_set_region(map, &start, &end, nbits) Find next set region - * bitmap_for_each_clear_region(map, rs, re, start, end) - * Iterate over all clear regions - * bitmap_for_each_set_region(map, rs, re, start, end) - * Iterate over all set regions * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest @@ -465,14 +459,6 @@ static inline void bitmap_replace(unsigned long *dst, __bitmap_replace(dst, old, new, mask, nbits); } -static inline void bitmap_next_clear_region(unsigned long *bitmap, - unsigned int *rs, unsigned int *re, - unsigned int end) -{ - *rs = find_next_zero_bit(bitmap, end, *rs); - *re = find_next_bit(bitmap, end, *rs + 1); -} - static inline void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) @@ -481,25 +467,6 @@ static inline void bitmap_next_set_region(unsigned long *bitmap, *re = find_next_zero_bit(bitmap, end, *rs + 1); } -/* - * Bitmap region iterators. Iterates over the bitmap between [@start, @end). - * @rs and @re should be integer variables and will be set to start and end - * index of the current clear or set region. - */ -#define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \ - for ((rs) = (start), \ - bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \ - (rs) < (re); \ - (rs) = (re) + 1, \ - bitmap_next_clear_region((bitmap), &(rs), &(re), (end))) - -#define bitmap_for_each_set_region(bitmap, rs, re, start, end) \ - for ((rs) = (start), \ - bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \ - (rs) < (re); \ - (rs) = (re) + 1, \ - bitmap_next_set_region((bitmap), &(rs), &(re), (end))) - /** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. * @n: u64 value diff --git a/include/linux/find.h b/include/linux/find.h index ae9ed52b52b8b..5bb6db213bcb8 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -301,6 +301,62 @@ unsigned long find_next_bit_le(const void *addr, unsigned (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) +/** + * for_each_set_bitrange - iterate over all set bit ranges [b; e) + * @b: bit offset of start of current bitrange (first set bit) + * @e: bit offset of end of current bitrange (first unset bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_bitrange(b, e, addr, size) \ + for ((b) = find_next_bit((addr), (size), 0), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_bit((addr), (size), (e) + 1), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1)) + +/** + * for_each_set_bitrange_from - iterate over all set bit ranges [b; e) + * @b: bit offset of start of current bitrange (first set bit); must be initialized + * @e: bit offset of end of current bitrange (first unset bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_bitrange_from(b, e, addr, size) \ + for ((b) = find_next_bit((addr), (size), (b)), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_bit((addr), (size), (e) + 1), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1)) + +/** + * for_each_clear_bitrange - iterate over all unset bit ranges [b; e) + * @b: bit offset of start of current bitrange (first unset bit) + * @e: bit offset of end of current bitrange (first set bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_clear_bitrange(b, e, addr, size) \ + for ((b) = find_next_zero_bit((addr), (size), 0), \ + (e) = find_next_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_zero_bit((addr), (size), (e) + 1), \ + (e) = find_next_bit((addr), (size), (b) + 1)) + +/** + * for_each_clear_bitrange_from - iterate over all unset bit ranges [b; e) + * @b: bit offset of start of current bitrange (first set bit); must be initialized + * @e: bit offset of end of current bitrange (first unset bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_clear_bitrange_from(b, e, addr, size) \ + for ((b) = find_next_zero_bit((addr), (size), (b)), \ + (e) = find_next_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_zero_bit((addr), (size), (e) + 1), \ + (e) = find_next_bit((addr), (size), (b) + 1)) + /** * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits * @start: bit offset to start search and to store the current iteration offset diff --git a/mm/percpu.c b/mm/percpu.c index 1cf0bb904b1d0..ff4e7a89eb612 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -779,7 +779,7 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index) { struct pcpu_block_md *block = chunk->md_blocks + index; unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index); - unsigned int rs, re, start; /* region start, region end */ + unsigned int start, end; /* region start, region end */ /* promote scan_hint to contig_hint */ if (block->scan_hint) { @@ -795,9 +795,8 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index) block->right_free = 0; /* iterate over free areas and update the contig hints */ - bitmap_for_each_clear_region(alloc_map, rs, re, start, - PCPU_BITMAP_BLOCK_BITS) - pcpu_block_update(block, rs, re); + for_each_clear_bitrange_from(start, end, alloc_map, PCPU_BITMAP_BLOCK_BITS) + pcpu_block_update(block, start, end); } /** @@ -1852,13 +1851,12 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, /* populate if not all pages are already there */ if (!is_atomic) { - unsigned int page_start, page_end, rs, re; + unsigned int page_end, rs, re; - page_start = PFN_DOWN(off); + rs = PFN_DOWN(off); page_end = PFN_UP(off + size); - bitmap_for_each_clear_region(chunk->populated, rs, re, - page_start, page_end) { + for_each_clear_bitrange_from(rs, re, chunk->populated, page_end) { WARN_ON(chunk->immutable); ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); @@ -2014,8 +2012,7 @@ static void pcpu_balance_free(bool empty_only) list_for_each_entry_safe(chunk, next, &to_free, list) { unsigned int rs, re; - bitmap_for_each_set_region(chunk->populated, rs, re, 0, - chunk->nr_pages) { + for_each_set_bitrange(rs, re, chunk->populated, chunk->nr_pages) { pcpu_depopulate_chunk(chunk, rs, re); spin_lock_irq(&pcpu_lock); pcpu_chunk_depopulated(chunk, rs, re); @@ -2085,8 +2082,7 @@ static void pcpu_balance_populated(void) continue; /* @chunk can't go away while pcpu_alloc_mutex is held */ - bitmap_for_each_clear_region(chunk->populated, rs, re, 0, - chunk->nr_pages) { + for_each_clear_bitrange(rs, re, chunk->populated, chunk->nr_pages) { int nr = min_t(int, re - rs, nr_to_pop); spin_unlock_irq(&pcpu_lock); From 0ac8d21ff6c4c1f102c559cb6f1476a8424ba13a Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:12 -0700 Subject: [PATCH 0121/1202] lib: bitmap: add performance test for bitmap_print_to_pagebuf Functional tests for bitmap_print_to_pagebuf() are provided in lib/test_printf.c. This patch adds performance test for a case of fully set bitmap. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- lib/test_bitmap.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index d33fa5a61b958..0c82f07f74fcb 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -446,6 +446,42 @@ static void __init test_bitmap_parselist(void) } } +static void __init test_bitmap_printlist(void) +{ + unsigned long *bmap = kmalloc(PAGE_SIZE, GFP_KERNEL); + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + char expected[256]; + int ret, slen; + ktime_t time; + + if (!buf || !bmap) + goto out; + + memset(bmap, -1, PAGE_SIZE); + slen = snprintf(expected, 256, "0-%ld", PAGE_SIZE * 8 - 1); + if (slen < 0) + goto out; + + time = ktime_get(); + ret = bitmap_print_to_pagebuf(true, buf, bmap, PAGE_SIZE * 8); + time = ktime_get() - time; + + if (ret != slen + 1) { + pr_err("bitmap_print_to_pagebuf: result is %d, expected %d\n", ret, slen); + goto out; + } + + if (strncmp(buf, expected, slen)) { + pr_err("bitmap_print_to_pagebuf: result is %s, expected %s\n", buf, expected); + goto out; + } + + pr_err("bitmap_print_to_pagebuf: input is '%s', Time: %llu\n", buf, time); +out: + kfree(buf); + kfree(bmap); +} + static const unsigned long parse_test[] __initconst = { BITMAP_FROM_U64(0), BITMAP_FROM_U64(1), @@ -818,6 +854,7 @@ static void __init selftest(void) test_bitmap_arr32(); test_bitmap_parse(); test_bitmap_parselist(); + test_bitmap_printlist(); test_mem_optimisations(); test_for_each_set_clump8(); test_bitmap_cut(); From 785cb064e2f87b6cd2157554f50193dba0642d75 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sat, 14 Aug 2021 14:17:13 -0700 Subject: [PATCH 0122/1202] vsprintf: rework bitmap_list_string bitmap_list_string() is very ineffective when printing bitmaps with long ranges of set bits because it calls find_next_bit for each bit in the bitmap. We can do better by detecting ranges of set bits. In my environment, before/after is 943008/31008 ns. Signed-off-by: Yury Norov Tested-by: Wolfram Sang --- lib/vsprintf.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d7ad44f2c8f57..4415bea601fdf 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1241,20 +1241,13 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, struct printf_spec spec, const char *fmt) { int nr_bits = max_t(int, spec.field_width, 0); - /* current bit is 'cur', most recently seen range is [rbot, rtop] */ - int cur, rbot, rtop; bool first = true; + int rbot, rtop; if (check_pointer(&buf, end, bitmap, spec)) return buf; - rbot = cur = find_first_bit(bitmap, nr_bits); - while (cur < nr_bits) { - rtop = cur; - cur = find_next_bit(bitmap, nr_bits, cur + 1); - if (cur < nr_bits && cur <= rtop + 1) - continue; - + for_each_set_bitrange(rbot, rtop, bitmap, nr_bits) { if (!first) { if (buf < end) *buf = ','; @@ -1263,15 +1256,12 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, first = false; buf = number(buf, end, rbot, default_dec_spec); - if (rbot < rtop) { - if (buf < end) - *buf = '-'; - buf++; - - buf = number(buf, end, rtop, default_dec_spec); - } + if (rtop == rbot + 1) + continue; - rbot = cur; + if (buf < end) + *buf = '-'; + buf = number(++buf, end, rtop - 1, default_dec_spec); } return buf; } From 4d74190d30c82e387621768a223ae90d7a334c1b Mon Sep 17 00:00:00 2001 From: Orlando Chamberlain Date: Sat, 2 Oct 2021 11:16:30 +0000 Subject: [PATCH 0123/1202] mfd: intel-lpss: Add support for MacBookPro16,2 ICL-N UART Added 8086:38a8 to the intel_lpss_pci driver. It is an Intel Ice Lake PCH-N UART controler present on the MacBookPro16,2. Signed-off-by: Orlando Chamberlain Reviewed-by: Andy Shevchenko Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20211002111449.12674-1-redecorating@protonmail.com --- drivers/mfd/intel-lpss-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index c54d19fb184ca..a872b4485eacf 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -253,6 +253,8 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x34ea), (kernel_ulong_t)&bxt_i2c_info }, { PCI_VDEVICE(INTEL, 0x34eb), (kernel_ulong_t)&bxt_i2c_info }, { PCI_VDEVICE(INTEL, 0x34fb), (kernel_ulong_t)&spt_info }, + /* ICL-N */ + { PCI_VDEVICE(INTEL, 0x38a8), (kernel_ulong_t)&bxt_uart_info }, /* TGL-H */ { PCI_VDEVICE(INTEL, 0x43a7), (kernel_ulong_t)&bxt_uart_info }, { PCI_VDEVICE(INTEL, 0x43a8), (kernel_ulong_t)&bxt_uart_info }, From 2e7aa399dffce2a281b50e92df2b16c625d635e9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 29 Sep 2021 17:38:34 +0100 Subject: [PATCH 0124/1202] irqdomain: Make of_phandle_args_to_fwspec() generally available of_phandle_args_to_fwspec() can be generally useful to code extracting a DT of_phandle and using an irq_fwspec to use the hierarchical irqdomain API. Make it visible the the rest of the kernel, including modules. Link: https://lore.kernel.org/r/20210929163847.2807812-2-maz@kernel.org Tested-by: Alyssa Rosenzweig Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi --- include/linux/irqdomain.h | 4 ++++ kernel/irq/irqdomain.c | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 23e4ee5235769..cfd442316f395 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -64,6 +64,10 @@ struct irq_fwspec { u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; }; +/* Conversion function from of_phandle_args fields to fwspec */ +void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, + unsigned int count, struct irq_fwspec *fwspec); + /* * Should several domains have the same device node, but serve * different purposes (for example one domain is for PCI/MSI, and the diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 19e83e9b723ce..5a698c1f6cc68 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -744,9 +744,8 @@ static int irq_domain_translate(struct irq_domain *d, return 0; } -static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, - unsigned int count, - struct irq_fwspec *fwspec) +void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, + unsigned int count, struct irq_fwspec *fwspec) { int i; @@ -756,6 +755,7 @@ static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, for (i = 0; i < count; i++) fwspec->param[i] = args[i]; } +EXPORT_SYMBOL_GPL(of_phandle_args_to_fwspec); unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) { From 5a58ff59f2447f400fae25998747ba2647a64b2f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 29 Sep 2021 17:38:35 +0100 Subject: [PATCH 0125/1202] of/irq: Allow matching of an interrupt-map local to an interrupt controller of_irq_parse_raw() has a baked assumption that if a node has an interrupt-controller property, it cannot possibly also have an interrupt-map property (the latter being ignored). This seems to be an odd behaviour, and there is no reason why we should avoid supporting this use case. This is specially useful when a PCI root port acts as an interrupt controller for PCI endpoints, such as this: pcie0: pcie@690000000 { [...] port00: pci@0,0 { device_type = "pci"; [...] #address-cells = <3>; interrupt-controller; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &port00 0 0 0 0>, <0 0 0 2 &port00 0 0 0 1>, <0 0 0 3 &port00 0 0 0 2>, <0 0 0 4 &port00 0 0 0 3>; }; }; Handle it by detecting that we have an interrupt-map early in the parsing, and special case the situation where the phandle in the interrupt map refers to the current node (which is the interesting case here). Link: https://lore.kernel.org/r/20210929163847.2807812-3-maz@kernel.org Tested-by: Alyssa Rosenzweig Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- drivers/of/irq.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 352e14b007e78..32be5a03951fa 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -156,10 +156,14 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) /* Now start the actual "proper" walk of the interrupt tree */ while (ipar != NULL) { - /* Now check if cursor is an interrupt-controller and if it is - * then we are done + /* + * Now check if cursor is an interrupt-controller and + * if it is then we are done, unless there is an + * interrupt-map which takes precedence. */ - if (of_property_read_bool(ipar, "interrupt-controller")) { + imap = of_get_property(ipar, "interrupt-map", &imaplen); + if (imap == NULL && + of_property_read_bool(ipar, "interrupt-controller")) { pr_debug(" -> got it !\n"); return 0; } @@ -173,8 +177,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) goto fail; } - /* Now look for an interrupt-map */ - imap = of_get_property(ipar, "interrupt-map", &imaplen); /* No interrupt map, check for an interrupt parent */ if (imap == NULL) { pr_debug(" -> no map, getting parent\n"); @@ -255,6 +257,11 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) out_irq->args_count = intsize = newintsize; addrsize = newaddrsize; + if (ipar == newpar) { + pr_debug("%pOF interrupt-map entry to self\n", ipar); + return 0; + } + skiplevel: /* Iterate again with new parent */ out_irq->np = newpar; From 432c404c03cb349c8deac5ce38be061d51ed3566 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 29 Sep 2021 17:38:36 +0100 Subject: [PATCH 0126/1202] PCI: of: Allow matching of an interrupt-map local to a PCI device Just as we now allow an interrupt map to be parsed when part of an interrupt controller, there is no reason to ignore an interrupt map that would be part of a pci device node such as a root port since we already allow interrupt specifiers. Allow the matching of such property when local to the node of a PCI device, which allows the device itself to use the interrupt map for for its own purpose. Link: https://lore.kernel.org/r/20210929163847.2807812-4-maz@kernel.org Tested-by: Alyssa Rosenzweig Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Acked-by: Bjorn Helgaas --- drivers/pci/of.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/pci/of.c b/drivers/pci/of.c index d84381ce82b52..0b1237cff239a 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -423,7 +423,7 @@ static int devm_of_pci_get_host_bridge_resources(struct device *dev, */ static int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq) { - struct device_node *dn, *ppnode; + struct device_node *dn, *ppnode = NULL; struct pci_dev *ppdev; __be32 laddr[3]; u8 pin; @@ -452,8 +452,14 @@ static int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args * if (pin == 0) return -ENODEV; + /* Local interrupt-map in the device node? Use it! */ + if (of_get_property(dn, "interrupt-map", NULL)) { + pin = pci_swizzle_interrupt_pin(pdev, pin); + ppnode = dn; + } + /* Now we walk up the PCI tree */ - for (;;) { + while (!ppnode) { /* Get the pci_dev of our parent */ ppdev = pdev->bus->self; From 66dbd6888e178a8e2210bbf8a30092d03f905c5f Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 11 Sep 2020 16:49:00 -0700 Subject: [PATCH 0127/1202] lib: zstd: Add kernel-specific API This patch: - Moves `include/linux/zstd.h` -> `include/linux/zstd_lib.h` - Updates modified zstd headers to yearless copyright - Adds a new API in `include/linux/zstd.h` that is functionally equivalent to the in-use subset of the current API. Functions are renamed to avoid symbol collisions with zstd, to make it clear it is not the upstream zstd API, and to follow the kernel style guide. - Updates all callers to use the new API. There are no functional changes in this patch. Since there are no functional change, I felt it was okay to update all the callers in a single patch. Once the API is approved, the callers are mechanically changed. This patch is preparing for the 3rd patch in this series, which updates zstd to version 1.4.10. Since the upstream zstd API is no longer exposed to callers, the update can happen transparently. Signed-off-by: Nick Terrell --- crypto/zstd.c | 28 +- fs/btrfs/zstd.c | 68 +- fs/f2fs/compress.c | 56 +- fs/f2fs/super.c | 2 +- fs/pstore/platform.c | 2 +- fs/squashfs/zstd_wrapper.c | 16 +- include/linux/zstd.h | 1243 ++++++++---------------------------- include/linux/zstd_lib.h | 1157 +++++++++++++++++++++++++++++++++ lib/decompress_unzstd.c | 42 +- lib/zstd/compress.c | 123 ++-- lib/zstd/decompress.c | 112 ++-- 11 files changed, 1691 insertions(+), 1158 deletions(-) create mode 100644 include/linux/zstd_lib.h diff --git a/crypto/zstd.c b/crypto/zstd.c index 1a3309f066f7e..154a969c83a82 100644 --- a/crypto/zstd.c +++ b/crypto/zstd.c @@ -18,22 +18,22 @@ #define ZSTD_DEF_LEVEL 3 struct zstd_ctx { - ZSTD_CCtx *cctx; - ZSTD_DCtx *dctx; + zstd_cctx *cctx; + zstd_dctx *dctx; void *cwksp; void *dwksp; }; -static ZSTD_parameters zstd_params(void) +static zstd_parameters zstd_params(void) { - return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0); + return zstd_get_params(ZSTD_DEF_LEVEL, 0); } static int zstd_comp_init(struct zstd_ctx *ctx) { int ret = 0; - const ZSTD_parameters params = zstd_params(); - const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams); + const zstd_parameters params = zstd_params(); + const size_t wksp_size = zstd_cctx_workspace_bound(¶ms.cParams); ctx->cwksp = vzalloc(wksp_size); if (!ctx->cwksp) { @@ -41,7 +41,7 @@ static int zstd_comp_init(struct zstd_ctx *ctx) goto out; } - ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size); + ctx->cctx = zstd_init_cctx(ctx->cwksp, wksp_size); if (!ctx->cctx) { ret = -EINVAL; goto out_free; @@ -56,7 +56,7 @@ static int zstd_comp_init(struct zstd_ctx *ctx) static int zstd_decomp_init(struct zstd_ctx *ctx) { int ret = 0; - const size_t wksp_size = ZSTD_DCtxWorkspaceBound(); + const size_t wksp_size = zstd_dctx_workspace_bound(); ctx->dwksp = vzalloc(wksp_size); if (!ctx->dwksp) { @@ -64,7 +64,7 @@ static int zstd_decomp_init(struct zstd_ctx *ctx) goto out; } - ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size); + ctx->dctx = zstd_init_dctx(ctx->dwksp, wksp_size); if (!ctx->dctx) { ret = -EINVAL; goto out_free; @@ -152,10 +152,10 @@ static int __zstd_compress(const u8 *src, unsigned int slen, { size_t out_len; struct zstd_ctx *zctx = ctx; - const ZSTD_parameters params = zstd_params(); + const zstd_parameters params = zstd_params(); - out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params); - if (ZSTD_isError(out_len)) + out_len = zstd_compress_cctx(zctx->cctx, dst, *dlen, src, slen, ¶ms); + if (zstd_is_error(out_len)) return -EINVAL; *dlen = out_len; return 0; @@ -182,8 +182,8 @@ static int __zstd_decompress(const u8 *src, unsigned int slen, size_t out_len; struct zstd_ctx *zctx = ctx; - out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen); - if (ZSTD_isError(out_len)) + out_len = zstd_decompress_dctx(zctx->dctx, dst, *dlen, src, slen); + if (zstd_is_error(out_len)) return -EINVAL; *dlen = out_len; return 0; diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 56dce9f00988c..38193b4a57dae 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -28,10 +28,10 @@ /* 307s to avoid pathologically clashing with transaction commit */ #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ) -static ZSTD_parameters zstd_get_btrfs_parameters(unsigned int level, +static zstd_parameters zstd_get_btrfs_parameters(unsigned int level, size_t src_len) { - ZSTD_parameters params = ZSTD_getParams(level, src_len, 0); + zstd_parameters params = zstd_get_params(level, src_len); if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG) params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG; @@ -48,8 +48,8 @@ struct workspace { unsigned long last_used; /* jiffies */ struct list_head list; struct list_head lru_list; - ZSTD_inBuffer in_buf; - ZSTD_outBuffer out_buf; + zstd_in_buffer in_buf; + zstd_out_buffer out_buf; }; /* @@ -155,12 +155,12 @@ static void zstd_calc_ws_mem_sizes(void) unsigned int level; for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) { - ZSTD_parameters params = + zstd_parameters params = zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT); size_t level_size = max_t(size_t, - ZSTD_CStreamWorkspaceBound(params.cParams), - ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT)); + zstd_cstream_workspace_bound(¶ms.cParams), + zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT)); max_size = max_t(size_t, max_size, level_size); zstd_ws_mem_sizes[level - 1] = max_size; @@ -371,7 +371,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, unsigned long *total_in, unsigned long *total_out) { struct workspace *workspace = list_entry(ws, struct workspace, list); - ZSTD_CStream *stream; + zstd_cstream *stream; int ret = 0; int nr_pages = 0; struct page *in_page = NULL; /* The current page to read */ @@ -381,7 +381,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, unsigned long len = *total_out; const unsigned long nr_dest_pages = *out_pages; unsigned long max_out = nr_dest_pages * PAGE_SIZE; - ZSTD_parameters params = zstd_get_btrfs_parameters(workspace->req_level, + zstd_parameters params = zstd_get_btrfs_parameters(workspace->req_level, len); *out_pages = 0; @@ -389,10 +389,10 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, *total_in = 0; /* Initialize the stream */ - stream = ZSTD_initCStream(params, len, workspace->mem, + stream = zstd_init_cstream(¶ms, len, workspace->mem, workspace->size); if (!stream) { - pr_warn("BTRFS: ZSTD_initCStream failed\n"); + pr_warn("BTRFS: zstd_init_cstream failed\n"); ret = -EIO; goto out; } @@ -418,11 +418,11 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, while (1) { size_t ret2; - ret2 = ZSTD_compressStream(stream, &workspace->out_buf, + ret2 = zstd_compress_stream(stream, &workspace->out_buf, &workspace->in_buf); - if (ZSTD_isError(ret2)) { - pr_debug("BTRFS: ZSTD_compressStream returned %d\n", - ZSTD_getErrorCode(ret2)); + if (zstd_is_error(ret2)) { + pr_debug("BTRFS: zstd_compress_stream returned %d\n", + zstd_get_error_code(ret2)); ret = -EIO; goto out; } @@ -485,10 +485,10 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, while (1) { size_t ret2; - ret2 = ZSTD_endStream(stream, &workspace->out_buf); - if (ZSTD_isError(ret2)) { - pr_debug("BTRFS: ZSTD_endStream returned %d\n", - ZSTD_getErrorCode(ret2)); + ret2 = zstd_end_stream(stream, &workspace->out_buf); + if (zstd_is_error(ret2)) { + pr_debug("BTRFS: zstd_end_stream returned %d\n", + zstd_get_error_code(ret2)); ret = -EIO; goto out; } @@ -541,17 +541,17 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) struct workspace *workspace = list_entry(ws, struct workspace, list); struct page **pages_in = cb->compressed_pages; size_t srclen = cb->compressed_len; - ZSTD_DStream *stream; + zstd_dstream *stream; int ret = 0; unsigned long page_in_index = 0; unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); unsigned long buf_start; unsigned long total_out = 0; - stream = ZSTD_initDStream( + stream = zstd_init_dstream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); if (!stream) { - pr_debug("BTRFS: ZSTD_initDStream failed\n"); + pr_debug("BTRFS: zstd_init_dstream failed\n"); ret = -EIO; goto done; } @@ -567,11 +567,11 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) while (1) { size_t ret2; - ret2 = ZSTD_decompressStream(stream, &workspace->out_buf, + ret2 = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf); - if (ZSTD_isError(ret2)) { - pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", - ZSTD_getErrorCode(ret2)); + if (zstd_is_error(ret2)) { + pr_debug("BTRFS: zstd_decompress_stream returned %d\n", + zstd_get_error_code(ret2)); ret = -EIO; goto done; } @@ -615,16 +615,16 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in, size_t destlen) { struct workspace *workspace = list_entry(ws, struct workspace, list); - ZSTD_DStream *stream; + zstd_dstream *stream; int ret = 0; size_t ret2; unsigned long total_out = 0; unsigned long pg_offset = 0; - stream = ZSTD_initDStream( + stream = zstd_init_dstream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); if (!stream) { - pr_warn("BTRFS: ZSTD_initDStream failed\n"); + pr_warn("BTRFS: zstd_init_dstream failed\n"); ret = -EIO; goto finish; } @@ -648,15 +648,15 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in, /* Check if the frame is over and we still need more input */ if (ret2 == 0) { - pr_debug("BTRFS: ZSTD_decompressStream ended early\n"); + pr_debug("BTRFS: zstd_decompress_stream ended early\n"); ret = -EIO; goto finish; } - ret2 = ZSTD_decompressStream(stream, &workspace->out_buf, + ret2 = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf); - if (ZSTD_isError(ret2)) { - pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", - ZSTD_getErrorCode(ret2)); + if (zstd_is_error(ret2)) { + pr_debug("BTRFS: zstd_decompress_stream returned %d\n", + zstd_get_error_code(ret2)); ret = -EIO; goto finish; } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c1bf9ad4c2207..f56bb2501d1b6 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -335,8 +335,8 @@ static const struct f2fs_compress_ops f2fs_lz4_ops = { static int zstd_init_compress_ctx(struct compress_ctx *cc) { - ZSTD_parameters params; - ZSTD_CStream *stream; + zstd_parameters params; + zstd_cstream *stream; void *workspace; unsigned int workspace_size; unsigned char level = F2FS_I(cc->inode)->i_compress_flag >> @@ -345,17 +345,17 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) if (!level) level = F2FS_ZSTD_DEFAULT_CLEVEL; - params = ZSTD_getParams(level, cc->rlen, 0); - workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams); + params = zstd_get_params(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen); + workspace_size = zstd_cstream_workspace_bound(¶ms.cParams); workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode), workspace_size, GFP_NOFS); if (!workspace) return -ENOMEM; - stream = ZSTD_initCStream(params, 0, workspace, workspace_size); + stream = zstd_init_cstream(¶ms, 0, workspace, workspace_size); if (!stream) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initCStream failed\n", + printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_cstream failed\n", KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, __func__); kvfree(workspace); @@ -378,9 +378,9 @@ static void zstd_destroy_compress_ctx(struct compress_ctx *cc) static int zstd_compress_pages(struct compress_ctx *cc) { - ZSTD_CStream *stream = cc->private2; - ZSTD_inBuffer inbuf; - ZSTD_outBuffer outbuf; + zstd_cstream *stream = cc->private2; + zstd_in_buffer inbuf; + zstd_out_buffer outbuf; int src_size = cc->rlen; int dst_size = src_size - PAGE_SIZE - COMPRESS_HEADER_SIZE; int ret; @@ -393,19 +393,19 @@ static int zstd_compress_pages(struct compress_ctx *cc) outbuf.dst = cc->cbuf->cdata; outbuf.size = dst_size; - ret = ZSTD_compressStream(stream, &outbuf, &inbuf); - if (ZSTD_isError(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + ret = zstd_compress_stream(stream, &outbuf, &inbuf); + if (zstd_is_error(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s zstd_compress_stream failed, ret: %d\n", KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, - __func__, ZSTD_getErrorCode(ret)); + __func__, zstd_get_error_code(ret)); return -EIO; } - ret = ZSTD_endStream(stream, &outbuf); - if (ZSTD_isError(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_endStream returned %d\n", + ret = zstd_end_stream(stream, &outbuf); + if (zstd_is_error(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s zstd_end_stream returned %d\n", KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, - __func__, ZSTD_getErrorCode(ret)); + __func__, zstd_get_error_code(ret)); return -EIO; } @@ -422,22 +422,22 @@ static int zstd_compress_pages(struct compress_ctx *cc) static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) { - ZSTD_DStream *stream; + zstd_dstream *stream; void *workspace; unsigned int workspace_size; unsigned int max_window_size = MAX_COMPRESS_WINDOW_SIZE(dic->log_cluster_size); - workspace_size = ZSTD_DStreamWorkspaceBound(max_window_size); + workspace_size = zstd_dstream_workspace_bound(max_window_size); workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode), workspace_size, GFP_NOFS); if (!workspace) return -ENOMEM; - stream = ZSTD_initDStream(max_window_size, workspace, workspace_size); + stream = zstd_init_dstream(max_window_size, workspace, workspace_size); if (!stream) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n", + printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_dstream failed\n", KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, __func__); kvfree(workspace); @@ -459,9 +459,9 @@ static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic) static int zstd_decompress_pages(struct decompress_io_ctx *dic) { - ZSTD_DStream *stream = dic->private2; - ZSTD_inBuffer inbuf; - ZSTD_outBuffer outbuf; + zstd_dstream *stream = dic->private2; + zstd_in_buffer inbuf; + zstd_out_buffer outbuf; int ret; inbuf.pos = 0; @@ -472,11 +472,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) outbuf.dst = dic->rbuf; outbuf.size = dic->rlen; - ret = ZSTD_decompressStream(stream, &outbuf, &inbuf); - if (ZSTD_isError(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + ret = zstd_decompress_stream(stream, &outbuf, &inbuf); + if (zstd_is_error(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s zstd_decompress_stream failed, ret: %d\n", KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, - __func__, ZSTD_getErrorCode(ret)); + __func__, zstd_get_error_code(ret)); return -EIO; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 78ebc306ee2b5..97e5a051c320c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -592,7 +592,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) if (kstrtouint(str + 1, 10, &level)) return -EINVAL; - if (!level || level > ZSTD_maxCLevel()) { + if (!level || level > zstd_max_clevel()) { f2fs_info(sbi, "invalid zstd compress level: %d", level); return -EINVAL; } diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index b9614db48b1de..f243cb5e6a4fb 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -218,7 +218,7 @@ static int zbufsize_842(size_t size) #if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS) static int zbufsize_zstd(size_t size) { - return ZSTD_compressBound(size); + return zstd_compress_bound(size); } #endif diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c index 0015cf8b55820..c40445dbf38c7 100644 --- a/fs/squashfs/zstd_wrapper.c +++ b/fs/squashfs/zstd_wrapper.c @@ -34,7 +34,7 @@ static void *zstd_init(struct squashfs_sb_info *msblk, void *buff) goto failed; wksp->window_size = max_t(size_t, msblk->block_size, SQUASHFS_METADATA_SIZE); - wksp->mem_size = ZSTD_DStreamWorkspaceBound(wksp->window_size); + wksp->mem_size = zstd_dstream_workspace_bound(wksp->window_size); wksp->mem = vmalloc(wksp->mem_size); if (wksp->mem == NULL) goto failed; @@ -63,15 +63,15 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, struct squashfs_page_actor *output) { struct workspace *wksp = strm; - ZSTD_DStream *stream; + zstd_dstream *stream; size_t total_out = 0; int error = 0; - ZSTD_inBuffer in_buf = { NULL, 0, 0 }; - ZSTD_outBuffer out_buf = { NULL, 0, 0 }; + zstd_in_buffer in_buf = { NULL, 0, 0 }; + zstd_out_buffer out_buf = { NULL, 0, 0 }; struct bvec_iter_all iter_all = {}; struct bio_vec *bvec = bvec_init_iter_all(&iter_all); - stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size); + stream = zstd_init_dstream(wksp->window_size, wksp->mem, wksp->mem_size); if (!stream) { ERROR("Failed to initialize zstd decompressor\n"); @@ -116,14 +116,14 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, } total_out -= out_buf.pos; - zstd_err = ZSTD_decompressStream(stream, &out_buf, &in_buf); + zstd_err = zstd_decompress_stream(stream, &out_buf, &in_buf); total_out += out_buf.pos; /* add the additional data produced */ if (zstd_err == 0) break; - if (ZSTD_isError(zstd_err)) { + if (zstd_is_error(zstd_err)) { ERROR("zstd decompression error: %d\n", - (int)ZSTD_getErrorCode(zstd_err)); + (int)zstd_get_error_code(zstd_err)); error = -EIO; break; } diff --git a/include/linux/zstd.h b/include/linux/zstd.h index e87f78c9b19cc..9fbc7729b0a0d 100644 --- a/include/linux/zstd.h +++ b/include/linux/zstd.h @@ -1,138 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of https://github.com/facebook/zstd) and + * the GPLv2 (found in the COPYING file in the root directory of + * https://github.com/facebook/zstd). You may select, at your option, one of the + * above-listed licenses. */ -#ifndef ZSTD_H -#define ZSTD_H +#ifndef LINUX_ZSTD_H +#define LINUX_ZSTD_H -/* ====== Dependency ======*/ -#include /* size_t */ +/** + * This is a kernel-style API that wraps the upstream zstd API, which cannot be + * used directly because the symbols aren't exported. It exposes the minimal + * functionality which is currently required by users of zstd in the kernel. + * Expose extra functions from lib/zstd/zstd.h as needed. + */ +/* ====== Dependency ====== */ +#include +#include -/*-***************************************************************************** - * Introduction +/* ====== Helper Functions ====== */ +/** + * zstd_compress_bound() - maximum compressed size in worst case scenario + * @src_size: The size of the data to compress. * - * zstd, short for Zstandard, is a fast lossless compression algorithm, - * targeting real-time compression scenarios at zlib-level and better - * compression ratios. The zstd compression library provides in-memory - * compression and decompression functions. The library supports compression - * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled - * ultra, should be used with caution, as they require more memory. - * Compression can be done in: - * - a single step, reusing a context (described as Explicit memory management) - * - unbounded multiple steps (described as Streaming compression) - * The compression ratio achievable on small data can be highly improved using - * compression with a dictionary in: - * - a single step (described as Simple dictionary API) - * - a single step, reusing a dictionary (described as Fast dictionary API) - ******************************************************************************/ - -/*====== Helper functions ======*/ + * Return: The maximum compressed size in the worst case scenario. + */ +size_t zstd_compress_bound(size_t src_size); /** - * enum ZSTD_ErrorCode - zstd error codes + * zstd_is_error() - tells if a size_t function result is an error code + * @code: The function result to check for error. * - * Functions that return size_t can be checked for errors using ZSTD_isError() - * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode(). + * Return: Non-zero iff the code is an error. + */ +unsigned int zstd_is_error(size_t code); + +/** + * enum zstd_error_code - zstd error codes */ -typedef enum { - ZSTD_error_no_error, - ZSTD_error_GENERIC, - ZSTD_error_prefix_unknown, - ZSTD_error_version_unsupported, - ZSTD_error_parameter_unknown, - ZSTD_error_frameParameter_unsupported, - ZSTD_error_frameParameter_unsupportedBy32bits, - ZSTD_error_frameParameter_windowTooLarge, - ZSTD_error_compressionParameter_unsupported, - ZSTD_error_init_missing, - ZSTD_error_memory_allocation, - ZSTD_error_stage_wrong, - ZSTD_error_dstSize_tooSmall, - ZSTD_error_srcSize_wrong, - ZSTD_error_corruption_detected, - ZSTD_error_checksum_wrong, - ZSTD_error_tableLog_tooLarge, - ZSTD_error_maxSymbolValue_tooLarge, - ZSTD_error_maxSymbolValue_tooSmall, - ZSTD_error_dictionary_corrupted, - ZSTD_error_dictionary_wrong, - ZSTD_error_dictionaryCreation_failed, - ZSTD_error_maxCode -} ZSTD_ErrorCode; +typedef ZSTD_ErrorCode zstd_error_code; /** - * ZSTD_maxCLevel() - maximum compression level available + * zstd_get_error_code() - translates an error function result to an error code + * @code: The function result for which zstd_is_error(code) is true. * - * Return: Maximum compression level available. + * Return: A unique error code for this error. */ -int ZSTD_maxCLevel(void); +zstd_error_code zstd_get_error_code(size_t code); + /** - * ZSTD_compressBound() - maximum compressed size in worst case scenario - * @srcSize: The size of the data to compress. + * zstd_get_error_name() - translates an error function result to a string + * @code: The function result for which zstd_is_error(code) is true. * - * Return: The maximum compressed size in the worst case scenario. + * Return: An error string corresponding to the error code. */ -size_t ZSTD_compressBound(size_t srcSize); +const char *zstd_get_error_name(size_t code); + /** - * ZSTD_isError() - tells if a size_t function result is an error code - * @code: The function result to check for error. + * zstd_min_clevel() - minimum allowed compression level * - * Return: Non-zero iff the code is an error. + * Return: The minimum allowed compression level. */ -static __attribute__((unused)) unsigned int ZSTD_isError(size_t code) -{ - return code > (size_t)-ZSTD_error_maxCode; -} +int zstd_min_clevel(void); + /** - * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode - * @functionResult: The result of a function for which ZSTD_isError() is true. + * zstd_max_clevel() - maximum allowed compression level * - * Return: The ZSTD_ErrorCode corresponding to the functionResult or 0 - * if the functionResult isn't an error. + * Return: The maximum allowed compression level. */ -static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode( - size_t functionResult) -{ - if (!ZSTD_isError(functionResult)) - return (ZSTD_ErrorCode)0; - return (ZSTD_ErrorCode)(0 - functionResult); -} +int zstd_max_clevel(void); + +/* ====== Parameter Selection ====== */ /** - * enum ZSTD_strategy - zstd compression search strategy + * enum zstd_strategy - zstd compression search strategy * - * From faster to stronger. + * From faster to stronger. See zstd_lib.h. */ -typedef enum { - ZSTD_fast, - ZSTD_dfast, - ZSTD_greedy, - ZSTD_lazy, - ZSTD_lazy2, - ZSTD_btlazy2, - ZSTD_btopt, - ZSTD_btopt2 -} ZSTD_strategy; +typedef ZSTD_strategy zstd_strategy; /** - * struct ZSTD_compressionParameters - zstd compression parameters + * struct zstd_compression_parameters - zstd compression parameters * @windowLog: Log of the largest match distance. Larger means more * compression, and more memory needed during decompression. - * @chainLog: Fully searched segment. Larger means more compression, slower, - * and more memory (useless for fast). + * @chainLog: Fully searched segment. Larger means more compression, + * slower, and more memory (useless for fast). * @hashLog: Dispatch table. Larger means more compression, * slower, and more memory. * @searchLog: Number of searches. Larger means more compression and slower. @@ -141,1017 +99,342 @@ typedef enum { * @targetLength: Acceptable match size for optimal parser (only). Larger means * more compression, and slower. * @strategy: The zstd compression strategy. + * + * See zstd_lib.h. */ -typedef struct { - unsigned int windowLog; - unsigned int chainLog; - unsigned int hashLog; - unsigned int searchLog; - unsigned int searchLength; - unsigned int targetLength; - ZSTD_strategy strategy; -} ZSTD_compressionParameters; +typedef ZSTD_compressionParameters zstd_compression_parameters; /** - * struct ZSTD_frameParameters - zstd frame parameters - * @contentSizeFlag: Controls whether content size will be present in the frame - * header (when known). - * @checksumFlag: Controls whether a 32-bit checksum is generated at the end - * of the frame for error detection. - * @noDictIDFlag: Controls whether dictID will be saved into the frame header - * when using dictionary compression. + * struct zstd_frame_parameters - zstd frame parameters + * @contentSizeFlag: Controls whether content size will be present in the + * frame header (when known). + * @checksumFlag: Controls whether a 32-bit checksum is generated at the + * end of the frame for error detection. + * @noDictIDFlag: Controls whether dictID will be saved into the frame + * header when using dictionary compression. * - * The default value is all fields set to 0. + * The default value is all fields set to 0. See zstd_lib.h. */ -typedef struct { - unsigned int contentSizeFlag; - unsigned int checksumFlag; - unsigned int noDictIDFlag; -} ZSTD_frameParameters; +typedef ZSTD_frameParameters zstd_frame_parameters; /** - * struct ZSTD_parameters - zstd parameters + * struct zstd_parameters - zstd parameters * @cParams: The compression parameters. * @fParams: The frame parameters. */ -typedef struct { - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; -} ZSTD_parameters; +typedef ZSTD_parameters zstd_parameters; /** - * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level - * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). - * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. - * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * zstd_get_params() - returns zstd_parameters for selected level + * @level: The compression level + * @estimated_src_size: The estimated source size to compress or 0 + * if unknown. * - * Return: The selected ZSTD_compressionParameters. + * Return: The selected zstd_parameters. */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, - unsigned long long estimatedSrcSize, size_t dictSize); +zstd_parameters zstd_get_params(int level, + unsigned long long estimated_src_size); -/** - * ZSTD_getParams() - returns ZSTD_parameters for selected level - * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). - * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. - * @dictSize: The dictionary size or 0 if a dictionary isn't being used. - * - * The same as ZSTD_getCParams() except also selects the default frame - * parameters (all zero). - * - * Return: The selected ZSTD_parameters. - */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, - unsigned long long estimatedSrcSize, size_t dictSize); +/* ====== Single-pass Compression ====== */ -/*-************************************* - * Explicit memory management - **************************************/ +typedef ZSTD_CCtx zstd_cctx; /** - * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx - * @cParams: The compression parameters to be used for compression. + * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx + * @parameters: The compression parameters to be used. * * If multiple compression parameters might be used, the caller must call - * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum + * zstd_cctx_workspace_bound() for each set of parameters and use the maximum * size. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCCtx(). + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cctx(). */ -size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams); +size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters); /** - * struct ZSTD_CCtx - the zstd compression context - * - * When compressing many times it is recommended to allocate a context just once - * and reuse it for each successive compression operation. - */ -typedef struct ZSTD_CCtx_s ZSTD_CCtx; -/** - * ZSTD_initCCtx() - initialize a zstd compression context - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to - * determine how large the workspace must be. - * - * Return: A compression context emplaced into workspace. - */ -ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize); - -/** - * ZSTD_compressCCtx() - compress src into dst - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, ZSTD_parameters params); - -/** - * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx - * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDCtx(). - */ -size_t ZSTD_DCtxWorkspaceBound(void); - -/** - * struct ZSTD_DCtx - the zstd decompression context - * - * When decompressing many times it is recommended to allocate a context just - * once and reuse it for each successive decompression operation. - */ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -/** - * ZSTD_initDCtx() - initialize a zstd decompression context - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to - * determine how large the workspace must be. - * - * Return: A decompression context emplaced into workspace. - */ -ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize); - -/** - * ZSTD_decompressDCtx() - decompress zstd compressed src into dst - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); - -/*-************************ - * Simple dictionary API - **************************/ - -/** - * ZSTD_compress_usingDict() - compress src into dst using a dictionary - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @dict: The dictionary to use for compression. - * @dictSize: The size of the dictionary. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * - * Compression using a predefined dictionary. The same dictionary must be used - * during decompression. - * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const void *dict, size_t dictSize, - ZSTD_parameters params); - -/** - * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * @dict: The dictionary to use for decompression. The same dictionary - * must've been used to compress the data. - * @dictSize: The size of the dictionary. - * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const void *dict, size_t dictSize); - -/*-************************** - * Fast dictionary API - ***************************/ - -/** - * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict - * @cParams: The compression parameters to be used for compression. + * zstd_init_cctx() - initialize a zstd compression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to + * determine how large the workspace must be. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCDict(). - */ -size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams); - -/** - * struct ZSTD_CDict - a digested dictionary to be used for compression + * Return: A zstd compression context or NULL on error. */ -typedef struct ZSTD_CDict_s ZSTD_CDict; +zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size); /** - * ZSTD_initCDict() - initialize a digested dictionary for compression - * @dictBuffer: The dictionary to digest. The buffer is referenced by the - * ZSTD_CDict so it must outlive the returned ZSTD_CDict. - * @dictSize: The size of the dictionary. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * @workspace: The workspace. It must outlive the returned ZSTD_CDict. - * @workspaceSize: The workspace size. Must be at least - * ZSTD_CDictWorkspaceBound(params.cParams). + * zstd_compress_cctx() - compress src into dst with the initialized parameters + * @cctx: The context. Must have been initialized with zstd_init_cctx(). + * @dst: The buffer to compress src into. + * @dst_capacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @src_size: The size of the data to compress. + * @parameters: The compression parameters to be used. * - * When compressing multiple messages / blocks with the same dictionary it is - * recommended to load it just once. The ZSTD_CDict merely references the - * dictBuffer, so it must outlive the returned ZSTD_CDict. - * - * Return: The digested dictionary emplaced into workspace. + * Return: The compressed size or an error, which can be checked using + * zstd_is_error(). */ -ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize, - ZSTD_parameters params, void *workspace, size_t workspaceSize); +size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size, const zstd_parameters *parameters); -/** - * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(cParams) where - * cParams are the compression parameters used to initialize the - * cdict. - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @cdict: The digested dictionary to use for compression. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * - * Compression using a digested dictionary. The same dictionary must be used - * during decompression. - * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const ZSTD_CDict *cdict); +/* ====== Single-pass Decompression ====== */ +typedef ZSTD_DCtx zstd_dctx; /** - * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict + * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDDict(). - */ -size_t ZSTD_DDictWorkspaceBound(void); - -/** - * struct ZSTD_DDict - a digested dictionary to be used for decompression + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_dctx(). */ -typedef struct ZSTD_DDict_s ZSTD_DDict; +size_t zstd_dctx_workspace_bound(void); /** - * ZSTD_initDDict() - initialize a digested dictionary for decompression - * @dictBuffer: The dictionary to digest. The buffer is referenced by the - * ZSTD_DDict so it must outlive the returned ZSTD_DDict. - * @dictSize: The size of the dictionary. - * @workspace: The workspace. It must outlive the returned ZSTD_DDict. - * @workspaceSize: The workspace size. Must be at least - * ZSTD_DDictWorkspaceBound(). - * - * When decompressing multiple messages / blocks with the same dictionary it is - * recommended to load it just once. The ZSTD_DDict merely references the - * dictBuffer, so it must outlive the returned ZSTD_DDict. + * zstd_init_dctx() - initialize a zstd decompression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to + * determine how large the workspace must be. * - * Return: The digested dictionary emplaced into workspace. + * Return: A zstd decompression context or NULL on error. */ -ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize, - void *workspace, size_t workspaceSize); +zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size); /** - * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * @ddict: The digested dictionary to use for decompression. The same - * dictionary must've been used to compress the data. + * zstd_decompress_dctx() - decompress zstd compressed src into dst + * @dctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dst_capacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @src_size: The exact size of the data to decompress. * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). + * Return: The decompressed size or an error, which can be checked using + * zstd_is_error(). */ -size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, - size_t dstCapacity, const void *src, size_t srcSize, - const ZSTD_DDict *ddict); +size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size); - -/*-************************** - * Streaming - ***************************/ +/* ====== Streaming Buffers ====== */ /** - * struct ZSTD_inBuffer - input buffer for streaming + * struct zstd_in_buffer - input buffer for streaming * @src: Start of the input buffer. * @size: Size of the input buffer. * @pos: Position where reading stopped. Will be updated. * Necessarily 0 <= pos <= size. + * + * See zstd_lib.h. */ -typedef struct ZSTD_inBuffer_s { - const void *src; - size_t size; - size_t pos; -} ZSTD_inBuffer; +typedef ZSTD_inBuffer zstd_in_buffer; /** - * struct ZSTD_outBuffer - output buffer for streaming + * struct zstd_out_buffer - output buffer for streaming * @dst: Start of the output buffer. * @size: Size of the output buffer. * @pos: Position where writing stopped. Will be updated. * Necessarily 0 <= pos <= size. + * + * See zstd_lib.h. */ -typedef struct ZSTD_outBuffer_s { - void *dst; - size_t size; - size_t pos; -} ZSTD_outBuffer; +typedef ZSTD_outBuffer zstd_out_buffer; +/* ====== Streaming Compression ====== */ - -/*-***************************************************************************** - * Streaming compression - HowTo - * - * A ZSTD_CStream object is required to track streaming operation. - * Use ZSTD_initCStream() to initialize a ZSTD_CStream object. - * ZSTD_CStream objects can be reused multiple times on consecutive compression - * operations. It is recommended to re-use ZSTD_CStream in situations where many - * streaming operations will be achieved consecutively. Use one separate - * ZSTD_CStream per thread for parallel execution. - * - * Use ZSTD_compressStream() repetitively to consume input stream. - * The function will automatically update both `pos` fields. - * Note that it may not consume the entire input, in which case `pos < size`, - * and it's up to the caller to present again remaining data. - * It returns a hint for the preferred number of bytes to use as an input for - * the next function call. - * - * At any moment, it's possible to flush whatever data remains within internal - * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might - * still be some content left within the internal buffer if `output->size` is - * too small. It returns the number of bytes left in the internal buffer and - * must be called until it returns 0. - * - * ZSTD_endStream() instructs to finish a frame. It will perform a flush and - * write frame epilogue. The epilogue is required for decoders to consider a - * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush - * the full content if `output->size` is too small. In which case, call again - * ZSTD_endStream() to complete the flush. It returns the number of bytes left - * in the internal buffer and must be called until it returns 0. - ******************************************************************************/ +typedef ZSTD_CStream zstd_cstream; /** - * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream - * @cParams: The compression parameters to be used for compression. + * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream + * @cparams: The compression parameters to be used for compression. * * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCStream() and ZSTD_initCStream_usingCDict(). - */ -size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams); - -/** - * struct ZSTD_CStream - the zstd streaming compression context + * zstd_init_cstream(). */ -typedef struct ZSTD_CStream_s ZSTD_CStream; +size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams); -/*===== ZSTD_CStream management functions =====*/ /** - * ZSTD_initCStream() - initialize a zstd streaming compression context - * @params: The zstd compression parameters. - * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must - * pass the source size (zero means empty source). Otherwise, - * the caller may optionally pass the source size, or zero if - * unknown. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine - * how large the workspace must be. + * zstd_init_cstream() - initialize a zstd streaming compression context + * @parameters The zstd parameters to use for compression. + * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller + * must pass the source size (zero means empty source). + * Otherwise, the caller may optionally pass the source + * size, or zero if unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. + * Use zstd_cstream_workspace_bound(params->cparams) to + * determine how large the workspace must be. * - * Return: The zstd streaming compression context. + * Return: The zstd streaming compression context or NULL on error. */ -ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, - unsigned long long pledgedSrcSize, void *workspace, - size_t workspaceSize); +zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, + unsigned long long pledged_src_size, void *workspace, size_t workspace_size); /** - * ZSTD_initCStream_usingCDict() - initialize a streaming compression context - * @cdict: The digested dictionary to use for compression. - * @pledgedSrcSize: Optionally the source size, or zero if unknown. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Call ZSTD_CStreamWorkspaceBound() - * with the cParams used to initialize the cdict to determine - * how large the workspace must be. - * - * Return: The zstd streaming compression context. - */ -ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, - unsigned long long pledgedSrcSize, void *workspace, - size_t workspaceSize); - -/*===== Streaming compression functions =====*/ -/** - * ZSTD_resetCStream() - reset the context using parameters from creation - * @zcs: The zstd streaming compression context to reset. - * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * zstd_reset_cstream() - reset the context using parameters from creation + * @cstream: The zstd streaming compression context to reset. + * @pledged_src_size: Optionally the source size, or zero if unknown. * * Resets the context using the parameters from creation. Skips dictionary - * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame + * loading, since it can be reused. If `pledged_src_size` is non-zero the frame * content size is always written into the frame header. * - * Return: Zero or an error, which can be checked using ZSTD_isError(). + * Return: Zero or an error, which can be checked using + * zstd_is_error(). */ -size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize); +size_t zstd_reset_cstream(zstd_cstream *cstream, + unsigned long long pledged_src_size); + /** - * ZSTD_compressStream() - streaming compress some of input into output - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. - * @input: Source buffer. `input->pos` is updated to indicate how much data was - * read. Note that it may not consume the entire input, in which case - * `input->pos < input->size`, and it's up to the caller to present - * remaining data again. + * zstd_compress_stream() - streaming compress some of input into output + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * @input: Source buffer. `input->pos` is updated to indicate how much data + * was read. Note that it may not consume the entire input, in which + * case `input->pos < input->size`, and it's up to the caller to + * present remaining data again. * * The `input` and `output` buffers may be any size. Guaranteed to make some * forward progress if `input` and `output` are not empty. * - * Return: A hint for the number of bytes to use as the input for the next - * function call or an error, which can be checked using - * ZSTD_isError(). + * Return: A hint for the number of bytes to use as the input for the next + * function call or an error, which can be checked using + * zstd_is_error(). */ -size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, - ZSTD_inBuffer *input); +size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, + zstd_in_buffer *input); + /** - * ZSTD_flushStream() - flush internal buffers into output - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. + * zstd_flush_stream() - flush internal buffers into output + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. * - * ZSTD_flushStream() must be called until it returns 0, meaning all the data - * has been flushed. Since ZSTD_flushStream() causes a block to be ended, + * zstd_flush_stream() must be called until it returns 0, meaning all the data + * has been flushed. Since zstd_flush_stream() causes a block to be ended, * calling it too often will degrade the compression ratio. * - * Return: The number of bytes still present within internal buffers or an - * error, which can be checked using ZSTD_isError(). - */ -size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); -/** - * ZSTD_endStream() - flush internal buffers into output and end the frame - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. - * - * ZSTD_endStream() must be called until it returns 0, meaning all the data has - * been flushed and the frame epilogue has been written. - * - * Return: The number of bytes still present within internal buffers or an - * error, which can be checked using ZSTD_isError(). + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using zstd_is_error(). */ -size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); +size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output); /** - * ZSTD_CStreamInSize() - recommended size for the input buffer - * - * Return: The recommended size for the input buffer. - */ -size_t ZSTD_CStreamInSize(void); -/** - * ZSTD_CStreamOutSize() - recommended size for the output buffer + * zstd_end_stream() - flush internal buffers into output and end the frame + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. * - * When the output buffer is at least this large, it is guaranteed to be large - * enough to flush at least one complete compressed block. + * zstd_end_stream() must be called until it returns 0, meaning all the data has + * been flushed and the frame epilogue has been written. * - * Return: The recommended size for the output buffer. + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using zstd_is_error(). */ -size_t ZSTD_CStreamOutSize(void); +size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output); +/* ====== Streaming Decompression ====== */ - -/*-***************************************************************************** - * Streaming decompression - HowTo - * - * A ZSTD_DStream object is required to track streaming operations. - * Use ZSTD_initDStream() to initialize a ZSTD_DStream object. - * ZSTD_DStream objects can be re-used multiple times. - * - * Use ZSTD_decompressStream() repetitively to consume your input. - * The function will update both `pos` fields. - * If `input->pos < input->size`, some input has not been consumed. - * It's up to the caller to present again remaining data. - * If `output->pos < output->size`, decoder has flushed everything it could. - * Returns 0 iff a frame is completely decoded and fully flushed. - * Otherwise it returns a suggested next input size that will never load more - * than the current frame. - ******************************************************************************/ +typedef ZSTD_DStream zstd_dstream; /** - * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream - * @maxWindowSize: The maximum window size allowed for compressed frames. + * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream + * @max_window_size: The maximum window size allowed for compressed frames. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDStream() and ZSTD_initDStream_usingDDict(). + * Return: A lower bound on the size of the workspace that is passed + * to zstd_init_dstream(). */ -size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize); +size_t zstd_dstream_workspace_bound(size_t max_window_size); /** - * struct ZSTD_DStream - the zstd streaming decompression context - */ -typedef struct ZSTD_DStream_s ZSTD_DStream; -/*===== ZSTD_DStream management functions =====*/ -/** - * ZSTD_initDStream() - initialize a zstd streaming decompression context - * @maxWindowSize: The maximum window size allowed for compressed frames. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine - * how large the workspace must be. - * - * Return: The zstd streaming decompression context. - */ -ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, - size_t workspaceSize); -/** - * ZSTD_initDStream_usingDDict() - initialize streaming decompression context - * @maxWindowSize: The maximum window size allowed for compressed frames. - * @ddict: The digested dictionary to use for decompression. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine - * how large the workspace must be. + * zstd_init_dstream() - initialize a zstd streaming decompression context + * @max_window_size: The maximum window size allowed for compressed frames. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use zstd_dstream_workspace_bound(max_window_size) to + * determine how large the workspace must be. * - * Return: The zstd streaming decompression context. + * Return: The zstd streaming decompression context. */ -ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, - const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize); +zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, + size_t workspace_size); -/*===== Streaming decompression functions =====*/ /** - * ZSTD_resetDStream() - reset the context using parameters from creation - * @zds: The zstd streaming decompression context to reset. + * zstd_reset_dstream() - reset the context using parameters from creation + * @dstream: The zstd streaming decompression context to reset. * * Resets the context using the parameters from creation. Skips dictionary * loading, since it can be reused. * - * Return: Zero or an error, which can be checked using ZSTD_isError(). + * Return: Zero or an error, which can be checked using zstd_is_error(). */ -size_t ZSTD_resetDStream(ZSTD_DStream *zds); +size_t zstd_reset_dstream(zstd_dstream *dstream); + /** - * ZSTD_decompressStream() - streaming decompress some of input into output - * @zds: The zstd streaming decompression context. - * @output: Destination buffer. `output.pos` is updated to indicate how much - * decompressed data was written. - * @input: Source buffer. `input.pos` is updated to indicate how much data was - * read. Note that it may not consume the entire input, in which case - * `input.pos < input.size`, and it's up to the caller to present - * remaining data again. + * zstd_decompress_stream() - streaming decompress some of input into output + * @dstream: The zstd streaming decompression context. + * @output: Destination buffer. `output.pos` is updated to indicate how much + * decompressed data was written. + * @input: Source buffer. `input.pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input.pos < input.size`, and it's up to the caller to present + * remaining data again. * * The `input` and `output` buffers may be any size. Guaranteed to make some * forward progress if `input` and `output` are not empty. - * ZSTD_decompressStream() will not consume the last byte of the frame until + * zstd_decompress_stream() will not consume the last byte of the frame until * the entire frame is flushed. * - * Return: Returns 0 iff a frame is completely decoded and fully flushed. - * Otherwise returns a hint for the number of bytes to use as the input - * for the next function call or an error, which can be checked using - * ZSTD_isError(). The size hint will never load more than the frame. - */ -size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, - ZSTD_inBuffer *input); - -/** - * ZSTD_DStreamInSize() - recommended size for the input buffer - * - * Return: The recommended size for the input buffer. - */ -size_t ZSTD_DStreamInSize(void); -/** - * ZSTD_DStreamOutSize() - recommended size for the output buffer - * - * When the output buffer is at least this large, it is guaranteed to be large - * enough to flush at least one complete decompressed block. - * - * Return: The recommended size for the output buffer. - */ -size_t ZSTD_DStreamOutSize(void); - - -/* --- Constants ---*/ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ -#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U - -#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) - -#define ZSTD_WINDOWLOG_MAX_32 27 -#define ZSTD_WINDOWLOG_MAX_64 27 -#define ZSTD_WINDOWLOG_MAX \ - ((unsigned int)(sizeof(size_t) == 4 \ - ? ZSTD_WINDOWLOG_MAX_32 \ - : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX -#define ZSTD_HASHLOG_MIN 6 -#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) -#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN -#define ZSTD_HASHLOG3_MAX 17 -#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -/* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_SEARCHLENGTH_MAX 7 -/* only for ZSTD_btopt, other strategies are limited to 4 */ -#define ZSTD_SEARCHLENGTH_MIN 3 -#define ZSTD_TARGETLENGTH_MIN 4 -#define ZSTD_TARGETLENGTH_MAX 999 - -/* for static allocation */ -#define ZSTD_FRAMEHEADERSIZE_MAX 18 -#define ZSTD_FRAMEHEADERSIZE_MIN 6 -#define ZSTD_frameHeaderSize_prefix 5 -#define ZSTD_frameHeaderSize_min ZSTD_FRAMEHEADERSIZE_MIN -#define ZSTD_frameHeaderSize_max ZSTD_FRAMEHEADERSIZE_MAX -/* magic number + skippable frame length */ -#define ZSTD_skippableHeaderSize 8 - - -/*-************************************* - * Compressed size functions - **************************************/ - -/** - * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame - * @src: Source buffer. It should point to the start of a zstd encoded frame - * or a skippable frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * size of the frame. - * - * Return: The compressed size of the frame pointed to by `src` or an error, - * which can be check with ZSTD_isError(). - * Suitable to pass to ZSTD_decompress() or similar functions. - */ -size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); - -/*-************************************* - * Decompressed size functions - **************************************/ -/** - * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header - * @src: It should point to the start of a zstd encoded frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * frame header. `ZSTD_frameHeaderSize_max` is always large enough. - * - * Return: The frame content size stored in the frame header if known. - * `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the - * frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input. - */ -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); - -/** - * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames - * @src: It should point to the start of a series of zstd encoded and/or - * skippable frames. - * @srcSize: The exact size of the series of frames. - * - * If any zstd encoded frame in the series doesn't have the frame content size - * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always - * set when using ZSTD_compress(). The decompressed size can be very large. - * If the source is untrusted, the decompressed size could be wrong or - * intentionally modified. Always ensure the result fits within the - * application's authorized limits. ZSTD_findDecompressedSize() handles multiple - * frames, and so it must traverse the input to read each frame header. This is - * efficient as most of the data is skipped, however it does mean that all frame - * data must be present and valid. - * - * Return: Decompressed size of all the data contained in the frames if known. - * `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown. - * `ZSTD_CONTENTSIZE_ERROR` if an error occurred. - */ -unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize); - -/*-************************************* - * Advanced compression functions - **************************************/ -/** - * ZSTD_checkCParams() - ensure parameter values remain within authorized range - * @cParams: The zstd compression parameters. - * - * Return: Zero or an error, which can be checked using ZSTD_isError(). - */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams); - -/** - * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize - * @srcSize: Optionally the estimated source size, or zero if unknown. - * @dictSize: Optionally the estimated dictionary size, or zero if unknown. - * - * Return: The optimized parameters. - */ -ZSTD_compressionParameters ZSTD_adjustCParams( - ZSTD_compressionParameters cParams, unsigned long long srcSize, - size_t dictSize); - -/*--- Advanced decompression functions ---*/ - -/** - * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame - * @buffer: The source buffer to check. - * @size: The size of the source buffer, must be at least 4 bytes. - * - * Return: True iff the buffer starts with a zstd or skippable frame identifier. - */ -unsigned int ZSTD_isFrame(const void *buffer, size_t size); - -/** - * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary - * @dict: The dictionary buffer. - * @dictSize: The size of the dictionary buffer. - * - * Return: The dictionary id stored within the dictionary or 0 if the - * dictionary is not a zstd dictionary. If it returns 0 the - * dictionary can still be loaded as a content-only dictionary. + * Return: Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise returns a hint for the number of bytes to use as the + * input for the next function call or an error, which can be checked + * using zstd_is_error(). The size hint will never load more than the + * frame. */ -unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize); +size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, + zstd_in_buffer *input); -/** - * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict - * @ddict: The ddict to find the id of. - * - * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not - * a zstd dictionary. If it returns 0 `ddict` will be loaded as a - * content-only dictionary. - */ -unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict); +/* ====== Frame Inspection Functions ====== */ /** - * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame - * @src: Source buffer. It must be a zstd encoded frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * frame header. `ZSTD_frameHeaderSize_max` is always large enough. + * zstd_find_frame_compressed_size() - returns the size of a compressed frame + * @src: Source buffer. It should point to the start of a zstd encoded + * frame or a skippable frame. + * @src_size: The size of the source buffer. It must be at least as large as the + * size of the frame. * - * Return: The dictionary id required to decompress the frame stored within - * `src` or 0 if the dictionary id could not be decoded. It can return - * 0 if the frame does not require a dictionary, the dictionary id - * wasn't stored in the frame, `src` is not a zstd frame, or `srcSize` - * is too small. + * Return: The compressed size of the frame pointed to by `src` or an error, + * which can be check with zstd_is_error(). + * Suitable to pass to ZSTD_decompress() or similar functions. */ -unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize); +size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); /** - * struct ZSTD_frameParams - zstd frame parameters stored in the frame header + * struct zstd_frame_params - zstd frame parameters stored in the frame header * @frameContentSize: The frame content size, or 0 if not present. * @windowSize: The window size, or 0 if the frame is a skippable frame. * @dictID: The dictionary id, or 0 if not present. * @checksumFlag: Whether a checksum was used. */ -typedef struct { - unsigned long long frameContentSize; - unsigned int windowSize; - unsigned int dictID; - unsigned int checksumFlag; -} ZSTD_frameParams; +typedef ZSTD_frameParams zstd_frame_header; /** - * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame - * @fparamsPtr: On success the frame parameters are written here. - * @src: The source buffer. It must point to a zstd or skippable frame. - * @srcSize: The size of the source buffer. `ZSTD_frameHeaderSize_max` is - * always large enough to succeed. + * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame + * @params: On success the frame parameters are written here. + * @src: The source buffer. It must point to a zstd or skippable frame. + * @src_size: The size of the source buffer. * - * Return: 0 on success. If more data is required it returns how many bytes - * must be provided to make forward progress. Otherwise it returns - * an error, which can be checked using ZSTD_isError(). + * Return: 0 on success. If more data is required it returns how many bytes + * must be provided to make forward progress. Otherwise it returns + * an error, which can be checked using zstd_is_error(). */ -size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, - size_t srcSize); - -/*-***************************************************************************** - * Buffer-less and synchronous inner streaming functions - * - * This is an advanced API, giving full control over buffer management, for - * users which need direct control over memory. - * But it's also a complex one, with many restrictions (documented below). - * Prefer using normal streaming API for an easier experience - ******************************************************************************/ - -/*-***************************************************************************** - * Buffer-less streaming compression (synchronous mode) - * - * A ZSTD_CCtx object is required to track streaming operations. - * Use ZSTD_initCCtx() to initialize a context. - * ZSTD_CCtx object can be re-used multiple times within successive compression - * operations. - * - * Start by initializing a context. - * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary - * compression, - * or ZSTD_compressBegin_advanced(), for finer parameter control. - * It's also possible to duplicate a reference context which has already been - * initialized, using ZSTD_copyCCtx() - * - * Then, consume your input using ZSTD_compressContinue(). - * There are some important considerations to keep in mind when using this - * advanced function : - * - ZSTD_compressContinue() has no internal buffer. It uses externally provided - * buffer only. - * - Interface is synchronous : input is consumed entirely and produce 1+ - * (or more) compressed blocks. - * - Caller must ensure there is enough space in `dst` to store compressed data - * under worst case scenario. Worst case evaluation is provided by - * ZSTD_compressBound(). - * ZSTD_compressContinue() doesn't guarantee recover after a failed - * compression. - * - ZSTD_compressContinue() presumes prior input ***is still accessible and - * unmodified*** (up to maximum distance size, see WindowLog). - * It remembers all previous contiguous blocks, plus one separated memory - * segment (which can itself consists of multiple contiguous blocks) - * - ZSTD_compressContinue() detects that prior input has been overwritten when - * `src` buffer overlaps. In which case, it will "discard" the relevant memory - * section from its history. - * - * Finish a frame with ZSTD_compressEnd(), which will write the last block(s) - * and optional checksum. It's possible to use srcSize==0, in which case, it - * will write a final empty block to end the frame. Without last block mark, - * frames will be considered unfinished (corrupted) by decoders. - * - * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new - * frame. - ******************************************************************************/ - -/*===== Buffer-less streaming compression functions =====*/ -size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel); -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, - size_t dictSize, int compressionLevel); -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, - size_t dictSize, ZSTD_parameters params, - unsigned long long pledgedSrcSize); -size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx, - unsigned long long pledgedSrcSize); -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, - unsigned long long pledgedSrcSize); -size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); - - - -/*-***************************************************************************** - * Buffer-less streaming decompression (synchronous mode) - * - * A ZSTD_DCtx object is required to track streaming operations. - * Use ZSTD_initDCtx() to initialize a context. - * A ZSTD_DCtx object can be re-used multiple times. - * - * First typical operation is to retrieve frame parameters, using - * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide - * important information to correctly decode the frame, such as the minimum - * rolling buffer size to allocate to decompress data (`windowSize`), and the - * dictionary ID used. - * Note: content size is optional, it may not be present. 0 means unknown. - * Note that these values could be wrong, either because of data malformation, - * or because an attacker is spoofing deliberate false information. As a - * consequence, check that values remain within valid application range, - * especially `windowSize`, before allocation. Each application can set its own - * limit, depending on local restrictions. For extended interoperability, it is - * recommended to support at least 8 MB. - * Frame parameters are extracted from the beginning of the compressed frame. - * Data fragment must be large enough to ensure successful decoding, typically - * `ZSTD_frameHeaderSize_max` bytes. - * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled. - * >0: `srcSize` is too small, provide at least this many bytes. - * errorCode, which can be tested using ZSTD_isError(). - * - * Start decompression, with ZSTD_decompressBegin() or - * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared - * context, using ZSTD_copyDCtx(). - * - * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() - * alternatively. - * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' - * to ZSTD_decompressContinue(). - * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will - * fail. - * - * The result of ZSTD_decompressContinue() is the number of bytes regenerated - * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an - * error; it just means ZSTD_decompressContinue() has decoded some metadata - * item. It can also be an error code, which can be tested with ZSTD_isError(). - * - * ZSTD_decompressContinue() needs previous data blocks during decompression, up - * to `windowSize`. They should preferably be located contiguously, prior to - * current block. Alternatively, a round buffer of sufficient size is also - * possible. Sufficient size is determined by frame parameters. - * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't - * follow each other, make sure that either the compressor breaks contiguity at - * the same place, or that previous contiguous segment is large enough to - * properly handle maximum back-reference. - * - * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - * Context can then be reset to start a new decompression. - * - * Note: it's possible to know if next input to present is a header or a block, - * using ZSTD_nextInputType(). This information is not required to properly - * decode a frame. - * - * == Special case: skippable frames == - * - * Skippable frames allow integration of user-defined data into a flow of - * concatenated frames. Skippable frames will be ignored (skipped) by a - * decompressor. The format of skippable frames is as follows: - * a) Skippable frame ID - 4 Bytes, Little endian format, any value from - * 0x184D2A50 to 0x184D2A5F - * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - * c) Frame Content - any content (User Data) of length equal to Frame Size - * For skippable frames ZSTD_decompressContinue() always returns 0. - * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 - * what means that a frame is skippable. - * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might - * actually be a zstd encoded frame with no content. For purposes of - * decompression, it is valid in both cases to skip the frame using - * ZSTD_findFrameCompressedSize() to find its size in bytes. - * It also returns frame size as fparamsPtr->frameContentSize. - ******************************************************************************/ - -/*===== Buffer-less streaming decompression functions =====*/ -size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx); -size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, - size_t dictSize); -void ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx); -size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx); -size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -typedef enum { - ZSTDnit_frameHeader, - ZSTDnit_blockHeader, - ZSTDnit_block, - ZSTDnit_lastBlock, - ZSTDnit_checksum, - ZSTDnit_skippableFrame -} ZSTD_nextInputType_e; -ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx); - -/*-***************************************************************************** - * Block functions - * - * Block functions produce and decode raw zstd blocks, without frame metadata. - * Frame metadata cost is typically ~18 bytes, which can be non-negligible for - * very small blocks (< 100 bytes). User will have to take in charge required - * information to regenerate data, such as compressed and content sizes. - * - * A few rules to respect: - * - Compressing and decompressing require a context structure - * + Use ZSTD_initCCtx() and ZSTD_initDCtx() - * - It is necessary to init context before starting - * + compression : ZSTD_compressBegin() - * + decompression : ZSTD_decompressBegin() - * + variants _usingDict() are also allowed - * + copyCCtx() and copyDCtx() work too - * - Block size is limited, it must be <= ZSTD_getBlockSizeMax() - * + If you need to compress more, cut data into multiple blocks - * + Consider using the regular ZSTD_compress() instead, as frame metadata - * costs become negligible when source size is large. - * - When a block is considered not compressible enough, ZSTD_compressBlock() - * result will be zero. In which case, nothing is produced into `dst`. - * + User must test for such outcome and deal directly with uncompressed data - * + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!! - * + In case of multiple successive blocks, decoder must be informed of - * uncompressed block existence to follow proper history. Use - * ZSTD_insertBlock() in such a case. - ******************************************************************************/ - -/* Define for static allocation */ -#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) -/*===== Raw zstd block functions =====*/ -size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx); -size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, - size_t blockSize); +size_t zstd_get_frame_header(zstd_frame_header *params, const void *src, + size_t src_size); -#endif /* ZSTD_H */ +#endif /* LINUX_ZSTD_H */ diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h new file mode 100644 index 0000000000000..13151c34f725f --- /dev/null +++ b/include/linux/zstd_lib.h @@ -0,0 +1,1157 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of https://github.com/facebook/zstd. + * An additional grant of patent rights can be found in the PATENTS file in the + * same directory. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. This program is dual-licensed; you may select + * either version 2 of the GNU General Public License ("GPL") or BSD license + * ("BSD"). + */ + +#ifndef ZSTD_H +#define ZSTD_H + +/* ====== Dependency ======*/ +#include /* size_t */ + + +/*-***************************************************************************** + * Introduction + * + * zstd, short for Zstandard, is a fast lossless compression algorithm, + * targeting real-time compression scenarios at zlib-level and better + * compression ratios. The zstd compression library provides in-memory + * compression and decompression functions. The library supports compression + * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled + * ultra, should be used with caution, as they require more memory. + * Compression can be done in: + * - a single step, reusing a context (described as Explicit memory management) + * - unbounded multiple steps (described as Streaming compression) + * The compression ratio achievable on small data can be highly improved using + * compression with a dictionary in: + * - a single step (described as Simple dictionary API) + * - a single step, reusing a dictionary (described as Fast dictionary API) + ******************************************************************************/ + +/*====== Helper functions ======*/ + +/** + * enum ZSTD_ErrorCode - zstd error codes + * + * Functions that return size_t can be checked for errors using ZSTD_isError() + * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode(). + */ +typedef enum { + ZSTD_error_no_error, + ZSTD_error_GENERIC, + ZSTD_error_prefix_unknown, + ZSTD_error_version_unsupported, + ZSTD_error_parameter_unknown, + ZSTD_error_frameParameter_unsupported, + ZSTD_error_frameParameter_unsupportedBy32bits, + ZSTD_error_frameParameter_windowTooLarge, + ZSTD_error_compressionParameter_unsupported, + ZSTD_error_init_missing, + ZSTD_error_memory_allocation, + ZSTD_error_stage_wrong, + ZSTD_error_dstSize_tooSmall, + ZSTD_error_srcSize_wrong, + ZSTD_error_corruption_detected, + ZSTD_error_checksum_wrong, + ZSTD_error_tableLog_tooLarge, + ZSTD_error_maxSymbolValue_tooLarge, + ZSTD_error_maxSymbolValue_tooSmall, + ZSTD_error_dictionary_corrupted, + ZSTD_error_dictionary_wrong, + ZSTD_error_dictionaryCreation_failed, + ZSTD_error_maxCode +} ZSTD_ErrorCode; + +/** + * ZSTD_maxCLevel() - maximum compression level available + * + * Return: Maximum compression level available. + */ +int ZSTD_maxCLevel(void); +/** + * ZSTD_compressBound() - maximum compressed size in worst case scenario + * @srcSize: The size of the data to compress. + * + * Return: The maximum compressed size in the worst case scenario. + */ +size_t ZSTD_compressBound(size_t srcSize); +/** + * ZSTD_isError() - tells if a size_t function result is an error code + * @code: The function result to check for error. + * + * Return: Non-zero iff the code is an error. + */ +static __attribute__((unused)) unsigned int ZSTD_isError(size_t code) +{ + return code > (size_t)-ZSTD_error_maxCode; +} +/** + * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode + * @functionResult: The result of a function for which ZSTD_isError() is true. + * + * Return: The ZSTD_ErrorCode corresponding to the functionResult or 0 + * if the functionResult isn't an error. + */ +static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode( + size_t functionResult) +{ + if (!ZSTD_isError(functionResult)) + return (ZSTD_ErrorCode)0; + return (ZSTD_ErrorCode)(0 - functionResult); +} + +/** + * enum ZSTD_strategy - zstd compression search strategy + * + * From faster to stronger. + */ +typedef enum { + ZSTD_fast, + ZSTD_dfast, + ZSTD_greedy, + ZSTD_lazy, + ZSTD_lazy2, + ZSTD_btlazy2, + ZSTD_btopt, + ZSTD_btopt2 +} ZSTD_strategy; + +/** + * struct ZSTD_compressionParameters - zstd compression parameters + * @windowLog: Log of the largest match distance. Larger means more + * compression, and more memory needed during decompression. + * @chainLog: Fully searched segment. Larger means more compression, slower, + * and more memory (useless for fast). + * @hashLog: Dispatch table. Larger means more compression, + * slower, and more memory. + * @searchLog: Number of searches. Larger means more compression and slower. + * @searchLength: Match length searched. Larger means faster decompression, + * sometimes less compression. + * @targetLength: Acceptable match size for optimal parser (only). Larger means + * more compression, and slower. + * @strategy: The zstd compression strategy. + */ +typedef struct { + unsigned int windowLog; + unsigned int chainLog; + unsigned int hashLog; + unsigned int searchLog; + unsigned int searchLength; + unsigned int targetLength; + ZSTD_strategy strategy; +} ZSTD_compressionParameters; + +/** + * struct ZSTD_frameParameters - zstd frame parameters + * @contentSizeFlag: Controls whether content size will be present in the frame + * header (when known). + * @checksumFlag: Controls whether a 32-bit checksum is generated at the end + * of the frame for error detection. + * @noDictIDFlag: Controls whether dictID will be saved into the frame header + * when using dictionary compression. + * + * The default value is all fields set to 0. + */ +typedef struct { + unsigned int contentSizeFlag; + unsigned int checksumFlag; + unsigned int noDictIDFlag; +} ZSTD_frameParameters; + +/** + * struct ZSTD_parameters - zstd parameters + * @cParams: The compression parameters. + * @fParams: The frame parameters. + */ +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +/** + * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level + * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). + * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. + * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * + * Return: The selected ZSTD_compressionParameters. + */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, + unsigned long long estimatedSrcSize, size_t dictSize); + +/** + * ZSTD_getParams() - returns ZSTD_parameters for selected level + * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). + * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. + * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * + * The same as ZSTD_getCParams() except also selects the default frame + * parameters (all zero). + * + * Return: The selected ZSTD_parameters. + */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, + unsigned long long estimatedSrcSize, size_t dictSize); + +/*-************************************* + * Explicit memory management + **************************************/ + +/** + * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx + * @cParams: The compression parameters to be used for compression. + * + * If multiple compression parameters might be used, the caller must call + * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum + * size. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initCCtx(). + */ +size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams); + +/** + * struct ZSTD_CCtx - the zstd compression context + * + * When compressing many times it is recommended to allocate a context just once + * and reuse it for each successive compression operation. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +/** + * ZSTD_initCCtx() - initialize a zstd compression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to + * determine how large the workspace must be. + * + * Return: A compression context emplaced into workspace. + */ +ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize); + +/** + * ZSTD_compressCCtx() - compress src into dst + * @ctx: The context. Must have been initialized with a workspace at + * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, ZSTD_parameters params); + +/** + * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initDCtx(). + */ +size_t ZSTD_DCtxWorkspaceBound(void); + +/** + * struct ZSTD_DCtx - the zstd decompression context + * + * When decompressing many times it is recommended to allocate a context just + * once and reuse it for each successive decompression operation. + */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +/** + * ZSTD_initDCtx() - initialize a zstd decompression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to + * determine how large the workspace must be. + * + * Return: A decompression context emplaced into workspace. + */ +ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize); + +/** + * ZSTD_decompressDCtx() - decompress zstd compressed src into dst + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); + +/*-************************ + * Simple dictionary API + **************************/ + +/** + * ZSTD_compress_usingDict() - compress src into dst using a dictionary + * @ctx: The context. Must have been initialized with a workspace at + * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @dict: The dictionary to use for compression. + * @dictSize: The size of the dictionary. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Compression using a predefined dictionary. The same dictionary must be used + * during decompression. + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const void *dict, size_t dictSize, + ZSTD_parameters params); + +/** + * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * @dict: The dictionary to use for decompression. The same dictionary + * must've been used to compress the data. + * @dictSize: The size of the dictionary. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const void *dict, size_t dictSize); + +/*-************************** + * Fast dictionary API + ***************************/ + +/** + * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict + * @cParams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initCDict(). + */ +size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams); + +/** + * struct ZSTD_CDict - a digested dictionary to be used for compression + */ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/** + * ZSTD_initCDict() - initialize a digested dictionary for compression + * @dictBuffer: The dictionary to digest. The buffer is referenced by the + * ZSTD_CDict so it must outlive the returned ZSTD_CDict. + * @dictSize: The size of the dictionary. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * @workspace: The workspace. It must outlive the returned ZSTD_CDict. + * @workspaceSize: The workspace size. Must be at least + * ZSTD_CDictWorkspaceBound(params.cParams). + * + * When compressing multiple messages / blocks with the same dictionary it is + * recommended to load it just once. The ZSTD_CDict merely references the + * dictBuffer, so it must outlive the returned ZSTD_CDict. + * + * Return: The digested dictionary emplaced into workspace. + */ +ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize, + ZSTD_parameters params, void *workspace, size_t workspaceSize); + +/** + * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict + * @ctx: The context. Must have been initialized with a workspace at + * least as large as ZSTD_CCtxWorkspaceBound(cParams) where + * cParams are the compression parameters used to initialize the + * cdict. + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @cdict: The digested dictionary to use for compression. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Compression using a digested dictionary. The same dictionary must be used + * during decompression. + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const ZSTD_CDict *cdict); + + +/** + * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initDDict(). + */ +size_t ZSTD_DDictWorkspaceBound(void); + +/** + * struct ZSTD_DDict - a digested dictionary to be used for decompression + */ +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/** + * ZSTD_initDDict() - initialize a digested dictionary for decompression + * @dictBuffer: The dictionary to digest. The buffer is referenced by the + * ZSTD_DDict so it must outlive the returned ZSTD_DDict. + * @dictSize: The size of the dictionary. + * @workspace: The workspace. It must outlive the returned ZSTD_DDict. + * @workspaceSize: The workspace size. Must be at least + * ZSTD_DDictWorkspaceBound(). + * + * When decompressing multiple messages / blocks with the same dictionary it is + * recommended to load it just once. The ZSTD_DDict merely references the + * dictBuffer, so it must outlive the returned ZSTD_DDict. + * + * Return: The digested dictionary emplaced into workspace. + */ +ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize, + void *workspace, size_t workspaceSize); + +/** + * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * @ddict: The digested dictionary to use for decompression. The same + * dictionary must've been used to compress the data. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, + size_t dstCapacity, const void *src, size_t srcSize, + const ZSTD_DDict *ddict); + + +/*-************************** + * Streaming + ***************************/ + +/** + * struct ZSTD_inBuffer - input buffer for streaming + * @src: Start of the input buffer. + * @size: Size of the input buffer. + * @pos: Position where reading stopped. Will be updated. + * Necessarily 0 <= pos <= size. + */ +typedef struct ZSTD_inBuffer_s { + const void *src; + size_t size; + size_t pos; +} ZSTD_inBuffer; + +/** + * struct ZSTD_outBuffer - output buffer for streaming + * @dst: Start of the output buffer. + * @size: Size of the output buffer. + * @pos: Position where writing stopped. Will be updated. + * Necessarily 0 <= pos <= size. + */ +typedef struct ZSTD_outBuffer_s { + void *dst; + size_t size; + size_t pos; +} ZSTD_outBuffer; + + + +/*-***************************************************************************** + * Streaming compression - HowTo + * + * A ZSTD_CStream object is required to track streaming operation. + * Use ZSTD_initCStream() to initialize a ZSTD_CStream object. + * ZSTD_CStream objects can be reused multiple times on consecutive compression + * operations. It is recommended to re-use ZSTD_CStream in situations where many + * streaming operations will be achieved consecutively. Use one separate + * ZSTD_CStream per thread for parallel execution. + * + * Use ZSTD_compressStream() repetitively to consume input stream. + * The function will automatically update both `pos` fields. + * Note that it may not consume the entire input, in which case `pos < size`, + * and it's up to the caller to present again remaining data. + * It returns a hint for the preferred number of bytes to use as an input for + * the next function call. + * + * At any moment, it's possible to flush whatever data remains within internal + * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might + * still be some content left within the internal buffer if `output->size` is + * too small. It returns the number of bytes left in the internal buffer and + * must be called until it returns 0. + * + * ZSTD_endStream() instructs to finish a frame. It will perform a flush and + * write frame epilogue. The epilogue is required for decoders to consider a + * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush + * the full content if `output->size` is too small. In which case, call again + * ZSTD_endStream() to complete the flush. It returns the number of bytes left + * in the internal buffer and must be called until it returns 0. + ******************************************************************************/ + +/** + * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream + * @cParams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initCStream() and ZSTD_initCStream_usingCDict(). + */ +size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams); + +/** + * struct ZSTD_CStream - the zstd streaming compression context + */ +typedef struct ZSTD_CStream_s ZSTD_CStream; + +/*===== ZSTD_CStream management functions =====*/ +/** + * ZSTD_initCStream() - initialize a zstd streaming compression context + * @params: The zstd compression parameters. + * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must + * pass the source size (zero means empty source). Otherwise, + * the caller may optionally pass the source size, or zero if + * unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine + * how large the workspace must be. + * + * Return: The zstd streaming compression context. + */ +ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, + unsigned long long pledgedSrcSize, void *workspace, + size_t workspaceSize); + +/** + * ZSTD_initCStream_usingCDict() - initialize a streaming compression context + * @cdict: The digested dictionary to use for compression. + * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Call ZSTD_CStreamWorkspaceBound() + * with the cParams used to initialize the cdict to determine + * how large the workspace must be. + * + * Return: The zstd streaming compression context. + */ +ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, + unsigned long long pledgedSrcSize, void *workspace, + size_t workspaceSize); + +/*===== Streaming compression functions =====*/ +/** + * ZSTD_resetCStream() - reset the context using parameters from creation + * @zcs: The zstd streaming compression context to reset. + * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame + * content size is always written into the frame header. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize); +/** + * ZSTD_compressStream() - streaming compress some of input into output + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * @input: Source buffer. `input->pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input->pos < input->size`, and it's up to the caller to present + * remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * + * Return: A hint for the number of bytes to use as the input for the next + * function call or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, + ZSTD_inBuffer *input); +/** + * ZSTD_flushStream() - flush internal buffers into output + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * ZSTD_flushStream() must be called until it returns 0, meaning all the data + * has been flushed. Since ZSTD_flushStream() causes a block to be ended, + * calling it too often will degrade the compression ratio. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); +/** + * ZSTD_endStream() - flush internal buffers into output and end the frame + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * ZSTD_endStream() must be called until it returns 0, meaning all the data has + * been flushed and the frame epilogue has been written. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); + +/** + * ZSTD_CStreamInSize() - recommended size for the input buffer + * + * Return: The recommended size for the input buffer. + */ +size_t ZSTD_CStreamInSize(void); +/** + * ZSTD_CStreamOutSize() - recommended size for the output buffer + * + * When the output buffer is at least this large, it is guaranteed to be large + * enough to flush at least one complete compressed block. + * + * Return: The recommended size for the output buffer. + */ +size_t ZSTD_CStreamOutSize(void); + + + +/*-***************************************************************************** + * Streaming decompression - HowTo + * + * A ZSTD_DStream object is required to track streaming operations. + * Use ZSTD_initDStream() to initialize a ZSTD_DStream object. + * ZSTD_DStream objects can be re-used multiple times. + * + * Use ZSTD_decompressStream() repetitively to consume your input. + * The function will update both `pos` fields. + * If `input->pos < input->size`, some input has not been consumed. + * It's up to the caller to present again remaining data. + * If `output->pos < output->size`, decoder has flushed everything it could. + * Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise it returns a suggested next input size that will never load more + * than the current frame. + ******************************************************************************/ + +/** + * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream + * @maxWindowSize: The maximum window size allowed for compressed frames. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_initDStream() and ZSTD_initDStream_usingDDict(). + */ +size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize); + +/** + * struct ZSTD_DStream - the zstd streaming decompression context + */ +typedef struct ZSTD_DStream_s ZSTD_DStream; +/*===== ZSTD_DStream management functions =====*/ +/** + * ZSTD_initDStream() - initialize a zstd streaming decompression context + * @maxWindowSize: The maximum window size allowed for compressed frames. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine + * how large the workspace must be. + * + * Return: The zstd streaming decompression context. + */ +ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, + size_t workspaceSize); +/** + * ZSTD_initDStream_usingDDict() - initialize streaming decompression context + * @maxWindowSize: The maximum window size allowed for compressed frames. + * @ddict: The digested dictionary to use for decompression. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine + * how large the workspace must be. + * + * Return: The zstd streaming decompression context. + */ +ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, + const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize); + +/*===== Streaming decompression functions =====*/ +/** + * ZSTD_resetDStream() - reset the context using parameters from creation + * @zds: The zstd streaming decompression context to reset. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_resetDStream(ZSTD_DStream *zds); +/** + * ZSTD_decompressStream() - streaming decompress some of input into output + * @zds: The zstd streaming decompression context. + * @output: Destination buffer. `output.pos` is updated to indicate how much + * decompressed data was written. + * @input: Source buffer. `input.pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input.pos < input.size`, and it's up to the caller to present + * remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * ZSTD_decompressStream() will not consume the last byte of the frame until + * the entire frame is flushed. + * + * Return: Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise returns a hint for the number of bytes to use as the input + * for the next function call or an error, which can be checked using + * ZSTD_isError(). The size hint will never load more than the frame. + */ +size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, + ZSTD_inBuffer *input); + +/** + * ZSTD_DStreamInSize() - recommended size for the input buffer + * + * Return: The recommended size for the input buffer. + */ +size_t ZSTD_DStreamInSize(void); +/** + * ZSTD_DStreamOutSize() - recommended size for the output buffer + * + * When the output buffer is at least this large, it is guaranteed to be large + * enough to flush at least one complete decompressed block. + * + * Return: The recommended size for the output buffer. + */ +size_t ZSTD_DStreamOutSize(void); + + +/* --- Constants ---*/ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U + +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) + +#define ZSTD_WINDOWLOG_MAX_32 27 +#define ZSTD_WINDOWLOG_MAX_64 27 +#define ZSTD_WINDOWLOG_MAX \ + ((unsigned int)(sizeof(size_t) == 4 \ + ? ZSTD_WINDOWLOG_MAX_32 \ + : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_HASHLOG3_MAX 17 +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +/* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_SEARCHLENGTH_MAX 7 +/* only for ZSTD_btopt, other strategies are limited to 4 */ +#define ZSTD_SEARCHLENGTH_MIN 3 +#define ZSTD_TARGETLENGTH_MIN 4 +#define ZSTD_TARGETLENGTH_MAX 999 + +/* for static allocation */ +#define ZSTD_FRAMEHEADERSIZE_MAX 18 +#define ZSTD_FRAMEHEADERSIZE_MIN 6 +#define ZSTD_frameHeaderSize_prefix 5 +#define ZSTD_frameHeaderSize_min ZSTD_FRAMEHEADERSIZE_MIN +#define ZSTD_frameHeaderSize_max ZSTD_FRAMEHEADERSIZE_MAX +/* magic number + skippable frame length */ +#define ZSTD_skippableHeaderSize 8 + + +/*-************************************* + * Compressed size functions + **************************************/ + +/** + * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame + * @src: Source buffer. It should point to the start of a zstd encoded frame + * or a skippable frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * size of the frame. + * + * Return: The compressed size of the frame pointed to by `src` or an error, + * which can be check with ZSTD_isError(). + * Suitable to pass to ZSTD_decompress() or similar functions. + */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); + +/*-************************************* + * Decompressed size functions + **************************************/ +/** + * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header + * @src: It should point to the start of a zstd encoded frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * frame header. `ZSTD_frameHeaderSize_max` is always large enough. + * + * Return: The frame content size stored in the frame header if known. + * `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the + * frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input. + */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/** + * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames + * @src: It should point to the start of a series of zstd encoded and/or + * skippable frames. + * @srcSize: The exact size of the series of frames. + * + * If any zstd encoded frame in the series doesn't have the frame content size + * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always + * set when using ZSTD_compress(). The decompressed size can be very large. + * If the source is untrusted, the decompressed size could be wrong or + * intentionally modified. Always ensure the result fits within the + * application's authorized limits. ZSTD_findDecompressedSize() handles multiple + * frames, and so it must traverse the input to read each frame header. This is + * efficient as most of the data is skipped, however it does mean that all frame + * data must be present and valid. + * + * Return: Decompressed size of all the data contained in the frames if known. + * `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown. + * `ZSTD_CONTENTSIZE_ERROR` if an error occurred. + */ +unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize); + +/*-************************************* + * Advanced compression functions + **************************************/ +/** + * ZSTD_checkCParams() - ensure parameter values remain within authorized range + * @cParams: The zstd compression parameters. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams); + +/** + * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize + * @srcSize: Optionally the estimated source size, or zero if unknown. + * @dictSize: Optionally the estimated dictionary size, or zero if unknown. + * + * Return: The optimized parameters. + */ +ZSTD_compressionParameters ZSTD_adjustCParams( + ZSTD_compressionParameters cParams, unsigned long long srcSize, + size_t dictSize); + +/*--- Advanced decompression functions ---*/ + +/** + * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame + * @buffer: The source buffer to check. + * @size: The size of the source buffer, must be at least 4 bytes. + * + * Return: True iff the buffer starts with a zstd or skippable frame identifier. + */ +unsigned int ZSTD_isFrame(const void *buffer, size_t size); + +/** + * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary + * @dict: The dictionary buffer. + * @dictSize: The size of the dictionary buffer. + * + * Return: The dictionary id stored within the dictionary or 0 if the + * dictionary is not a zstd dictionary. If it returns 0 the + * dictionary can still be loaded as a content-only dictionary. + */ +unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize); + +/** + * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict + * @ddict: The ddict to find the id of. + * + * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not + * a zstd dictionary. If it returns 0 `ddict` will be loaded as a + * content-only dictionary. + */ +unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict); + +/** + * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame + * @src: Source buffer. It must be a zstd encoded frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * frame header. `ZSTD_frameHeaderSize_max` is always large enough. + * + * Return: The dictionary id required to decompress the frame stored within + * `src` or 0 if the dictionary id could not be decoded. It can return + * 0 if the frame does not require a dictionary, the dictionary id + * wasn't stored in the frame, `src` is not a zstd frame, or `srcSize` + * is too small. + */ +unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize); + +/** + * struct ZSTD_frameParams - zstd frame parameters stored in the frame header + * @frameContentSize: The frame content size, or 0 if not present. + * @windowSize: The window size, or 0 if the frame is a skippable frame. + * @dictID: The dictionary id, or 0 if not present. + * @checksumFlag: Whether a checksum was used. + */ +typedef struct { + unsigned long long frameContentSize; + unsigned int windowSize; + unsigned int dictID; + unsigned int checksumFlag; +} ZSTD_frameParams; + +/** + * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame + * @fparamsPtr: On success the frame parameters are written here. + * @src: The source buffer. It must point to a zstd or skippable frame. + * @srcSize: The size of the source buffer. `ZSTD_frameHeaderSize_max` is + * always large enough to succeed. + * + * Return: 0 on success. If more data is required it returns how many bytes + * must be provided to make forward progress. Otherwise it returns + * an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, + size_t srcSize); + +/*-***************************************************************************** + * Buffer-less and synchronous inner streaming functions + * + * This is an advanced API, giving full control over buffer management, for + * users which need direct control over memory. + * But it's also a complex one, with many restrictions (documented below). + * Prefer using normal streaming API for an easier experience + ******************************************************************************/ + +/*-***************************************************************************** + * Buffer-less streaming compression (synchronous mode) + * + * A ZSTD_CCtx object is required to track streaming operations. + * Use ZSTD_initCCtx() to initialize a context. + * ZSTD_CCtx object can be re-used multiple times within successive compression + * operations. + * + * Start by initializing a context. + * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary + * compression, + * or ZSTD_compressBegin_advanced(), for finer parameter control. + * It's also possible to duplicate a reference context which has already been + * initialized, using ZSTD_copyCCtx() + * + * Then, consume your input using ZSTD_compressContinue(). + * There are some important considerations to keep in mind when using this + * advanced function : + * - ZSTD_compressContinue() has no internal buffer. It uses externally provided + * buffer only. + * - Interface is synchronous : input is consumed entirely and produce 1+ + * (or more) compressed blocks. + * - Caller must ensure there is enough space in `dst` to store compressed data + * under worst case scenario. Worst case evaluation is provided by + * ZSTD_compressBound(). + * ZSTD_compressContinue() doesn't guarantee recover after a failed + * compression. + * - ZSTD_compressContinue() presumes prior input ***is still accessible and + * unmodified*** (up to maximum distance size, see WindowLog). + * It remembers all previous contiguous blocks, plus one separated memory + * segment (which can itself consists of multiple contiguous blocks) + * - ZSTD_compressContinue() detects that prior input has been overwritten when + * `src` buffer overlaps. In which case, it will "discard" the relevant memory + * section from its history. + * + * Finish a frame with ZSTD_compressEnd(), which will write the last block(s) + * and optional checksum. It's possible to use srcSize==0, in which case, it + * will write a final empty block to end the frame. Without last block mark, + * frames will be considered unfinished (corrupted) by decoders. + * + * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new + * frame. + ******************************************************************************/ + +/*===== Buffer-less streaming compression functions =====*/ +size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel); +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, + size_t dictSize, int compressionLevel); +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, + size_t dictSize, ZSTD_parameters params, + unsigned long long pledgedSrcSize); +size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx, + unsigned long long pledgedSrcSize); +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, + unsigned long long pledgedSrcSize); +size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); + + + +/*-***************************************************************************** + * Buffer-less streaming decompression (synchronous mode) + * + * A ZSTD_DCtx object is required to track streaming operations. + * Use ZSTD_initDCtx() to initialize a context. + * A ZSTD_DCtx object can be re-used multiple times. + * + * First typical operation is to retrieve frame parameters, using + * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide + * important information to correctly decode the frame, such as the minimum + * rolling buffer size to allocate to decompress data (`windowSize`), and the + * dictionary ID used. + * Note: content size is optional, it may not be present. 0 means unknown. + * Note that these values could be wrong, either because of data malformation, + * or because an attacker is spoofing deliberate false information. As a + * consequence, check that values remain within valid application range, + * especially `windowSize`, before allocation. Each application can set its own + * limit, depending on local restrictions. For extended interoperability, it is + * recommended to support at least 8 MB. + * Frame parameters are extracted from the beginning of the compressed frame. + * Data fragment must be large enough to ensure successful decoding, typically + * `ZSTD_frameHeaderSize_max` bytes. + * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled. + * >0: `srcSize` is too small, provide at least this many bytes. + * errorCode, which can be tested using ZSTD_isError(). + * + * Start decompression, with ZSTD_decompressBegin() or + * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared + * context, using ZSTD_copyDCtx(). + * + * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() + * alternatively. + * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' + * to ZSTD_decompressContinue(). + * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will + * fail. + * + * The result of ZSTD_decompressContinue() is the number of bytes regenerated + * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an + * error; it just means ZSTD_decompressContinue() has decoded some metadata + * item. It can also be an error code, which can be tested with ZSTD_isError(). + * + * ZSTD_decompressContinue() needs previous data blocks during decompression, up + * to `windowSize`. They should preferably be located contiguously, prior to + * current block. Alternatively, a round buffer of sufficient size is also + * possible. Sufficient size is determined by frame parameters. + * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't + * follow each other, make sure that either the compressor breaks contiguity at + * the same place, or that previous contiguous segment is large enough to + * properly handle maximum back-reference. + * + * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + * Context can then be reset to start a new decompression. + * + * Note: it's possible to know if next input to present is a header or a block, + * using ZSTD_nextInputType(). This information is not required to properly + * decode a frame. + * + * == Special case: skippable frames == + * + * Skippable frames allow integration of user-defined data into a flow of + * concatenated frames. Skippable frames will be ignored (skipped) by a + * decompressor. The format of skippable frames is as follows: + * a) Skippable frame ID - 4 Bytes, Little endian format, any value from + * 0x184D2A50 to 0x184D2A5F + * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + * c) Frame Content - any content (User Data) of length equal to Frame Size + * For skippable frames ZSTD_decompressContinue() always returns 0. + * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 + * what means that a frame is skippable. + * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might + * actually be a zstd encoded frame with no content. For purposes of + * decompression, it is valid in both cases to skip the frame using + * ZSTD_findFrameCompressedSize() to find its size in bytes. + * It also returns frame size as fparamsPtr->frameContentSize. + ******************************************************************************/ + +/*===== Buffer-less streaming decompression functions =====*/ +size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx); +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, + size_t dictSize); +void ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx); +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx); +size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +typedef enum { + ZSTDnit_frameHeader, + ZSTDnit_blockHeader, + ZSTDnit_block, + ZSTDnit_lastBlock, + ZSTDnit_checksum, + ZSTDnit_skippableFrame +} ZSTD_nextInputType_e; +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx); + +/*-***************************************************************************** + * Block functions + * + * Block functions produce and decode raw zstd blocks, without frame metadata. + * Frame metadata cost is typically ~18 bytes, which can be non-negligible for + * very small blocks (< 100 bytes). User will have to take in charge required + * information to regenerate data, such as compressed and content sizes. + * + * A few rules to respect: + * - Compressing and decompressing require a context structure + * + Use ZSTD_initCCtx() and ZSTD_initDCtx() + * - It is necessary to init context before starting + * + compression : ZSTD_compressBegin() + * + decompression : ZSTD_decompressBegin() + * + variants _usingDict() are also allowed + * + copyCCtx() and copyDCtx() work too + * - Block size is limited, it must be <= ZSTD_getBlockSizeMax() + * + If you need to compress more, cut data into multiple blocks + * + Consider using the regular ZSTD_compress() instead, as frame metadata + * costs become negligible when source size is large. + * - When a block is considered not compressible enough, ZSTD_compressBlock() + * result will be zero. In which case, nothing is produced into `dst`. + * + User must test for such outcome and deal directly with uncompressed data + * + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!! + * + In case of multiple successive blocks, decoder must be informed of + * uncompressed block existence to follow proper history. Use + * ZSTD_insertBlock() in such a case. + ******************************************************************************/ + +/* Define for static allocation */ +#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) +/*===== Raw zstd block functions =====*/ +size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx); +size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, + size_t blockSize); + +#endif /* ZSTD_H */ diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c index 6b629ab31c1eb..c076d6f050649 100644 --- a/lib/decompress_unzstd.c +++ b/lib/decompress_unzstd.c @@ -91,11 +91,15 @@ static int INIT handle_zstd_error(size_t ret, void (*error)(char *x)) { - const int err = ZSTD_getErrorCode(ret); + const zstd_error_code err = zstd_get_error_code(ret); - if (!ZSTD_isError(ret)) + if (!zstd_is_error(ret)) return 0; + /* + * zstd_get_error_name() cannot be used because error takes a char * + * not a const char * + */ switch (err) { case ZSTD_error_memory_allocation: error("ZSTD decompressor ran out of memory"); @@ -124,28 +128,28 @@ static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf, long out_len, long *in_pos, void (*error)(char *x)) { - const size_t wksp_size = ZSTD_DCtxWorkspaceBound(); + const size_t wksp_size = zstd_dctx_workspace_bound(); void *wksp = large_malloc(wksp_size); - ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size); + zstd_dctx *dctx = zstd_init_dctx(wksp, wksp_size); int err; size_t ret; if (dctx == NULL) { - error("Out of memory while allocating ZSTD_DCtx"); + error("Out of memory while allocating zstd_dctx"); err = -1; goto out; } /* * Find out how large the frame actually is, there may be junk at - * the end of the frame that ZSTD_decompressDCtx() can't handle. + * the end of the frame that zstd_decompress_dctx() can't handle. */ - ret = ZSTD_findFrameCompressedSize(in_buf, in_len); + ret = zstd_find_frame_compressed_size(in_buf, in_len); err = handle_zstd_error(ret, error); if (err) goto out; in_len = (long)ret; - ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len); + ret = zstd_decompress_dctx(dctx, out_buf, out_len, in_buf, in_len); err = handle_zstd_error(ret, error); if (err) goto out; @@ -167,14 +171,14 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, long *in_pos, void (*error)(char *x)) { - ZSTD_inBuffer in; - ZSTD_outBuffer out; - ZSTD_frameParams params; + zstd_in_buffer in; + zstd_out_buffer out; + zstd_frame_header header; void *in_allocated = NULL; void *out_allocated = NULL; void *wksp = NULL; size_t wksp_size; - ZSTD_DStream *dstream; + zstd_dstream *dstream; int err; size_t ret; @@ -238,13 +242,13 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, out.size = out_len; /* - * We need to know the window size to allocate the ZSTD_DStream. + * We need to know the window size to allocate the zstd_dstream. * Since we are streaming, we need to allocate a buffer for the sliding * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX * (8 MB), so it is important to use the actual value so as not to * waste memory when it is smaller. */ - ret = ZSTD_getFrameParams(¶ms, in.src, in.size); + ret = zstd_get_frame_header(&header, in.src, in.size); err = handle_zstd_error(ret, error); if (err) goto out; @@ -253,19 +257,19 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, err = -1; goto out; } - if (params.windowSize > ZSTD_WINDOWSIZE_MAX) { + if (header.windowSize > ZSTD_WINDOWSIZE_MAX) { error("ZSTD-compressed data has too large a window size"); err = -1; goto out; } /* - * Allocate the ZSTD_DStream now that we know how much memory is + * Allocate the zstd_dstream now that we know how much memory is * required. */ - wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize); + wksp_size = zstd_dstream_workspace_bound(header.windowSize); wksp = large_malloc(wksp_size); - dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size); + dstream = zstd_init_dstream(header.windowSize, wksp, wksp_size); if (dstream == NULL) { error("Out of memory while allocating ZSTD_DStream"); err = -1; @@ -298,7 +302,7 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, in.size = in_len; } /* Returns zero when the frame is complete. */ - ret = ZSTD_decompressStream(dstream, &out, &in); + ret = zstd_decompress_stream(dstream, &out, &in); err = handle_zstd_error(ret, error); if (err) goto out; diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c index b080264ed3adf..57aaa64306a01 100644 --- a/lib/zstd/compress.c +++ b/lib/zstd/compress.c @@ -3443,43 +3443,92 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, return params; } -EXPORT_SYMBOL(ZSTD_maxCLevel); -EXPORT_SYMBOL(ZSTD_compressBound); - -EXPORT_SYMBOL(ZSTD_CCtxWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initCCtx); -EXPORT_SYMBOL(ZSTD_compressCCtx); -EXPORT_SYMBOL(ZSTD_compress_usingDict); - -EXPORT_SYMBOL(ZSTD_CDictWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initCDict); -EXPORT_SYMBOL(ZSTD_compress_usingCDict); - -EXPORT_SYMBOL(ZSTD_CStreamWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initCStream); -EXPORT_SYMBOL(ZSTD_initCStream_usingCDict); -EXPORT_SYMBOL(ZSTD_resetCStream); -EXPORT_SYMBOL(ZSTD_compressStream); -EXPORT_SYMBOL(ZSTD_flushStream); -EXPORT_SYMBOL(ZSTD_endStream); -EXPORT_SYMBOL(ZSTD_CStreamInSize); -EXPORT_SYMBOL(ZSTD_CStreamOutSize); - -EXPORT_SYMBOL(ZSTD_getCParams); -EXPORT_SYMBOL(ZSTD_getParams); -EXPORT_SYMBOL(ZSTD_checkCParams); -EXPORT_SYMBOL(ZSTD_adjustCParams); - -EXPORT_SYMBOL(ZSTD_compressBegin); -EXPORT_SYMBOL(ZSTD_compressBegin_usingDict); -EXPORT_SYMBOL(ZSTD_compressBegin_advanced); -EXPORT_SYMBOL(ZSTD_copyCCtx); -EXPORT_SYMBOL(ZSTD_compressBegin_usingCDict); -EXPORT_SYMBOL(ZSTD_compressContinue); -EXPORT_SYMBOL(ZSTD_compressEnd); - -EXPORT_SYMBOL(ZSTD_getBlockSizeMax); -EXPORT_SYMBOL(ZSTD_compressBlock); +size_t zstd_compress_bound(size_t src_size) +{ + return ZSTD_compressBound(src_size); +} +EXPORT_SYMBOL(zstd_compress_bound); + +int zstd_min_clevel(void) +{ + /* + * zstd-1.3.1 doesn't implement ZSTD_minCLevel(). + * Return 0 (default level). + */ + return 0; +} +EXPORT_SYMBOL(zstd_min_clevel); + +int zstd_max_clevel(void) +{ + return ZSTD_maxCLevel(); +} +EXPORT_SYMBOL(zstd_max_clevel); + +zstd_parameters zstd_get_params(int level, + unsigned long long estimated_src_size) +{ + return ZSTD_getParams(level, estimated_src_size, 0); +} +EXPORT_SYMBOL(zstd_get_params); + +size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams) +{ + return ZSTD_CCtxWorkspaceBound(*cparams); +} +EXPORT_SYMBOL(zstd_cctx_workspace_bound); + +zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size) +{ + return ZSTD_initCCtx(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_cctx); + +size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size, const zstd_parameters *parameters) +{ + return ZSTD_compressCCtx(cctx, dst, dst_capacity, src, src_size, *parameters); +} +EXPORT_SYMBOL(zstd_compress_cctx); + +size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams) +{ + return ZSTD_CStreamWorkspaceBound(*cparams); +} +EXPORT_SYMBOL(zstd_cstream_workspace_bound); + +zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, + unsigned long long pledged_src_size, void *workspace, size_t workspace_size) +{ + return ZSTD_initCStream(*parameters, pledged_src_size, workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_cstream); + +size_t zstd_reset_cstream(zstd_cstream *cstream, + unsigned long long pledged_src_size) +{ + return ZSTD_resetCStream(cstream, pledged_src_size); +} +EXPORT_SYMBOL(zstd_reset_cstream); + +size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, + zstd_in_buffer *input) +{ + return ZSTD_compressStream(cstream, output, input); +} +EXPORT_SYMBOL(zstd_compress_stream); + +size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output) +{ + return ZSTD_flushStream(cstream, output); +} +EXPORT_SYMBOL(zstd_flush_stream); + +size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output) +{ + return ZSTD_endStream(cstream, output); +} +EXPORT_SYMBOL(zstd_end_stream); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("Zstd Compressor"); diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c index 66cd487a326a8..02e92c2cbf4fc 100644 --- a/lib/zstd/decompress.c +++ b/lib/zstd/decompress.c @@ -2490,42 +2490,82 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB } } -EXPORT_SYMBOL(ZSTD_DCtxWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initDCtx); -EXPORT_SYMBOL(ZSTD_decompressDCtx); -EXPORT_SYMBOL(ZSTD_decompress_usingDict); - -EXPORT_SYMBOL(ZSTD_DDictWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initDDict); -EXPORT_SYMBOL(ZSTD_decompress_usingDDict); - -EXPORT_SYMBOL(ZSTD_DStreamWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initDStream); -EXPORT_SYMBOL(ZSTD_initDStream_usingDDict); -EXPORT_SYMBOL(ZSTD_resetDStream); -EXPORT_SYMBOL(ZSTD_decompressStream); -EXPORT_SYMBOL(ZSTD_DStreamInSize); -EXPORT_SYMBOL(ZSTD_DStreamOutSize); - -EXPORT_SYMBOL(ZSTD_findFrameCompressedSize); -EXPORT_SYMBOL(ZSTD_getFrameContentSize); -EXPORT_SYMBOL(ZSTD_findDecompressedSize); - -EXPORT_SYMBOL(ZSTD_isFrame); -EXPORT_SYMBOL(ZSTD_getDictID_fromDict); -EXPORT_SYMBOL(ZSTD_getDictID_fromDDict); -EXPORT_SYMBOL(ZSTD_getDictID_fromFrame); - -EXPORT_SYMBOL(ZSTD_getFrameParams); -EXPORT_SYMBOL(ZSTD_decompressBegin); -EXPORT_SYMBOL(ZSTD_decompressBegin_usingDict); -EXPORT_SYMBOL(ZSTD_copyDCtx); -EXPORT_SYMBOL(ZSTD_nextSrcSizeToDecompress); -EXPORT_SYMBOL(ZSTD_decompressContinue); -EXPORT_SYMBOL(ZSTD_nextInputType); - -EXPORT_SYMBOL(ZSTD_decompressBlock); -EXPORT_SYMBOL(ZSTD_insertBlock); +unsigned int zstd_is_error(size_t code) +{ + return ZSTD_isError(code); +} +EXPORT_SYMBOL(zstd_is_error); + +zstd_error_code zstd_get_error_code(size_t code) +{ + return ZSTD_getErrorCode(code); +} +EXPORT_SYMBOL(zstd_get_error_code); + +const char *zstd_get_error_name(size_t code) +{ + /* Real implementation in zstd-1.4.6. */ + return "GENERIC"; +} +EXPORT_SYMBOL(zstd_get_error_name); + +size_t zstd_dctx_workspace_bound(void) +{ + return ZSTD_DCtxWorkspaceBound(); +} +EXPORT_SYMBOL(zstd_dctx_workspace_bound); + +zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size) +{ + return ZSTD_initDCtx(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_dctx); + +size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size) +{ + return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size); +} +EXPORT_SYMBOL(zstd_decompress_dctx); + +size_t zstd_dstream_workspace_bound(size_t max_window_size) +{ + return ZSTD_DStreamWorkspaceBound(max_window_size); +} +EXPORT_SYMBOL(zstd_dstream_workspace_bound); + +zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, + size_t workspace_size) +{ + return ZSTD_initDStream(max_window_size, workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_dstream); + +size_t zstd_reset_dstream(zstd_dstream *dstream) +{ + return ZSTD_resetDStream(dstream); +} +EXPORT_SYMBOL(zstd_reset_dstream); + +size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, + zstd_in_buffer *input) +{ + return ZSTD_decompressStream(dstream, output, input); +} +EXPORT_SYMBOL(zstd_decompress_stream); + +size_t zstd_find_frame_compressed_size(const void *src, size_t src_size) +{ + return ZSTD_findFrameCompressedSize(src, src_size); +} +EXPORT_SYMBOL(zstd_find_frame_compressed_size); + +size_t zstd_get_frame_header(zstd_frame_header *header, const void *src, + size_t src_size) +{ + return ZSTD_getFrameParams(header, src, src_size); +} +EXPORT_SYMBOL(zstd_get_frame_header); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("Zstd Decompressor"); From 87746a6502d6d7b1bbe175a1ad7bb71df865e098 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 14 Sep 2020 12:54:12 -0700 Subject: [PATCH 0128/1202] lib: zstd: Add decompress_sources.h for decompress_unzstd Adds decompress_sources.h which includes every .c file necessary for zstd decompression. This is used in decompress_unzstd.c so the internal structure of the library isn't exposed. This allows us to upgrade the zstd library version without modifying any callers. Instead we just need to update decompress_sources.h. Signed-off-by: Nick Terrell --- lib/decompress_unzstd.c | 6 +----- lib/zstd/decompress_sources.h | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 lib/zstd/decompress_sources.h diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c index c076d6f050649..a512b99ae16a9 100644 --- a/lib/decompress_unzstd.c +++ b/lib/decompress_unzstd.c @@ -68,11 +68,7 @@ #ifdef STATIC # define UNZSTD_PREBOOT # include "xxhash.c" -# include "zstd/entropy_common.c" -# include "zstd/fse_decompress.c" -# include "zstd/huf_decompress.c" -# include "zstd/zstd_common.c" -# include "zstd/decompress.c" +# include "zstd/decompress_sources.h" #endif #include diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h new file mode 100644 index 0000000000000..9ba367b441493 --- /dev/null +++ b/lib/zstd/decompress_sources.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + * This file includes every .c file needed for decompression. + * It is used by lib/decompress_unzstd.c to include the decompression + * source into the translation-unit, so it can be used for kernel + * decompression. + */ + +#include "entropy_common.c" +#include "fse_decompress.c" +#include "huf_decompress.c" +#include "zstd_common.c" +#include "decompress.c" From b3c08d1ad2bb1a731e1d7f4eaa6b1373e5cf52d3 Mon Sep 17 00:00:00 2001 From: "Hector.Yuan" Date: Mon, 4 Oct 2021 22:42:33 +0800 Subject: [PATCH 0129/1202] cpufreq: Fix parameter in parse_perf_domain() Pass cpu to parse_perf_domain() instead of pcpu. Signed-off-by: Hector.Yuan [ Viresh: Massaged changelog ] Signed-off-by: Viresh Kumar --- include/linux/cpufreq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index ff88bb3e44fca..66a1f495f01a6 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1041,7 +1041,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ if (cpu == pcpu) continue; - ret = parse_perf_domain(pcpu, list_name, cell_name); + ret = parse_perf_domain(cpu, list_name, cell_name); if (ret < 0) continue; From 956bbf2a94e8bfabb553ed91f9b740a8d7175434 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Tue, 28 Sep 2021 10:20:02 +0800 Subject: [PATCH 0130/1202] arm64: dts: qcom: Add missing vdd-supply for QUSB2 PHY QUSB2 PHY requires vdd-supply for digital circuit operation. Add it for platforms that miss it. Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20210928022002.26286-4-shawn.guo@linaro.org Signed-off-by: Vinod Koul --- arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 2 ++ arch/arm64/boot/dts/qcom/msm8996-sony-xperia-tone.dtsi | 1 + arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi | 1 + arch/arm64/boot/dts/qcom/msm8998-oneplus-common.dtsi | 1 + 4 files changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 51e17094d7b18..d9826ce27e500 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -230,6 +230,7 @@ &hsusb_phy1 { status = "okay"; + vdd-supply = <&vreg_l28a_0p925>; vdda-pll-supply = <&vreg_l12a_1p8>; vdda-phy-dpdm-supply = <&vreg_l24a_3p075>; }; @@ -237,6 +238,7 @@ &hsusb_phy2 { status = "okay"; + vdd-supply = <&vreg_l28a_0p925>; vdda-pll-supply = <&vreg_l12a_1p8>; vdda-phy-dpdm-supply = <&vreg_l24a_3p075>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8996-sony-xperia-tone.dtsi b/arch/arm64/boot/dts/qcom/msm8996-sony-xperia-tone.dtsi index 507396c4d23b6..61ea6d4ef8ac6 100644 --- a/arch/arm64/boot/dts/qcom/msm8996-sony-xperia-tone.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996-sony-xperia-tone.dtsi @@ -184,6 +184,7 @@ &hsusb_phy1 { status = "okay"; + vdd-supply = <&pm8994_l28>; vdda-pll-supply = <&pm8994_l12>; vdda-phy-dpdm-supply = <&pm8994_l24>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi b/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi index 125d7923d7131..4f823974559b7 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi @@ -102,6 +102,7 @@ &qusb2phy { status = "okay"; + vdd-supply = <&vreg_l1a_0p875>; vdda-pll-supply = <&vreg_l12a_1p8>; vdda-phy-dpdm-supply = <&vreg_l24a_3p075>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8998-oneplus-common.dtsi b/arch/arm64/boot/dts/qcom/msm8998-oneplus-common.dtsi index 0f5c7828a901d..243f4ee5da8cd 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-oneplus-common.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998-oneplus-common.dtsi @@ -260,6 +260,7 @@ &qusb2phy { status = "okay"; + vdd-supply = <&vreg_l1a_0p875>; vdda-pll-supply = <&vreg_l12a_1p8>; vdda-phy-dpdm-supply = <&vreg_l24a_3p075>; }; From 455296030ca5bd70052f4a296ca44747c5ebcebb Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Mon, 27 Sep 2021 14:48:28 +0800 Subject: [PATCH 0131/1202] dt-bindings: phy: qcom,qmp: Add QCM2290 USB3 PHY Add support for USB3 PHY found on Qualcomm QCM2290 SoC. Signed-off-by: Shawn Guo Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20210927064829.5752-2-shawn.guo@linaro.org Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,qmp-phy.yaml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml index a167b5c1ae178..647b45cfec848 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml @@ -27,6 +27,7 @@ properties: - qcom,msm8998-qmp-pcie-phy - qcom,msm8998-qmp-ufs-phy - qcom,msm8998-qmp-usb3-phy + - qcom,qcm2290-qmp-usb3-phy - qcom,sc7180-qmp-usb3-phy - qcom,sc8180x-qmp-pcie-phy - qcom,sc8180x-qmp-ufs-phy @@ -414,6 +415,32 @@ allOf: items: - const: phy - const: common + - if: + properties: + compatible: + contains: + enum: + - qcom,qcm2290-qmp-usb3-phy + then: + properties: + clocks: + items: + - description: Phy config clock. + - description: 19.2 MHz ref clk. + - description: Phy common block aux clock. + clock-names: + items: + - const: cfg_ahb + - const: ref + - const: com_aux + resets: + items: + - description: phy_phy reset. + - description: reset of phy block. + reset-names: + items: + - const: phy_phy + - const: phy examples: - | From 759f9ec3f3761ab1b6121eb8cdc04d5c03d92d18 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Mon, 27 Sep 2021 14:48:29 +0800 Subject: [PATCH 0132/1202] phy: qcom-qmp: Add QCM2290 USB3 PHY support Enable QCM2290 USB3 PHY support by adding the qmp_phy_cfg data which are taken from downstream kernel. Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20210927064829.5752-3-shawn.guo@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp.c | 143 ++++++++++++++++++++++++++++ drivers/phy/qualcomm/phy-qcom-qmp.h | 2 + 2 files changed, 145 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c index 084e3d96264ec..32123a60be978 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp.c @@ -135,6 +135,8 @@ enum qphy_reg_layout { QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR, QPHY_PCS_LFPS_RXTERM_IRQ_STATUS, QPHY_PCS_POWER_DOWN_CONTROL, + /* PCS_MISC registers */ + QPHY_PCS_MISC_TYPEC_CTRL, /* Keep last to ensure regs_layout arrays are properly initialized */ QPHY_LAYOUT_SIZE }; @@ -229,6 +231,16 @@ static const unsigned int sm8350_usb3_uniphy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = 0x1014, }; +static const unsigned int qcm2290_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = { + [QPHY_SW_RESET] = 0x00, + [QPHY_PCS_POWER_DOWN_CONTROL] = 0x04, + [QPHY_START_CTRL] = 0x08, + [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = 0xd8, + [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = 0xdc, + [QPHY_PCS_STATUS] = 0x174, + [QPHY_PCS_MISC_TYPEC_CTRL] = 0x00, +}; + static const unsigned int sdm845_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = { [QPHY_START_CTRL] = 0x00, [QPHY_PCS_READY_STATUS] = 0x160, @@ -2761,6 +2773,99 @@ static const struct qmp_phy_init_tbl sm8350_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x21), }; +static const struct qmp_phy_init_tbl qcm2290_usb3_serdes_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_COM_SYSCLK_EN_SEL, 0x14), + QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CLKBUFLR_EN, 0x08), + QMP_PHY_INIT_CFG(QSERDES_COM_CLK_SELECT, 0x30), + QMP_PHY_INIT_CFG(QSERDES_COM_SYS_CLK_CTRL, 0x06), + QMP_PHY_INIT_CFG(QSERDES_COM_RESETSM_CNTRL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_COM_RESETSM_CNTRL2, 0x08), + QMP_PHY_INIT_CFG(QSERDES_COM_BG_TRIM, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_COM_SVS_MODE_CLK_SEL, 0x01), + QMP_PHY_INIT_CFG(QSERDES_COM_HSCLK_SEL, 0x00), + QMP_PHY_INIT_CFG(QSERDES_COM_DEC_START_MODE0, 0x82), + QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START1_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START2_MODE0, 0x55), + QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START3_MODE0, 0x03), + QMP_PHY_INIT_CFG(QSERDES_COM_CP_CTRL_MODE0, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_COM_PLL_RCTRL_MODE0, 0x16), + QMP_PHY_INIT_CFG(QSERDES_COM_PLL_CCTRL_MODE0, 0x28), + QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN0_MODE0, 0x80), + QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN1_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_COM_CORECLK_DIV, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP1_MODE0, 0x15), + QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP2_MODE0, 0x34), + QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP3_MODE0, 0x00), + QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP_EN, 0x00), + QMP_PHY_INIT_CFG(QSERDES_COM_CORE_CLK_EN, 0x00), + QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP_CFG, 0x00), + QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_MAP, 0x00), + QMP_PHY_INIT_CFG(QSERDES_COM_BG_TIMER, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_EN_CENTER, 0x01), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_PER1, 0x31), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_PER2, 0x01), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_ADJ_PER1, 0x00), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_ADJ_PER2, 0x00), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_STEP_SIZE1, 0xde), + QMP_PHY_INIT_CFG(QSERDES_COM_SSC_STEP_SIZE2, 0x07), + QMP_PHY_INIT_CFG(QSERDES_COM_PLL_IVCO, 0x0f), + QMP_PHY_INIT_CFG(QSERDES_COM_CMN_CONFIG, 0x06), + QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_INITVAL, 0x80), + QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CTRL_BY_PSM, 0x01), +}; + +static const struct qmp_phy_init_tbl qcm2290_usb3_tx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V3_TX_HIGHZ_DRVR_EN, 0x10), + QMP_PHY_INIT_CFG(QSERDES_V3_TX_RCV_DETECT_LVL_2, 0x12), + QMP_PHY_INIT_CFG(QSERDES_V3_TX_LANE_MODE_1, 0xc6), + QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_TX, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_RX, 0x00), +}; + +static const struct qmp_phy_init_tbl qcm2290_usb3_rx_tbl[] = { + QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN, 0x0b), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_PI_CONTROLS, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_LOW, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FO_GAIN, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_GAIN, 0x06), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x75), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL2, 0x02), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4e), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL4, 0x18), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x77), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_VGA_CAL_CNTRL2, 0x0a), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_CNTRL, 0x03), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_DEGLITCH_CNTRL, 0x16), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_ENABLES, 0x00), + QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_MODE_00, 0x00), +}; + +static const struct qmp_phy_init_tbl qcm2290_usb3_pcs_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V0, 0x9f), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V0, 0x17), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V0, 0x0f), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL2, 0x83), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL1, 0x02), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_L, 0x09), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_H_TOL, 0xa2), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_MAN_CODE, 0x85), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG1, 0xd1), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG2, 0x1f), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG3, 0x47), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_WAIT_TIME, 0x75), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_RUN_TIME, 0x13), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_LFPS_TX_ECSTART_EQTLOCK, 0x86), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_PWRUP_RESET_DLY_TIME_AUXCLK, 0x04), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_TSYNC_RSYNC_TIME, 0x44), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_L, 0x40), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_H, 0x00), + QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_SIGDET_LVL, 0x88), +}; + struct qmp_phy; /* struct qmp_phy_cfg - per-PHY initialization config */ @@ -2995,6 +3100,10 @@ static const char * const qmp_v4_sdx55_usbphy_clk_l[] = { "aux", "cfg_ahb", "ref" }; +static const char * const qcm2290_usb3phy_clk_l[] = { + "cfg_ahb", "ref", "com_aux", +}; + /* list of resets */ static const char * const msm8996_pciephy_reset_l[] = { "phy", "common", "cfg", @@ -3008,6 +3117,10 @@ static const char * const sc7180_usb3phy_reset_l[] = { "phy", }; +static const char * const qcm2290_usb3phy_reset_l[] = { + "phy_phy", "phy", +}; + static const char * const sdm845_pciephy_reset_l[] = { "phy", }; @@ -3974,6 +4087,33 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = { .pwrdn_delay_max = POWER_DOWN_DELAY_US_MAX, }; +static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = { + .type = PHY_TYPE_USB3, + .nlanes = 1, + + .serdes_tbl = qcm2290_usb3_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(qcm2290_usb3_serdes_tbl), + .tx_tbl = qcm2290_usb3_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(qcm2290_usb3_tx_tbl), + .rx_tbl = qcm2290_usb3_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(qcm2290_usb3_rx_tbl), + .pcs_tbl = qcm2290_usb3_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(qcm2290_usb3_pcs_tbl), + .clk_list = qcm2290_usb3phy_clk_l, + .num_clks = ARRAY_SIZE(qcm2290_usb3phy_clk_l), + .reset_list = qcm2290_usb3phy_reset_l, + .num_resets = ARRAY_SIZE(qcm2290_usb3phy_reset_l), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qcm2290_usb3phy_regs_layout, + + .start_ctrl = SERDES_START | PCS_START, + .pwrdn_ctrl = SW_PWRDN, + .phy_status = PHYSTATUS, + + .is_dual_lane_phy = true, +}; + static void qcom_qmp_phy_configure_lane(void __iomem *base, const unsigned int *regs, const struct qmp_phy_init_tbl tbl[], @@ -5605,6 +5745,9 @@ static const struct of_device_id qcom_qmp_phy_of_match_table[] = { }, { .compatible = "qcom,sm8350-qmp-usb3-uni-phy", .data = &sm8350_usb3_uniphy_cfg, + }, { + .compatible = "qcom,qcm2290-qmp-usb3-phy", + .data = &qcm2290_usb3phy_cfg, }, { }, }; diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h index bebeac2c091cb..e15f461065bba 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp.h @@ -169,6 +169,7 @@ #define QSERDES_COM_BIAS_EN_CTRL_BY_PSM 0x0a8 #define QSERDES_COM_SYSCLK_EN_SEL 0x0ac #define QSERDES_COM_RESETSM_CNTRL 0x0b4 +#define QSERDES_COM_RESETSM_CNTRL2 0x0b8 #define QSERDES_COM_RESTRIM_CTRL 0x0bc #define QSERDES_COM_RESCODE_DIV_NUM 0x0c4 #define QSERDES_COM_LOCK_CMP_EN 0x0c8 @@ -181,6 +182,7 @@ #define QSERDES_COM_DIV_FRAC_START1_MODE1 0x0e8 #define QSERDES_COM_DIV_FRAC_START2_MODE1 0x0ec #define QSERDES_COM_DIV_FRAC_START3_MODE1 0x0f0 +#define QSERDES_COM_INTEGLOOP_INITVAL 0x100 #define QSERDES_COM_INTEGLOOP_GAIN0_MODE0 0x108 #define QSERDES_COM_INTEGLOOP_GAIN1_MODE0 0x10c #define QSERDES_COM_INTEGLOOP_GAIN0_MODE1 0x110 From 9e16f49c8280a358f97649054336212958af0387 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 2 Sep 2021 14:47:48 +0200 Subject: [PATCH 0133/1202] mfd: hi6421-spmi-pmic: cleanup drvdata There are lots of fields at struct hi6421_spmi_pmic that aren't used. In a matter of fact, only regmap is needed. So, drop the struct as a hole, and set just the regmap as the drvdata. Acked-by: Mark Brown Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/1828cb783b1ebca0b98bf0b3077d8701adb228f7.1630586862.git.mchehab+huawei@kernel.org --- drivers/mfd/hi6421-spmi-pmic.c | 16 +++++---------- drivers/misc/hi6421v600-irq.c | 9 ++++----- drivers/regulator/hi6421v600-regulator.c | 10 +++++----- include/linux/mfd/hi6421-spmi-pmic.h | 25 ------------------------ 4 files changed, 14 insertions(+), 46 deletions(-) delete mode 100644 include/linux/mfd/hi6421-spmi-pmic.h diff --git a/drivers/mfd/hi6421-spmi-pmic.c b/drivers/mfd/hi6421-spmi-pmic.c index 4f136826681bc..c9c0c3d7011f9 100644 --- a/drivers/mfd/hi6421-spmi-pmic.c +++ b/drivers/mfd/hi6421-spmi-pmic.c @@ -8,7 +8,6 @@ */ #include -#include #include #include #include @@ -30,19 +29,14 @@ static const struct regmap_config regmap_config = { static int hi6421_spmi_pmic_probe(struct spmi_device *sdev) { struct device *dev = &sdev->dev; + struct regmap *regmap; int ret; - struct hi6421_spmi_pmic *ddata; - ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL); - if (!ddata) - return -ENOMEM; - ddata->regmap = devm_regmap_init_spmi_ext(sdev, ®map_config); - if (IS_ERR(ddata->regmap)) - return PTR_ERR(ddata->regmap); + regmap = devm_regmap_init_spmi_ext(sdev, ®map_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); - ddata->dev = dev; - - dev_set_drvdata(&sdev->dev, ddata); + dev_set_drvdata(&sdev->dev, regmap); ret = devm_mfd_add_devices(&sdev->dev, PLATFORM_DEVID_NONE, hi6421v600_devs, ARRAY_SIZE(hi6421v600_devs), diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c index 08535e97ff433..1c763796cf1fa 100644 --- a/drivers/misc/hi6421v600-irq.c +++ b/drivers/misc/hi6421v600-irq.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -220,7 +219,7 @@ static int hi6421v600_irq_probe(struct platform_device *pdev) struct platform_device *pmic_pdev; struct device *dev = &pdev->dev; struct hi6421v600_irq *priv; - struct hi6421_spmi_pmic *pmic; + struct regmap *regmap; unsigned int virq; int i, ret; @@ -229,8 +228,8 @@ static int hi6421v600_irq_probe(struct platform_device *pdev) * which should first set drvdata. If this doesn't happen, hit * a warn on and return. */ - pmic = dev_get_drvdata(pmic_dev); - if (WARN_ON(!pmic)) + regmap = dev_get_drvdata(pmic_dev); + if (WARN_ON(!regmap)) return -ENODEV; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -238,7 +237,7 @@ static int hi6421v600_irq_probe(struct platform_device *pdev) return -ENOMEM; priv->dev = dev; - priv->regmap = pmic->regmap; + priv->regmap = regmap; spin_lock_init(&priv->lock); diff --git a/drivers/regulator/hi6421v600-regulator.c b/drivers/regulator/hi6421v600-regulator.c index 662d87ae61cb5..4671678f6b19a 100644 --- a/drivers/regulator/hi6421v600-regulator.c +++ b/drivers/regulator/hi6421v600-regulator.c @@ -9,8 +9,8 @@ // Guodong Xu #include -#include #include +#include #include #include #include @@ -237,7 +237,7 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev) struct hi6421_spmi_reg_priv *priv; struct hi6421_spmi_reg_info *info; struct device *dev = &pdev->dev; - struct hi6421_spmi_pmic *pmic; + struct regmap *regmap; struct regulator_dev *rdev; int i; @@ -246,8 +246,8 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev) * which should first set drvdata. If this doesn't happen, hit * a warn on and return. */ - pmic = dev_get_drvdata(pmic_dev); - if (WARN_ON(!pmic)) + regmap = dev_get_drvdata(pmic_dev); + if (WARN_ON(!regmap)) return -ENODEV; priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -261,7 +261,7 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev) config.dev = pdev->dev.parent; config.driver_data = priv; - config.regmap = pmic->regmap; + config.regmap = regmap; rdev = devm_regulator_register(dev, &info->desc, &config); if (IS_ERR(rdev)) { diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h deleted file mode 100644 index e5b8dbf828b68..0000000000000 --- a/include/linux/mfd/hi6421-spmi-pmic.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Header file for device driver Hi6421 PMIC - * - * Copyright (c) 2013 Linaro Ltd. - * Copyright (C) 2011 Hisilicon. - * Copyright (c) 2020-2021 Huawei Technologies Co., Ltd - * - * Guodong Xu - */ - -#ifndef __HISI_PMIC_H -#define __HISI_PMIC_H - -#include -#include - -struct hi6421_spmi_pmic { - struct resource *res; - struct device *dev; - void __iomem *regs; - struct regmap *regmap; -}; - -#endif /* __HISI_PMIC_H */ From 037d5fcf9a8186e64a3db5d733650e4dd37ae20b Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Thu, 23 Sep 2021 02:35:48 +0300 Subject: [PATCH 0134/1202] phy: qcom-qusb2: Fix a memory leak on probe On success nvmem_cell_read() returns a pointer to a dynamically allocated buffer, and therefore it shall be freed after usage. The issue is reported by kmemleak: # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff3b3803e4b280 (size 128): comm "kworker/u16:1", pid 107, jiffies 4294892861 (age 94.120s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000007739afdc>] __kmalloc+0x27c/0x41c [<0000000071c0fbf8>] nvmem_cell_read+0x40/0xe0 [<00000000e803ef1f>] qusb2_phy_init+0x258/0x5bc [<00000000fc81fcfa>] phy_init+0x70/0x110 [<00000000e3d48a57>] dwc3_core_soft_reset+0x4c/0x234 [<0000000027d1dbd4>] dwc3_core_init+0x68/0x990 [<000000001965faf9>] dwc3_probe+0x4f4/0x730 [<000000002f7617ca>] platform_probe+0x74/0xf0 [<00000000a2576cac>] really_probe+0xc4/0x470 [<00000000bc77f2c5>] __driver_probe_device+0x11c/0x190 [<00000000130db71f>] driver_probe_device+0x48/0x110 [<0000000019f36c2b>] __device_attach_driver+0xa4/0x140 [<00000000e5812ff7>] bus_for_each_drv+0x84/0xe0 [<00000000f4bac574>] __device_attach+0xe4/0x1c0 [<00000000d3beb631>] device_initial_probe+0x20/0x30 [<000000008019b9db>] bus_probe_device+0xa4/0xb0 Fixes: ca04d9d3e1b1 ("phy: qcom-qusb2: New driver for QUSB2 PHY on Qcom chips") Signed-off-by: Vladimir Zapolskiy Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210922233548.2150244-1-vladimir.zapolskiy@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qusb2.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c index 7a8e4333babe4..032d02bf50c5a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c @@ -561,7 +561,7 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy) { struct device *dev = &qphy->phy->dev; const struct qusb2_phy_cfg *cfg = qphy->cfg; - u8 *val; + u8 *val, hstx_trim; /* efuse register is optional */ if (!qphy->cell) @@ -575,7 +575,13 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy) * set while configuring the phy. */ val = nvmem_cell_read(qphy->cell, NULL); - if (IS_ERR(val) || !val[0]) { + if (IS_ERR(val)) { + dev_dbg(dev, "failed to read a valid hs-tx trim value\n"); + return; + } + hstx_trim = val[0]; + kfree(val); + if (!hstx_trim) { dev_dbg(dev, "failed to read a valid hs-tx trim value\n"); return; } @@ -583,12 +589,10 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy) /* Fused TUNE1/2 value is the higher nibble only */ if (cfg->update_tune1_with_efuse) qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE1], - val[0] << HSTX_TRIM_SHIFT, - HSTX_TRIM_MASK); + hstx_trim << HSTX_TRIM_SHIFT, HSTX_TRIM_MASK); else qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE2], - val[0] << HSTX_TRIM_SHIFT, - HSTX_TRIM_MASK); + hstx_trim << HSTX_TRIM_SHIFT, HSTX_TRIM_MASK); } static int qusb2_phy_set_mode(struct phy *phy, From d28fa13d0785c6fe076af54e025c729c41f32a2f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:40 +0200 Subject: [PATCH 0135/1202] s390/gmap: validate VMA in __gmap_zap() We should not walk/touch page tables outside of VMA boundaries when holding only the mmap sem in read mode. Evil user space can modify the VMA layout just before this function runs and e.g., trigger races with page table removal code since commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"). The pure prescence in our guest_to_host radix tree does not imply that there is a VMA. Further, we should not allocate page tables (via get_locked_pte()) outside of VMA boundaries: if evil user space decides to map hugetlbfs to these ranges, bad things will happen because we suddenly have PTE or PMD page tables where we shouldn't have them. Similarly, we have to check if we suddenly find a hugetlbfs VMA, before calling get_locked_pte(). Note that gmap_discard() is different: zap_page_range()->unmap_single_vma() makes sure to stay within VMA boundaries. Fixes: b31288fa83b2 ("s390/kvm: support collaborative memory management") Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-2-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/gmap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 4d3b33ce81c62..e0735c3437759 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -672,6 +672,7 @@ EXPORT_SYMBOL_GPL(gmap_fault); */ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { + struct vm_area_struct *vma; unsigned long vmaddr; spinlock_t *ptl; pte_t *ptep; @@ -681,6 +682,11 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) gaddr >> PMD_SHIFT); if (vmaddr) { vmaddr |= gaddr & ~PMD_MASK; + + vma = vma_lookup(gmap->mm, vmaddr); + if (!vma || is_vm_hugetlb_page(vma)) + return; + /* Get pointer to the page table entry */ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); if (likely(ptep)) From 7c2e6a000e58cfe12b46eea0555ea22592991c3d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:41 +0200 Subject: [PATCH 0136/1202] s390/gmap: don't unconditionally call pte_unmap_unlock() in __gmap_zap() ... otherwise we will try unlocking a spinlock that was never locked via a garbage pointer. At the time we reach this code path, we usually successfully looked up a PGSTE already; however, evil user space could have manipulated the VMA layout in the meantime and triggered removal of the page table. Fixes: 1e133ab296f3 ("s390/mm: split arch/s390/mm/pgtable.c") Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-3-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/gmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index e0735c3437759..d63c0ccc5ccda 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -689,9 +689,10 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) /* Get pointer to the page table entry */ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (likely(ptep)) + if (likely(ptep)) { ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); - pte_unmap_unlock(ptep, ptl); + pte_unmap_unlock(ptep, ptl); + } } } EXPORT_SYMBOL_GPL(__gmap_zap); From adb58f7781d737edd31f27ab41b04a1a98b778d3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:42 +0200 Subject: [PATCH 0137/1202] s390/mm: validate VMA in PGSTE manipulation functions We should not walk/touch page tables outside of VMA boundaries when holding only the mmap sem in read mode. Evil user space can modify the VMA layout just before this function runs and e.g., trigger races with page table removal code since commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"). gfn_to_hva() will only translate using KVM memory regions, but won't validate the VMA. Further, we should not allocate page tables outside of VMA boundaries: if evil user space decides to map hugetlbfs to these ranges, bad things will happen because we suddenly have PTE or PMD page tables where we shouldn't have them. Similarly, we have to check if we suddenly find a hugetlbfs VMA, before calling get_locked_pte(). Fixes: 2d42f9477320 ("s390/kvm: Add PGSTE manipulation functions") Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-4-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 034721a68d8fd..2717a406edeb3 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -988,6 +988,7 @@ EXPORT_SYMBOL(get_guest_storage_key); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste) { + struct vm_area_struct *vma; unsigned long pgstev; spinlock_t *ptl; pgste_t pgste; @@ -997,6 +998,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, WARN_ON_ONCE(orc > ESSA_MAX); if (unlikely(orc > ESSA_MAX)) return -EINVAL; + + vma = vma_lookup(mm, hva); + if (!vma || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1089,10 +1094,14 @@ EXPORT_SYMBOL(pgste_perform_essa); int set_pgste_bits(struct mm_struct *mm, unsigned long hva, unsigned long bits, unsigned long value) { + struct vm_area_struct *vma; spinlock_t *ptl; pgste_t new; pte_t *ptep; + vma = vma_lookup(mm, hva); + if (!vma || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1117,9 +1126,13 @@ EXPORT_SYMBOL(set_pgste_bits); */ int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) { + struct vm_area_struct *vma; spinlock_t *ptl; pte_t *ptep; + vma = vma_lookup(mm, hva); + if (!vma || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; From 8d6deceb90e7392dbc550229be5f15cd3c850513 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:43 +0200 Subject: [PATCH 0138/1202] s390/mm: fix VMA and page table handling code in storage key handling functions There are multiple things broken about our storage key handling functions: 1. We should not walk/touch page tables outside of VMA boundaries when holding only the mmap sem in read mode. Evil user space can modify the VMA layout just before this function runs and e.g., trigger races with page table removal code since commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"). gfn_to_hva() will only translate using KVM memory regions, but won't validate the VMA. 2. We should not allocate page tables outside of VMA boundaries: if evil user space decides to map hugetlbfs to these ranges, bad things will happen because we suddenly have PTE or PMD page tables where we shouldn't have them. 3. We don't handle large PUDs that might suddenly appeared inside our page table hierarchy. Don't manually allocate page tables, properly validate that we have VMA and bail out on pud_large(). All callers of page table handling functions, except get_guest_storage_key(), call fixup_user_fault() in case they receive an -EFAULT and retry; this will allocate the necessary page tables if required. To keep get_guest_storage_key() working as expected and not requiring kvm_s390_get_skeys() to call fixup_user_fault() distinguish between "there is simply no page table or huge page yet and the key is assumed to be 0" and "this is a fault to be reported". Although commit 637ff9efe5ea ("s390/mm: Add huge pmd storage key handling") introduced most of the affected code, it was actually already broken before when using get_locked_pte() without any VMA checks. Note: Ever since commit 637ff9efe5ea ("s390/mm: Add huge pmd storage key handling") we can no longer set a guest storage key (for example from QEMU during VM live migration) without actually resolving a fault. Although we would have created most page tables, we would choke on the !pmd_present(), requiring a call to fixup_user_fault(). I would have thought that this is problematic in combination with postcopy life migration ... but nobody noticed and this patch doesn't change the situation. So maybe it's just fine. Fixes: 9fcf93b5de06 ("KVM: S390: Create helper function get_guest_storage_key") Fixes: 24d5dd0208ed ("s390/kvm: Provide function for setting the guest storage key") Fixes: a7e19ab55ffd ("KVM: s390: handle missing storage-key facility") Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-5-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 57 +++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2717a406edeb3..6ad634a27d5b9 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -429,22 +429,36 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, } #ifdef CONFIG_PGSTE -static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) +static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp) { + struct vm_area_struct *vma; pgd_t *pgd; p4d_t *p4d; pud_t *pud; - pmd_t *pmd; + + /* We need a valid VMA, otherwise this is clearly a fault. */ + vma = vma_lookup(mm, addr); + if (!vma) + return -EFAULT; pgd = pgd_offset(mm, addr); - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return NULL; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return NULL; - pmd = pmd_alloc(mm, pud, addr); - return pmd; + if (!pgd_present(*pgd)) + return -ENOENT; + + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return -ENOENT; + + pud = pud_offset(p4d, addr); + if (!pud_present(*pud)) + return -ENOENT; + + /* Large PUDs are not supported yet. */ + if (pud_large(*pud)) + return -EFAULT; + + *pmdp = pmd_offset(pud, addr); + return 0; } #endif @@ -778,8 +792,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp; pte_t *ptep; - pmdp = pmd_alloc_map(mm, addr); - if (unlikely(!pmdp)) + if (pmd_lookup(mm, addr, &pmdp)) return -EFAULT; ptl = pmd_lock(mm, pmdp); @@ -881,8 +894,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) pte_t *ptep; int cc = 0; - pmdp = pmd_alloc_map(mm, addr); - if (unlikely(!pmdp)) + if (pmd_lookup(mm, addr, &pmdp)) return -EFAULT; ptl = pmd_lock(mm, pmdp); @@ -935,15 +947,24 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp; pte_t *ptep; - pmdp = pmd_alloc_map(mm, addr); - if (unlikely(!pmdp)) + /* + * If we don't have a PTE table and if there is no huge page mapped, + * the storage key is 0. + */ + *key = 0; + + switch (pmd_lookup(mm, addr, &pmdp)) { + case -ENOENT: + return 0; + case 0: + break; + default: return -EFAULT; + } ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { - /* Not yet mapped memory has a zero key */ spin_unlock(ptl); - *key = 0; return 0; } From c27e18c2a65f561b5a49e0d8843db9503a955fd1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:44 +0200 Subject: [PATCH 0139/1202] s390/uv: fully validate the VMA before calling follow_page() We should not walk/touch page tables outside of VMA boundaries when holding only the mmap sem in read mode. Evil user space can modify the VMA layout just before this function runs and e.g., trigger races with page table removal code since commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"). find_vma() does not check if the address is >= the VMA start address; use vma_lookup() instead. Fixes: 214d9bbcd3a6 ("s390/mm: provide memory management functions for protected KVM guests") Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Reviewed-by: Liam R. Howlett Link: https://lore.kernel.org/r/20210909162248.14969-6-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/kernel/uv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 5a656c7b7a67a..f95ccbd396925 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -212,7 +212,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) uaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(uaddr)) goto out; - vma = find_vma(gmap->mm, uaddr); + vma = vma_lookup(gmap->mm, uaddr); if (!vma) goto out; /* From d084bf329d8d2017b88f45a9a7a9d38a251a6313 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:46 +0200 Subject: [PATCH 0140/1202] s390/mm: no need for pte_alloc_map_lock() if we know the pmd is present pte_map_lock() is sufficient. Signed-off-by: David Hildenbrand Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-8-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 6ad634a27d5b9..e74cc59dcd67d 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -814,10 +814,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } spin_unlock(ptl); - ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); - if (unlikely(!ptep)) - return -EFAULT; - + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | PGSTE_ACC_BITS | PGSTE_FP_BIT); @@ -912,10 +909,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) } spin_unlock(ptl); - ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); - if (unlikely(!ptep)) - return -EFAULT; - + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); new = old = pgste_get_lock(ptep); /* Reset guest reference bit only */ pgste_val(new) &= ~PGSTE_GR_BIT; @@ -977,10 +971,7 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, } spin_unlock(ptl); - ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); - if (unlikely(!ptep)) - return -EFAULT; - + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); pgste = pgste_get_lock(ptep); *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; paddr = pte_val(*ptep) & PAGE_MASK; From c537e76035d1aeb345c9863f3c5b003ca7899b73 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:47 +0200 Subject: [PATCH 0141/1202] s390/mm: optimize set_guest_storage_key() We already optimize get_guest_storage_key() to assume that if we don't have a PTE table and don't have a huge page mapped that the storage key is 0. Similarly, optimize set_guest_storage_key() to simply do nothing in case the key to set is 0. Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-9-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e74cc59dcd67d..1c9aeb361f8dc 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -792,13 +792,23 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp; pte_t *ptep; - if (pmd_lookup(mm, addr, &pmdp)) + /* + * If we don't have a PTE table and if there is no huge page mapped, + * we can ignore attempts to set the key to 0, because it already is 0. + */ + switch (pmd_lookup(mm, addr, &pmdp)) { + case -ENOENT: + return key ? -EFAULT : 0; + case 0: + break; + default: return -EFAULT; + } ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); - return -EFAULT; + return key ? -EFAULT : 0; } if (pmd_large(*pmdp)) { From 85fa1e6bc225eff2f295b6d62a7a9b195bb09790 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 9 Sep 2021 18:22:48 +0200 Subject: [PATCH 0142/1202] s390/mm: optimize reset_guest_reference_bit() We already optimize get_guest_storage_key() to assume that if we don't have a PTE table and don't have a huge page mapped that the storage key is 0. Similarly, optimize reset_guest_reference_bit() to simply do nothing if there is no PTE table and no huge page mapped. Signed-off-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Acked-by: Heiko Carstens Link: https://lore.kernel.org/r/20210909162248.14969-10-david@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 1c9aeb361f8dc..c16232cd0ec59 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -901,13 +901,23 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) pte_t *ptep; int cc = 0; - if (pmd_lookup(mm, addr, &pmdp)) + /* + * If we don't have a PTE table and if there is no huge page mapped, + * the storage key is 0 and there is nothing for us to do. + */ + switch (pmd_lookup(mm, addr, &pmdp)) { + case -ENOENT: + return 0; + case 0: + break; + default: return -EFAULT; + } ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); - return -EFAULT; + return 0; } if (pmd_large(*pmdp)) { From ec45f15dd4b43f2462855238df35f9e6d89c965e Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 20 Sep 2021 15:24:49 +0200 Subject: [PATCH 0143/1202] KVM: s390: pv: add macros for UVC CC values Add macros to describe the 4 possible CC values returned by the UVC instruction. Signed-off-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Message-Id: <20210920132502.36111-2-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/uv.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index fe92a4caf5ec8..9ab1914c5b952 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -18,6 +18,11 @@ #include #include +#define UVC_CC_OK 0 +#define UVC_CC_ERROR 1 +#define UVC_CC_BUSY 2 +#define UVC_CC_PARTIAL 3 + #define UVC_RC_EXECUTED 0x0001 #define UVC_RC_INV_CMD 0x0002 #define UVC_RC_INV_STATE 0x0003 From f32b90c28e0a4779018f65cc17c0b25a38a3999b Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 20 Sep 2021 15:24:50 +0200 Subject: [PATCH 0144/1202] KVM: s390: pv: avoid double free of sida page If kvm_s390_pv_destroy_cpu is called more than once, we risk calling free_page on a random page, since the sidad field is aliased with the gbea, which is not guaranteed to be zero. This can happen, for example, if userspace calls the KVM_PV_DISABLE IOCTL, and it fails, and then userspace calls the same IOCTL again. This scenario is only possible if KVM has some serious bug or if the hardware is broken. The solution is to simply return successfully immediately if the vCPU was already non secure. Signed-off-by: Claudio Imbrenda Fixes: 19e1227768863a1469797c13ef8fea1af7beac2c ("KVM: S390: protvirt: Introduce instruction data area bounce buffer") Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Message-Id: <20210920132502.36111-3-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Christian Borntraeger --- arch/s390/kvm/pv.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index c8841f476e913..0a854115100b4 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -16,18 +16,17 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) { - int cc = 0; + int cc; - if (kvm_s390_pv_cpu_get_handle(vcpu)) { - cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), - UVC_CMD_DESTROY_SEC_CPU, rc, rrc); + if (!kvm_s390_pv_cpu_get_handle(vcpu)) + return 0; + + cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc); + + KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", + vcpu->vcpu_id, *rc, *rrc); + WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc); - KVM_UV_EVENT(vcpu->kvm, 3, - "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", - vcpu->vcpu_id, *rc, *rrc); - WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", - *rc, *rrc); - } /* Intended memory leak for something that should never happen. */ if (!cc) free_pages(vcpu->arch.pv.stor_base, From 3094c9738e4b9c3f448ca5899f48e908c818dfac Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 20 Sep 2021 15:24:51 +0200 Subject: [PATCH 0145/1202] KVM: s390: pv: avoid stalls for kvm_s390_pv_init_vm When the system is heavily overcommitted, kvm_s390_pv_init_vm might generate stall notifications. Fix this by using uv_call_sched instead of just uv_call. This is ok because we are not holding spinlocks. Signed-off-by: Claudio Imbrenda Fixes: 214d9bbcd3a672 ("s390/mm: provide memory management functions for protected KVM guests") Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Message-Id: <20210920132502.36111-4-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Christian Borntraeger --- arch/s390/kvm/pv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 0a854115100b4..00d272d134c24 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -195,7 +195,7 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base; uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; - cc = uv_call(0, (u64)&uvcb); + cc = uv_call_sched(0, (u64)&uvcb); *rc = uvcb.header.rc; *rrc = uvcb.header.rrc; KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x", From 9931912237f01ae19ad03be0590c3fe364ebea19 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Sep 2021 09:46:47 +0100 Subject: [PATCH 0146/1202] fscache: Generalise the ->begin_read_operation method Generalise the ->begin_read_operation() method in the fscache_cache_ops struct so that it's not read specific by: (1) changing the name to ->begin_operation(); (2) changing the netfs_read_request struct pointer parameter to be a netfs_cache_resources struct pointer (it only accesses the cache resources and an ID for debugging from the read request); (3) and by changing the fscache_retrieval pointer parameter to be a fscache_operation pointer (it only access the operation base of the retrieval). Also modify the cachefiles implementation so that it stores a pointer to the fscache_operation rather than fscache_retrieval in the cache resources. This makes it easier to share code with the write path in future. Signed-off-by: David Howells Tested-by: Dave Wysochanski Link: https://lore.kernel.org/r/163162768886.438332.9851931782896704604.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/163189105559.2509237.10649193286647776741.stgit@warthog.procyon.org.uk/ # rfc v2 --- fs/afs/file.c | 2 +- fs/cachefiles/interface.c | 2 +- fs/cachefiles/internal.h | 4 ++-- fs/cachefiles/io.c | 28 +++++++++++++--------------- fs/ceph/cache.h | 2 +- fs/fscache/io.c | 35 +++++++++++++++++++++-------------- include/linux/fscache-cache.h | 6 +++--- include/linux/fscache.h | 15 ++++++++------- 8 files changed, 50 insertions(+), 44 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index e6c447ae91f38..4d5b6bfcf815d 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -351,7 +351,7 @@ static int afs_begin_cache_operation(struct netfs_read_request *rreq) { struct afs_vnode *vnode = AFS_FS_I(rreq->inode); - return fscache_begin_read_operation(rreq, afs_vnode_cache(vnode)); + return fscache_begin_read_operation(&rreq->cache_resources, afs_vnode_cache(vnode)); } static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len, diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index da28ac1fa225d..8a7755b86c59e 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -568,5 +568,5 @@ const struct fscache_cache_ops cachefiles_cache_ops = { .uncache_page = cachefiles_uncache_page, .dissociate_pages = cachefiles_dissociate_pages, .check_consistency = cachefiles_check_consistency, - .begin_read_operation = cachefiles_begin_read_operation, + .begin_operation = cachefiles_begin_operation, }; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 0a511c36dab85..994f90ff12ac7 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -198,8 +198,8 @@ extern void cachefiles_uncache_page(struct fscache_object *, struct page *); /* * rdwr2.c */ -extern int cachefiles_begin_read_operation(struct netfs_read_request *, - struct fscache_retrieval *); +extern int cachefiles_begin_operation(struct netfs_cache_resources *, + struct fscache_operation *); /* * security.c diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index fac2e8e7b533e..08b3183e0dcea 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -268,7 +268,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres, static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq, loff_t i_size) { - struct fscache_retrieval *op = subreq->rreq->cache_resources.cache_priv; + struct fscache_operation *op = subreq->rreq->cache_resources.cache_priv; struct cachefiles_object *object; struct cachefiles_cache *cache; const struct cred *saved_cred; @@ -277,8 +277,7 @@ static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subreque _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size); - object = container_of(op->op.object, - struct cachefiles_object, fscache); + object = container_of(op->object, struct cachefiles_object, fscache); cache = container_of(object->fscache.cache, struct cachefiles_cache, cache); @@ -351,7 +350,7 @@ static int cachefiles_prepare_write(struct netfs_cache_resources *cres, */ static void cachefiles_end_operation(struct netfs_cache_resources *cres) { - struct fscache_retrieval *op = cres->cache_priv; + struct fscache_operation *op = cres->cache_priv; struct file *file = cres->cache_priv2; _enter(""); @@ -359,8 +358,8 @@ static void cachefiles_end_operation(struct netfs_cache_resources *cres) if (file) fput(file); if (op) { - fscache_op_complete(&op->op, false); - fscache_put_retrieval(op); + fscache_op_complete(op, false); + fscache_put_operation(op); } _leave(""); @@ -377,8 +376,8 @@ static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { /* * Open the cache file when beginning a cache operation. */ -int cachefiles_begin_read_operation(struct netfs_read_request *rreq, - struct fscache_retrieval *op) +int cachefiles_begin_operation(struct netfs_cache_resources *cres, + struct fscache_operation *op) { struct cachefiles_object *object; struct cachefiles_cache *cache; @@ -387,8 +386,7 @@ int cachefiles_begin_read_operation(struct netfs_read_request *rreq, _enter(""); - object = container_of(op->op.object, - struct cachefiles_object, fscache); + object = container_of(op->object, struct cachefiles_object, fscache); cache = container_of(object->fscache.cache, struct cachefiles_cache, cache); @@ -406,11 +404,11 @@ int cachefiles_begin_read_operation(struct netfs_read_request *rreq, goto error_file; } - fscache_get_retrieval(op); - rreq->cache_resources.cache_priv = op; - rreq->cache_resources.cache_priv2 = file; - rreq->cache_resources.ops = &cachefiles_netfs_cache_ops; - rreq->cache_resources.debug_id = object->fscache.debug_id; + atomic_inc(&op->usage); + cres->cache_priv = op; + cres->cache_priv2 = file; + cres->ops = &cachefiles_netfs_cache_ops; + cres->debug_id = object->fscache.debug_id; _leave(""); return 0; diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h index 058ea2a043762..b94d3f38fb253 100644 --- a/fs/ceph/cache.h +++ b/fs/ceph/cache.h @@ -54,7 +54,7 @@ static inline int ceph_begin_cache_operation(struct netfs_read_request *rreq) { struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(rreq->inode)); - return fscache_begin_read_operation(rreq, cookie); + return fscache_begin_read_operation(&rreq->cache_resources, cookie); } #else diff --git a/fs/fscache/io.c b/fs/fscache/io.c index 8ecc1141802f4..3745a06316181 100644 --- a/fs/fscache/io.c +++ b/fs/fscache/io.c @@ -14,7 +14,7 @@ #include "internal.h" /* - * Start a cache read operation. + * Start a cache operation. * - we return: * -ENOMEM - out of memory, some pages may be being read * -ERESTARTSYS - interrupted, some pages may be being read @@ -24,15 +24,16 @@ * the pages * 0 - dispatched a read on all pages */ -int __fscache_begin_read_operation(struct netfs_read_request *rreq, - struct fscache_cookie *cookie) +int __fscache_begin_operation(struct netfs_cache_resources *cres, + struct fscache_cookie *cookie, + bool for_write) { - struct fscache_retrieval *op; + struct fscache_operation *op; struct fscache_object *object; bool wake_cookie = false; int ret; - _enter("rr=%08x", rreq->debug_id); + _enter("c=%08x", cres->debug_id); fscache_stat(&fscache_n_retrievals); @@ -49,10 +50,16 @@ int __fscache_begin_read_operation(struct netfs_read_request *rreq, if (fscache_wait_for_deferred_lookup(cookie) < 0) return -ERESTARTSYS; - op = fscache_alloc_retrieval(cookie, NULL, NULL, NULL); + op = kzalloc(sizeof(*op), GFP_KERNEL); if (!op) return -ENOMEM; - trace_fscache_page_op(cookie, NULL, &op->op, fscache_page_op_retr_multi); + + fscache_operation_init(cookie, op, NULL, NULL, NULL); + op->flags = FSCACHE_OP_MYTHREAD | + (1UL << FSCACHE_OP_WAITING) | + (1UL << FSCACHE_OP_UNUSE_COOKIE); + + trace_fscache_page_op(cookie, NULL, op, fscache_page_op_retr_multi); spin_lock(&cookie->lock); @@ -64,9 +71,9 @@ int __fscache_begin_read_operation(struct netfs_read_request *rreq, __fscache_use_cookie(cookie); atomic_inc(&object->n_reads); - __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags); - if (fscache_submit_op(object, &op->op) < 0) + if (fscache_submit_op(object, op) < 0) goto nobufs_unlock_dec; spin_unlock(&cookie->lock); @@ -75,14 +82,14 @@ int __fscache_begin_read_operation(struct netfs_read_request *rreq, /* we wait for the operation to become active, and then process it * *here*, in this thread, and not in the thread pool */ ret = fscache_wait_for_operation_activation( - object, &op->op, + object, op, __fscache_stat(&fscache_n_retrieval_op_waits), __fscache_stat(&fscache_n_retrievals_object_dead)); if (ret < 0) goto error; /* ask the cache to honour the operation */ - ret = object->cache->ops->begin_read_operation(rreq, op); + ret = object->cache->ops->begin_operation(cres, op); error: if (ret == -ENOMEM) @@ -96,7 +103,7 @@ int __fscache_begin_read_operation(struct netfs_read_request *rreq, else fscache_stat(&fscache_n_retrievals_ok); - fscache_put_retrieval(op); + fscache_put_operation(op); _leave(" = %d", ret); return ret; @@ -105,7 +112,7 @@ int __fscache_begin_read_operation(struct netfs_read_request *rreq, wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); - fscache_put_retrieval(op); + fscache_put_operation(op); if (wake_cookie) __fscache_wake_unused_cookie(cookie); nobufs: @@ -113,4 +120,4 @@ int __fscache_begin_read_operation(struct netfs_read_request *rreq, _leave(" = -ENOBUFS"); return -ENOBUFS; } -EXPORT_SYMBOL(__fscache_begin_read_operation); +EXPORT_SYMBOL(__fscache_begin_operation); diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 8d39491c5f9fa..efa9b6f9fab17 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -304,9 +304,9 @@ struct fscache_cache_ops { /* dissociate a cache from all the pages it was backing */ void (*dissociate_pages)(struct fscache_cache *cache); - /* Begin a read operation for the netfs lib */ - int (*begin_read_operation)(struct netfs_read_request *rreq, - struct fscache_retrieval *op); + /* Begin an operation for the netfs lib */ + int (*begin_operation)(struct netfs_cache_resources *cres, + struct fscache_operation *op); }; extern struct fscache_cookie fscache_fsdef_index; diff --git a/include/linux/fscache.h b/include/linux/fscache.h index a4dab59986137..32f65c16328a2 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -196,7 +196,8 @@ extern void __fscache_invalidate(struct fscache_cookie *); extern void __fscache_wait_on_invalidate(struct fscache_cookie *); #ifdef FSCACHE_USE_NEW_IO_API -extern int __fscache_begin_read_operation(struct netfs_read_request *, struct fscache_cookie *); +extern int __fscache_begin_operation(struct netfs_cache_resources *, struct fscache_cookie *, + bool); #else extern int __fscache_read_or_alloc_page(struct fscache_cookie *, struct page *, @@ -511,12 +512,12 @@ int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) /** * fscache_begin_read_operation - Begin a read operation for the netfs lib - * @rreq: The read request being undertaken + * @cres: The cache resources for the read being performed * @cookie: The cookie representing the cache object * - * Begin a read operation on behalf of the netfs helper library. @rreq - * indicates the read request to which the operation state should be attached; - * @cookie indicates the cache object that will be accessed. + * Begin a read operation on behalf of the netfs helper library. @cres + * indicates the cache resources to which the operation state should be + * attached; @cookie indicates the cache object that will be accessed. * * This is intended to be called from the ->begin_cache_operation() netfs lib * operation as implemented by the network filesystem. @@ -527,11 +528,11 @@ int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) * * Other error code from the cache, such as -ENOMEM. */ static inline -int fscache_begin_read_operation(struct netfs_read_request *rreq, +int fscache_begin_read_operation(struct netfs_cache_resources *cres, struct fscache_cookie *cookie) { if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_begin_read_operation(rreq, cookie); + return __fscache_begin_operation(cres, cookie, false); return -ENOBUFS; } From 9be48df9ab2c8f19e03708747fa6209b7b0adca8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Sep 2021 10:36:05 +0100 Subject: [PATCH 0147/1202] fscache: Fix fscache_cookie_enabled() to handle NULL cookie Fix fscache_cookie_enabled() to handle being given a NULL cookie pointer (ie. a cookie was not allocated and caching should be ignored). Signed-off-by: David Howells --- include/linux/fscache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 32f65c16328a2..5e3ddb11715a1 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -167,7 +167,8 @@ struct fscache_cookie { static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie) { - return test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); + return (fscache_cookie_valid(cookie) && + test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)); } /* From 14cb4fc1b4229b3f14983ef6837c9ee001aa5ed6 Mon Sep 17 00:00:00 2001 From: Mateusz Kwiatkowski Date: Thu, 22 Jul 2021 21:02:08 +0200 Subject: [PATCH 0148/1202] ARM: dts: bcm283x: Fix VEC address for BCM2711 The VEC has a different address (0x7ec13000) on the BCM2711 (used in e.g. Raspberry Pi 4) compared to BCM283x (e.g. Pi 3 and earlier). This was erroneously not taken account for. Definition of the VEC in the devicetrees had to be moved from bcm283x.dtsi to bcm2711.dtsi and bcm2835-common.dtsi to allow for this differentiation. Fixes: 7894bdc6228f ("ARM: boot: dts: bcm2711: Add BCM2711 VEC compatible") Signed-off-by: Mateusz Kwiatkowski Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/1626980528-3835-1-git-send-email-stefan.wahren@i2se.com Signed-off-by: Nicolas Saenz Julienne --- arch/arm/boot/dts/bcm2711.dtsi | 8 ++++++++ arch/arm/boot/dts/bcm2835-common.dtsi | 8 ++++++++ arch/arm/boot/dts/bcm283x.dtsi | 8 -------- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index b8a4096192aa9..734342c6bb154 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -300,6 +300,14 @@ status = "disabled"; }; + vec: vec@7ec13000 { + compatible = "brcm,bcm2711-vec"; + reg = <0x7ec13000 0x1000>; + clocks = <&clocks BCM2835_CLOCK_VEC>; + interrupts = ; + status = "disabled"; + }; + dvp: clock@7ef00000 { compatible = "brcm,brcm2711-dvp"; reg = <0x7ef00000 0x10>; diff --git a/arch/arm/boot/dts/bcm2835-common.dtsi b/arch/arm/boot/dts/bcm2835-common.dtsi index 4119271c979d6..c25e797b90600 100644 --- a/arch/arm/boot/dts/bcm2835-common.dtsi +++ b/arch/arm/boot/dts/bcm2835-common.dtsi @@ -106,6 +106,14 @@ status = "okay"; }; + vec: vec@7e806000 { + compatible = "brcm,bcm2835-vec"; + reg = <0x7e806000 0x1000>; + clocks = <&clocks BCM2835_CLOCK_VEC>; + interrupts = <2 27>; + status = "disabled"; + }; + pixelvalve@7e807000 { compatible = "brcm,bcm2835-pixelvalve2"; reg = <0x7e807000 0x100>; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 0f3be55201a5b..a3e06b6809476 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -464,14 +464,6 @@ status = "disabled"; }; - vec: vec@7e806000 { - compatible = "brcm,bcm2835-vec"; - reg = <0x7e806000 0x1000>; - clocks = <&clocks BCM2835_CLOCK_VEC>; - interrupts = <2 27>; - status = "disabled"; - }; - usb: usb@7e980000 { compatible = "brcm,bcm2835-usb"; reg = <0x7e980000 0x10000>; From c7a774b1e713f6b26e94ad5a6b98a710865b7900 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 7 Aug 2021 13:06:32 +0200 Subject: [PATCH 0149/1202] ARM: dts: bcm2711: fix MDIO #address- and #size-cells The values of #address-cells and #size-cells are swapped. Fix this and avoid the following DT schema warnings for mdio@e14: #address-cells:0:0: 1 was expected #size-cells:0:0: 0 was expected Fixes: be8af7a9e3cc ("ARM: dts: bcm2711-rpi-4: Enable GENET support") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/1628334401-6577-2-git-send-email-stefan.wahren@i2se.com Signed-off-by: Nicolas Saenz Julienne --- arch/arm/boot/dts/bcm2711.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index 734342c6bb154..3b60297af7f60 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -540,8 +540,8 @@ compatible = "brcm,genet-mdio-v5"; reg = <0xe14 0x8>; reg-names = "mdio"; - #address-cells = <0x0>; - #size-cells = <0x1>; + #address-cells = <0x1>; + #size-cells = <0x0>; }; }; }; From ed9b4d0416b7eb2cbca014118eff877b00b5d367 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 7 Aug 2021 13:06:33 +0200 Subject: [PATCH 0150/1202] ARM: dts: bcm2711-rpi-4-b: fix sd_io_1v8_reg regulator states DT schema check complains at sd_io_1v8_reg about the following: [1800000, 1, 3300000, 0] is too long Additional items are not allowed (3300000, 0 were unexpected) So fix the states definition. Fixes: 7dbe8c62ceeb ("ARM: dts: Add minimal Raspberry Pi 4 support") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/1628334401-6577-3-git-send-email-stefan.wahren@i2se.com Signed-off-by: Nicolas Saenz Julienne --- arch/arm/boot/dts/bcm2711-rpi-4-b.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts index f24bdd0870a52..abf8298610a77 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts @@ -40,8 +40,8 @@ regulator-always-on; regulator-settling-time-us = <5000>; gpios = <&expgpio 4 GPIO_ACTIVE_HIGH>; - states = <1800000 0x1 - 3300000 0x0>; + states = <1800000 0x1>, + <3300000 0x0>; status = "okay"; }; From 367009c7ef0f0b376d3449e626c5d9853c31f923 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 7 Aug 2021 13:06:34 +0200 Subject: [PATCH 0151/1202] dt-bindings: display: bcm2835: add optional property power-domains The Raspberry Pi boards with BCM283x needs control of the power domains to get display components running. DT schema warns us that it's used, but not documented as a optional property: hdmi@7e902000: 'power-domains' does not match any of the regexes: ... Signed-off-by: Stefan Wahren Acked-by: Rob Herring Link: https://lore.kernel.org/r/1628334401-6577-4-git-send-email-stefan.wahren@i2se.com Signed-off-by: Nicolas Saenz Julienne --- .../devicetree/bindings/display/brcm,bcm2835-dsi0.yaml | 3 +++ .../devicetree/bindings/display/brcm,bcm2835-hdmi.yaml | 3 +++ .../devicetree/bindings/display/brcm,bcm2835-v3d.yaml | 3 +++ .../devicetree/bindings/display/brcm,bcm2835-vec.yaml | 3 +++ 4 files changed, 12 insertions(+) diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-dsi0.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-dsi0.yaml index 32608578a352e..c8b2459d64f66 100644 --- a/Documentation/devicetree/bindings/display/brcm,bcm2835-dsi0.yaml +++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-dsi0.yaml @@ -47,6 +47,9 @@ properties: interrupts: maxItems: 1 + power-domains: + maxItems: 1 + required: - "#clock-cells" - compatible diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml index 031e35e76db2a..48c8cad0d96d4 100644 --- a/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-hdmi.yaml @@ -51,6 +51,9 @@ properties: dma-names: const: audio-rx + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-v3d.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-v3d.yaml index 8a73780f573d3..c55a8217de250 100644 --- a/Documentation/devicetree/bindings/display/brcm,bcm2835-v3d.yaml +++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-v3d.yaml @@ -24,6 +24,9 @@ properties: interrupts: maxItems: 1 + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2835-vec.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2835-vec.yaml index 9b24081a0dbd8..5d921e30394e3 100644 --- a/Documentation/devicetree/bindings/display/brcm,bcm2835-vec.yaml +++ b/Documentation/devicetree/bindings/display/brcm,bcm2835-vec.yaml @@ -24,6 +24,9 @@ properties: interrupts: maxItems: 1 + power-domains: + maxItems: 1 + required: - compatible - reg From 2383fb799b746fe5b9db858a6b3064522135adff Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 7 Aug 2021 13:06:37 +0200 Subject: [PATCH 0152/1202] ARM: dts: bcm283x-rpi: Move Wifi/BT into separate dtsi A Wifi/BT chip is quite common for the Raspberry Pi boards. So move those definitions into a separate dtsi in order to avoid copy & paste. This change was inspired by a vendor tree patch from Phil Elwell. Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/1628334401-6577-7-git-send-email-stefan.wahren@i2se.com Signed-off-by: Nicolas Saenz Julienne --- arch/arm/boot/dts/bcm2711-rpi-4-b.dts | 38 +++++----------------- arch/arm/boot/dts/bcm2835-rpi-zero-w.dts | 31 +++++------------- arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts | 36 ++++---------------- arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts | 36 +++++--------------- arch/arm/boot/dts/bcm2837-rpi-3-b.dts | 36 +++++--------------- arch/arm/boot/dts/bcm283x-rpi-wifi-bt.dtsi | 34 +++++++++++++++++++ 6 files changed, 74 insertions(+), 137 deletions(-) create mode 100644 arch/arm/boot/dts/bcm283x-rpi-wifi-bt.dtsi diff --git a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts index abf8298610a77..c54ba5cf277c1 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts @@ -3,6 +3,7 @@ #include "bcm2711.dtsi" #include "bcm2711-rpi.dtsi" #include "bcm283x-rpi-usb-peripheral.dtsi" +#include "bcm283x-rpi-wifi-bt.dtsi" / { compatible = "raspberrypi,4-model-b", "brcm,bcm2711"; @@ -26,11 +27,6 @@ }; }; - wifi_pwrseq: wifi-pwrseq { - compatible = "mmc-pwrseq-simple"; - reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; - }; - sd_io_1v8_reg: sd_io_1v8_reg { compatible = "regulator-gpio"; regulator-name = "vdd-sd-io"; @@ -56,6 +52,10 @@ }; }; +&bt { + shutdown-gpios = <&expgpio 0 GPIO_ACTIVE_HIGH>; +}; + &ddc0 { status = "okay"; }; @@ -178,23 +178,6 @@ status = "okay"; }; -/* SDHCI is used to control the SDIO for wireless */ -&sdhci { - #address-cells = <1>; - #size-cells = <0>; - pinctrl-names = "default"; - pinctrl-0 = <&emmc_gpio34>; - bus-width = <4>; - non-removable; - mmc-pwrseq = <&wifi_pwrseq>; - status = "okay"; - - brcmf: wifi@1 { - reg = <1>; - compatible = "brcm,bcm4329-fmac"; - }; -}; - /* EMMC2 is used to drive the SD card */ &emmc2 { vqmmc-supply = <&sd_io_1v8_reg>; @@ -236,13 +219,6 @@ pinctrl-names = "default"; pinctrl-0 = <&uart0_ctsrts_gpio30 &uart0_gpio32>; uart-has-rtscts; - status = "okay"; - - bluetooth { - compatible = "brcm,bcm43438-bt"; - max-speed = <2000000>; - shutdown-gpios = <&expgpio 0 GPIO_ACTIVE_HIGH>; - }; }; /* uart1 is mapped to the pin header */ @@ -259,3 +235,7 @@ &vec { status = "disabled"; }; + +&wifi_pwrseq { + reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; +}; diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index 33b2b77aa47db..243236bc1e00b 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -7,6 +7,7 @@ #include "bcm2835.dtsi" #include "bcm2835-rpi.dtsi" #include "bcm283x-rpi-usb-otg.dtsi" +#include "bcm283x-rpi-wifi-bt.dtsi" / { compatible = "raspberrypi,model-zero-w", "brcm,bcm2835"; @@ -27,11 +28,10 @@ gpios = <&gpio 47 GPIO_ACTIVE_LOW>; }; }; +}; - wifi_pwrseq: wifi-pwrseq { - compatible = "mmc-pwrseq-simple"; - reset-gpios = <&gpio 41 GPIO_ACTIVE_LOW>; - }; +&bt { + shutdown-gpios = <&gpio 45 GPIO_ACTIVE_HIGH>; }; &gpio { @@ -110,19 +110,7 @@ }; &sdhci { - #address-cells = <1>; - #size-cells = <0>; - pinctrl-names = "default"; pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>; - bus-width = <4>; - mmc-pwrseq = <&wifi_pwrseq>; - non-removable; - status = "okay"; - - brcmf: wifi@1 { - reg = <1>; - compatible = "brcm,bcm4329-fmac"; - }; }; &sdhost { @@ -135,13 +123,6 @@ &uart0 { pinctrl-names = "default"; pinctrl-0 = <&uart0_gpio32 &uart0_ctsrts_gpio30>; - status = "okay"; - - bluetooth { - compatible = "brcm,bcm43438-bt"; - max-speed = <2000000>; - shutdown-gpios = <&gpio 45 GPIO_ACTIVE_HIGH>; - }; }; &uart1 { @@ -149,3 +130,7 @@ pinctrl-0 = <&uart1_gpio14>; status = "okay"; }; + +&wifi_pwrseq { + reset-gpios = <&gpio 41 GPIO_ACTIVE_LOW>; +}; diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts index 77099a7871b03..d73daf5bff1d6 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts @@ -3,6 +3,7 @@ #include "bcm2837.dtsi" #include "bcm2836-rpi.dtsi" #include "bcm283x-rpi-usb-host.dtsi" +#include "bcm283x-rpi-wifi-bt.dtsi" / { compatible = "raspberrypi,3-model-a-plus", "brcm,bcm2837"; @@ -130,28 +131,6 @@ status = "okay"; }; -/* - * SDHCI is used to control the SDIO for wireless - * - * WL_REG_ON and BT_REG_ON of the CYW43455 Wifi/BT module are driven - * by a single GPIO. We can't give GPIO control to one of the drivers, - * otherwise the other part would get unexpectedly disturbed. - */ -&sdhci { - #address-cells = <1>; - #size-cells = <0>; - pinctrl-names = "default"; - pinctrl-0 = <&emmc_gpio34>; - status = "okay"; - bus-width = <4>; - non-removable; - - brcmf: wifi@1 { - reg = <1>; - compatible = "brcm,bcm4329-fmac"; - }; -}; - /* SDHOST is used to drive the SD card */ &sdhost { pinctrl-names = "default"; @@ -160,16 +139,15 @@ bus-width = <4>; }; -/* uart0 communicates with the BT module */ +/* uart0 communicates with the BT module + * + * WL_REG_ON and BT_REG_ON of the CYW43455 Wifi/BT module are driven + * by a single GPIO. We can't give GPIO control to one of the drivers, + * otherwise the other part would get unexpectedly disturbed. + */ &uart0 { pinctrl-names = "default"; pinctrl-0 = <&uart0_ctsrts_gpio30 &uart0_gpio32 &gpclk2_gpio43>; - status = "okay"; - - bluetooth { - compatible = "brcm,bcm43438-bt"; - max-speed = <2000000>; - }; }; /* uart1 is mapped to the pin header */ diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts index 61010266ca9a3..e12938baaf12c 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts @@ -4,6 +4,7 @@ #include "bcm2836-rpi.dtsi" #include "bcm283x-rpi-lan7515.dtsi" #include "bcm283x-rpi-usb-host.dtsi" +#include "bcm283x-rpi-wifi-bt.dtsi" / { compatible = "raspberrypi,3-model-b-plus", "brcm,bcm2837"; @@ -31,11 +32,10 @@ linux,default-trigger = "default-on"; }; }; +}; - wifi_pwrseq: wifi-pwrseq { - compatible = "mmc-pwrseq-simple"; - reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; - }; +&bt { + shutdown-gpios = <&expgpio 0 GPIO_ACTIVE_HIGH>; }; &firmware { @@ -137,23 +137,6 @@ status = "okay"; }; -/* SDHCI is used to control the SDIO for wireless */ -&sdhci { - #address-cells = <1>; - #size-cells = <0>; - pinctrl-names = "default"; - pinctrl-0 = <&emmc_gpio34>; - status = "okay"; - bus-width = <4>; - non-removable; - mmc-pwrseq = <&wifi_pwrseq>; - - brcmf: wifi@1 { - reg = <1>; - compatible = "brcm,bcm4329-fmac"; - }; -}; - /* SDHOST is used to drive the SD card */ &sdhost { pinctrl-names = "default"; @@ -166,13 +149,6 @@ &uart0 { pinctrl-names = "default"; pinctrl-0 = <&uart0_ctsrts_gpio30 &uart0_gpio32 &gpclk2_gpio43>; - status = "okay"; - - bluetooth { - compatible = "brcm,bcm43438-bt"; - max-speed = <2000000>; - shutdown-gpios = <&expgpio 0 GPIO_ACTIVE_HIGH>; - }; }; /* uart1 is mapped to the pin header */ @@ -181,3 +157,7 @@ pinctrl-0 = <&uart1_gpio14>; status = "okay"; }; + +&wifi_pwrseq { + reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; +}; diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts index dd4a486040971..42b5383b55d88 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts @@ -4,6 +4,7 @@ #include "bcm2836-rpi.dtsi" #include "bcm283x-rpi-smsc9514.dtsi" #include "bcm283x-rpi-usb-host.dtsi" +#include "bcm283x-rpi-wifi-bt.dtsi" / { compatible = "raspberrypi,3-model-b", "brcm,bcm2837"; @@ -24,11 +25,10 @@ gpios = <&expgpio 2 GPIO_ACTIVE_HIGH>; }; }; +}; - wifi_pwrseq: wifi-pwrseq { - compatible = "mmc-pwrseq-simple"; - reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; - }; +&bt { + shutdown-gpios = <&expgpio 0 GPIO_ACTIVE_HIGH>; }; &firmware { @@ -134,13 +134,6 @@ &uart0 { pinctrl-names = "default"; pinctrl-0 = <&uart0_gpio32 &gpclk2_gpio43>; - status = "okay"; - - bluetooth { - compatible = "brcm,bcm43438-bt"; - max-speed = <2000000>; - shutdown-gpios = <&expgpio 0 GPIO_ACTIVE_HIGH>; - }; }; /* uart1 is mapped to the pin header */ @@ -150,23 +143,6 @@ status = "okay"; }; -/* SDHCI is used to control the SDIO for wireless */ -&sdhci { - #address-cells = <1>; - #size-cells = <0>; - pinctrl-names = "default"; - pinctrl-0 = <&emmc_gpio34>; - status = "okay"; - bus-width = <4>; - non-removable; - mmc-pwrseq = <&wifi_pwrseq>; - - brcmf: wifi@1 { - reg = <1>; - compatible = "brcm,bcm4329-fmac"; - }; -}; - /* SDHOST is used to drive the SD card */ &sdhost { pinctrl-names = "default"; @@ -174,3 +150,7 @@ status = "okay"; bus-width = <4>; }; + +&wifi_pwrseq { + reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; +}; diff --git a/arch/arm/boot/dts/bcm283x-rpi-wifi-bt.dtsi b/arch/arm/boot/dts/bcm283x-rpi-wifi-bt.dtsi new file mode 100644 index 0000000000000..0b64cc19941f3 --- /dev/null +++ b/arch/arm/boot/dts/bcm283x-rpi-wifi-bt.dtsi @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 + +/ { + wifi_pwrseq: wifi-pwrseq { + compatible = "mmc-pwrseq-simple"; + }; +}; + +/* SDHCI is used to control the SDIO for wireless */ +&sdhci { + #address-cells = <1>; + #size-cells = <0>; + pinctrl-names = "default"; + pinctrl-0 = <&emmc_gpio34>; + bus-width = <4>; + non-removable; + mmc-pwrseq = <&wifi_pwrseq>; + status = "okay"; + + brcmf: wifi@1 { + reg = <1>; + compatible = "brcm,bcm4329-fmac"; + }; +}; + +/* uart0 communicates with the BT module */ +&uart0 { + status = "okay"; + + bt: bluetooth { + compatible = "brcm,bcm43438-bt"; + max-speed = <2000000>; + }; +}; From 50d03bcc0c14d20d0eb975321e69c275dc9548b6 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 7 Aug 2021 13:06:38 +0200 Subject: [PATCH 0153/1202] dt-bindings: arm: bcm2835: Add Raspberry Pi Compute Module 4 Add the Raspberry Pi Compute Module 4 to DT schema. Signed-off-by: Stefan Wahren Acked-by: Rob Herring Link: https://lore.kernel.org/r/1628334401-6577-8-git-send-email-stefan.wahren@i2se.com Signed-off-by: Nicolas Saenz Julienne --- Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml b/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml index 230b80d9d6cfc..5dc48241efb3b 100644 --- a/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml @@ -19,6 +19,7 @@ properties: items: - enum: - raspberrypi,400 + - raspberrypi,4-compute-module - raspberrypi,4-model-b - const: brcm,bcm2711 From b65b46c047a9140f3fe44a4a3e00ce5c2caae8cc Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 7 Aug 2021 13:06:39 +0200 Subject: [PATCH 0154/1202] ARM: dts: Add Raspberry Pi Compute Module 4 The Raspberry Pi Compute Module 4 (CM4) are SoMs which contain the following: * BCM2711 quad core processor * up to 8 GB RAM * up to 32 GB eMMC * a GPIO expander * Gigabit PHY BCM54210PE * Wifi/BT module with internal and external antenna The eMMC and the Wifi/BT module are optional. Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/1628334401-6577-9-git-send-email-stefan.wahren@i2se.com Signed-off-by: Nicolas Saenz Julienne --- arch/arm/boot/dts/bcm2711-rpi-cm4.dtsi | 113 +++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 arch/arm/boot/dts/bcm2711-rpi-cm4.dtsi diff --git a/arch/arm/boot/dts/bcm2711-rpi-cm4.dtsi b/arch/arm/boot/dts/bcm2711-rpi-cm4.dtsi new file mode 100644 index 0000000000000..a2954d466a73b --- /dev/null +++ b/arch/arm/boot/dts/bcm2711-rpi-cm4.dtsi @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; +#include "bcm2711.dtsi" +#include "bcm2711-rpi.dtsi" +#include "bcm283x-rpi-wifi-bt.dtsi" + +/ { + compatible = "raspberrypi,4-compute-module", "brcm,bcm2711"; + + chosen { + /* 8250 auxiliary UART instead of pl011 */ + stdout-path = "serial1:115200n8"; + }; + + sd_io_1v8_reg: sd_io_1v8_reg { + compatible = "regulator-gpio"; + regulator-name = "vdd-sd-io"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + regulator-always-on; + regulator-settling-time-us = <5000>; + gpios = <&expgpio 4 GPIO_ACTIVE_HIGH>; + states = <1800000 0x1>, + <3300000 0x0>; + status = "okay"; + }; + + sd_vcc_reg: sd_vcc_reg { + compatible = "regulator-fixed"; + regulator-name = "vcc-sd"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + enable-active-high; + gpio = <&expgpio 6 GPIO_ACTIVE_HIGH>; + }; +}; + +&bt { + shutdown-gpios = <&expgpio 0 GPIO_ACTIVE_HIGH>; +}; + +/* EMMC2 is used to drive the eMMC */ +&emmc2 { + bus-width = <8>; + vqmmc-supply = <&sd_io_1v8_reg>; + vmmc-supply = <&sd_vcc_reg>; + broken-cd; + /* Even the IP block is limited to 100 MHz + * this provides a throughput gain + */ + mmc-hs200-1_8v; + status = "okay"; +}; + +&expgpio { + gpio-line-names = "BT_ON", + "WL_ON", + "PWR_LED_OFF", + "ANT1", + "VDD_SD_IO_SEL", + "CAM_GPIO", + "SD_PWR_ON", + "ANT2"; + + ant1: ant1-hog { + gpio-hog; + gpios = <3 GPIO_ACTIVE_HIGH>; + /* internal antenna enabled */ + output-high; + line-name = "ant1"; + }; + + ant2: ant2-hog { + gpio-hog; + gpios = <7 GPIO_ACTIVE_HIGH>; + /* external antenna disabled */ + output-low; + line-name = "ant2"; + }; +}; + +&genet { + phy-handle = <&phy1>; + phy-mode = "rgmii-rxid"; + status = "okay"; +}; + +&genet_mdio { + phy1: ethernet-phy@0 { + /* No PHY interrupt */ + reg = <0x0>; + }; +}; + +/* uart0 communicates with the BT module */ +&uart0 { + pinctrl-names = "default"; + pinctrl-0 = <&uart0_ctsrts_gpio30 &uart0_gpio32>; + uart-has-rtscts; +}; + +/* uart1 is mapped to the pin header */ +&uart1 { + pinctrl-names = "default"; + pinctrl-0 = <&uart1_gpio14>; + status = "okay"; +}; + +&wifi_pwrseq { + reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; +}; From c5d283ef9e722b9ee134c85c5291b1676bf68e9f Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 7 Aug 2021 13:06:40 +0200 Subject: [PATCH 0155/1202] ARM: dts: Add Raspberry Pi Compute Module 4 IO Board This adds the matching carrier for Raspberry Pi Compute Module 4. Instead of xHCI USB host controller there is just a USB 2.0 interface connected to the DWC2 controller from the BCM2711. As a result there is a free PCIe Gen 2 socket. Also there are 2 full-size HDMI 2.0 connectors. Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/1628334401-6577-10-git-send-email-stefan.wahren@i2se.com Signed-off-by: Nicolas Saenz Julienne --- arch/arm/boot/dts/Makefile | 1 + arch/arm/boot/dts/bcm2711-rpi-cm4-io.dts | 138 +++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 arch/arm/boot/dts/bcm2711-rpi-cm4-io.dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 7e0934180724d..ce8f508e32cbe 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -92,6 +92,7 @@ dtb-$(CONFIG_ARCH_BCM2835) += \ bcm2837-rpi-cm3-io3.dtb \ bcm2711-rpi-400.dtb \ bcm2711-rpi-4-b.dtb \ + bcm2711-rpi-cm4-io.dtb \ bcm2835-rpi-zero.dtb \ bcm2835-rpi-zero-w.dtb dtb-$(CONFIG_ARCH_BCM_5301X) += \ diff --git a/arch/arm/boot/dts/bcm2711-rpi-cm4-io.dts b/arch/arm/boot/dts/bcm2711-rpi-cm4-io.dts new file mode 100644 index 0000000000000..19600b629be58 --- /dev/null +++ b/arch/arm/boot/dts/bcm2711-rpi-cm4-io.dts @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; +#include "bcm2711-rpi-cm4.dtsi" +#include "bcm283x-rpi-usb-host.dtsi" + +/ { + model = "Raspberry Pi Compute Module 4 IO Board"; + + leds { + led-act { + gpios = <&gpio 42 GPIO_ACTIVE_HIGH>; + }; + + led-pwr { + label = "PWR"; + gpios = <&expgpio 2 GPIO_ACTIVE_LOW>; + default-state = "keep"; + linux,default-trigger = "default-on"; + }; + }; +}; + +&ddc0 { + status = "okay"; +}; + +&ddc1 { + status = "okay"; +}; + +&gpio { + /* + * Parts taken from rpi_SCH_4b_4p0_reduced.pdf and + * the official GPU firmware DT blob. + * + * Legend: + * "FOO" = GPIO line named "FOO" on the schematic + * "FOO_N" = GPIO line named "FOO" on schematic, active low + */ + gpio-line-names = "ID_SDA", + "ID_SCL", + "SDA1", + "SCL1", + "GPIO_GCLK", + "GPIO5", + "GPIO6", + "SPI_CE1_N", + "SPI_CE0_N", + "SPI_MISO", + "SPI_MOSI", + "SPI_SCLK", + "GPIO12", + "GPIO13", + /* Serial port */ + "TXD1", + "RXD1", + "GPIO16", + "GPIO17", + "GPIO18", + "GPIO19", + "GPIO20", + "GPIO21", + "GPIO22", + "GPIO23", + "GPIO24", + "GPIO25", + "GPIO26", + "GPIO27", + "RGMII_MDIO", + "RGMIO_MDC", + /* Used by BT module */ + "CTS0", + "RTS0", + "TXD0", + "RXD0", + /* Used by Wifi */ + "SD1_CLK", + "SD1_CMD", + "SD1_DATA0", + "SD1_DATA1", + "SD1_DATA2", + "SD1_DATA3", + /* Shared with SPI flash */ + "PWM0_MISO", + "PWM1_MOSI", + "STATUS_LED_G_CLK", + "SPIFLASH_CE_N", + "SDA0", + "SCL0", + "RGMII_RXCLK", + "RGMII_RXCTL", + "RGMII_RXD0", + "RGMII_RXD1", + "RGMII_RXD2", + "RGMII_RXD3", + "RGMII_TXCLK", + "RGMII_TXCTL", + "RGMII_TXD0", + "RGMII_TXD1", + "RGMII_TXD2", + "RGMII_TXD3"; +}; + +&hdmi0 { + status = "okay"; +}; + +&hdmi1 { + status = "okay"; +}; + +&genet { + status = "okay"; +}; + +&pixelvalve0 { + status = "okay"; +}; + +&pixelvalve1 { + status = "okay"; +}; + +&pixelvalve2 { + status = "okay"; +}; + +&pixelvalve4 { + status = "okay"; +}; + +&vc4 { + status = "okay"; +}; + +&vec { + status = "disabled"; +}; From 25e62167d1da8473e066ae449d6304562783a241 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 7 Aug 2021 13:06:41 +0200 Subject: [PATCH 0156/1202] arm64: dts: broadcom: Add reference to RPi CM4 IO Board This adds a reference to the dts of the Raspberry Pi Compute Module 4 IO Board, so we don't need to maintain the content in arm64. Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/1628334401-6577-11-git-send-email-stefan.wahren@i2se.com Signed-off-by: Nicolas Saenz Julienne --- arch/arm64/boot/dts/broadcom/Makefile | 1 + arch/arm64/boot/dts/broadcom/bcm2711-rpi-cm4-io.dts | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 arch/arm64/boot/dts/broadcom/bcm2711-rpi-cm4-io.dts diff --git a/arch/arm64/boot/dts/broadcom/Makefile b/arch/arm64/boot/dts/broadcom/Makefile index 11eae3e3a9447..c6882032a4280 100644 --- a/arch/arm64/boot/dts/broadcom/Makefile +++ b/arch/arm64/boot/dts/broadcom/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_ARCH_BCM2835) += bcm2711-rpi-400.dtb \ bcm2711-rpi-4-b.dtb \ + bcm2711-rpi-cm4-io.dtb \ bcm2837-rpi-3-a-plus.dtb \ bcm2837-rpi-3-b.dtb \ bcm2837-rpi-3-b-plus.dtb \ diff --git a/arch/arm64/boot/dts/broadcom/bcm2711-rpi-cm4-io.dts b/arch/arm64/boot/dts/broadcom/bcm2711-rpi-cm4-io.dts new file mode 100644 index 0000000000000..e36d395e39510 --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm2711-rpi-cm4-io.dts @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "arm/bcm2711-rpi-cm4-io.dts" From df359f0ca01907507d2671760492907829ee17b7 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 31 Aug 2021 14:58:42 +0200 Subject: [PATCH 0157/1202] ARM: dts: bcm2711-rpi-4-b: Fix pcie0's unit address formatting dtbs_check currently complains that: arch/arm/boot/dts/bcm2711-rpi-4-b.dts:220.10-231.4: Warning (pci_device_reg): /scb/pcie@7d500000/pci@1,0: PCI unit address format error, expected "0,0" Unsurprisingly pci@0,0 is the right address, as illustrated by its reg property: &pcie0 { pci@0,0 { /* * As defined in the IEEE Std 1275-1994 document, * reg is a five-cell address encoded as (phys.hi * phys.mid phys.lo size.hi size.lo). phys.hi * should contain the device's BDF as 0b00000000 * bbbbbbbb dddddfff 00000000. The other cells * should be zero. */ reg = <0 0 0 0 0>; }; }; The device is clearly 0. So fix it. Also add a missing 'device_type = "pci"'. Fixes: 258f92d2f840 ("ARM: dts: bcm2711: Add reset controller to xHCI node") Suggested-by: Rob Herring Signed-off-by: Nicolas Saenz Julienne Link: https://lore.kernel.org/r/20210831125843.1233488-1-nsaenzju@redhat.com Signed-off-by: Nicolas Saenz Julienne --- arch/arm/boot/dts/bcm2711-rpi-4-b.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts index c54ba5cf277c1..18d2cefb50ae6 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts @@ -200,7 +200,8 @@ }; &pcie0 { - pci@1,0 { + pci@0,0 { + device_type = "pci"; #address-cells = <3>; #size-cells = <2>; ranges; From a036b0a5d7d6ab4bae3b9028bb1d44b677bba5f7 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 31 Aug 2021 14:58:43 +0200 Subject: [PATCH 0158/1202] ARM: dts: bcm2711-rpi-4-b: Fix usb's unit address The unit address is supposed to represent ','. Which are both 0 for RPi4b's XHCI controller. On top of that although OpenFirmware states bus number goes in the high part of the last reg parameter, FDT doesn't seem to care for it[1], so remove it. [1] https://patchwork.kernel.org/project/linux-arm-kernel/patch/20210830103909.323356-1-nsaenzju@redhat.com/#24414633 Fixes: 258f92d2f840 ("ARM: dts: bcm2711: Add reset controller to xHCI node") Suggested-by: Rob Herring Signed-off-by: Nicolas Saenz Julienne Link: https://lore.kernel.org/r/20210831125843.1233488-2-nsaenzju@redhat.com Signed-off-by: Nicolas Saenz Julienne --- arch/arm/boot/dts/bcm2711-rpi-4-b.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts index 18d2cefb50ae6..631dd5baf68da 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts @@ -208,8 +208,8 @@ reg = <0 0 0 0 0>; - usb@1,0 { - reg = <0x10000 0 0 0 0>; + usb@0,0 { + reg = <0 0 0 0 0>; resets = <&reset RASPBERRYPI_FIRMWARE_RESET_ID_USB>; }; }; From 8c3f1ab09e57d6bbee558e361ec99db47f39d2dc Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 28 May 2021 07:51:26 -0400 Subject: [PATCH 0159/1202] mfd: core: Add missing of_node_put for loop iteration Early exits from for_each_child_of_node() should decrement the node reference counter. Reported by Coccinelle: drivers/mfd/mfd-core.c:197:2-24: WARNING: Function "for_each_child_of_node" should have of_node_put() before goto around lines 209. Fixes: c94bb233a9fe ("mfd: Make MFD core code Device Tree and IRQ domain aware") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210528115126.18370-1-krzysztof.kozlowski@canonical.com --- drivers/mfd/mfd-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 79f5c6a18815a..684a011a63968 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -198,6 +198,7 @@ static int mfd_add_device(struct device *parent, int id, if (of_device_is_compatible(np, cell->of_compatible)) { /* Ignore 'disabled' devices error free */ if (!of_device_is_available(np)) { + of_node_put(np); ret = 0; goto fail_alias; } @@ -205,6 +206,7 @@ static int mfd_add_device(struct device *parent, int id, ret = mfd_match_of_node_to_dev(pdev, np, cell); if (ret == -EAGAIN) continue; + of_node_put(np); if (ret) goto fail_alias; From 0830e033c077e30cdd62bb0489e5e8ec04cadf04 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 16 Sep 2021 11:33:34 +0100 Subject: [PATCH 0160/1202] firmware: arm_scmi: Review some virtio log messages Be more verbose avoiding to use _once flavour of dev_info/_err/_notice. Remove usage of __func_ to identify which vqueue is referred in some error messages and explicitly name the TX/RX vqueue. Link: https://lore.kernel.org/r/20210916103336.7243-1-cristian.marussi@arm.com Cc: "Michael S. Tsirkin" Cc: Sudeep Holla Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/virtio.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c index 11e8efb713751..c8cab5652daf0 100644 --- a/drivers/firmware/arm_scmi/virtio.c +++ b/drivers/firmware/arm_scmi/virtio.c @@ -94,8 +94,8 @@ static int scmi_vio_feed_vq_rx(struct scmi_vio_channel *vioch, rc = virtqueue_add_inbuf(vioch->vqueue, &sg_in, 1, msg, GFP_ATOMIC); if (rc) - dev_err_once(vioch->cinfo->dev, - "failed to add to virtqueue (%d)\n", rc); + dev_err(vioch->cinfo->dev, + "failed to add to RX virtqueue (%d)\n", rc); else virtqueue_kick(vioch->vqueue); @@ -193,8 +193,8 @@ static unsigned int virtio_get_max_msg(struct scmi_chan_info *base_cinfo) static int virtio_link_supplier(struct device *dev) { if (!scmi_vdev) { - dev_notice_once(dev, - "Deferring probe after not finding a bound scmi-virtio device\n"); + dev_notice(dev, + "Deferring probe after not finding a bound scmi-virtio device\n"); return -EPROBE_DEFER; } @@ -334,9 +334,8 @@ static int virtio_send_message(struct scmi_chan_info *cinfo, rc = virtqueue_add_sgs(vioch->vqueue, sgs, 1, 1, msg, GFP_ATOMIC); if (rc) { list_add(&msg->list, &vioch->free_list); - dev_err_once(vioch->cinfo->dev, - "%s() failed to add to virtqueue (%d)\n", __func__, - rc); + dev_err(vioch->cinfo->dev, + "failed to add to TX virtqueue (%d)\n", rc); } else { virtqueue_kick(vioch->vqueue); } @@ -427,10 +426,10 @@ static int scmi_vio_probe(struct virtio_device *vdev) sz /= DESCRIPTORS_PER_TX_MSG; if (sz > MSG_TOKEN_MAX) { - dev_info_once(dev, - "%s virtqueue could hold %d messages. Only %ld allowed to be pending.\n", - channels[i].is_rx ? "rx" : "tx", - sz, MSG_TOKEN_MAX); + dev_info(dev, + "%s virtqueue could hold %d messages. Only %ld allowed to be pending.\n", + channels[i].is_rx ? "rx" : "tx", + sz, MSG_TOKEN_MAX); sz = MSG_TOKEN_MAX; } channels[i].max_msg = sz; From 0a483657e760a4cd30ed04bbec652334e513e167 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 5 Oct 2021 13:26:49 +0100 Subject: [PATCH 0161/1202] cachefiles: Always indicate we should fill a post-EOF page with zeros In cachefiles_prepare_read(), always indicate to the netfs lib that a page beyond the EOF should be filled with zeros, even if we don't have a cache file attached because it's still being created. This avoids confusion in netfs_rreq_prepare_read() where it sees source == NETFS_DOWNLOAD_FROM_SERVER, where it consequently sees the read after the EOF getting reduced to 0 size and thus triggers the WARN_ON and marking the read invalid. Also don't try to check for data if there's a flag set indicating we don't yet have anything stored in the cache. Signed-off-by: David Howells --- fs/cachefiles/io.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 08b3183e0dcea..dbc1c4421116e 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -273,6 +273,7 @@ static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subreque struct cachefiles_cache *cache; const struct cred *saved_cred; struct file *file = subreq->rreq->cache_resources.cache_priv2; + enum netfs_read_source ret = NETFS_DOWNLOAD_FROM_SERVER; loff_t off, to; _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size); @@ -281,19 +282,24 @@ static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subreque cache = container_of(object->fscache.cache, struct cachefiles_cache, cache); - if (!file) - goto cache_fail_nosec; + cachefiles_begin_secure(cache, &saved_cred); - if (subreq->start >= i_size) - return NETFS_FILL_WITH_ZEROES; + if (subreq->start >= i_size) { + ret = NETFS_FILL_WITH_ZEROES; + goto out; + } - cachefiles_begin_secure(cache, &saved_cred); + if (!file) + goto out; + + if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->fscache.cookie->flags)) + goto download_and_store; off = vfs_llseek(file, subreq->start, SEEK_DATA); if (off < 0 && off >= (loff_t)-MAX_ERRNO) { if (off == (loff_t)-ENXIO) goto download_and_store; - goto cache_fail; + goto out; } if (off >= subreq->start + subreq->len) @@ -307,7 +313,7 @@ static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subreque to = vfs_llseek(file, subreq->start, SEEK_HOLE); if (to < 0 && to >= (loff_t)-MAX_ERRNO) - goto cache_fail; + goto out; if (to < subreq->start + subreq->len) { if (subreq->start + subreq->len >= i_size) @@ -317,16 +323,15 @@ static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subreque subreq->len = to - subreq->start; } - cachefiles_end_secure(cache, saved_cred); - return NETFS_READ_FROM_CACHE; + ret = NETFS_READ_FROM_CACHE; + goto out; download_and_store: if (cachefiles_has_space(cache, 0, (subreq->len + PAGE_SIZE - 1) / PAGE_SIZE) == 0) __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); -cache_fail: +out: cachefiles_end_secure(cache, saved_cred); -cache_fail_nosec: - return NETFS_DOWNLOAD_FROM_SERVER; + return ret; } /* From 397a7b68cb16f6035c03c47f78a6075e7d2ede72 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Sep 2021 09:47:45 +0100 Subject: [PATCH 0162/1202] fscache: Implement a fallback I/O interface to replace the old API Implement an alternative to using the netfslib-base I/O API so that we can move forwards on getting rid of the old API. Note that this API is should not be used by new filesystems as it still uses the backing filesystem to track unfilled holes in the backing file, though using SEEK_DATA/SEEK_HOLE rather than bmap(). This is dangerous and can lead to corrupted data as the backing filesystem cannot be relied on not to fill in holes with blocks of zeros in order to optimise an extent list[1]. It may also punch out blocks of zeros to create holes for the same reason, but this is less of a problem. Also adjust the macros that must be defined to indicate which API is to be used: (*) FSCACHE_USE_OLD_IO_API - Use the current upstream API. This will be deleted. (*) FSCACHE_USE_FALLBACK_IO_API - Use the API added here. (*) FSCACHE_USE_NEW_IO_API - Use the new API or netfs API. Changes ======= ver #2: - Changed "deprecated" to "fallback" in the new function names[2]. - Need to define FSCACHE_USE_FALLBACK_IO_API in fscache/io.c to enable the prototypes of the functions[3]. - Removed a couple of unused variables[3]. - Make netfs/read_helpers.c use NETFS_READ_HOLE_*. Signed-off-by: David Howells Tested-by: Dave Wysochanski cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YO17ZNOcq+9PajfQ@mit.edu/ [1] Link: https://lore.kernel.org/r/CAHk-=wiVK+1CyEjW8u71zVPK8msea=qPpznX35gnX+s8sXnJTg@mail.gmail.com/ [2] Link: https://lore.kernel.org/r/202109150420.QX7dDzSE-lkp@intel.com/ [3] Link: https://lore.kernel.org/r/163162770137.438332.13788466444753625553.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/163189107352.2509237.759630157032861275.stgit@warthog.procyon.org.uk/ # rfc v2 --- fs/9p/cache.h | 1 + fs/cachefiles/io.c | 28 +++++++- fs/cifs/fscache.h | 1 + fs/fscache/internal.h | 3 + fs/fscache/io.c | 137 +++++++++++++++++++++++++++++++++----- fs/fscache/page.c | 1 + fs/fscache/stats.c | 12 +++- fs/netfs/read_helper.c | 8 +-- fs/nfs/fscache.h | 1 + include/linux/fscache.h | 142 +++++++++++++++++++++++++++++++++++++--- include/linux/netfs.h | 17 ++++- 11 files changed, 314 insertions(+), 37 deletions(-) diff --git a/fs/9p/cache.h b/fs/9p/cache.h index 00f107af443ef..c7e74776ce90f 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -8,6 +8,7 @@ #ifndef _9P_CACHE_H #define _9P_CACHE_H #ifdef CONFIG_9P_FSCACHE +#define FSCACHE_USE_OLD_IO_API #include #include diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index dbc1c4421116e..5ead97de4bb7c 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -58,14 +58,14 @@ static void cachefiles_read_complete(struct kiocb *iocb, long ret, long ret2) static int cachefiles_read(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, - bool seek_data, + enum netfs_read_from_hole read_hole, netfs_io_terminated_t term_func, void *term_func_priv) { struct cachefiles_kiocb *ki; struct file *file = cres->cache_priv2; unsigned int old_nofs; - ssize_t ret = -ENOBUFS; + ssize_t ret = -ENODATA; size_t len = iov_iter_count(iter), skipped = 0; _enter("%pD,%li,%llx,%zx/%llx", @@ -75,7 +75,7 @@ static int cachefiles_read(struct netfs_cache_resources *cres, /* If the caller asked us to seek for data before doing the read, then * we should do that now. If we find a gap, we fill it with zeros. */ - if (seek_data) { + if (read_hole != NETFS_READ_HOLE_IGNORE) { loff_t off = start_pos, off2; off2 = vfs_llseek(file, off, SEEK_DATA); @@ -90,6 +90,9 @@ static int cachefiles_read(struct netfs_cache_resources *cres, * in the region, so clear the rest of the buffer and * return success. */ + if (read_hole == NETFS_READ_HOLE_FAIL) + goto presubmission_error; + iov_iter_zero(len, iter); skipped = len; ret = 0; @@ -350,6 +353,24 @@ static int cachefiles_prepare_write(struct netfs_cache_resources *cres, return 0; } +/* + * Prepare for a write to occur from the fallback I/O API. + */ +static int cachefiles_prepare_fallback_write(struct netfs_cache_resources *cres, + pgoff_t index) +{ + struct fscache_operation *op = cres->cache_priv; + struct cachefiles_object *object; + struct cachefiles_cache *cache; + + _enter("%lx", index); + + object = container_of(op->object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + return cachefiles_has_space(cache, 0, 1); +} + /* * Clean up an operation. */ @@ -376,6 +397,7 @@ static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { .write = cachefiles_write, .prepare_read = cachefiles_prepare_read, .prepare_write = cachefiles_prepare_write, + .prepare_fallback_write = cachefiles_prepare_fallback_write, }; /* diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index 9baa1d0f22bde..729b51100a6d1 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -9,6 +9,7 @@ #ifndef _CIFS_FSCACHE_H #define _CIFS_FSCACHE_H +#define FSCACHE_USE_OLD_IO_API #include #include "cifsglob.h" diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index c3e4804b8fcbf..1d1046408311d 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -180,12 +180,15 @@ extern atomic_t fscache_n_stores; extern atomic_t fscache_n_stores_ok; extern atomic_t fscache_n_stores_again; extern atomic_t fscache_n_stores_nobufs; +extern atomic_t fscache_n_stores_intr; extern atomic_t fscache_n_stores_oom; extern atomic_t fscache_n_store_ops; extern atomic_t fscache_n_store_calls; extern atomic_t fscache_n_store_pages; extern atomic_t fscache_n_store_radix_deletes; extern atomic_t fscache_n_store_pages_over_limit; +extern atomic_t fscache_n_stores_object_dead; +extern atomic_t fscache_n_store_op_waits; extern atomic_t fscache_n_store_vmscan_not_storing; extern atomic_t fscache_n_store_vmscan_gone; diff --git a/fs/fscache/io.c b/fs/fscache/io.c index 3745a06316181..e633808ba813c 100644 --- a/fs/fscache/io.c +++ b/fs/fscache/io.c @@ -8,7 +8,10 @@ #define FSCACHE_DEBUG_LEVEL PAGE #include #define FSCACHE_USE_NEW_IO_API +#define FSCACHE_USE_FALLBACK_IO_API #include +#include +#include #include #include #include "internal.h" @@ -35,7 +38,10 @@ int __fscache_begin_operation(struct netfs_cache_resources *cres, _enter("c=%08x", cres->debug_id); - fscache_stat(&fscache_n_retrievals); + if (for_write) + fscache_stat(&fscache_n_stores); + else + fscache_stat(&fscache_n_retrievals); if (hlist_empty(&cookie->backing_objects)) goto nobufs; @@ -77,14 +83,23 @@ int __fscache_begin_operation(struct netfs_cache_resources *cres, goto nobufs_unlock_dec; spin_unlock(&cookie->lock); - fscache_stat(&fscache_n_retrieval_ops); - /* we wait for the operation to become active, and then process it * *here*, in this thread, and not in the thread pool */ - ret = fscache_wait_for_operation_activation( - object, op, - __fscache_stat(&fscache_n_retrieval_op_waits), - __fscache_stat(&fscache_n_retrievals_object_dead)); + if (for_write) { + fscache_stat(&fscache_n_store_ops); + + ret = fscache_wait_for_operation_activation( + object, op, + __fscache_stat(&fscache_n_store_op_waits), + __fscache_stat(&fscache_n_stores_object_dead)); + } else { + fscache_stat(&fscache_n_retrieval_ops); + + ret = fscache_wait_for_operation_activation( + object, op, + __fscache_stat(&fscache_n_retrieval_op_waits), + __fscache_stat(&fscache_n_retrievals_object_dead)); + } if (ret < 0) goto error; @@ -92,16 +107,27 @@ int __fscache_begin_operation(struct netfs_cache_resources *cres, ret = object->cache->ops->begin_operation(cres, op); error: - if (ret == -ENOMEM) - fscache_stat(&fscache_n_retrievals_nomem); - else if (ret == -ERESTARTSYS) - fscache_stat(&fscache_n_retrievals_intr); - else if (ret == -ENODATA) - fscache_stat(&fscache_n_retrievals_nodata); - else if (ret < 0) - fscache_stat(&fscache_n_retrievals_nobufs); - else - fscache_stat(&fscache_n_retrievals_ok); + if (for_write) { + if (ret == -ENOMEM) + fscache_stat(&fscache_n_stores_oom); + else if (ret == -ERESTARTSYS) + fscache_stat(&fscache_n_stores_intr); + else if (ret < 0) + fscache_stat(&fscache_n_stores_nobufs); + else + fscache_stat(&fscache_n_stores_ok); + } else { + if (ret == -ENOMEM) + fscache_stat(&fscache_n_retrievals_nomem); + else if (ret == -ERESTARTSYS) + fscache_stat(&fscache_n_retrievals_intr); + else if (ret == -ENODATA) + fscache_stat(&fscache_n_retrievals_nodata); + else if (ret < 0) + fscache_stat(&fscache_n_retrievals_nobufs); + else + fscache_stat(&fscache_n_retrievals_ok); + } fscache_put_operation(op); _leave(" = %d", ret); @@ -116,8 +142,83 @@ int __fscache_begin_operation(struct netfs_cache_resources *cres, if (wake_cookie) __fscache_wake_unused_cookie(cookie); nobufs: - fscache_stat(&fscache_n_retrievals_nobufs); + if (for_write) + fscache_stat(&fscache_n_stores_nobufs); + else + fscache_stat(&fscache_n_retrievals_nobufs); _leave(" = -ENOBUFS"); return -ENOBUFS; } EXPORT_SYMBOL(__fscache_begin_operation); + +/* + * Clean up an operation. + */ +static void fscache_end_operation(struct netfs_cache_resources *cres) +{ + cres->ops->end_operation(cres); +} + +/* + * Fallback page reading interface. + */ +int __fscache_fallback_read_page(struct fscache_cookie *cookie, struct page *page) +{ + struct netfs_cache_resources cres; + struct iov_iter iter; + struct bio_vec bvec[1]; + int ret; + + _enter("%lx", page->index); + + memset(&cres, 0, sizeof(cres)); + bvec[0].bv_page = page; + bvec[0].bv_offset = 0; + bvec[0].bv_len = PAGE_SIZE; + iov_iter_bvec(&iter, READ, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); + + ret = fscache_begin_read_operation(&cres, cookie); + if (ret < 0) + return ret; + + ret = fscache_read(&cres, page_offset(page), &iter, NETFS_READ_HOLE_FAIL, + NULL, NULL); + fscache_end_operation(&cres); + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(__fscache_fallback_read_page); + +/* + * Fallback page writing interface. + */ +int __fscache_fallback_write_page(struct fscache_cookie *cookie, struct page *page) +{ + struct netfs_cache_resources cres; + struct iov_iter iter; + struct bio_vec bvec[1]; + int ret; + + _enter("%lx", page->index); + + memset(&cres, 0, sizeof(cres)); + bvec[0].bv_page = page; + bvec[0].bv_offset = 0; + bvec[0].bv_len = PAGE_SIZE; + iov_iter_bvec(&iter, WRITE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); + + ret = __fscache_begin_operation(&cres, cookie, true); + if (ret < 0) + return ret; + + ret = cres.ops->prepare_fallback_write(&cres, page_index(page)); + if (ret < 0) + goto out; + + ret = fscache_write(&cres, page_offset(page), &iter, NULL, NULL); +out: + fscache_end_operation(&cres); + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(__fscache_fallback_write_page); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 27df94ef0e0bd..ed41a00b861c0 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -7,6 +7,7 @@ #define FSCACHE_DEBUG_LEVEL PAGE #include +#define FSCACHE_USE_OLD_IO_API #include #include #include diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index a7c3ed89a3e03..3ffa34c99977a 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -54,12 +54,15 @@ atomic_t fscache_n_stores; atomic_t fscache_n_stores_ok; atomic_t fscache_n_stores_again; atomic_t fscache_n_stores_nobufs; +atomic_t fscache_n_stores_intr; atomic_t fscache_n_stores_oom; atomic_t fscache_n_store_ops; atomic_t fscache_n_store_calls; atomic_t fscache_n_store_pages; atomic_t fscache_n_store_radix_deletes; atomic_t fscache_n_store_pages_over_limit; +atomic_t fscache_n_stores_object_dead; +atomic_t fscache_n_store_op_waits; atomic_t fscache_n_store_vmscan_not_storing; atomic_t fscache_n_store_vmscan_gone; @@ -221,18 +224,21 @@ int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_retrieval_op_waits), atomic_read(&fscache_n_retrievals_object_dead)); - seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n", + seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u int=%u oom=%u\n", atomic_read(&fscache_n_stores), atomic_read(&fscache_n_stores_ok), atomic_read(&fscache_n_stores_again), atomic_read(&fscache_n_stores_nobufs), + atomic_read(&fscache_n_stores_intr), atomic_read(&fscache_n_stores_oom)); - seq_printf(m, "Stores : ops=%u run=%u pgs=%u rxd=%u olm=%u\n", + seq_printf(m, "Stores : ops=%u owt=%u run=%u pgs=%u rxd=%u olm=%u abt=%u\n", atomic_read(&fscache_n_store_ops), + atomic_read(&fscache_n_store_op_waits), atomic_read(&fscache_n_store_calls), atomic_read(&fscache_n_store_pages), atomic_read(&fscache_n_store_radix_deletes), - atomic_read(&fscache_n_store_pages_over_limit)); + atomic_read(&fscache_n_store_pages_over_limit), + atomic_read(&fscache_n_stores_object_dead)); seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u wt=%u\n", atomic_read(&fscache_n_store_vmscan_not_storing), diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index 0b6cd3b8734c6..31219c1036102 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -170,7 +170,7 @@ static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error */ static void netfs_read_from_cache(struct netfs_read_request *rreq, struct netfs_read_subrequest *subreq, - bool seek_data) + enum netfs_read_from_hole read_hole) { struct netfs_cache_resources *cres = &rreq->cache_resources; struct iov_iter iter; @@ -180,7 +180,7 @@ static void netfs_read_from_cache(struct netfs_read_request *rreq, subreq->start + subreq->transferred, subreq->len - subreq->transferred); - cres->ops->read(cres, subreq->start, &iter, seek_data, + cres->ops->read(cres, subreq->start, &iter, read_hole, netfs_cache_read_terminated, subreq); } @@ -468,7 +468,7 @@ static void netfs_rreq_short_read(struct netfs_read_request *rreq, netfs_get_read_subrequest(subreq); atomic_inc(&rreq->nr_rd_ops); if (subreq->source == NETFS_READ_FROM_CACHE) - netfs_read_from_cache(rreq, subreq, true); + netfs_read_from_cache(rreq, subreq, NETFS_READ_HOLE_CLEAR); else netfs_read_from_server(rreq, subreq); } @@ -796,7 +796,7 @@ static bool netfs_rreq_submit_slice(struct netfs_read_request *rreq, netfs_read_from_server(rreq, subreq); break; case NETFS_READ_FROM_CACHE: - netfs_read_from_cache(rreq, subreq, false); + netfs_read_from_cache(rreq, subreq, NETFS_READ_HOLE_IGNORE); break; default: BUG(); diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 6754c8607230b..6118cdd2e1d72 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -11,6 +11,7 @@ #include #include #include +#define FSCACHE_USE_OLD_IO_API #include #ifdef CONFIG_NFS_FSCACHE diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 5e3ddb11715a1..715fb830e982f 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -24,15 +24,13 @@ #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) #define fscache_available() (1) #define fscache_cookie_valid(cookie) (cookie) +#define fscache_resources_valid(cres) ((cres)->cache_priv) #else #define fscache_available() (0) #define fscache_cookie_valid(cookie) (0) +#define fscache_resources_valid(cres) (false) #endif - -/* pattern used to fill dead space in an index entry */ -#define FSCACHE_INDEX_DEADFILL_PATTERN 0x79 - struct pagevec; struct fscache_cache_tag; struct fscache_cookie; @@ -199,7 +197,12 @@ extern void __fscache_wait_on_invalidate(struct fscache_cookie *); #ifdef FSCACHE_USE_NEW_IO_API extern int __fscache_begin_operation(struct netfs_cache_resources *, struct fscache_cookie *, bool); -#else +#endif +#ifdef FSCACHE_USE_FALLBACK_IO_API +extern int __fscache_fallback_read_page(struct fscache_cookie *, struct page *); +extern int __fscache_fallback_write_page(struct fscache_cookie *, struct page *); +#endif +#ifdef FSCACHE_USE_OLD_IO_API extern int __fscache_read_or_alloc_page(struct fscache_cookie *, struct page *, fscache_rw_complete_t, @@ -223,7 +226,8 @@ extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *, struct inode *); extern void __fscache_readpages_cancel(struct fscache_cookie *cookie, struct list_head *pages); -#endif /* FSCACHE_USE_NEW_IO_API */ + +#endif /* FSCACHE_USE_OLD_IO_API */ extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool); extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t, @@ -537,7 +541,85 @@ int fscache_begin_read_operation(struct netfs_cache_resources *cres, return -ENOBUFS; } -#else /* FSCACHE_USE_NEW_IO_API */ +/** + * fscache_operation_valid - Return true if operations resources are usable + * @cres: The resources to check. + * + * Returns a pointer to the operations table if usable or NULL if not. + */ +static inline +const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_resources *cres) +{ + return fscache_resources_valid(cres) ? cres->ops : NULL; +} + +/** + * fscache_read - Start a read from the cache. + * @cres: The cache resources to use + * @start_pos: The beginning file offset in the cache file + * @iter: The buffer to fill - and also the length + * @read_hole: How to handle a hole in the data. + * @term_func: The function to call upon completion + * @term_func_priv: The private data for @term_func + * + * Start a read from the cache. @cres indicates the cache object to read from + * and must be obtained by a call to fscache_begin_operation() beforehand. + * + * The data is read into the iterator, @iter, and that also indicates the size + * of the operation. @start_pos is the start position in the file, though if + * @seek_data is set appropriately, the cache can use SEEK_DATA to find the + * next piece of data, writing zeros for the hole into the iterator. + * + * Upon termination of the operation, @term_func will be called and supplied + * with @term_func_priv plus the amount of data written, if successful, or the + * error code otherwise. + */ +static inline +int fscache_read(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + enum netfs_read_from_hole read_hole, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + const struct netfs_cache_ops *ops = fscache_operation_valid(cres); + return ops->read(cres, start_pos, iter, read_hole, + term_func, term_func_priv); +} + +/** + * fscache_write - Start a write to the cache. + * @cres: The cache resources to use + * @start_pos: The beginning file offset in the cache file + * @iter: The data to write - and also the length + * @term_func: The function to call upon completion + * @term_func_priv: The private data for @term_func + * + * Start a write to the cache. @cres indicates the cache object to write to and + * must be obtained by a call to fscache_begin_operation() beforehand. + * + * The data to be written is obtained from the iterator, @iter, and that also + * indicates the size of the operation. @start_pos is the start position in + * the file. + * + * Upon termination of the operation, @term_func will be called and supplied + * with @term_func_priv plus the amount of data written, if successful, or the + * error code otherwise. + */ +static inline +int fscache_write(struct netfs_cache_resources *cres, + loff_t start_pos, + struct iov_iter *iter, + netfs_io_terminated_t term_func, + void *term_func_priv) +{ + const struct netfs_cache_ops *ops = fscache_operation_valid(cres); + return ops->write(cres, start_pos, iter, term_func, term_func_priv); +} + +#endif /* FSCACHE_USE_NEW_IO_API */ + +#ifdef FSCACHE_USE_OLD_IO_API /** * fscache_read_or_alloc_page - Read a page from the cache or allocate a block @@ -818,7 +900,7 @@ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, __fscache_uncache_all_inode_pages(cookie, inode); } -#endif /* FSCACHE_USE_NEW_IO_API */ +#endif /* FSCACHE_USE_OLD_IO_API */ /** * fscache_disable_cookie - Disable a cookie @@ -874,4 +956,48 @@ void fscache_enable_cookie(struct fscache_cookie *cookie, can_enable, data); } +#ifdef FSCACHE_USE_FALLBACK_IO_API + +/** + * fscache_fallback_read_page - Read a page from a cache object (DANGEROUS) + * @cookie: The cookie representing the cache object + * @page: The page to be read to + * + * Synchronously read a page from the cache. The page's offset is used to + * indicate where to read. + * + * This is dangerous and should be moved away from as it relies on the + * assumption that the backing filesystem will exactly record the blocks we + * have stored there. + */ +static inline +int fscache_fallback_read_page(struct fscache_cookie *cookie, struct page *page) +{ + if (fscache_cookie_enabled(cookie)) + return __fscache_fallback_read_page(cookie, page); + return -ENOBUFS; +} + +/** + * fscache_fallback_write_page - Write a page to a cache object (DANGEROUS) + * @cookie: The cookie representing the cache object + * @page: The page to be written from + * + * Synchronously write a page to the cache. The page's offset is used to + * indicate where to write. + * + * This is dangerous and should be moved away from as it relies on the + * assumption that the backing filesystem will exactly record the blocks we + * have stored there. + */ +static inline +int fscache_fallback_write_page(struct fscache_cookie *cookie, struct page *page) +{ + if (fscache_cookie_enabled(cookie)) + return __fscache_fallback_write_page(cookie, page); + return -ENOBUFS; +} + +#endif /* FSCACHE_USE_FALLBACK_IO_API */ + #endif /* _LINUX_FSCACHE_H */ diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5d6a4158a9a6f..014fb502fd91b 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -174,6 +174,15 @@ struct netfs_read_request_ops { void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; +/* + * How to handle reading from a hole. + */ +enum netfs_read_from_hole { + NETFS_READ_HOLE_IGNORE, + NETFS_READ_HOLE_CLEAR, + NETFS_READ_HOLE_FAIL, +}; + /* * Table of operations for access to a cache. This is obtained by * rreq->ops->begin_cache_operation(). @@ -186,7 +195,7 @@ struct netfs_cache_ops { int (*read)(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, - bool seek_data, + enum netfs_read_from_hole read_hole, netfs_io_terminated_t term_func, void *term_func_priv); @@ -212,6 +221,12 @@ struct netfs_cache_ops { */ int (*prepare_write)(struct netfs_cache_resources *cres, loff_t *_start, size_t *_len, loff_t i_size); + + /* Prepare a write operation for the fallback fscache API, working out + * whether we can cache a page or not. + */ + int (*prepare_fallback_write)(struct netfs_cache_resources *cres, + pgoff_t index); }; struct readahead_control; From b63dc8f2b02c816b911fafdb88aad67592f9c2bf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Sep 2021 09:50:26 +0200 Subject: [PATCH 0163/1202] firmware: include drivers/firmware/Kconfig unconditionally Compile-testing drivers that require access to a firmware layer fails when that firmware symbol is unavailable. This happened twice this week: - My proposed to change to rework the QCOM_SCM firmware symbol broke on ppc64 and others. - The cs_dsp firmware patch added device specific firmware loader into drivers/firmware, which broke on the same set of architectures. We should probably do the same thing for other subsystems as well, but fix this one first as this is a dependency for other patches getting merged. Signed-off-by: Arnd Bergmann Reviewed-by: Bjorn Andersson Reviewed-by: Charles Keepax Acked-by: Will Deacon Acked-by: Bjorn Andersson Cc: Mark Brown Cc: Liam Girdwood Cc: Charles Keepax Cc: Simon Trimmer Cc: Arnd Bergmann Cc: Michael Ellerman Reviwed-by: Mark Brown Signed-off-by: Arnd Bergmann --- arch/arm/Kconfig | 2 -- arch/arm64/Kconfig | 2 -- arch/ia64/Kconfig | 2 -- arch/mips/Kconfig | 2 -- arch/parisc/Kconfig | 2 -- arch/riscv/Kconfig | 2 -- arch/x86/Kconfig | 2 -- drivers/Kconfig | 2 ++ 8 files changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fc196421b2ced..59baf6c132a74 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1989,8 +1989,6 @@ config ARCH_HIBERNATION_POSSIBLE endmenu -source "drivers/firmware/Kconfig" - if CRYPTO source "arch/arm/crypto/Kconfig" endif diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 077f2ec4eeb23..407b4addea361 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1931,8 +1931,6 @@ source "drivers/cpufreq/Kconfig" endmenu -source "drivers/firmware/Kconfig" - source "drivers/acpi/Kconfig" source "arch/arm64/kvm/Kconfig" diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 045792cde4811..1e33666fa679b 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -388,8 +388,6 @@ config CRASH_DUMP help Generate crash dump after being started by kexec. -source "drivers/firmware/Kconfig" - endmenu menu "Power management and ACPI options" diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 771ca53af06d2..6b8f591c5054c 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -3316,8 +3316,6 @@ source "drivers/cpuidle/Kconfig" endmenu -source "drivers/firmware/Kconfig" - source "arch/mips/kvm/Kconfig" source "arch/mips/vdso/Kconfig" diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 4742b6f169b72..27a8b49af11fc 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -384,6 +384,4 @@ config KEXEC_FILE endmenu -source "drivers/firmware/Kconfig" - source "drivers/parisc/Kconfig" diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c3f3fd583e04b..8bc71ab143e38 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -561,5 +561,3 @@ menu "Power management options" source "kernel/power/Kconfig" endmenu - -source "drivers/firmware/Kconfig" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4e001bbbb4256..4dca39744ee9d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2828,8 +2828,6 @@ config HAVE_ATOMIC_IOMAP def_bool y depends on X86_32 -source "drivers/firmware/Kconfig" - source "arch/x86/kvm/Kconfig" source "arch/x86/Kconfig.assembler" diff --git a/drivers/Kconfig b/drivers/Kconfig index 30d2db37cc873..0d399ddaa185a 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -17,6 +17,8 @@ source "drivers/bus/Kconfig" source "drivers/connector/Kconfig" +source "drivers/firmware/Kconfig" + source "drivers/gnss/Kconfig" source "drivers/mtd/Kconfig" From 7efbbe6e141466dbe022b39fafbc81d17a8ed8be Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Sep 2021 09:50:27 +0200 Subject: [PATCH 0164/1202] qcom_scm: hide Kconfig symbol Now that SCM can be a loadable module, we have to add another dependency to avoid link failures when ipa or adreno-gpu are built-in: aarch64-linux-ld: drivers/net/ipa/ipa_main.o: in function `ipa_probe': ipa_main.c:(.text+0xfc4): undefined reference to `qcom_scm_is_available' ld.lld: error: undefined symbol: qcom_scm_is_available >>> referenced by adreno_gpu.c >>> gpu/drm/msm/adreno/adreno_gpu.o:(adreno_zap_shader_load) in archive drivers/built-in.a This can happen when CONFIG_ARCH_QCOM is disabled and we don't select QCOM_MDT_LOADER, but some other module selects QCOM_SCM. Ideally we'd use a similar dependency here to what we have for QCOM_RPROC_COMMON, but that causes dependency loops from other things selecting QCOM_SCM. This appears to be an endless problem, so try something different this time: - CONFIG_QCOM_SCM becomes a hidden symbol that nothing 'depends on' but that is simply selected by all of its users - All the stubs in include/linux/qcom_scm.h can go away - arm-smccc.h needs to provide a stub for __arm_smccc_smc() to allow compile-testing QCOM_SCM on all architectures. - To avoid a circular dependency chain involving RESET_CONTROLLER and PINCTRL_SUNXI, drop the 'select RESET_CONTROLLER' statement. According to my testing this still builds fine, and the QCOM platform selects this symbol already. Acked-by: Kalle Valo Acked-by: Alex Elder Signed-off-by: Arnd Bergmann --- drivers/firmware/Kconfig | 5 +- drivers/gpu/drm/msm/Kconfig | 4 +- drivers/iommu/Kconfig | 3 +- drivers/iommu/arm/arm-smmu/Makefile | 3 +- drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 3 +- drivers/media/platform/Kconfig | 2 +- drivers/mmc/host/Kconfig | 2 +- drivers/net/ipa/Kconfig | 1 + drivers/net/wireless/ath/ath10k/Kconfig | 2 +- drivers/pinctrl/qcom/Kconfig | 3 +- include/linux/arm-smccc.h | 10 +++ include/linux/qcom_scm.h | 71 ---------------------- 12 files changed, 24 insertions(+), 85 deletions(-) diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 220a58cf0a441..cda7d7162cbbd 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -203,10 +203,7 @@ config INTEL_STRATIX10_RSU Say Y here if you want Intel RSU support. config QCOM_SCM - tristate "Qcom SCM driver" - depends on ARM || ARM64 - depends on HAVE_ARM_SMCCC - select RESET_CONTROLLER + tristate config QCOM_SCM_DOWNLOAD_MODE_DEFAULT bool "Qualcomm download mode enabled by default" diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index e9c6af78b1d7c..3ddf739a6f9b8 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -17,7 +17,7 @@ config DRM_MSM select DRM_SCHED select SHMEM select TMPFS - select QCOM_SCM if ARCH_QCOM + select QCOM_SCM select WANT_DEV_COREDUMP select SND_SOC_HDMI_CODEC if SND_SOC select SYNC_FILE @@ -55,7 +55,7 @@ config DRM_MSM_GPU_SUDO config DRM_MSM_HDMI_HDCP bool "Enable HDMI HDCP support in MSM DRM driver" - depends on DRM_MSM && QCOM_SCM + depends on DRM_MSM default y help Choose this option to enable HDCP state machine diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 124c41adeca18..c5c71b7ab7e83 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -308,7 +308,6 @@ config APPLE_DART config ARM_SMMU tristate "ARM Ltd. System MMU (SMMU) Support" depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64) - depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU if ARM @@ -438,7 +437,7 @@ config QCOM_IOMMU # Note: iommu drivers cannot (yet?) be built as modules bool "Qualcomm IOMMU Support" depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64) - depends on QCOM_SCM=y + select QCOM_SCM select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU diff --git a/drivers/iommu/arm/arm-smmu/Makefile b/drivers/iommu/arm/arm-smmu/Makefile index e240a7bcf3107..b0cc01aa20c98 100644 --- a/drivers/iommu/arm/arm-smmu/Makefile +++ b/drivers/iommu/arm/arm-smmu/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o obj-$(CONFIG_ARM_SMMU) += arm_smmu.o -arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o arm-smmu-qcom.o +arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o +arm_smmu-$(CONFIG_ARM_SMMU_QCOM) += arm-smmu-qcom.o diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c index 9f465e146799f..2c25cce380603 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c @@ -215,7 +215,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu) of_device_is_compatible(np, "nvidia,tegra186-smmu")) return nvidia_smmu_impl_init(smmu); - smmu = qcom_smmu_impl_init(smmu); + if (IS_ENABLED(CONFIG_ARM_SMMU_QCOM)) + smmu = qcom_smmu_impl_init(smmu); if (of_device_is_compatible(np, "marvell,ap806-smmu-500")) smmu->impl = &mrvl_mmu500_impl; diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index 157c924686e4b..80321e03809ac 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -565,7 +565,7 @@ config VIDEO_QCOM_VENUS depends on VIDEO_DEV && VIDEO_V4L2 && QCOM_SMEM depends on (ARCH_QCOM && IOMMU_DMA) || COMPILE_TEST select QCOM_MDT_LOADER if ARCH_QCOM - select QCOM_SCM if ARCH_QCOM + select QCOM_SCM select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV help diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 71313961cc54d..95b3511b05605 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -547,7 +547,7 @@ config MMC_SDHCI_MSM depends on MMC_SDHCI_PLTFM select MMC_SDHCI_IO_ACCESSORS select MMC_CQHCI - select QCOM_SCM if MMC_CRYPTO && ARCH_QCOM + select QCOM_SCM if MMC_CRYPTO help This selects the Secure Digital Host Controller Interface (SDHCI) support present in Qualcomm SOCs. The controller supports diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig index 8f99cfa14680a..d037682fb7adb 100644 --- a/drivers/net/ipa/Kconfig +++ b/drivers/net/ipa/Kconfig @@ -4,6 +4,7 @@ config QCOM_IPA depends on ARCH_QCOM || COMPILE_TEST depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST) select QCOM_MDT_LOADER if ARCH_QCOM + select QCOM_SCM select QCOM_QMI_HELPERS help Choose Y or M here to include support for the Qualcomm diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig index 741289e385d59..ca007b800f756 100644 --- a/drivers/net/wireless/ath/ath10k/Kconfig +++ b/drivers/net/wireless/ath/ath10k/Kconfig @@ -44,7 +44,7 @@ config ATH10K_SNOC tristate "Qualcomm ath10k SNOC support" depends on ATH10K depends on ARCH_QCOM || COMPILE_TEST - depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y + select QCOM_SCM select QCOM_QMI_HELPERS help This module adds support for integrated WCN3990 chip connected diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig index 32ea2a8ec02b5..5ff4207df66e1 100644 --- a/drivers/pinctrl/qcom/Kconfig +++ b/drivers/pinctrl/qcom/Kconfig @@ -3,7 +3,8 @@ if (ARCH_QCOM || COMPILE_TEST) config PINCTRL_MSM tristate "Qualcomm core pin controller driver" - depends on GPIOLIB && (QCOM_SCM || !QCOM_SCM) #if QCOM_SCM=m this can't be =y + depends on GPIOLIB + select QCOM_SCM select PINMUX select PINCONF select GENERIC_PINCONF diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 7d1cabe152622..63ccb52521902 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -321,10 +321,20 @@ asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0); * from register 0 to 3 on return from the SMC instruction. An optional * quirk structure provides vendor specific behavior. */ +#ifdef CONFIG_HAVE_ARM_SMCCC asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5, unsigned long a6, unsigned long a7, struct arm_smccc_res *res, struct arm_smccc_quirk *quirk); +#else +static inline void __arm_smccc_smc(unsigned long a0, unsigned long a1, + unsigned long a2, unsigned long a3, unsigned long a4, + unsigned long a5, unsigned long a6, unsigned long a7, + struct arm_smccc_res *res, struct arm_smccc_quirk *quirk) +{ + *res = (struct arm_smccc_res){}; +} +#endif /** * __arm_smccc_hvc() - make HVC calls diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index c0475d1c98850..81cad9e1e4120 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -61,7 +61,6 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) #define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) -#if IS_ENABLED(CONFIG_QCOM_SCM) extern bool qcom_scm_is_available(void); extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); @@ -115,74 +114,4 @@ extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, extern int qcom_scm_lmh_profile_change(u32 profile_id); extern bool qcom_scm_lmh_dcvsh_available(void); -#else - -#include - -static inline bool qcom_scm_is_available(void) { return false; } - -static inline int qcom_scm_set_cold_boot_addr(void *entry, - const cpumask_t *cpus) { return -ENODEV; } -static inline int qcom_scm_set_warm_boot_addr(void *entry, - const cpumask_t *cpus) { return -ENODEV; } -static inline void qcom_scm_cpu_power_down(u32 flags) {} -static inline u32 qcom_scm_set_remote_state(u32 state,u32 id) - { return -ENODEV; } - -static inline int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size) { return -ENODEV; } -static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size) { return -ENODEV; } -static inline int qcom_scm_pas_auth_and_reset(u32 peripheral) - { return -ENODEV; } -static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; } -static inline bool qcom_scm_pas_supported(u32 peripheral) { return false; } - -static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) - { return -ENODEV; } -static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) - { return -ENODEV; } - -static inline bool qcom_scm_restore_sec_cfg_available(void) { return false; } -static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) - { return -ENODEV; } -static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) - { return -ENODEV; } -static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) - { return -ENODEV; } -extern inline int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size) - { return -ENODEV; } -static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - unsigned int *src, const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt) { return -ENODEV; } - -static inline bool qcom_scm_ocmem_lock_available(void) { return false; } -static inline int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode) { return -ENODEV; } -static inline int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, - u32 offset, u32 size) { return -ENODEV; } - -static inline bool qcom_scm_ice_available(void) { return false; } -static inline int qcom_scm_ice_invalidate_key(u32 index) { return -ENODEV; } -static inline int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size) { return -ENODEV; } - -static inline bool qcom_scm_hdcp_available(void) { return false; } -static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp) { return -ENODEV; } - -static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en) - { return -ENODEV; } - -static inline int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version) - { return -ENODEV; } - -static inline int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; } - -static inline bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; } -#endif #endif From 1c4d17a5267bcfa5741ffd93fdb540e87a0d6cdc Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 9 Sep 2021 15:51:24 +0200 Subject: [PATCH 0165/1202] drm/tegra: Implement correct DMA-BUF semantics DMA-BUF requires that each device that accesses a DMA-BUF attaches to it separately. To do so the host1x_bo_pin() and host1x_bo_unpin() functions need to be reimplemented so that they can return a mapping, which either represents an attachment or a map of the driver's own GEM object. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/gem.c | 163 +++++++++++++++++++++------------ drivers/gpu/drm/tegra/plane.c | 60 +++--------- drivers/gpu/drm/tegra/plane.h | 2 +- drivers/gpu/drm/tegra/submit.c | 62 +++++++++---- drivers/gpu/drm/tegra/uapi.c | 68 +++++--------- drivers/gpu/drm/tegra/uapi.h | 5 +- drivers/gpu/host1x/job.c | 160 ++++++++++++-------------------- drivers/gpu/host1x/job.h | 6 +- include/linux/host1x.h | 30 ++++-- 9 files changed, 268 insertions(+), 288 deletions(-) diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 6ec598f5d5b3e..f0646a036d7e0 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -20,86 +20,96 @@ #include "drm.h" #include "gem.h" -static void tegra_bo_put(struct host1x_bo *bo) +static unsigned int sg_dma_count_chunks(struct scatterlist *sgl, unsigned int nents) { - struct tegra_bo *obj = host1x_to_tegra_bo(bo); + dma_addr_t next = ~(dma_addr_t)0; + unsigned int count = 0, i; + struct scatterlist *s; - drm_gem_object_put(&obj->gem); -} + for_each_sg(sgl, s, nents, i) { + /* sg_dma_address(s) is only valid for entries that have sg_dma_len(s) != 0. */ + if (!sg_dma_len(s)) + continue; -/* XXX move this into lib/scatterlist.c? */ -static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg, - unsigned int nents, gfp_t gfp_mask) -{ - struct scatterlist *dst; - unsigned int i; - int err; + if (sg_dma_address(s) != next) { + next = sg_dma_address(s) + sg_dma_len(s); + count++; + } + } - err = sg_alloc_table(sgt, nents, gfp_mask); - if (err < 0) - return err; + return count; +} - dst = sgt->sgl; +static inline unsigned int sgt_dma_count_chunks(struct sg_table *sgt) +{ + return sg_dma_count_chunks(sgt->sgl, sgt->nents); +} - for (i = 0; i < nents; i++) { - sg_set_page(dst, sg_page(sg), sg->length, 0); - dst = sg_next(dst); - sg = sg_next(sg); - } +static void tegra_bo_put(struct host1x_bo *bo) +{ + struct tegra_bo *obj = host1x_to_tegra_bo(bo); - return 0; + drm_gem_object_put(&obj->gem); } -static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys) +static struct host1x_bo_mapping *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction direction) { struct tegra_bo *obj = host1x_to_tegra_bo(bo); - struct sg_table *sgt; + struct drm_gem_object *gem = &obj->gem; + struct host1x_bo_mapping *map; int err; + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return ERR_PTR(-ENOMEM); + + map->bo = host1x_bo_get(bo); + map->direction = direction; + map->dev = dev; + /* - * If we've manually mapped the buffer object through the IOMMU, make - * sure to return the IOVA address of our mapping. - * - * Similarly, for buffers that have been allocated by the DMA API the - * physical address can be used for devices that are not attached to - * an IOMMU. For these devices, callers must pass a valid pointer via - * the @phys argument. - * - * Imported buffers were also already mapped at import time, so the - * existing mapping can be reused. + * Imported buffers need special treatment to satisfy the semantics of DMA-BUF. */ - if (phys) { - *phys = obj->iova; - return NULL; + if (gem->import_attach) { + struct dma_buf *buf = gem->import_attach->dmabuf; + + map->attach = dma_buf_attach(buf, dev); + if (IS_ERR(map->attach)) { + err = PTR_ERR(map->attach); + goto free; + } + + map->sgt = dma_buf_map_attachment(map->attach, direction); + if (IS_ERR(map->sgt)) { + dma_buf_detach(buf, map->attach); + err = PTR_ERR(map->sgt); + goto free; + } + + err = sgt_dma_count_chunks(map->sgt); + map->size = gem->size; + + goto out; } /* * If we don't have a mapping for this buffer yet, return an SG table * so that host1x can do the mapping for us via the DMA API. */ - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); - if (!sgt) - return ERR_PTR(-ENOMEM); + map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL); + if (!map->sgt) { + err = -ENOMEM; + goto free; + } if (obj->pages) { /* * If the buffer object was allocated from the explicit IOMMU * API code paths, construct an SG table from the pages. */ - err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages, - 0, obj->gem.size, GFP_KERNEL); - if (err < 0) - goto free; - } else if (obj->sgt) { - /* - * If the buffer object already has an SG table but no pages - * were allocated for it, it means the buffer was imported and - * the SG table needs to be copied to avoid overwriting any - * other potential users of the original SG table. - */ - err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl, - obj->sgt->orig_nents, GFP_KERNEL); + err = sg_alloc_table_from_pages(map->sgt, obj->pages, obj->num_pages, 0, gem->size, + GFP_KERNEL); if (err < 0) goto free; } else { @@ -108,25 +118,56 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, * not imported, it had to be allocated with the DMA API, so * the DMA API helper can be used. */ - err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova, - obj->gem.size); + err = dma_get_sgtable(dev, map->sgt, obj->vaddr, obj->iova, gem->size); if (err < 0) goto free; } - return sgt; + err = dma_map_sgtable(dev, map->sgt, direction, 0); + if (err) + goto free_sgt; + +out: + /* + * If we've manually mapped the buffer object through the IOMMU, make sure to return the + * existing IOVA address of our mapping. + */ + if (!obj->mm) { + map->phys = sg_dma_address(map->sgt->sgl); + map->chunks = err; + } else { + map->phys = obj->iova; + map->chunks = 1; + } + + map->size = gem->size; + return map; + +free_sgt: + sg_free_table(map->sgt); free: - kfree(sgt); + kfree(map->sgt); + kfree(map); return ERR_PTR(err); } -static void tegra_bo_unpin(struct device *dev, struct sg_table *sgt) +static void tegra_bo_unpin(struct host1x_bo_mapping *map) { - if (sgt) { - sg_free_table(sgt); - kfree(sgt); + if (!map) + return; + + if (map->attach) { + dma_buf_unmap_attachment(map->attach, map->sgt, map->direction); + dma_buf_detach(map->attach->dmabuf, map->attach); + } else { + dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0); + sg_free_table(map->sgt); + kfree(map->sgt); } + + host1x_bo_put(map->bo); + kfree(map); } static void *tegra_bo_mmap(struct host1x_bo *bo) diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index e00ec3f40ec84..56806034371b4 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -74,7 +74,7 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane) for (i = 0; i < 3; i++) { copy->iova[i] = DMA_MAPPING_ERROR; - copy->sgt[i] = NULL; + copy->map[i] = NULL; } return ©->base; @@ -138,55 +138,37 @@ const struct drm_plane_funcs tegra_plane_funcs = { static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dc->dev); unsigned int i; int err; for (i = 0; i < state->base.fb->format->num_planes; i++) { struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - dma_addr_t phys_addr, *phys; - struct sg_table *sgt; + struct host1x_bo_mapping *map; - /* - * If we're not attached to a domain, we already stored the - * physical address when the buffer was allocated. If we're - * part of a group that's shared between all display - * controllers, we've also already mapped the framebuffer - * through the SMMU. In both cases we can short-circuit the - * code below and retrieve the stored IOV address. - */ - if (!domain || dc->client.group) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(dc->dev, &bo->base, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); + map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE); + if (IS_ERR(map)) { + err = PTR_ERR(map); goto unpin; } - if (sgt) { - err = dma_map_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0); - if (err) - goto unpin; - + if (!dc->client.group) { /* * The display controller needs contiguous memory, so * fail if the buffer is discontiguous and we fail to * map its SG table to a single contiguous chunk of * I/O virtual memory. */ - if (sgt->nents > 1) { + if (map->chunks > 1) { err = -EINVAL; goto unpin; } - state->iova[i] = sg_dma_address(sgt->sgl); - state->sgt[i] = sgt; + state->iova[i] = map->phys; } else { - state->iova[i] = phys_addr; + state->iova[i] = bo->iova; } + + state->map[i] = map; } return 0; @@ -195,15 +177,9 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) dev_err(dc->dev, "failed to map plane %u: %d\n", i, err); while (i--) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - struct sg_table *sgt = state->sgt[i]; - - if (sgt) - dma_unmap_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0); - - host1x_bo_unpin(dc->dev, &bo->base, sgt); + host1x_bo_unpin(state->map[i]); state->iova[i] = DMA_MAPPING_ERROR; - state->sgt[i] = NULL; + state->map[i] = NULL; } return err; @@ -214,15 +190,9 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state) unsigned int i; for (i = 0; i < state->base.fb->format->num_planes; i++) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - struct sg_table *sgt = state->sgt[i]; - - if (sgt) - dma_unmap_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0); - - host1x_bo_unpin(dc->dev, &bo->base, sgt); + host1x_bo_unpin(state->map[i]); state->iova[i] = DMA_MAPPING_ERROR; - state->sgt[i] = NULL; + state->map[i] = NULL; } } diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index d9470780c8039..dfb20712fbd77 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -43,7 +43,7 @@ struct tegra_plane_legacy_blending_state { struct tegra_plane_state { struct drm_plane_state base; - struct sg_table *sgt[3]; + struct host1x_bo_mapping *map[3]; dma_addr_t iova[3]; struct tegra_bo_tiling tiling; diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c index 776f825df52fa..6b950388527b6 100644 --- a/drivers/gpu/drm/tegra/submit.c +++ b/drivers/gpu/drm/tegra/submit.c @@ -64,33 +64,61 @@ static void gather_bo_put(struct host1x_bo *host_bo) kref_put(&bo->ref, gather_bo_release); } -static struct sg_table * -gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys) +static struct host1x_bo_mapping * +gather_bo_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction) { - struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); - struct sg_table *sgt; + struct gather_bo *gather = container_of(bo, struct gather_bo, base); + struct host1x_bo_mapping *map; int err; - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); - if (!sgt) + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) return ERR_PTR(-ENOMEM); - err = dma_get_sgtable(bo->dev, sgt, bo->gather_data, bo->gather_data_dma, - bo->gather_data_words * 4); - if (err) { - kfree(sgt); - return ERR_PTR(err); + map->bo = host1x_bo_get(bo); + map->direction = direction; + map->dev = dev; + + map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL); + if (!map->sgt) { + err = -ENOMEM; + goto free; } - return sgt; + err = dma_get_sgtable(gather->dev, map->sgt, gather->gather_data, gather->gather_data_dma, + gather->gather_data_words * 4); + if (err) + goto free_sgt; + + err = dma_map_sgtable(dev, map->sgt, direction, 0); + if (err) + goto free_sgt; + + map->phys = sg_dma_address(map->sgt->sgl); + map->size = gather->gather_data_words * 4; + map->chunks = err; + + return map; + +free_sgt: + sg_free_table(map->sgt); + kfree(map->sgt); +free: + kfree(map); + return ERR_PTR(err); } -static void gather_bo_unpin(struct device *dev, struct sg_table *sgt) +static void gather_bo_unpin(struct host1x_bo_mapping *map) { - if (sgt) { - sg_free_table(sgt); - kfree(sgt); - } + if (!map) + return; + + dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0); + sg_free_table(map->sgt); + kfree(map->sgt); + host1x_bo_put(map->bo); + + kfree(map); } static void *gather_bo_mmap(struct host1x_bo *host_bo) diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index 690a339c52ec6..d31df8e129c5e 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -17,11 +17,7 @@ static void tegra_drm_mapping_release(struct kref *ref) struct tegra_drm_mapping *mapping = container_of(ref, struct tegra_drm_mapping, ref); - if (mapping->sgt) - dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction, - DMA_ATTR_SKIP_CPU_SYNC); - - host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); + host1x_bo_unpin(mapping->map); host1x_bo_put(mapping->bo); kfree(mapping); @@ -159,6 +155,7 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f struct drm_tegra_channel_map *args = data; struct tegra_drm_mapping *mapping; struct tegra_drm_context *context; + enum dma_data_direction direction; int err = 0; if (args->flags & ~DRM_TEGRA_CHANNEL_MAP_READ_WRITE) @@ -180,68 +177,53 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f kref_init(&mapping->ref); - mapping->dev = context->client->base.dev; mapping->bo = tegra_gem_lookup(file, args->handle); if (!mapping->bo) { err = -EINVAL; - goto unlock; + goto free; } - if (context->client->base.group) { - /* IOMMU domain managed directly using IOMMU API */ - host1x_bo_pin(mapping->dev, mapping->bo, &mapping->iova); - } else { - switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) { - case DRM_TEGRA_CHANNEL_MAP_READ_WRITE: - mapping->direction = DMA_BIDIRECTIONAL; - break; - - case DRM_TEGRA_CHANNEL_MAP_WRITE: - mapping->direction = DMA_FROM_DEVICE; - break; - - case DRM_TEGRA_CHANNEL_MAP_READ: - mapping->direction = DMA_TO_DEVICE; - break; + switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) { + case DRM_TEGRA_CHANNEL_MAP_READ_WRITE: + direction = DMA_BIDIRECTIONAL; + break; - default: - return -EINVAL; - } + case DRM_TEGRA_CHANNEL_MAP_WRITE: + direction = DMA_FROM_DEVICE; + break; - mapping->sgt = host1x_bo_pin(mapping->dev, mapping->bo, NULL); - if (IS_ERR(mapping->sgt)) { - err = PTR_ERR(mapping->sgt); - goto put_gem; - } + case DRM_TEGRA_CHANNEL_MAP_READ: + direction = DMA_TO_DEVICE; + break; - err = dma_map_sgtable(mapping->dev, mapping->sgt, mapping->direction, - DMA_ATTR_SKIP_CPU_SYNC); - if (err) - goto unpin; + default: + err = -EINVAL; + goto put_gem; + } - mapping->iova = sg_dma_address(mapping->sgt->sgl); + mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction); + if (IS_ERR(mapping->map)) { + err = PTR_ERR(mapping->map); + goto put_gem; } + mapping->iova = mapping->map->phys; mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->gem.size; err = xa_alloc(&context->mappings, &args->mapping, mapping, XA_LIMIT(1, U32_MAX), GFP_KERNEL); if (err < 0) - goto unmap; + goto unpin; mutex_unlock(&fpriv->lock); return 0; -unmap: - if (mapping->sgt) { - dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction, - DMA_ATTR_SKIP_CPU_SYNC); - } unpin: - host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); + host1x_bo_unpin(mapping->map); put_gem: host1x_bo_put(mapping->bo); +free: kfree(mapping); unlock: mutex_unlock(&fpriv->lock); diff --git a/drivers/gpu/drm/tegra/uapi.h b/drivers/gpu/drm/tegra/uapi.h index 12adad770ad3f..92ff1e44ff150 100644 --- a/drivers/gpu/drm/tegra/uapi.h +++ b/drivers/gpu/drm/tegra/uapi.h @@ -27,10 +27,9 @@ struct tegra_drm_file { struct tegra_drm_mapping { struct kref ref; - struct device *dev; + struct host1x_bo_mapping *map; struct host1x_bo *bo; - struct sg_table *sgt; - enum dma_data_direction direction; + dma_addr_t iova; dma_addr_t iova_end; }; diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 0eef6df7c89ec..64bcc3d83efc5 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -134,20 +134,20 @@ EXPORT_SYMBOL(host1x_job_add_wait); static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { + unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; struct host1x_client *client = job->client; struct device *dev = client->dev; struct host1x_job_gather *g; - struct iommu_domain *domain; - struct sg_table *sgt; unsigned int i; int err; - domain = iommu_get_domain_for_dev(dev); job->num_unpins = 0; for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; - dma_addr_t phys_addr, *phys; + enum dma_data_direction direction; + struct host1x_bo_mapping *map; + struct host1x_bo *bo; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -155,64 +155,44 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - /* - * If the client device is not attached to an IOMMU, the - * physical address of the buffer object can be used. - * - * Similarly, when an IOMMU domain is shared between all - * host1x clients, the IOVA is already available, so no - * need to map the buffer object again. - * - * XXX Note that this isn't always safe to do because it - * relies on an assumption that no cache maintenance is - * needed on the buffer objects. - */ - if (!domain || client->group) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(dev, reloc->target.bo, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } + bo = reloc->target.bo; - if (sgt) { - unsigned long mask = HOST1X_RELOC_READ | - HOST1X_RELOC_WRITE; - enum dma_data_direction dir; - - switch (reloc->flags & mask) { - case HOST1X_RELOC_READ: - dir = DMA_TO_DEVICE; - break; + switch (reloc->flags & mask) { + case HOST1X_RELOC_READ: + direction = DMA_TO_DEVICE; + break; - case HOST1X_RELOC_WRITE: - dir = DMA_FROM_DEVICE; - break; + case HOST1X_RELOC_WRITE: + direction = DMA_FROM_DEVICE; + break; - case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: - dir = DMA_BIDIRECTIONAL; - break; + case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: + direction = DMA_BIDIRECTIONAL; + break; - default: - err = -EINVAL; - goto unpin; - } + default: + err = -EINVAL; + goto unpin; + } - err = dma_map_sgtable(dev, sgt, dir, 0); - if (err) - goto unpin; + map = host1x_bo_pin(dev, bo, direction); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; + } - job->unpins[job->num_unpins].dev = dev; - job->unpins[job->num_unpins].dir = dir; - phys_addr = sg_dma_address(sgt->sgl); + /* + * host1x clients are generally not able to do scatter-gather themselves, so fail + * if the buffer is discontiguous and we fail to map its SG table to a single + * contiguous chunk of I/O virtual memory. + */ + if (map->chunks > 1) { + err = -EINVAL; + goto unpin; } - job->addr_phys[job->num_unpins] = phys_addr; - job->unpins[job->num_unpins].bo = reloc->target.bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; } @@ -224,12 +204,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) return 0; for (i = 0; i < job->num_cmds; i++) { + struct host1x_bo_mapping *map; size_t gather_size = 0; struct scatterlist *sg; - dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; - dma_addr_t *phys; unsigned int j; if (job->cmds[i].is_wait) @@ -243,25 +222,16 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - /** - * If the host1x is not attached to an IOMMU, there is no need - * to map the buffer object for the host1x, since the physical - * address can simply be used. - */ - if (!iommu_get_domain_for_dev(host->dev)) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(host->dev, g->bo, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto put; + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; } if (host->domain) { - for_each_sgtable_sg(sgt, sg, j) + for_each_sgtable_sg(map->sgt, sg, j) gather_size += sg->length; + gather_size = iova_align(&host->iova, gather_size); shift = iova_shift(&host->iova); @@ -272,33 +242,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto put; } - err = iommu_map_sgtable(host->domain, - iova_dma_addr(&host->iova, alloc), - sgt, IOMMU_READ); + err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc), + map->sgt, IOMMU_READ); if (err == 0) { __free_iova(&host->iova, alloc); err = -EINVAL; goto put; } - job->unpins[job->num_unpins].size = gather_size; - phys_addr = iova_dma_addr(&host->iova, alloc); - } else if (sgt) { - err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0); - if (err) - goto put; - - job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; - job->unpins[job->num_unpins].dev = host->dev; - phys_addr = sg_dma_address(sgt->sgl); + map->phys = iova_dma_addr(&host->iova, alloc); + map->size = gather_size; } - job->addr_phys[job->num_unpins] = phys_addr; - job->gather_addr_phys[i] = phys_addr; - - job->unpins[job->num_unpins].bo = g->bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; + + job->gather_addr_phys[i] = map->phys; } return 0; @@ -690,22 +650,16 @@ void host1x_job_unpin(struct host1x_job *job) unsigned int i; for (i = 0; i < job->num_unpins; i++) { - struct host1x_job_unpin_data *unpin = &job->unpins[i]; - struct device *dev = unpin->dev ?: host->dev; - struct sg_table *sgt = unpin->sgt; - - if (!job->enable_firewall && unpin->size && host->domain) { - iommu_unmap(host->domain, job->addr_phys[i], - unpin->size); - free_iova(&host->iova, - iova_pfn(&host->iova, job->addr_phys[i])); - } + struct host1x_bo_mapping *map = job->unpins[i].map; + struct host1x_bo *bo = map->bo; - if (unpin->dev && sgt) - dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0); + if (!job->enable_firewall && map->size && host->domain) { + iommu_unmap(host->domain, job->addr_phys[i], map->size); + free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i])); + } - host1x_bo_unpin(dev, unpin->bo, sgt); - host1x_bo_put(unpin->bo); + host1x_bo_unpin(map); + host1x_bo_put(bo); } job->num_unpins = 0; diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index b4428c5495c93..dad5a1946693d 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -35,11 +35,7 @@ struct host1x_job_cmd { }; struct host1x_job_unpin_data { - struct host1x_bo *bo; - struct sg_table *sgt; - struct device *dev; - size_t size; - enum dma_data_direction dir; + struct host1x_bo_mapping *map; }; /* diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 7bccf589aba7d..157326074df86 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -7,6 +7,7 @@ #define __LINUX_HOST1X_H #include +#include #include enum host1x_class { @@ -82,12 +83,23 @@ struct host1x_client { struct host1x_bo; struct sg_table; +struct host1x_bo_mapping { + struct dma_buf_attachment *attach; + enum dma_data_direction direction; + struct host1x_bo *bo; + struct sg_table *sgt; + unsigned int chunks; + struct device *dev; + dma_addr_t phys; + size_t size; +}; + struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); - struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys); - void (*unpin)(struct device *dev, struct sg_table *sgt); + struct host1x_bo_mapping *(*pin)(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir); + void (*unpin)(struct host1x_bo_mapping *map); void *(*mmap)(struct host1x_bo *bo); void (*munmap)(struct host1x_bo *bo, void *addr); }; @@ -112,17 +124,15 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); } -static inline struct sg_table *host1x_bo_pin(struct device *dev, - struct host1x_bo *bo, - dma_addr_t *phys) +static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir) { - return bo->ops->pin(dev, bo, phys); + return bo->ops->pin(dev, bo, dir); } -static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo, - struct sg_table *sgt) +static inline void host1x_bo_unpin(struct host1x_bo_mapping *map) { - bo->ops->unpin(dev, sgt); + map->bo->ops->unpin(map); } static inline void *host1x_bo_mmap(struct host1x_bo *bo) From e3166698a8a049a3e94e549d41012b400f15d1e6 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 7 Feb 2020 16:50:52 +0100 Subject: [PATCH 0166/1202] drm/tegra: Implement buffer object cache This cache is used to avoid mapping and unmapping buffer objects unnecessarily. Mappings are cached per client and stay hot until the buffer object is destroyed. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/gem.c | 14 ++++-- drivers/gpu/drm/tegra/plane.c | 2 +- drivers/gpu/drm/tegra/submit.c | 1 + drivers/gpu/drm/tegra/uapi.c | 2 +- drivers/gpu/host1x/bus.c | 78 ++++++++++++++++++++++++++++++++++ drivers/gpu/host1x/dev.c | 2 + drivers/gpu/host1x/dev.h | 2 + drivers/gpu/host1x/job.c | 4 +- include/linux/host1x.h | 53 ++++++++++++++++++----- 9 files changed, 141 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index f0646a036d7e0..62fc7e8429d43 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -64,6 +64,7 @@ static struct host1x_bo_mapping *tegra_bo_pin(struct device *dev, struct host1x_ if (!map) return ERR_PTR(-ENOMEM); + kref_init(&map->ref); map->bo = host1x_bo_get(bo); map->direction = direction; map->dev = dev; @@ -154,9 +155,6 @@ static struct host1x_bo_mapping *tegra_bo_pin(struct device *dev, struct host1x_ static void tegra_bo_unpin(struct host1x_bo_mapping *map) { - if (!map) - return; - if (map->attach) { dma_buf_unmap_attachment(map->attach, map->sgt, map->direction); dma_buf_detach(map->attach->dmabuf, map->attach); @@ -490,8 +488,18 @@ static struct tegra_bo *tegra_bo_import(struct drm_device *drm, void tegra_bo_free_object(struct drm_gem_object *gem) { struct tegra_drm *tegra = gem->dev->dev_private; + struct host1x_bo_mapping *mapping, *tmp; struct tegra_bo *bo = to_tegra_bo(gem); + /* remove all mappings of this buffer object from any caches */ + list_for_each_entry_safe(mapping, tmp, &bo->base.mappings, list) { + if (mapping->cache) + host1x_bo_unpin(mapping); + else + dev_err(gem->dev->dev, "mapping %p stale for device %s\n", mapping, + dev_name(mapping->dev)); + } + if (tegra->domain) tegra_bo_iommu_unmap(tegra, bo); diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 56806034371b4..298f5831c9005 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -145,7 +145,7 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); struct host1x_bo_mapping *map; - map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE); + map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE, &dc->client.cache); if (IS_ERR(map)) { err = PTR_ERR(map); goto unpin; diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c index 6b950388527b6..c32698404e36a 100644 --- a/drivers/gpu/drm/tegra/submit.c +++ b/drivers/gpu/drm/tegra/submit.c @@ -75,6 +75,7 @@ gather_bo_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction if (!map) return ERR_PTR(-ENOMEM); + kref_init(&map->ref); map->bo = host1x_bo_get(bo); map->direction = direction; map->dev = dev; diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index d31df8e129c5e..9ab9179d20262 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -201,7 +201,7 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f goto put_gem; } - mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction); + mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction, NULL); if (IS_ERR(mapping->map)) { err = PTR_ERR(mapping->map); goto put_gem; diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 218e3718fd68c..0b67d894470d3 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -742,6 +742,7 @@ EXPORT_SYMBOL(host1x_driver_unregister); */ void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key) { + host1x_bo_cache_init(&client->cache); INIT_LIST_HEAD(&client->list); __mutex_init(&client->lock, "host1x client lock", key); client->usecount = 0; @@ -830,6 +831,8 @@ int host1x_client_unregister(struct host1x_client *client) mutex_unlock(&clients_lock); + host1x_bo_cache_destroy(&client->cache); + return 0; } EXPORT_SYMBOL(host1x_client_unregister); @@ -904,3 +907,78 @@ int host1x_client_resume(struct host1x_client *client) return err; } EXPORT_SYMBOL(host1x_client_resume); + +struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir, + struct host1x_bo_cache *cache) +{ + struct host1x_bo_mapping *mapping; + + if (cache) { + mutex_lock(&cache->lock); + + list_for_each_entry(mapping, &cache->mappings, entry) { + if (mapping->bo == bo && mapping->direction == dir) { + kref_get(&mapping->ref); + goto unlock; + } + } + } + + mapping = bo->ops->pin(dev, bo, dir); + if (IS_ERR(mapping)) + goto unlock; + + spin_lock(&mapping->bo->lock); + list_add_tail(&mapping->list, &bo->mappings); + spin_unlock(&mapping->bo->lock); + + if (cache) { + INIT_LIST_HEAD(&mapping->entry); + mapping->cache = cache; + + list_add_tail(&mapping->entry, &cache->mappings); + + /* bump reference count to track the copy in the cache */ + kref_get(&mapping->ref); + } + +unlock: + if (cache) + mutex_unlock(&cache->lock); + + return mapping; +} +EXPORT_SYMBOL(host1x_bo_pin); + +static void __host1x_bo_unpin(struct kref *ref) +{ + struct host1x_bo_mapping *mapping = to_host1x_bo_mapping(ref); + + /* + * When the last reference of the mapping goes away, make sure to remove the mapping from + * the cache. + */ + if (mapping->cache) + list_del(&mapping->entry); + + spin_lock(&mapping->bo->lock); + list_del(&mapping->list); + spin_unlock(&mapping->bo->lock); + + mapping->bo->ops->unpin(mapping); +} + +void host1x_bo_unpin(struct host1x_bo_mapping *mapping) +{ + struct host1x_bo_cache *cache = mapping->cache; + + if (cache) + mutex_lock(&cache->lock); + + kref_put(&mapping->ref, __host1x_bo_unpin); + + if (cache) + mutex_unlock(&cache->lock); +} +EXPORT_SYMBOL(host1x_bo_unpin); diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index fbb6447b8659e..85eb3d19bbdb6 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -386,6 +386,7 @@ static int host1x_probe(struct platform_device *pdev) if (syncpt_irq < 0) return syncpt_irq; + host1x_bo_cache_init(&host->cache); mutex_init(&host->devices_lock); INIT_LIST_HEAD(&host->devices); INIT_LIST_HEAD(&host->list); @@ -512,6 +513,7 @@ static int host1x_remove(struct platform_device *pdev) reset_control_assert(host->rst); clk_disable_unprepare(host->clk); host1x_iommu_exit(host); + host1x_bo_cache_destroy(&host->cache); return 0; } diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index fa6d4bc46e981..5b7fdea5d1699 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -149,6 +149,8 @@ struct host1x { struct list_head list; struct device_dma_parameters dma_parms; + + struct host1x_bo_cache cache; }; void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v); diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 64bcc3d83efc5..5e8c183167b7d 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -175,7 +175,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - map = host1x_bo_pin(dev, bo, direction); + map = host1x_bo_pin(dev, bo, direction, &client->cache); if (IS_ERR(map)) { err = PTR_ERR(map); goto unpin; @@ -222,7 +222,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE); + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, &host->cache); if (IS_ERR(map)) { err = PTR_ERR(map); goto unpin; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 157326074df86..d0bb16cdd005d 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -8,6 +8,7 @@ #include #include +#include #include enum host1x_class { @@ -24,6 +25,28 @@ struct iommu_group; u64 host1x_get_dma_mask(struct host1x *host1x); +/** + * struct host1x_bo_cache - host1x buffer object cache + * @mappings: list of mappings + * @lock: synchronizes accesses to the list of mappings + */ +struct host1x_bo_cache { + struct list_head mappings; + struct mutex lock; +}; + +static inline void host1x_bo_cache_init(struct host1x_bo_cache *cache) +{ + INIT_LIST_HEAD(&cache->mappings); + mutex_init(&cache->lock); +} + +static inline void host1x_bo_cache_destroy(struct host1x_bo_cache *cache) +{ + /* XXX warn if not empty? */ + mutex_destroy(&cache->lock); +} + /** * struct host1x_client_ops - host1x client operations * @early_init: host1x client early initialization code @@ -74,6 +97,8 @@ struct host1x_client { struct host1x_client *parent; unsigned int usecount; struct mutex lock; + + struct host1x_bo_cache cache; }; /* @@ -84,16 +109,26 @@ struct host1x_bo; struct sg_table; struct host1x_bo_mapping { + struct kref ref; struct dma_buf_attachment *attach; enum dma_data_direction direction; + struct list_head list; struct host1x_bo *bo; struct sg_table *sgt; unsigned int chunks; struct device *dev; dma_addr_t phys; size_t size; + + struct host1x_bo_cache *cache; + struct list_head entry; }; +static inline struct host1x_bo_mapping *to_host1x_bo_mapping(struct kref *ref) +{ + return container_of(ref, struct host1x_bo_mapping, ref); +} + struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); @@ -106,11 +141,15 @@ struct host1x_bo_ops { struct host1x_bo { const struct host1x_bo_ops *ops; + struct list_head mappings; + spinlock_t lock; }; static inline void host1x_bo_init(struct host1x_bo *bo, const struct host1x_bo_ops *ops) { + INIT_LIST_HEAD(&bo->mappings); + spin_lock_init(&bo->lock); bo->ops = ops; } @@ -124,16 +163,10 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); } -static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, - enum dma_data_direction dir) -{ - return bo->ops->pin(dev, bo, dir); -} - -static inline void host1x_bo_unpin(struct host1x_bo_mapping *map) -{ - map->bo->ops->unpin(map); -} +struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir, + struct host1x_bo_cache *cache); +void host1x_bo_unpin(struct host1x_bo_mapping *map); static inline void *host1x_bo_mmap(struct host1x_bo *bo) { From c8696fa00635bcd39e1119d4acc0aa040ab81ece Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 9 Sep 2021 14:08:24 +0200 Subject: [PATCH 0167/1202] drm/tegra: Do not reference tegra_plane_funcs directly Instead of referencing the tegra_plane_funcs struct directly, use each plane's vtable instead. This makes it more future-proof in case any of the planes ever use a different set of functions. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index a29d64f875635..a73fd6abc9a61 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -1267,9 +1267,9 @@ static struct drm_plane *tegra_dc_add_planes(struct drm_device *drm, err = PTR_ERR(planes[i]); while (i--) - tegra_plane_funcs.destroy(planes[i]); + planes[i]->funcs->destroy(planes[i]); - tegra_plane_funcs.destroy(primary); + primary->funcs->destroy(primary); return ERR_PTR(err); } } From 953018ca991f374d413a4e602b1db1efa9f44f54 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 9 Sep 2021 14:10:23 +0200 Subject: [PATCH 0168/1202] drm/tegra: Propagate errors from drm_gem_plane_helper_prepare_fb() Currently this function doesn't return an error, but that may change in the future, so make sure to propagate any error codes that it might return. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/plane.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 298f5831c9005..0a938b2ed1f69 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -200,11 +200,14 @@ int tegra_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { struct tegra_dc *dc = to_tegra_dc(state->crtc); + int err; if (!state->fb) return 0; - drm_gem_plane_helper_prepare_fb(plane, state); + err = drm_gem_plane_helper_prepare_fb(plane, state); + if (err < 0) + return err; return tegra_dc_pin(dc, to_tegra_plane_state(state)); } From 8de4e9a62b1794c0ffb58cec79c1f0abb943cc3f Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 9 Sep 2021 14:07:57 +0200 Subject: [PATCH 0169/1202] drm/tegra: Support asynchronous commits for cursor This adds support for asynchronously updating the cursor plane, which enables support for the legacy cursor IOCTLs. Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 80 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index a73fd6abc9a61..6d1c8cc8b507e 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -890,11 +890,9 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane, return 0; } -static void tegra_cursor_atomic_update(struct drm_plane *plane, - struct drm_atomic_state *state) +static void __tegra_cursor_atomic_update(struct drm_plane *plane, + struct drm_plane_state *new_state) { - struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, - plane); struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state); struct tegra_dc *dc = to_tegra_dc(new_state->crtc); struct tegra_drm *tegra = plane->dev->dev_private; @@ -990,6 +988,14 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION); } +static void tegra_cursor_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); + + __tegra_cursor_atomic_update(plane, new_state); +} + static void tegra_cursor_atomic_disable(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -1009,12 +1015,78 @@ static void tegra_cursor_atomic_disable(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); } +static int tegra_cursor_atomic_async_check(struct drm_plane *plane, struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_crtc_state *crtc_state; + int min_scale, max_scale; + int err; + + crtc_state = drm_atomic_get_existing_crtc_state(state, new_state->crtc); + if (WARN_ON(!crtc_state)) + return -EINVAL; + + if (!crtc_state->active) + return -EINVAL; + + if (plane->state->crtc != new_state->crtc || + plane->state->src_w != new_state->src_w || + plane->state->src_h != new_state->src_h || + plane->state->crtc_w != new_state->crtc_w || + plane->state->crtc_h != new_state->crtc_h || + plane->state->fb != new_state->fb || + plane->state->fb == NULL) + return -EINVAL; + + min_scale = (1 << 16) / 8; + max_scale = (8 << 16) / 1; + + err = drm_atomic_helper_check_plane_state(new_state, crtc_state, min_scale, max_scale, + true, true); + if (err < 0) + return err; + + if (new_state->visible != plane->state->visible) + return -EINVAL; + + return 0; +} + +static void tegra_cursor_atomic_async_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); + struct tegra_dc *dc = to_tegra_dc(new_state->crtc); + + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + + if (new_state->visible) { + struct tegra_plane *p = to_tegra_plane(plane); + u32 value; + + __tegra_cursor_atomic_update(plane, new_state); + + value = (WIN_A_ACT_REQ << p->index) << 8 | GENERAL_UPDATE; + tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + (void)tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); + + value = (WIN_A_ACT_REQ << p->index) | GENERAL_ACT_REQ; + tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + (void)tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); + } +} + static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = { .prepare_fb = tegra_plane_prepare_fb, .cleanup_fb = tegra_plane_cleanup_fb, .atomic_check = tegra_cursor_atomic_check, .atomic_update = tegra_cursor_atomic_update, .atomic_disable = tegra_cursor_atomic_disable, + .atomic_async_check = tegra_cursor_atomic_async_check, + .atomic_async_update = tegra_cursor_atomic_async_update, }; static const uint64_t linear_modifiers[] = { From e76599df354df9f0e919d97dfea80590c24d9abb Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 16 Sep 2021 17:55:17 +0300 Subject: [PATCH 0170/1202] drm/tegra: Add NVDEC driver Add support for booting and using NVDEC on Tegra210, Tegra186 and Tegra194 to the Host1x and TegraDRM drivers. Booting in secure mode is not currently supported. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/Makefile | 3 +- drivers/gpu/drm/tegra/drm.c | 4 + drivers/gpu/drm/tegra/drm.h | 1 + drivers/gpu/drm/tegra/nvdec.c | 464 +++++++++++++++++++++++++++++++++ drivers/gpu/host1x/dev.c | 18 ++ include/linux/host1x.h | 2 + 6 files changed, 491 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/tegra/nvdec.c diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile index d801909182cf3..df6cc986aeba0 100644 --- a/drivers/gpu/drm/tegra/Makefile +++ b/drivers/gpu/drm/tegra/Makefile @@ -23,7 +23,8 @@ tegra-drm-y := \ gr2d.o \ gr3d.o \ falcon.o \ - vic.o + vic.o \ + nvdec.o tegra-drm-y += trace.o diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 8d37d6b00562a..2e7da2a7505d5 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1344,15 +1344,18 @@ static const struct of_device_id host1x_drm_subdevs[] = { { .compatible = "nvidia,tegra210-sor", }, { .compatible = "nvidia,tegra210-sor1", }, { .compatible = "nvidia,tegra210-vic", }, + { .compatible = "nvidia,tegra210-nvdec", }, { .compatible = "nvidia,tegra186-display", }, { .compatible = "nvidia,tegra186-dc", }, { .compatible = "nvidia,tegra186-sor", }, { .compatible = "nvidia,tegra186-sor1", }, { .compatible = "nvidia,tegra186-vic", }, + { .compatible = "nvidia,tegra186-nvdec", }, { .compatible = "nvidia,tegra194-display", }, { .compatible = "nvidia,tegra194-dc", }, { .compatible = "nvidia,tegra194-sor", }, { .compatible = "nvidia,tegra194-vic", }, + { .compatible = "nvidia,tegra194-nvdec", }, { /* sentinel */ } }; @@ -1376,6 +1379,7 @@ static struct platform_driver * const drivers[] = { &tegra_gr2d_driver, &tegra_gr3d_driver, &tegra_vic_driver, + &tegra_nvdec_driver, }; static int __init host1x_drm_init(void) diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 8b28327c931c1..fc0a19554eac7 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -202,5 +202,6 @@ extern struct platform_driver tegra_sor_driver; extern struct platform_driver tegra_gr2d_driver; extern struct platform_driver tegra_gr3d_driver; extern struct platform_driver tegra_vic_driver; +extern struct platform_driver tegra_nvdec_driver; #endif /* HOST1X_DRM_H */ diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c new file mode 100644 index 0000000000000..c3b6fe7fb4547 --- /dev/null +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2015-2021, NVIDIA Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "drm.h" +#include "falcon.h" +#include "vic.h" + +struct nvdec_config { + const char *firmware; + unsigned int version; + bool supports_sid; +}; + +struct nvdec { + struct falcon falcon; + + void __iomem *regs; + struct tegra_drm_client client; + struct host1x_channel *channel; + struct device *dev; + struct clk *clk; + + /* Platform configuration */ + const struct nvdec_config *config; +}; + +static inline struct nvdec *to_nvdec(struct tegra_drm_client *client) +{ + return container_of(client, struct nvdec, client); +} + +static void nvdec_writel(struct nvdec *nvdec, u32 value, unsigned int offset) +{ + writel(value, nvdec->regs + offset); +} + +static int nvdec_boot(struct nvdec *nvdec) +{ +#ifdef CONFIG_IOMMU_API + struct iommu_fwspec *spec = dev_iommu_fwspec_get(nvdec->dev); +#endif + int err; + +#ifdef CONFIG_IOMMU_API + if (nvdec->config->supports_sid && spec) { + u32 value; + + value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) | TRANSCFG_ATT(0, TRANSCFG_SID_HW); + nvdec_writel(nvdec, value, VIC_TFBIF_TRANSCFG); + + if (spec->num_ids > 0) { + value = spec->ids[0] & 0xffff; + + nvdec_writel(nvdec, value, VIC_THI_STREAMID0); + nvdec_writel(nvdec, value, VIC_THI_STREAMID1); + } + } +#endif + + err = falcon_boot(&nvdec->falcon); + if (err < 0) + return err; + + err = falcon_wait_idle(&nvdec->falcon); + if (err < 0) { + dev_err(nvdec->dev, "falcon boot timed out\n"); + return err; + } + + return 0; +} + +static int nvdec_init(struct host1x_client *client) +{ + struct tegra_drm_client *drm = host1x_to_drm_client(client); + struct drm_device *dev = dev_get_drvdata(client->host); + struct tegra_drm *tegra = dev->dev_private; + struct nvdec *nvdec = to_nvdec(drm); + int err; + + err = host1x_client_iommu_attach(client); + if (err < 0 && err != -ENODEV) { + dev_err(nvdec->dev, "failed to attach to domain: %d\n", err); + return err; + } + + nvdec->channel = host1x_channel_request(client); + if (!nvdec->channel) { + err = -ENOMEM; + goto detach; + } + + client->syncpts[0] = host1x_syncpt_request(client, 0); + if (!client->syncpts[0]) { + err = -ENOMEM; + goto free_channel; + } + + err = tegra_drm_register_client(tegra, drm); + if (err < 0) + goto free_syncpt; + + /* + * Inherit the DMA parameters (such as maximum segment size) from the + * parent host1x device. + */ + client->dev->dma_parms = client->host->dma_parms; + + return 0; + +free_syncpt: + host1x_syncpt_put(client->syncpts[0]); +free_channel: + host1x_channel_put(nvdec->channel); +detach: + host1x_client_iommu_detach(client); + + return err; +} + +static int nvdec_exit(struct host1x_client *client) +{ + struct tegra_drm_client *drm = host1x_to_drm_client(client); + struct drm_device *dev = dev_get_drvdata(client->host); + struct tegra_drm *tegra = dev->dev_private; + struct nvdec *nvdec = to_nvdec(drm); + int err; + + /* avoid a dangling pointer just in case this disappears */ + client->dev->dma_parms = NULL; + + err = tegra_drm_unregister_client(tegra, drm); + if (err < 0) + return err; + + host1x_syncpt_put(client->syncpts[0]); + host1x_channel_put(nvdec->channel); + host1x_client_iommu_detach(client); + + if (client->group) { + dma_unmap_single(nvdec->dev, nvdec->falcon.firmware.phys, + nvdec->falcon.firmware.size, DMA_TO_DEVICE); + tegra_drm_free(tegra, nvdec->falcon.firmware.size, + nvdec->falcon.firmware.virt, + nvdec->falcon.firmware.iova); + } else { + dma_free_coherent(nvdec->dev, nvdec->falcon.firmware.size, + nvdec->falcon.firmware.virt, + nvdec->falcon.firmware.iova); + } + + return 0; +} + +static const struct host1x_client_ops nvdec_client_ops = { + .init = nvdec_init, + .exit = nvdec_exit, +}; + +static int nvdec_load_firmware(struct nvdec *nvdec) +{ + struct host1x_client *client = &nvdec->client.base; + struct tegra_drm *tegra = nvdec->client.drm; + dma_addr_t iova; + size_t size; + void *virt; + int err; + + if (nvdec->falcon.firmware.virt) + return 0; + + err = falcon_read_firmware(&nvdec->falcon, nvdec->config->firmware); + if (err < 0) + return err; + + size = nvdec->falcon.firmware.size; + + if (!client->group) { + virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL); + + err = dma_mapping_error(nvdec->dev, iova); + if (err < 0) + return err; + } else { + virt = tegra_drm_alloc(tegra, size, &iova); + } + + nvdec->falcon.firmware.virt = virt; + nvdec->falcon.firmware.iova = iova; + + err = falcon_load_firmware(&nvdec->falcon); + if (err < 0) + goto cleanup; + + /* + * In this case we have received an IOVA from the shared domain, so we + * need to make sure to get the physical address so that the DMA API + * knows what memory pages to flush the cache for. + */ + if (client->group) { + dma_addr_t phys; + + phys = dma_map_single(nvdec->dev, virt, size, DMA_TO_DEVICE); + + err = dma_mapping_error(nvdec->dev, phys); + if (err < 0) + goto cleanup; + + nvdec->falcon.firmware.phys = phys; + } + + return 0; + +cleanup: + if (!client->group) + dma_free_coherent(nvdec->dev, size, virt, iova); + else + tegra_drm_free(tegra, size, virt, iova); + + return err; +} + + +static int nvdec_runtime_resume(struct device *dev) +{ + struct nvdec *nvdec = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(nvdec->clk); + if (err < 0) + return err; + + usleep_range(10, 20); + + err = nvdec_load_firmware(nvdec); + if (err < 0) + goto disable; + + err = nvdec_boot(nvdec); + if (err < 0) + goto disable; + + return 0; + +disable: + clk_disable_unprepare(nvdec->clk); + return err; +} + +static int nvdec_runtime_suspend(struct device *dev) +{ + struct nvdec *nvdec = dev_get_drvdata(dev); + + clk_disable_unprepare(nvdec->clk); + + return 0; +} + +static int nvdec_open_channel(struct tegra_drm_client *client, + struct tegra_drm_context *context) +{ + struct nvdec *nvdec = to_nvdec(client); + int err; + + err = pm_runtime_get_sync(nvdec->dev); + if (err < 0) { + pm_runtime_put(nvdec->dev); + return err; + } + + context->channel = host1x_channel_get(nvdec->channel); + if (!context->channel) { + pm_runtime_put(nvdec->dev); + return -ENOMEM; + } + + return 0; +} + +static void nvdec_close_channel(struct tegra_drm_context *context) +{ + struct nvdec *nvdec = to_nvdec(context->client); + + host1x_channel_put(context->channel); + pm_runtime_put(nvdec->dev); +} + +static const struct tegra_drm_client_ops nvdec_ops = { + .open_channel = nvdec_open_channel, + .close_channel = nvdec_close_channel, + .submit = tegra_drm_submit, +}; + +#define NVIDIA_TEGRA_210_NVDEC_FIRMWARE "nvidia/tegra210/nvdec.bin" + +static const struct nvdec_config nvdec_t210_config = { + .firmware = NVIDIA_TEGRA_210_NVDEC_FIRMWARE, + .version = 0x21, + .supports_sid = false, +}; + +#define NVIDIA_TEGRA_186_NVDEC_FIRMWARE "nvidia/tegra186/nvdec.bin" + +static const struct nvdec_config nvdec_t186_config = { + .firmware = NVIDIA_TEGRA_186_NVDEC_FIRMWARE, + .version = 0x18, + .supports_sid = true, +}; + +#define NVIDIA_TEGRA_194_NVDEC_FIRMWARE "nvidia/tegra194/nvdec.bin" + +static const struct nvdec_config nvdec_t194_config = { + .firmware = NVIDIA_TEGRA_194_NVDEC_FIRMWARE, + .version = 0x19, + .supports_sid = true, +}; + +static const struct of_device_id tegra_nvdec_of_match[] = { + { .compatible = "nvidia,tegra210-nvdec", .data = &nvdec_t210_config }, + { .compatible = "nvidia,tegra186-nvdec", .data = &nvdec_t186_config }, + { .compatible = "nvidia,tegra194-nvdec", .data = &nvdec_t194_config }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_nvdec_of_match); + +static int nvdec_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct host1x_syncpt **syncpts; + struct nvdec *nvdec; + u32 host_class; + int err; + + /* inherit DMA mask from host1x parent */ + err = dma_coerce_mask_and_coherent(dev, *dev->parent->dma_mask); + if (err < 0) { + dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err); + return err; + } + + nvdec = devm_kzalloc(dev, sizeof(*nvdec), GFP_KERNEL); + if (!nvdec) + return -ENOMEM; + + nvdec->config = of_device_get_match_data(dev); + + syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL); + if (!syncpts) + return -ENOMEM; + + nvdec->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); + if (IS_ERR(nvdec->regs)) + return PTR_ERR(nvdec->regs); + + nvdec->clk = devm_clk_get(dev, NULL); + if (IS_ERR(nvdec->clk)) { + dev_err(&pdev->dev, "failed to get clock\n"); + return PTR_ERR(nvdec->clk); + } + + err = of_property_read_u32(dev->of_node, "nvidia,host1x-class", &host_class); + if (err < 0) + host_class = HOST1X_CLASS_NVDEC; + + nvdec->falcon.dev = dev; + nvdec->falcon.regs = nvdec->regs; + + err = falcon_init(&nvdec->falcon); + if (err < 0) + return err; + + platform_set_drvdata(pdev, nvdec); + + INIT_LIST_HEAD(&nvdec->client.base.list); + nvdec->client.base.ops = &nvdec_client_ops; + nvdec->client.base.dev = dev; + nvdec->client.base.class = host_class; + nvdec->client.base.syncpts = syncpts; + nvdec->client.base.num_syncpts = 1; + nvdec->dev = dev; + + INIT_LIST_HEAD(&nvdec->client.list); + nvdec->client.version = nvdec->config->version; + nvdec->client.ops = &nvdec_ops; + + err = host1x_client_register(&nvdec->client.base); + if (err < 0) { + dev_err(dev, "failed to register host1x client: %d\n", err); + goto exit_falcon; + } + + pm_runtime_enable(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, 500); + pm_runtime_use_autosuspend(&pdev->dev); + + return 0; + +exit_falcon: + falcon_exit(&nvdec->falcon); + + return err; +} + +static int nvdec_remove(struct platform_device *pdev) +{ + struct nvdec *nvdec = platform_get_drvdata(pdev); + int err; + + err = host1x_client_unregister(&nvdec->client.base); + if (err < 0) { + dev_err(&pdev->dev, "failed to unregister host1x client: %d\n", + err); + return err; + } + + if (pm_runtime_enabled(&pdev->dev)) + pm_runtime_disable(&pdev->dev); + else + nvdec_runtime_suspend(&pdev->dev); + + falcon_exit(&nvdec->falcon); + + return 0; +} + +static const struct dev_pm_ops nvdec_pm_ops = { + SET_RUNTIME_PM_OPS(nvdec_runtime_suspend, nvdec_runtime_resume, NULL) +}; + +struct platform_driver tegra_nvdec_driver = { + .driver = { + .name = "tegra-nvdec", + .of_match_table = tegra_nvdec_of_match, + .pm = &nvdec_pm_ops + }, + .probe = nvdec_probe, + .remove = nvdec_remove, +}; + +#if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) +MODULE_FIRMWARE(NVIDIA_TEGRA_210_NVDEC_FIRMWARE); +#endif +#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC) +MODULE_FIRMWARE(NVIDIA_TEGRA_186_NVDEC_FIRMWARE); +#endif +#if IS_ENABLED(CONFIG_ARCH_TEGRA_194_SOC) +MODULE_FIRMWARE(NVIDIA_TEGRA_194_NVDEC_FIRMWARE); +#endif diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 85eb3d19bbdb6..3d4cabdbc78db 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -132,6 +132,12 @@ static const struct host1x_sid_entry tegra186_sid_table[] = { .offset = 0x30, .limit = 0x34 }, + { + /* NVDEC */ + .base = 0x1b00, + .offset = 0x30, + .limit = 0x34 + }, }; static const struct host1x_info host1x06_info = { @@ -156,6 +162,18 @@ static const struct host1x_sid_entry tegra194_sid_table[] = { .offset = 0x30, .limit = 0x34 }, + { + /* NVDEC */ + .base = 0x1b00, + .offset = 0x30, + .limit = 0x34 + }, + { + /* NVDEC1 */ + .base = 0x1bc0, + .offset = 0x30, + .limit = 0x34 + }, }; static const struct host1x_info host1x07_info = { diff --git a/include/linux/host1x.h b/include/linux/host1x.h index d0bb16cdd005d..2ca53d7ed7ca4 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -17,6 +17,8 @@ enum host1x_class { HOST1X_CLASS_GR2D_SB = 0x52, HOST1X_CLASS_VIC = 0x5D, HOST1X_CLASS_GR3D = 0x60, + HOST1X_CLASS_NVDEC = 0xF0, + HOST1X_CLASS_NVDEC1 = 0xF5, }; struct host1x; From ab4de22c216adb1ce720984f8052acc94f0ba384 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 16 Sep 2021 18:09:20 +0300 Subject: [PATCH 0171/1202] drm/tegra: Bump VIC/NVDEC clock rates to Fmax To get full performance out of these engines, bump their clock rates to maximum. In the future we may want something smarter but this should be fine for now. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/nvdec.c | 6 ++++++ drivers/gpu/drm/tegra/vic.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c index c3b6fe7fb4547..48c90e26e90a4 100644 --- a/drivers/gpu/drm/tegra/nvdec.c +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -374,6 +374,12 @@ static int nvdec_probe(struct platform_device *pdev) return PTR_ERR(nvdec->clk); } + err = clk_set_rate(nvdec->clk, ULONG_MAX); + if (err < 0) { + dev_err(&pdev->dev, "failed to set clock rate\n"); + return err; + } + err = of_property_read_u32(dev->of_node, "nvidia,host1x-class", &host_class); if (err < 0) host_class = HOST1X_CLASS_NVDEC; diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index c02010ff2b7f2..dec5e56f67804 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -441,6 +441,12 @@ static int vic_probe(struct platform_device *pdev) return PTR_ERR(vic->clk); } + err = clk_set_rate(vic->clk, ULONG_MAX); + if (err < 0) { + dev_err(&pdev->dev, "failed to set clock rate\n"); + return err; + } + if (!dev->pm_domain) { vic->rst = devm_reset_control_get(dev, "vic"); if (IS_ERR(vic->rst)) { From 3028956349e151ece0bacd3ebce1ec222e48e423 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 30 Sep 2021 01:28:04 +0300 Subject: [PATCH 0172/1202] drm/tegra: dc: rgb: Move PCLK shifter programming to CRTC Asus TF700T tablet uses TC358768 DPI->DSI bridge that sits between Tegra's DPI output and display panel input. Bridge requires to have stable PCLK output before RGB encoder is enabled because it uses PCLK by itself to clock internal logic and bridge is programmed before Tegra's encoder is enabled. Hence the PCLK clock shifter must be programmed when CRTC is enabled, otherwise clock is unstable and bridge hangs because of it. Move the shifter programming from RGB encoder into CRTC. Tested-by: Maxim Schwalm #TF700T Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 6 ++++++ drivers/gpu/drm/tegra/rgb.c | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 6d1c8cc8b507e..e62b091b93f22 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2179,6 +2179,12 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, tegra_dc_writel(dc, value, DC_COM_RG_UNDERFLOW); } + if (dc->rgb) { + /* XXX: parameterize? */ + value = SC0_H_QUALIFIER_NONE | SC1_H_QUALIFIER_NONE; + tegra_dc_writel(dc, value, DC_DISP_SHIFT_CLOCK_OPTIONS); + } + tegra_dc_commit(dc); drm_crtc_vblank_on(crtc); diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index 606c78a2b988f..933e14e4609f0 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -116,10 +116,6 @@ static void tegra_rgb_encoder_enable(struct drm_encoder *encoder) DISP_ORDER_RED_BLUE; tegra_dc_writel(rgb->dc, value, DC_DISP_DISP_INTERFACE_CONTROL); - /* XXX: parameterize? */ - value = SC0_H_QUALIFIER_NONE | SC1_H_QUALIFIER_NONE; - tegra_dc_writel(rgb->dc, value, DC_DISP_SHIFT_CLOCK_OPTIONS); - tegra_dc_commit(rgb->dc); } From 0c4b6757e15a56da3d0abb779ba73ddf746535aa Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 15 Sep 2021 16:23:03 +0200 Subject: [PATCH 0173/1202] EXP timers/nohz: Last resort update jiffies on nohz_full IRQ entry When at least one CPU runs in nohz_full mode, a dedicated timekeeper CPU is guaranteed to stay online and to never stop its tick. Except that this timekeeper could be spinning with interrupts disabled for an extended period of time, for example, when in the midst of a stop-machine operation. This means that jiffies are no longer being updated, which in turn means that a nohz_full CPU can endlessly program its next tick in the past, which results in an tick storm. This situation can arise as follows: 0) CPU 0 is the timekeeper and CPU 1 a nohz_full CPU. 1) A stop-machine callback is queued. 2) CPU 0 reaches MULTI_STOP_DISABLE_IRQ while CPU 1 is still in MULTI_STOP_PREPARE. Hence CPU 0 can't do its timekeeping duty. CPU 1 can still take IRQs. 3) CPU 1 receives an IRQ which queues a timer callback one jiffy forward. 4) On IRQ exit, CPU 1 schedules the tick one jiffy forward, taking last_jiffies_update as a base. But last_jiffies_update hasn't been updated for 2 jiffies since the timekeeper has interrupts disabled. 5) clockevents_program_event(), which relies on ktime_get(), observes that the expiration is in the past and therefore programs the min delta event on the clock. 6) The tick fires immediately, goto 3), tick storm! 7) The nohz_full CPU 1 makes no forward progress until such time as integer overflow causes the tick's expiration to actually be in the future, which after the better part of an hour finally allows CPU 1 to reach MULTI_STOP_DISABLE_IRQ. Solve this by unconditionally updating jiffies if the value is stale on nohz_full IRQ entry. IRQs and other disturbances are expected to be rare enough on nohz_full for the unconditional call to ktime_get() to incur negligible overhead. Reported-and-tested-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/softirq.c | 3 ++- kernel/time/tick-sched.c | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 322b65d456767..41f470929e991 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -595,7 +595,8 @@ void irq_enter_rcu(void) { __irq_enter_raw(); - if (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)) + if (tick_nohz_full_cpu(smp_processor_id()) || + (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET))) tick_irq_enter(); account_hardirq_enter(current); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6bffe5af8cb11..17a283ce2b20f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1375,6 +1375,13 @@ static inline void tick_nohz_irq_enter(void) now = ktime_get(); if (ts->idle_active) tick_nohz_stop_idle(ts, now); + /* + * If all CPUs are idle. We may need to update a stale jiffies value. + * Note nohz_full is a special case: a timekeeper is guaranteed to stay + * alive but it might be busy looping with interrupts disabled in some + * rare case (typically stop machine). So we must make sure we have a + * last resort. + */ if (ts->tick_stopped) tick_nohz_update_jiffies(now); } From f00b479e6e47e4f71a66805ce045007617eaeedc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 4 Oct 2021 22:37:26 -0700 Subject: [PATCH 0174/1202] gpu: host1x: Drop excess kernel-doc entry @key Fix kernel-doc warning in host1x: ../drivers/gpu/host1x/bus.c:774: warning: Excess function parameter 'key' description in '__host1x_client_register' Fixes: 0cfe5a6e758f ("gpu: host1x: Split up client initalization and registration") Signed-off-by: Randy Dunlap Cc: Thierry Reding Cc: dri-devel@lists.freedesktop.org Cc: linux-tegra@vger.kernel.org Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Thierry Reding --- drivers/gpu/host1x/bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 0b67d894470d3..0d81eede12170 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -762,7 +762,6 @@ EXPORT_SYMBOL(host1x_client_exit); /** * __host1x_client_register() - register a host1x client * @client: host1x client - * @key: lock class key for the client-specific mutex * * Registers a host1x client with each host1x controller instance. Note that * each client will only match their parent host1x controller and will only be From 426c60a8b103403ae7c9ac307b3436360a07bb73 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Sep 2021 11:36:59 +0200 Subject: [PATCH 0175/1202] gpu: host1x: select CONFIG_DMA_SHARED_BUFFER Linking fails when dma-buf is disabled: ld.lld: error: undefined symbol: dma_fence_release >>> referenced by fence.c >>> gpu/host1x/fence.o:(host1x_syncpt_fence_enable_signaling) in archive drivers/built-in.a >>> referenced by fence.c >>> gpu/host1x/fence.o:(host1x_fence_signal) in archive drivers/built-in.a >>> referenced by fence.c >>> gpu/host1x/fence.o:(do_fence_timeout) in archive drivers/built-in.a Fixes: 687db2207b1b ("gpu: host1x: Add DMA fence implementation") Signed-off-by: Arnd Bergmann Reviewed-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig index 6dab94adf25e5..6815b4db17c1b 100644 --- a/drivers/gpu/host1x/Kconfig +++ b/drivers/gpu/host1x/Kconfig @@ -2,6 +2,7 @@ config TEGRA_HOST1X tristate "NVIDIA Tegra host1x driver" depends on ARCH_TEGRA || (ARM && COMPILE_TEST) + select DMA_SHARED_BUFFER select IOMMU_IOVA help Driver for the NVIDIA Tegra host1x hardware. From 6a01cd3f00ae6b5c4ec6285bf2c0e662c17f688e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 8 Jul 2021 16:37:36 +0200 Subject: [PATCH 0176/1202] drm/tegra: gr2d: Explicitly control module reset As of commit 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling clocks"), module resets are no longer automatically deasserted when the module clock is enabled. To make sure that the gr2d module continues to work, we need to explicitly control the module reset. Fixes: 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling clocks") Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/gr2d.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index de288cba39055..ba3722f1b8651 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -4,9 +4,11 @@ */ #include +#include #include #include #include +#include #include "drm.h" #include "gem.h" @@ -19,6 +21,7 @@ struct gr2d_soc { struct gr2d { struct tegra_drm_client client; struct host1x_channel *channel; + struct reset_control *rst; struct clk *clk; const struct gr2d_soc *soc; @@ -208,6 +211,12 @@ static int gr2d_probe(struct platform_device *pdev) if (!syncpts) return -ENOMEM; + gr2d->rst = devm_reset_control_get(dev, NULL); + if (IS_ERR(gr2d->rst)) { + dev_err(dev, "cannot get reset\n"); + return PTR_ERR(gr2d->rst); + } + gr2d->clk = devm_clk_get(dev, NULL); if (IS_ERR(gr2d->clk)) { dev_err(dev, "cannot get clock\n"); @@ -220,6 +229,14 @@ static int gr2d_probe(struct platform_device *pdev) return err; } + usleep_range(2000, 4000); + + err = reset_control_deassert(gr2d->rst); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + INIT_LIST_HEAD(&gr2d->client.base.list); gr2d->client.base.ops = &gr2d_client_ops; gr2d->client.base.dev = dev; @@ -234,8 +251,7 @@ static int gr2d_probe(struct platform_device *pdev) err = host1x_client_register(&gr2d->client.base); if (err < 0) { dev_err(dev, "failed to register host1x client: %d\n", err); - clk_disable_unprepare(gr2d->clk); - return err; + goto assert_rst; } /* initialize address register map */ @@ -245,6 +261,13 @@ static int gr2d_probe(struct platform_device *pdev) platform_set_drvdata(pdev, gr2d); return 0; + +assert_rst: + (void)reset_control_assert(gr2d->rst); +disable_clk: + clk_disable_unprepare(gr2d->clk); + + return err; } static int gr2d_remove(struct platform_device *pdev) @@ -259,6 +282,12 @@ static int gr2d_remove(struct platform_device *pdev) return err; } + err = reset_control_assert(gr2d->rst); + if (err < 0) + dev_err(&pdev->dev, "failed to assert reset: %d\n", err); + + usleep_range(2000, 4000); + clk_disable_unprepare(gr2d->clk); return 0; From 810ee5bd7472999189f5d93d158c37299bebdc74 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Wed, 11 Aug 2021 13:59:54 +0300 Subject: [PATCH 0177/1202] drm/tegra: vic: Use autosuspend When going idle, it's not unlikely that more work will follow. As such, use autosuspend with a 500ms suspend delay. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/vic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index dec5e56f67804..b58e2b99f81a6 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -488,6 +488,8 @@ static int vic_probe(struct platform_device *pdev) if (err < 0) goto unregister_client; } + pm_runtime_set_autosuspend_delay(&pdev->dev, 500); + pm_runtime_use_autosuspend(&pdev->dev); return 0; From fa6b2d32e6224aa3b460f00d1ace74a8ceaafda1 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 19 Apr 2021 19:18:26 +0800 Subject: [PATCH 0178/1202] drm/tegra: Remove duplicate struct declaration struct tegra_dc is declared at 13rd line. The declaration here is unnecessary. Remove it. Signed-off-by: Wan Jiabing Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/hub.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/hub.h b/drivers/gpu/drm/tegra/hub.h index 3efa1be07ff88..23c4b2115ed1e 100644 --- a/drivers/gpu/drm/tegra/hub.h +++ b/drivers/gpu/drm/tegra/hub.h @@ -72,7 +72,6 @@ to_tegra_display_hub_state(struct drm_private_state *priv) return container_of(priv, struct tegra_display_hub_state, base); } -struct tegra_dc; struct tegra_plane; int tegra_display_hub_prepare(struct tegra_display_hub *hub); From 51a837bdf0c0038402a4175a16e163047e5fb598 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Aug 2021 15:19:34 +0100 Subject: [PATCH 0179/1202] nfs: Move to using the alternate fallback fscache I/O API Move NFS to using the alternate fallback fscache I/O API instead of the old upstream I/O API as that is about to be deleted. The alternate API will also be deleted at some point in the future as it's dangerous (as is the old API) and can lead to data corruption if the backing filesystem can insert/remove bridging blocks of zeros into its extent list[1]. The alternate API reads and writes pages synchronously, with the intention of allowing removal of the operation management framework and thence the object management framework from fscache. The preferred change would be to use the netfs lib, but the new I/O API can be used directly. It's just that as the cache now needs to track data for itself, caching blocks may exceed page size... Changes ======= ver #3: - Merged and adjusted a fix from Dave Wysochanski[3] so as not to return 1 from the nfs cache readpage(s) functions. ver #2: - Changed "deprecated" to "fallback" in the new function names[2]. Signed-off-by: David Howells Tested-by: Dave Wysochanski cc: Trond Myklebust cc: Anna Schumaker cc: linux-nfs@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YO17ZNOcq+9PajfQ@mit.edu [1] Link: https://lore.kernel.org/r/CAHk-=wiVK+1CyEjW8u71zVPK8msea=qPpznX35gnX+s8sXnJTg@mail.gmail.com/ [2] Link: https://listman.redhat.com/archives/linux-cachefs/2021-October/msg00011.html [3] Link: https://lore.kernel.org/r/163162771421.438332.11563297618174948818.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/163189108292.2509237.12615909591150927232.stgit@warthog.procyon.org.uk/ # rfc v2 --- fs/nfs/file.c | 14 ++-- fs/nfs/fscache.c | 170 +++++------------------------------------------ fs/nfs/fscache.h | 85 ++++-------------------- fs/nfs/read.c | 25 +++---- fs/nfs/write.c | 7 +- 5 files changed, 52 insertions(+), 249 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index aa353fd582404..209dac208477b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -416,7 +416,7 @@ static void nfs_invalidate_page(struct page *page, unsigned int offset, /* Cancel any unstarted writes on this page */ nfs_wb_page_cancel(page_file_mapping(page)->host, page); - nfs_fscache_invalidate_page(page, page->mapping->host); + wait_on_page_fscache(page); } /* @@ -432,7 +432,12 @@ static int nfs_release_page(struct page *page, gfp_t gfp) /* If PagePrivate() is set, then the page is not freeable */ if (PagePrivate(page)) return 0; - return nfs_fscache_release_page(page, gfp); + if (PageFsCache(page)) { + if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) + return false; + wait_on_page_fscache(page); + } + return true; } static void nfs_check_dirty_writeback(struct page *page, @@ -475,12 +480,11 @@ static void nfs_check_dirty_writeback(struct page *page, static int nfs_launder_page(struct page *page) { struct inode *inode = page_file_mapping(page)->host; - struct nfs_inode *nfsi = NFS_I(inode); dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", inode->i_ino, (long long)page_offset(page)); - nfs_fscache_wait_on_page_write(nfsi, page); + wait_on_page_fscache(page); return nfs_wb_page(inode, page); } @@ -555,7 +559,7 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); /* make sure the cache has finished storing the page */ - nfs_fscache_wait_on_page_write(NFS_I(inode), page); + wait_on_page_fscache(page); wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING, nfs_wait_bit_killable, TASK_KILLABLE); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index d743629e05e12..68e266a376757 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -317,7 +317,6 @@ void nfs_fscache_open_file(struct inode *inode, struct file *filp) dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi); clear_bit(NFS_INO_FSCACHE, &nfsi->flags); fscache_disable_cookie(cookie, &auxdata, true); - fscache_uncache_all_inode_pages(cookie, inode); } else { dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi); fscache_enable_cookie(cookie, &auxdata, nfsi->vfs_inode.i_size, @@ -328,79 +327,10 @@ void nfs_fscache_open_file(struct inode *inode, struct file *filp) } EXPORT_SYMBOL_GPL(nfs_fscache_open_file); -/* - * Release the caching state associated with a page, if the page isn't busy - * interacting with the cache. - * - Returns true (can release page) or false (page busy). - */ -int nfs_fscache_release_page(struct page *page, gfp_t gfp) -{ - if (PageFsCache(page)) { - struct fscache_cookie *cookie = nfs_i_fscache(page->mapping->host); - - BUG_ON(!cookie); - dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", - cookie, page, NFS_I(page->mapping->host)); - - if (!fscache_maybe_release_page(cookie, page, gfp)) - return 0; - - nfs_inc_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED); - } - - return 1; -} - -/* - * Release the caching state associated with a page if undergoing complete page - * invalidation. - */ -void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) -{ - struct fscache_cookie *cookie = nfs_i_fscache(inode); - - BUG_ON(!cookie); - - dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n", - cookie, page, NFS_I(inode)); - - fscache_wait_on_page_write(cookie, page); - - BUG_ON(!PageLocked(page)); - fscache_uncache_page(cookie, page); - nfs_inc_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED); -} - -/* - * Handle completion of a page being read from the cache. - * - Called in process (keventd) context. - */ -static void nfs_readpage_from_fscache_complete(struct page *page, - void *context, - int error) -{ - dfprintk(FSCACHE, - "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n", - page, context, error); - - /* - * If the read completes with an error, mark the page with PG_checked, - * unlock the page, and let the VM reissue the readpage. - */ - if (!error) - SetPageUptodate(page); - else - SetPageChecked(page); - unlock_page(page); -} - /* * Retrieve a page from fscache */ -int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, - struct inode *inode, struct page *page) +int __nfs_readpage_from_fscache(struct inode *inode, struct page *page) { int ret; @@ -409,112 +339,48 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, nfs_i_fscache(inode), page, page->index, page->flags, inode); if (PageChecked(page)) { + dfprintk(FSCACHE, "NFS: readpage_from_fscache: PageChecked\n"); ClearPageChecked(page); return 1; } - ret = fscache_read_or_alloc_page(nfs_i_fscache(inode), - page, - nfs_readpage_from_fscache_complete, - ctx, - GFP_KERNEL); - - switch (ret) { - case 0: /* read BIO submitted (page in fscache) */ - dfprintk(FSCACHE, - "NFS: readpage_from_fscache: BIO submitted\n"); - nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK); - return ret; - - case -ENOBUFS: /* inode not in cache */ - case -ENODATA: /* page not in cache */ + ret = fscache_fallback_read_page(nfs_i_fscache(inode), page); + if (ret < 0) { nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); dfprintk(FSCACHE, - "NFS: readpage_from_fscache %d\n", ret); - return 1; - - default: - dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); - nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); - } - return ret; -} - -/* - * Retrieve a set of pages from fscache - */ -int __nfs_readpages_from_fscache(struct nfs_open_context *ctx, - struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - unsigned npages = *nr_pages; - int ret; - - dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", - nfs_i_fscache(inode), npages, inode); - - ret = fscache_read_or_alloc_pages(nfs_i_fscache(inode), - mapping, pages, nr_pages, - nfs_readpage_from_fscache_complete, - ctx, - mapping_gfp_mask(mapping)); - if (*nr_pages < npages) - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, - npages); - if (*nr_pages > 0) - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, - *nr_pages); - - switch (ret) { - case 0: /* read submitted to the cache for all pages */ - BUG_ON(!list_empty(pages)); - BUG_ON(*nr_pages != 0); - dfprintk(FSCACHE, - "NFS: nfs_getpages_from_fscache: submitted\n"); - + "NFS: readpage_from_fscache failed %d\n", ret); + SetPageChecked(page); return ret; - - case -ENOBUFS: /* some pages aren't cached and can't be */ - case -ENODATA: /* some pages aren't cached */ - dfprintk(FSCACHE, - "NFS: nfs_getpages_from_fscache: no page: %d\n", ret); - return 1; - - default: - dfprintk(FSCACHE, - "NFS: nfs_getpages_from_fscache: ret %d\n", ret); } - return ret; + /* Read completed synchronously */ + dfprintk(FSCACHE, "NFS: readpage_from_fscache: read successful\n"); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK); + SetPageUptodate(page); + return 0; } /* * Store a newly fetched page in fscache - * - PG_fscache must be set on the page */ -void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) +void __nfs_readpage_to_fscache(struct inode *inode, struct page *page) { int ret; dfprintk(FSCACHE, - "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", - nfs_i_fscache(inode), page, page->index, page->flags, sync); + "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx))\n", + nfs_i_fscache(inode), page, page->index, page->flags); + + ret = fscache_fallback_write_page(nfs_i_fscache(inode), page); - ret = fscache_write_page(nfs_i_fscache(inode), page, - inode->i_size, GFP_KERNEL); dfprintk(FSCACHE, "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", page, page->index, page->flags, ret); if (ret != 0) { - fscache_uncache_page(nfs_i_fscache(inode), page); - nfs_inc_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL); nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED); } else { - nfs_inc_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_OK); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_OK); } } diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 6118cdd2e1d72..679055720dae6 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -11,7 +11,7 @@ #include #include #include -#define FSCACHE_USE_OLD_IO_API +#define FSCACHE_USE_FALLBACK_IO_API #include #ifdef CONFIG_NFS_FSCACHE @@ -94,61 +94,19 @@ extern void nfs_fscache_init_inode(struct inode *); extern void nfs_fscache_clear_inode(struct inode *); extern void nfs_fscache_open_file(struct inode *, struct file *); -extern void __nfs_fscache_invalidate_page(struct page *, struct inode *); -extern int nfs_fscache_release_page(struct page *, gfp_t); - -extern int __nfs_readpage_from_fscache(struct nfs_open_context *, - struct inode *, struct page *); -extern int __nfs_readpages_from_fscache(struct nfs_open_context *, - struct inode *, struct address_space *, - struct list_head *, unsigned *); -extern void __nfs_readpage_to_fscache(struct inode *, struct page *, int); - -/* - * wait for a page to complete writing to the cache - */ -static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi, - struct page *page) -{ - if (PageFsCache(page)) - fscache_wait_on_page_write(nfsi->fscache, page); -} - -/* - * release the caching state associated with a page if undergoing complete page - * invalidation - */ -static inline void nfs_fscache_invalidate_page(struct page *page, - struct inode *inode) -{ - if (PageFsCache(page)) - __nfs_fscache_invalidate_page(page, inode); -} +extern int __nfs_readpage_from_fscache(struct inode *, struct page *); +extern void __nfs_read_completion_to_fscache(struct nfs_pgio_header *hdr, + unsigned long bytes); +extern void __nfs_readpage_to_fscache(struct inode *, struct page *); /* * Retrieve a page from an inode data storage object. */ -static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, - struct inode *inode, +static inline int nfs_readpage_from_fscache(struct inode *inode, struct page *page) { if (NFS_I(inode)->fscache) - return __nfs_readpage_from_fscache(ctx, inode, page); - return -ENOBUFS; -} - -/* - * Retrieve a set of pages from an inode data storage object. - */ -static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, - struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - if (NFS_I(inode)->fscache) - return __nfs_readpages_from_fscache(ctx, inode, mapping, pages, - nr_pages); + return __nfs_readpage_from_fscache(inode, page); return -ENOBUFS; } @@ -157,11 +115,10 @@ static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, * in the cache. */ static inline void nfs_readpage_to_fscache(struct inode *inode, - struct page *page, - int sync) + struct page *page) { - if (PageFsCache(page)) - __nfs_readpage_to_fscache(inode, page, sync); + if (NFS_I(inode)->fscache) + __nfs_readpage_to_fscache(inode, page); } /* @@ -204,31 +161,13 @@ static inline void nfs_fscache_clear_inode(struct inode *inode) {} static inline void nfs_fscache_open_file(struct inode *inode, struct file *filp) {} -static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) -{ - return 1; /* True: may release page */ -} -static inline void nfs_fscache_invalidate_page(struct page *page, - struct inode *inode) {} -static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi, - struct page *page) {} - -static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, - struct inode *inode, +static inline int nfs_readpage_from_fscache(struct inode *inode, struct page *page) { return -ENOBUFS; } -static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, - struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - return -ENOBUFS; -} static inline void nfs_readpage_to_fscache(struct inode *inode, - struct page *page, int sync) {} + struct page *page) {} static inline void nfs_fscache_invalidate(struct inode *inode) {} diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 08d6cc57cbc37..06ed827a67e82 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -123,7 +123,7 @@ static void nfs_readpage_release(struct nfs_page *req, int error) struct address_space *mapping = page_file_mapping(page); if (PageUptodate(page)) - nfs_readpage_to_fscache(inode, page, 0); + nfs_readpage_to_fscache(inode, page); else if (!PageError(page) && !PagePrivate(page)) generic_error_remove_page(mapping, page); unlock_page(page); @@ -305,6 +305,12 @@ readpage_async_filler(void *data, struct page *page) aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE); + if (!IS_SYNC(page->mapping->host)) { + error = nfs_readpage_from_fscache(page->mapping->host, page); + if (error == 0) + goto out_unlock; + } + new = nfs_create_request(desc->ctx, page, 0, aligned_len); if (IS_ERR(new)) goto out_error; @@ -320,6 +326,7 @@ readpage_async_filler(void *data, struct page *page) return 0; out_error: error = PTR_ERR(new); +out_unlock: unlock_page(page); out: return error; @@ -367,12 +374,6 @@ int nfs_readpage(struct file *file, struct page *page) desc.ctx = get_nfs_open_context(nfs_file_open_context(file)); xchg(&desc.ctx->error, 0); - if (!IS_SYNC(inode)) { - ret = nfs_readpage_from_fscache(desc.ctx, inode, page); - if (ret == 0) - goto out_wait; - } - nfs_pageio_init_read(&desc.pgio, inode, false, &nfs_async_read_completion_ops); @@ -382,7 +383,6 @@ int nfs_readpage(struct file *file, struct page *page) nfs_pageio_complete_read(&desc.pgio); ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0; -out_wait: if (!ret) { ret = wait_on_page_locked_killable(page); if (!PageUptodate(page) && !ret) @@ -421,14 +421,6 @@ int nfs_readpages(struct file *file, struct address_space *mapping, } else desc.ctx = get_nfs_open_context(nfs_file_open_context(file)); - /* attempt to read as many of the pages as possible from the cache - * - this returns -ENOBUFS immediately if the cookie is negative - */ - ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, - pages, &nr_pages); - if (ret == 0) - goto read_complete; /* all pages were read */ - nfs_pageio_init_read(&desc.pgio, inode, false, &nfs_async_read_completion_ops); @@ -436,7 +428,6 @@ int nfs_readpages(struct file *file, struct address_space *mapping, nfs_pageio_complete_read(&desc.pgio); -read_complete: put_nfs_open_context(desc.ctx); out: return ret; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index eae9bf1140417..466266a96b2a5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -2124,8 +2124,11 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, if (PagePrivate(page)) return -EBUSY; - if (!nfs_fscache_release_page(page, GFP_KERNEL)) - return -EBUSY; + if (PageFsCache(page)) { + if (mode == MIGRATE_ASYNC) + return -EBUSY; + wait_on_page_fscache(page); + } return migrate_page(mapping, newpage, page, mode); } From f431040be6e9c7e0d73c74365b9bc5c412ee3c16 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Nov 2020 09:06:42 +0000 Subject: [PATCH 0180/1202] 9p: Convert to using the netfs helper lib to do reads and caching Convert the 9p filesystem to use the netfs helper lib to handle readpage, readahead and write_begin, converting those into a common issue_op for the filesystem itself to handle. The netfs helper lib also handles reading from fscache if a cache is available, and interleaving reads from both sources. This change also switches from the old fscache I/O API to the new one, meaning that fscache no longer keeps track of netfs pages and instead does async DIO between the backing files and the 9p file pagecache. As a part of this change, the handling of PG_fscache changes. It now just means that the cache has a write I/O operation in progress on a page (PG_locked is used for a read I/O op). Note that this is a cut-down version of the fscache rewrite and does not change any of the cookie and cache coherency handling. Changes ======= ver #3: - v9fs_req_issue_op() needs to terminate the subrequest. - v9fs_write_end() needs to call SetPageUptodate() a bit more often. - It's not CONFIG_{AFS,V9FS}_FSCACHE[1] - v9fs_init_rreq() should take a ref on the p9_fid and the cleanup should drop it [from Dominique Martinet]. Signed-off-by: David Howells Reviewed-and-tested-by: Dominique Martinet cc: v9fs-developer@lists.sourceforge.net cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YUm+xucHxED+1MJp@codewreck.org/ [1] Link: https://lore.kernel.org/r/163162772646.438332.16323773205855053535.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/163189109885.2509237.7153668924503399173.stgit@warthog.procyon.org.uk/ # rfc v2 --- fs/9p/Kconfig | 1 + fs/9p/cache.c | 137 --------------------------------- fs/9p/cache.h | 99 +----------------------- fs/9p/v9fs.h | 9 +++ fs/9p/vfs_addr.c | 195 +++++++++++++++++++++++------------------------ fs/9p/vfs_file.c | 21 ++++- 6 files changed, 127 insertions(+), 335 deletions(-) diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 09fd4a185fd21..d7bc93447c85f 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig @@ -2,6 +2,7 @@ config 9P_FS tristate "Plan 9 Resource Sharing Support (9P2000)" depends on INET && NET_9P + select NETFS_SUPPORT help If you say Y here, you will get experimental support for Plan 9 resource sharing via the 9P2000 protocol. diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 1769a44f48192..077f0a40aa016 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -199,140 +199,3 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode) mutex_unlock(&v9inode->fscache_lock); } - -int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) -{ - struct inode *inode = page->mapping->host; - struct v9fs_inode *v9inode = V9FS_I(inode); - - BUG_ON(!v9inode->fscache); - - return fscache_maybe_release_page(v9inode->fscache, page, gfp); -} - -void __v9fs_fscache_invalidate_page(struct page *page) -{ - struct inode *inode = page->mapping->host; - struct v9fs_inode *v9inode = V9FS_I(inode); - - BUG_ON(!v9inode->fscache); - - if (PageFsCache(page)) { - fscache_wait_on_page_write(v9inode->fscache, page); - BUG_ON(!PageLocked(page)); - fscache_uncache_page(v9inode->fscache, page); - } -} - -static void v9fs_vfs_readpage_complete(struct page *page, void *data, - int error) -{ - if (!error) - SetPageUptodate(page); - - unlock_page(page); -} - -/* - * __v9fs_readpage_from_fscache - read a page from cache - * - * Returns 0 if the pages are in cache and a BIO is submitted, - * 1 if the pages are not in cache and -error otherwise. - */ - -int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) -{ - int ret; - const struct v9fs_inode *v9inode = V9FS_I(inode); - - p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); - if (!v9inode->fscache) - return -ENOBUFS; - - ret = fscache_read_or_alloc_page(v9inode->fscache, - page, - v9fs_vfs_readpage_complete, - NULL, - GFP_KERNEL); - switch (ret) { - case -ENOBUFS: - case -ENODATA: - p9_debug(P9_DEBUG_FSC, "page/inode not in cache %d\n", ret); - return 1; - case 0: - p9_debug(P9_DEBUG_FSC, "BIO submitted\n"); - return ret; - default: - p9_debug(P9_DEBUG_FSC, "ret %d\n", ret); - return ret; - } -} - -/* - * __v9fs_readpages_from_fscache - read multiple pages from cache - * - * Returns 0 if the pages are in cache and a BIO is submitted, - * 1 if the pages are not in cache and -error otherwise. - */ - -int __v9fs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - int ret; - const struct v9fs_inode *v9inode = V9FS_I(inode); - - p9_debug(P9_DEBUG_FSC, "inode %p pages %u\n", inode, *nr_pages); - if (!v9inode->fscache) - return -ENOBUFS; - - ret = fscache_read_or_alloc_pages(v9inode->fscache, - mapping, pages, nr_pages, - v9fs_vfs_readpage_complete, - NULL, - mapping_gfp_mask(mapping)); - switch (ret) { - case -ENOBUFS: - case -ENODATA: - p9_debug(P9_DEBUG_FSC, "pages/inodes not in cache %d\n", ret); - return 1; - case 0: - BUG_ON(!list_empty(pages)); - BUG_ON(*nr_pages != 0); - p9_debug(P9_DEBUG_FSC, "BIO submitted\n"); - return ret; - default: - p9_debug(P9_DEBUG_FSC, "ret %d\n", ret); - return ret; - } -} - -/* - * __v9fs_readpage_to_fscache - write a page to the cache - * - */ - -void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) -{ - int ret; - const struct v9fs_inode *v9inode = V9FS_I(inode); - - p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); - ret = fscache_write_page(v9inode->fscache, page, - i_size_read(&v9inode->vfs_inode), GFP_KERNEL); - p9_debug(P9_DEBUG_FSC, "ret = %d\n", ret); - if (ret != 0) - v9fs_uncache_page(inode, page); -} - -/* - * wait for a page to complete writing to the cache - */ -void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page) -{ - const struct v9fs_inode *v9inode = V9FS_I(inode); - p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page); - if (PageFsCache(page)) - fscache_wait_on_page_write(v9inode->fscache, page); -} diff --git a/fs/9p/cache.h b/fs/9p/cache.h index c7e74776ce90f..cfafa89b972c2 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -7,10 +7,11 @@ #ifndef _9P_CACHE_H #define _9P_CACHE_H -#ifdef CONFIG_9P_FSCACHE -#define FSCACHE_USE_OLD_IO_API + +#define FSCACHE_USE_NEW_IO_API #include -#include + +#ifdef CONFIG_9P_FSCACHE extern struct fscache_netfs v9fs_cache_netfs; extern const struct fscache_cookie_def v9fs_cache_session_index_def; @@ -28,64 +29,6 @@ extern void v9fs_cache_inode_reset_cookie(struct inode *inode); extern int __v9fs_cache_register(void); extern void __v9fs_cache_unregister(void); -extern int __v9fs_fscache_release_page(struct page *page, gfp_t gfp); -extern void __v9fs_fscache_invalidate_page(struct page *page); -extern int __v9fs_readpage_from_fscache(struct inode *inode, - struct page *page); -extern int __v9fs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages); -extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page); -extern void __v9fs_fscache_wait_on_page_write(struct inode *inode, - struct page *page); - -static inline int v9fs_fscache_release_page(struct page *page, - gfp_t gfp) -{ - return __v9fs_fscache_release_page(page, gfp); -} - -static inline void v9fs_fscache_invalidate_page(struct page *page) -{ - __v9fs_fscache_invalidate_page(page); -} - -static inline int v9fs_readpage_from_fscache(struct inode *inode, - struct page *page) -{ - return __v9fs_readpage_from_fscache(inode, page); -} - -static inline int v9fs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - return __v9fs_readpages_from_fscache(inode, mapping, pages, - nr_pages); -} - -static inline void v9fs_readpage_to_fscache(struct inode *inode, - struct page *page) -{ - if (PageFsCache(page)) - __v9fs_readpage_to_fscache(inode, page); -} - -static inline void v9fs_uncache_page(struct inode *inode, struct page *page) -{ - struct v9fs_inode *v9inode = V9FS_I(inode); - fscache_uncache_page(v9inode->fscache, page); - BUG_ON(PageFsCache(page)); -} - -static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, - struct page *page) -{ - return __v9fs_fscache_wait_on_page_write(inode, page); -} - #else /* CONFIG_9P_FSCACHE */ static inline void v9fs_cache_inode_get_cookie(struct inode *inode) @@ -100,39 +43,5 @@ static inline void v9fs_cache_inode_set_cookie(struct inode *inode, struct file { } -static inline int v9fs_fscache_release_page(struct page *page, - gfp_t gfp) { - return 1; -} - -static inline void v9fs_fscache_invalidate_page(struct page *page) {} - -static inline int v9fs_readpage_from_fscache(struct inode *inode, - struct page *page) -{ - return -ENOBUFS; -} - -static inline int v9fs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - return -ENOBUFS; -} - -static inline void v9fs_readpage_to_fscache(struct inode *inode, - struct page *page) -{} - -static inline void v9fs_uncache_page(struct inode *inode, struct page *page) -{} - -static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, - struct page *page) -{ - return; -} - #endif /* CONFIG_9P_FSCACHE */ #endif /* _9P_CACHE_H */ diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 4ca56c5dd6372..92124b235a6dd 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -124,6 +124,15 @@ static inline struct v9fs_inode *V9FS_I(const struct inode *inode) return container_of(inode, struct v9fs_inode, vfs_inode); } +static inline struct fscache_cookie *v9fs_inode_cookie(struct v9fs_inode *v9inode) +{ +#ifdef CONFIG_9P_FSCACHE + return v9inode->fscache; +#else + return NULL; +#endif +} + extern int v9fs_show_options(struct seq_file *m, struct dentry *root); struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 1c4f1b39cc950..cff99f5c05e33 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -29,88 +29,97 @@ #include "fid.h" /** - * v9fs_fid_readpage - read an entire page in from 9P - * @data: Opaque pointer to the fid being read - * @page: structure to page - * + * v9fs_req_issue_op - Issue a read from 9P + * @subreq: The read to make */ -static int v9fs_fid_readpage(void *data, struct page *page) +static void v9fs_req_issue_op(struct netfs_read_subrequest *subreq) { - struct p9_fid *fid = data; - struct inode *inode = page->mapping->host; - struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE}; + struct netfs_read_request *rreq = subreq->rreq; + struct p9_fid *fid = rreq->netfs_priv; struct iov_iter to; - int retval, err; + loff_t pos = subreq->start + subreq->transferred; + size_t len = subreq->len - subreq->transferred; + int total, err; - p9_debug(P9_DEBUG_VFS, "\n"); + iov_iter_xarray(&to, READ, &rreq->mapping->i_pages, pos, len); - BUG_ON(!PageLocked(page)); + total = p9_client_read(fid, pos, &to, &err); + netfs_subreq_terminated(subreq, err ?: total, false); +} - retval = v9fs_readpage_from_fscache(inode, page); - if (retval == 0) - return retval; +/** + * v9fs_init_rreq - Initialise a read request + * @rreq: The read request + * @file: The file being read from + */ +static void v9fs_init_rreq(struct netfs_read_request *rreq, struct file *file) +{ + struct p9_fid *fid = file->private_data; - iov_iter_bvec(&to, READ, &bvec, 1, PAGE_SIZE); + refcount_inc(&fid->count); + rreq->netfs_priv = fid; +} - retval = p9_client_read(fid, page_offset(page), &to, &err); - if (err) { - v9fs_uncache_page(inode, page); - retval = err; - goto done; - } +/** + * v9fs_req_cleanup - Cleanup request initialized by v9fs_init_rreq + * @mapping: unused mapping of request to cleanup + * @priv: private data to cleanup, a fid, guaranted non-null. + */ +static void v9fs_req_cleanup(struct address_space *mapping, void *priv) +{ + struct p9_fid *fid = priv; - zero_user(page, retval, PAGE_SIZE - retval); - flush_dcache_page(page); - SetPageUptodate(page); + p9_client_clunk(fid); +} - v9fs_readpage_to_fscache(inode, page); - retval = 0; +/** + * v9fs_is_cache_enabled - Determine if caching is enabled for an inode + * @inode: The inode to check + */ +static bool v9fs_is_cache_enabled(struct inode *inode) +{ + struct fscache_cookie *cookie = v9fs_inode_cookie(V9FS_I(inode)); -done: - unlock_page(page); - return retval; + return fscache_cookie_enabled(cookie) && !hlist_empty(&cookie->backing_objects); +} + +/** + * v9fs_begin_cache_operation - Begin a cache operation for a read + * @rreq: The read request + */ +static int v9fs_begin_cache_operation(struct netfs_read_request *rreq) +{ + struct fscache_cookie *cookie = v9fs_inode_cookie(V9FS_I(rreq->inode)); + + return fscache_begin_read_operation(&rreq->cache_resources, cookie); } +static const struct netfs_read_request_ops v9fs_req_ops = { + .init_rreq = v9fs_init_rreq, + .is_cache_enabled = v9fs_is_cache_enabled, + .begin_cache_operation = v9fs_begin_cache_operation, + .issue_op = v9fs_req_issue_op, + .cleanup = v9fs_req_cleanup, +}; + /** * v9fs_vfs_readpage - read an entire page in from 9P - * - * @filp: file being read + * @file: file being read * @page: structure to page * */ - -static int v9fs_vfs_readpage(struct file *filp, struct page *page) +static int v9fs_vfs_readpage(struct file *file, struct page *page) { - return v9fs_fid_readpage(filp->private_data, page); + return netfs_readpage(file, page, &v9fs_req_ops, NULL); } /** - * v9fs_vfs_readpages - read a set of pages from 9P - * - * @filp: file being read - * @mapping: the address space - * @pages: list of pages to read - * @nr_pages: count of pages to read - * + * v9fs_vfs_readahead - read a set of pages from 9P + * @ractl: The readahead parameters */ - -static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void v9fs_vfs_readahead(struct readahead_control *ractl) { - int ret = 0; - struct inode *inode; - - inode = mapping->host; - p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp); - - ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages); - if (ret == 0) - return ret; - - ret = read_cache_pages(mapping, pages, v9fs_fid_readpage, - filp->private_data); - p9_debug(P9_DEBUG_VFS, " = %d\n", ret); - return ret; + netfs_readahead(ractl, &v9fs_req_ops, NULL); } /** @@ -125,7 +134,14 @@ static int v9fs_release_page(struct page *page, gfp_t gfp) { if (PagePrivate(page)) return 0; - return v9fs_fscache_release_page(page, gfp); +#ifdef CONFIG_9P_FSCACHE + if (PageFsCache(page)) { + if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) + return 0; + wait_on_page_fscache(page); + } +#endif + return 1; } /** @@ -138,21 +154,16 @@ static int v9fs_release_page(struct page *page, gfp_t gfp) static void v9fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length) { - /* - * If called with zero offset, we should release - * the private state assocated with the page - */ - if (offset == 0 && length == PAGE_SIZE) - v9fs_fscache_invalidate_page(page); + wait_on_page_fscache(page); } static int v9fs_vfs_writepage_locked(struct page *page) { struct inode *inode = page->mapping->host; struct v9fs_inode *v9inode = V9FS_I(inode); + loff_t start = page_offset(page); loff_t size = i_size_read(inode); struct iov_iter from; - struct bio_vec bvec; int err, len; if (page->index == size >> PAGE_SHIFT) @@ -160,17 +171,14 @@ static int v9fs_vfs_writepage_locked(struct page *page) else len = PAGE_SIZE; - bvec.bv_page = page; - bvec.bv_offset = 0; - bvec.bv_len = len; - iov_iter_bvec(&from, WRITE, &bvec, 1, len); + iov_iter_xarray(&from, WRITE, &page->mapping->i_pages, start, len); /* We should have writeback_fid always set */ BUG_ON(!v9inode->writeback_fid); set_page_writeback(page); - p9_client_write(v9inode->writeback_fid, page_offset(page), &from, &err); + p9_client_write(v9inode->writeback_fid, start, &from, &err); end_page_writeback(page); return err; @@ -208,14 +216,13 @@ static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc) static int v9fs_launder_page(struct page *page) { int retval; - struct inode *inode = page->mapping->host; - v9fs_fscache_wait_on_page_write(inode, page); if (clear_page_dirty_for_io(page)) { retval = v9fs_vfs_writepage_locked(page); if (retval) return retval; } + wait_on_page_fscache(page); return 0; } @@ -260,35 +267,24 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - int retval = 0; + int retval; struct page *page; - struct v9fs_inode *v9inode; - pgoff_t index = pos >> PAGE_SHIFT; - struct inode *inode = mapping->host; - + struct v9fs_inode *v9inode = V9FS_I(mapping->host); p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping); - v9inode = V9FS_I(inode); -start: - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) { - retval = -ENOMEM; - goto out; - } BUG_ON(!v9inode->writeback_fid); - if (PageUptodate(page)) - goto out; - if (len == PAGE_SIZE) - goto out; + /* Prefetch area to be written into the cache if we're caching this + * file. We need to do this before we get a lock on the page in case + * there's more than one writer competing for the same cache block. + */ + retval = netfs_write_begin(filp, mapping, pos, len, flags, &page, fsdata, + &v9fs_req_ops, NULL); + if (retval < 0) + return retval; - retval = v9fs_fid_readpage(v9inode->writeback_fid, page); - put_page(page); - if (!retval) - goto start; -out: - *pagep = page; + *pagep = find_subpage(page, pos / PAGE_SIZE); return retval; } @@ -305,10 +301,11 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping, if (unlikely(copied < len)) { copied = 0; goto out; - } else if (len == PAGE_SIZE) { - SetPageUptodate(page); } + + SetPageUptodate(page); } + /* * No need to use i_size_read() here, the i_size * cannot change under us because we hold the i_mutex. @@ -328,7 +325,7 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping, const struct address_space_operations v9fs_addr_operations = { .readpage = v9fs_vfs_readpage, - .readpages = v9fs_vfs_readpages, + .readahead = v9fs_vfs_readahead, .set_page_dirty = __set_page_dirty_nobuffers, .writepage = v9fs_vfs_writepage, .write_begin = v9fs_write_begin, diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 246235ebdb70a..80052497f00ff 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -537,14 +537,27 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf) p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", page, (unsigned long)filp->private_data); + v9inode = V9FS_I(inode); + + /* Wait for the page to be written to the cache before we allow it to + * be modified. We then assume the entire page will need writing back. + */ +#ifdef CONFIG_9P_FSCACHE + if (PageFsCache(page) && + wait_on_page_bit_killable(page, PG_fscache) < 0) + return VM_FAULT_RETRY; +#endif + + if (PageWriteback(page) && + wait_on_page_bit_killable(page, PG_writeback) < 0) + return VM_FAULT_RETRY; + /* Update file times before taking page lock */ file_update_time(filp); - v9inode = V9FS_I(inode); - /* make sure the cache has finished storing the page */ - v9fs_fscache_wait_on_page_write(inode, page); BUG_ON(!v9inode->writeback_fid); - lock_page(page); + if (lock_page_killable(page) < 0) + return VM_FAULT_RETRY; if (page->mapping != inode->i_mapping) goto out_unlock; wait_for_stable_page(page); From 770f46bae74a8c3fca41262afe745c099e1ab549 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Sep 2021 11:25:47 +0100 Subject: [PATCH 0181/1202] cifs: (untested) Move to using the alternate fallback fscache I/O API Move cifs/smb to using the alternate fallback fscache I/O API instead of the old upstream I/O API as that is about to be deleted. The alternate API will also be deleted at some point in the future as it's dangerous (as is the old API) and can lead to data corruption if the backing filesystem can insert/remove bridging blocks of zeros into its extent list[1]. The alternate API reads and writes pages synchronously, with the intention of allowing removal of the operation management framework and thence the object management framework from fscache. The preferred change would be to use the netfs lib, but the new I/O API can be used directly. It's just that as the cache now needs to track data for itself, caching blocks may exceed page size... Changes ======= ver #2: - Changed "deprecated" to "fallback" in the new function names[2]. Signed-off-by: David Howells cc: Steve French cc: Shyam Prasad N cc: linux-cifs@vger.kernel.org cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/YO17ZNOcq+9PajfQ@mit.edu [1] Link: https://lore.kernel.org/r/CAHk-=wiVK+1CyEjW8u71zVPK8msea=qPpznX35gnX+s8sXnJTg@mail.gmail.com/ [2] Link: https://lore.kernel.org/r/163162773867.438332.3585429891151112562.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/163189112708.2509237.17528578040344723638.stgit@warthog.procyon.org.uk/ # rfc v2 --- fs/cifs/file.c | 64 +++++++++++++++------------- fs/cifs/fscache.c | 105 ++-------------------------------------------- fs/cifs/fscache.h | 75 +++------------------------------ 3 files changed, 43 insertions(+), 201 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 13f3182cf7969..02894e999c566 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4175,11 +4175,8 @@ static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct file *file = vmf->vma->vm_file; - struct inode *inode = file_inode(file); - - cifs_fscache_wait_on_page_write(inode, page); + wait_on_page_fscache(page); lock_page(page); return VM_FAULT_LOCKED; } @@ -4252,8 +4249,6 @@ cifs_readv_complete(struct work_struct *work) if (rdata->result == 0 || (rdata->result == -EAGAIN && got_bytes)) cifs_readpage_to_fscache(rdata->mapping->host, page); - else - cifs_fscache_uncache_page(rdata->mapping->host, page); got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes); @@ -4376,6 +4371,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, INIT_LIST_HEAD(tmplist); +again: page = lru_to_page(page_list); /* @@ -4393,6 +4389,18 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, return rc; } + rc = cifs_readpage_from_fscache(mapping->host, page); + if (rc == 0) { + list_del_init(&page->lru); + SetPageUptodate(page); + lru_cache_add(page); + unlock_page(page); + put_page(page); + if (list_empty(page_list)) + return 0; + goto again; + } + /* move first page to the tmplist */ *offset = (loff_t)page->index << PAGE_SHIFT; *bytes = PAGE_SIZE; @@ -4416,10 +4424,20 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, __ClearPageLocked(page); break; } - list_move_tail(&page->lru, tmplist); - (*bytes) += PAGE_SIZE; + + rc = cifs_readpage_from_fscache(mapping->host, page); + if (rc == 0) { + list_del_init(&page->lru); + SetPageUptodate(page); + lru_cache_add(page); + unlock_page(page); + put_page(page); + } else { + list_move_tail(&page->lru, tmplist); + (*bytes) += PAGE_SIZE; + (*nr_pages)++; + } expected_index++; - (*nr_pages)++; } return rc; } @@ -4437,19 +4455,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, unsigned int xid; xid = get_xid(); - /* - * Reads as many pages as possible from fscache. Returns -ENOBUFS - * immediately if the cookie is negative - * - * After this point, every page in the list might have PG_fscache set, - * so we will need to clean that up off of every page we don't use. - */ - rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, - &num_pages); - if (rc == 0) { - free_xid(xid); - return rc; - } if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; @@ -4574,7 +4579,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, * the pagecache must be uncached before they get returned to the * allocator. */ - cifs_fscache_readpages_cancel(mapping->host, page_list); free_xid(xid); return rc; } @@ -4778,17 +4782,19 @@ static int cifs_release_page(struct page *page, gfp_t gfp) { if (PagePrivate(page)) return 0; - - return cifs_fscache_release_page(page, gfp); + if (PageFsCache(page)) { + if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) + return false; + wait_on_page_fscache(page); + } + return true; } static void cifs_invalidate_page(struct page *page, unsigned int offset, unsigned int length) { - struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host); - if (offset == 0 && length == PAGE_SIZE) - cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); + wait_on_page_fscache(page); } static int cifs_launder_page(struct page *page) @@ -4808,7 +4814,7 @@ static int cifs_launder_page(struct page *page) if (clear_page_dirty_for_io(page)) rc = cifs_writepage_locked(page, &wbc); - cifs_fscache_invalidate_page(page, page->mapping->host); + wait_on_page_fscache(page); return rc; } diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 8eedd20c44abd..d6ff668c268ad 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -223,30 +223,6 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode) } } -int cifs_fscache_release_page(struct page *page, gfp_t gfp) -{ - if (PageFsCache(page)) { - struct inode *inode = page->mapping->host; - struct cifsInodeInfo *cifsi = CIFS_I(inode); - - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", - __func__, page, cifsi->fscache); - if (!fscache_maybe_release_page(cifsi->fscache, page, gfp)) - return 0; - } - - return 1; -} - -static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx, - int error) -{ - cifs_dbg(FYI, "%s: (0x%p/%d)\n", __func__, page, error); - if (!error) - SetPageUptodate(page); - unlock_page(page); -} - /* * Retrieve a page from FS-Cache */ @@ -256,12 +232,9 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n", __func__, CIFS_I(inode)->fscache, page, inode); - ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page, - cifs_readpage_from_fscache_complete, - NULL, - GFP_KERNEL); - switch (ret) { + ret = fscache_fallback_read_page(cifs_inode_cookie(inode), page); + switch (ret) { case 0: /* page found in fscache, read submitted */ cifs_dbg(FYI, "%s: submitted\n", __func__); return ret; @@ -276,86 +249,14 @@ int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) return ret; } -/* - * Retrieve a set of pages from FS-Cache - */ -int __cifs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - int ret; - - cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n", - __func__, CIFS_I(inode)->fscache, *nr_pages, inode); - ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping, - pages, nr_pages, - cifs_readpage_from_fscache_complete, - NULL, - mapping_gfp_mask(mapping)); - switch (ret) { - case 0: /* read submitted to the cache for all pages */ - cifs_dbg(FYI, "%s: submitted\n", __func__); - return ret; - - case -ENOBUFS: /* some pages are not cached and can't be */ - case -ENODATA: /* some pages are not cached */ - cifs_dbg(FYI, "%s: no page\n", __func__); - return 1; - - default: - cifs_dbg(FYI, "unknown error ret = %d\n", ret); - } - - return ret; -} - void __cifs_readpage_to_fscache(struct inode *inode, struct page *page) { struct cifsInodeInfo *cifsi = CIFS_I(inode); - int ret; WARN_ON(!cifsi->fscache); cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n", __func__, cifsi->fscache, page, inode); - ret = fscache_write_page(cifsi->fscache, page, - cifsi->vfs_inode.i_size, GFP_KERNEL); - if (ret != 0) - fscache_uncache_page(cifsi->fscache, page); -} - -void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages) -{ - cifs_dbg(FYI, "%s: (fsc: %p, i: %p)\n", - __func__, CIFS_I(inode)->fscache, inode); - fscache_readpages_cancel(CIFS_I(inode)->fscache, pages); -} - -void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode) -{ - struct cifsInodeInfo *cifsi = CIFS_I(inode); - struct fscache_cookie *cookie = cifsi->fscache; - - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie); - fscache_wait_on_page_write(cookie, page); - fscache_uncache_page(cookie, page); -} - -void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page) -{ - struct cifsInodeInfo *cifsi = CIFS_I(inode); - struct fscache_cookie *cookie = cifsi->fscache; - - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie); - fscache_wait_on_page_write(cookie, page); -} - -void __cifs_fscache_uncache_page(struct inode *inode, struct page *page) -{ - struct cifsInodeInfo *cifsi = CIFS_I(inode); - struct fscache_cookie *cookie = cifsi->fscache; - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie); - fscache_uncache_page(cookie, page); + fscache_fallback_write_page(cifs_inode_cookie(inode), page); } diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index 729b51100a6d1..081481645b772 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -9,7 +9,7 @@ #ifndef _CIFS_FSCACHE_H #define _CIFS_FSCACHE_H -#define FSCACHE_USE_OLD_IO_API +#define FSCACHE_USE_FALLBACK_IO_API #include #include "cifsglob.h" @@ -60,40 +60,9 @@ extern void cifs_fscache_update_inode_cookie(struct inode *inode); extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *); extern void cifs_fscache_reset_inode_cookie(struct inode *); -extern void __cifs_fscache_invalidate_page(struct page *, struct inode *); -extern void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page); -extern void __cifs_fscache_uncache_page(struct inode *inode, struct page *page); -extern int cifs_fscache_release_page(struct page *page, gfp_t gfp); extern int __cifs_readpage_from_fscache(struct inode *, struct page *); -extern int __cifs_readpages_from_fscache(struct inode *, - struct address_space *, - struct list_head *, - unsigned *); -extern void __cifs_fscache_readpages_cancel(struct inode *, struct list_head *); - extern void __cifs_readpage_to_fscache(struct inode *, struct page *); -static inline void cifs_fscache_invalidate_page(struct page *page, - struct inode *inode) -{ - if (PageFsCache(page)) - __cifs_fscache_invalidate_page(page, inode); -} - -static inline void cifs_fscache_wait_on_page_write(struct inode *inode, - struct page *page) -{ - if (PageFsCache(page)) - __cifs_fscache_wait_on_page_write(inode, page); -} - -static inline void cifs_fscache_uncache_page(struct inode *inode, - struct page *page) -{ - if (PageFsCache(page)) - __cifs_fscache_uncache_page(inode, page); -} - static inline int cifs_readpage_from_fscache(struct inode *inode, struct page *page) { @@ -103,17 +72,6 @@ static inline int cifs_readpage_from_fscache(struct inode *inode, return -ENOBUFS; } -static inline int cifs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - if (CIFS_I(inode)->fscache) - return __cifs_readpages_from_fscache(inode, mapping, pages, - nr_pages); - return -ENOBUFS; -} - static inline void cifs_readpage_to_fscache(struct inode *inode, struct page *page) { @@ -121,11 +79,9 @@ static inline void cifs_readpage_to_fscache(struct inode *inode, __cifs_readpage_to_fscache(inode, page); } -static inline void cifs_fscache_readpages_cancel(struct inode *inode, - struct list_head *pages) +static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { - if (CIFS_I(inode)->fscache) - return __cifs_fscache_readpages_cancel(inode, pages); + return CIFS_I(inode)->fscache; } #else /* CONFIG_CIFS_FSCACHE */ @@ -145,17 +101,7 @@ static inline void cifs_fscache_update_inode_cookie(struct inode *inode) {} static inline void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) {} static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {} -static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp) -{ - return 1; /* May release page */ -} -static inline void cifs_fscache_invalidate_page(struct page *page, - struct inode *inode) {} -static inline void cifs_fscache_wait_on_page_write(struct inode *inode, - struct page *page) {} -static inline void cifs_fscache_uncache_page(struct inode *inode, - struct page *page) {} static inline int cifs_readpage_from_fscache(struct inode *inode, struct page *page) @@ -163,21 +109,10 @@ cifs_readpage_from_fscache(struct inode *inode, struct page *page) return -ENOBUFS; } -static inline int cifs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) -{ - return -ENOBUFS; -} - static inline void cifs_readpage_to_fscache(struct inode *inode, struct page *page) {} - -static inline void cifs_fscache_readpages_cancel(struct inode *inode, - struct list_head *pages) -{ -} +static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) +{ return NULL; } #endif /* CONFIG_CIFS_FSCACHE */ From 3070d2fe6175c91c17b982ba1a8092a7692973dd Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 6 Feb 2020 14:22:21 +0000 Subject: [PATCH 0182/1202] fscache: Remove the old I/O API Remove the old fscache I/O API. There's no point trying to transform it as the new one bears no similarities to the old one. Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/158861213535.340223.11422037188513966130.stgit@warthog.procyon.org.uk/ # fscache rewrite patchset rfc Link: https://lore.kernel.org/r/159465772152.1376105.17485634094767962215.stgit@warthog.procyon.org.uk/ # fscache rewrite cleanup patch-subset Link: https://lore.kernel.org/r/160588462237.3465195.5911430241577283684.stgit@warthog.procyon.org.uk/ # fscache modernisation patchset Link: https://lore.kernel.org/r/163162775443.438332.258337773271765659.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/163189113658.2509237.9999467212589051557.stgit@warthog.procyon.org.uk/ # rfc v2 --- fs/cachefiles/Makefile | 1 - fs/cachefiles/interface.c | 15 - fs/cachefiles/internal.h | 38 -- fs/cachefiles/main.c | 1 - fs/cachefiles/rdwr.c | 972 ------------------------------ fs/fscache/cache.c | 6 - fs/fscache/cookie.c | 10 - fs/fscache/internal.h | 27 - fs/fscache/object.c | 2 - fs/fscache/page.c | 1067 --------------------------------- fs/fscache/stats.c | 6 +- fs/nfs/fscache-index.c | 26 - include/linux/fscache-cache.h | 131 ---- include/linux/fscache.h | 358 ----------- 14 files changed, 2 insertions(+), 2658 deletions(-) delete mode 100644 fs/cachefiles/rdwr.c diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile index 02fd177317697..714e84b3ca248 100644 --- a/fs/cachefiles/Makefile +++ b/fs/cachefiles/Makefile @@ -11,7 +11,6 @@ cachefiles-y := \ key.o \ main.o \ namei.o \ - rdwr.o \ security.o \ xattr.o diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 8a7755b86c59e..83671488a3232 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -540,14 +540,6 @@ static void cachefiles_invalidate_object(struct fscache_operation *op) _leave(""); } -/* - * dissociate a cache from all the pages it was backing - */ -static void cachefiles_dissociate_pages(struct fscache_cache *cache) -{ - _enter(""); -} - const struct fscache_cache_ops cachefiles_cache_ops = { .name = "cachefiles", .alloc_object = cachefiles_alloc_object, @@ -560,13 +552,6 @@ const struct fscache_cache_ops cachefiles_cache_ops = { .put_object = cachefiles_put_object, .sync_cache = cachefiles_sync_cache, .attr_changed = cachefiles_attr_changed, - .read_or_alloc_page = cachefiles_read_or_alloc_page, - .read_or_alloc_pages = cachefiles_read_or_alloc_pages, - .allocate_page = cachefiles_allocate_page, - .allocate_pages = cachefiles_allocate_pages, - .write_page = cachefiles_write_page, - .uncache_page = cachefiles_uncache_page, - .dissociate_pages = cachefiles_dissociate_pages, .check_consistency = cachefiles_check_consistency, .begin_operation = cachefiles_begin_operation, }; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 994f90ff12ac7..de982f4f513fb 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -43,7 +43,6 @@ struct cachefiles_object { atomic_t usage; /* object usage count */ uint8_t type; /* object type */ uint8_t new; /* T if object new */ - spinlock_t work_lock; struct rb_node active_node; /* link in active tree (dentry is key) */ }; @@ -89,28 +88,6 @@ struct cachefiles_cache { char *tag; /* cache binding tag */ }; -/* - * backing file read tracking - */ -struct cachefiles_one_read { - wait_queue_entry_t monitor; /* link into monitored waitqueue */ - struct page *back_page; /* backing file page we're waiting for */ - struct page *netfs_page; /* netfs page we're going to fill */ - struct fscache_retrieval *op; /* retrieval op covering this */ - struct list_head op_link; /* link in op's todo list */ -}; - -/* - * backing file write tracking - */ -struct cachefiles_one_write { - struct page *netfs_page; /* netfs page to copy */ - struct cachefiles_object *object; - struct list_head obj_link; /* link in object's lists */ - fscache_rw_complete_t end_io_func; - void *context; -}; - /* * auxiliary data xattr buffer */ @@ -180,21 +157,6 @@ extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, extern int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename); -/* - * rdwr.c - */ -extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, - struct page *, gfp_t); -extern int cachefiles_read_or_alloc_pages(struct fscache_retrieval *, - struct list_head *, unsigned *, - gfp_t); -extern int cachefiles_allocate_page(struct fscache_retrieval *, struct page *, - gfp_t); -extern int cachefiles_allocate_pages(struct fscache_retrieval *, - struct list_head *, unsigned *, gfp_t); -extern int cachefiles_write_page(struct fscache_storage *, struct page *); -extern void cachefiles_uncache_page(struct fscache_object *, struct page *); - /* * rdwr2.c */ diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c index 9c8d34c49b125..d3115106b22b0 100644 --- a/fs/cachefiles/main.c +++ b/fs/cachefiles/main.c @@ -42,7 +42,6 @@ static void cachefiles_object_init_once(void *_object) struct cachefiles_object *object = _object; memset(object, 0, sizeof(*object)); - spin_lock_init(&object->work_lock); } /* diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c deleted file mode 100644 index 8ffc40e84a594..0000000000000 --- a/fs/cachefiles/rdwr.c +++ /dev/null @@ -1,972 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* Storage object read/write - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -#include -#include -#include -#include -#include "internal.h" - -/* - * detect wake up events generated by the unlocking of pages in which we're - * interested - * - we use this to detect read completion of backing pages - * - the caller holds the waitqueue lock - */ -static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, - int sync, void *_key) -{ - struct cachefiles_one_read *monitor = - container_of(wait, struct cachefiles_one_read, monitor); - struct cachefiles_object *object; - struct fscache_retrieval *op = monitor->op; - struct wait_page_key *key = _key; - struct page *page = wait->private; - - ASSERT(key); - - _enter("{%lu},%u,%d,{%p,%u}", - monitor->netfs_page->index, mode, sync, - key->page, key->bit_nr); - - if (key->page != page || key->bit_nr != PG_locked) - return 0; - - _debug("--- monitor %p %lx ---", page, page->flags); - - if (!PageUptodate(page) && !PageError(page)) { - /* unlocked, not uptodate and not erronous? */ - _debug("page probably truncated"); - } - - /* remove from the waitqueue */ - list_del(&wait->entry); - - /* move onto the action list and queue for FS-Cache thread pool */ - ASSERT(op); - - /* We need to temporarily bump the usage count as we don't own a ref - * here otherwise cachefiles_read_copier() may free the op between the - * monitor being enqueued on the op->to_do list and the op getting - * enqueued on the work queue. - */ - fscache_get_retrieval(op); - - object = container_of(op->op.object, struct cachefiles_object, fscache); - spin_lock(&object->work_lock); - list_add_tail(&monitor->op_link, &op->to_do); - fscache_enqueue_retrieval(op); - spin_unlock(&object->work_lock); - - fscache_put_retrieval(op); - return 0; -} - -/* - * handle a probably truncated page - * - check to see if the page is still relevant and reissue the read if - * possible - * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we - * must wait again and 0 if successful - */ -static int cachefiles_read_reissue(struct cachefiles_object *object, - struct cachefiles_one_read *monitor) -{ - struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping; - struct page *backpage = monitor->back_page, *backpage2; - int ret; - - _enter("{ino=%lx},{%lx,%lx}", - d_backing_inode(object->backer)->i_ino, - backpage->index, backpage->flags); - - /* skip if the page was truncated away completely */ - if (backpage->mapping != bmapping) { - _leave(" = -ENODATA [mapping]"); - return -ENODATA; - } - - backpage2 = find_get_page(bmapping, backpage->index); - if (!backpage2) { - _leave(" = -ENODATA [gone]"); - return -ENODATA; - } - - if (backpage != backpage2) { - put_page(backpage2); - _leave(" = -ENODATA [different]"); - return -ENODATA; - } - - /* the page is still there and we already have a ref on it, so we don't - * need a second */ - put_page(backpage2); - - INIT_LIST_HEAD(&monitor->op_link); - add_page_wait_queue(backpage, &monitor->monitor); - - if (trylock_page(backpage)) { - ret = -EIO; - if (PageError(backpage)) - goto unlock_discard; - ret = 0; - if (PageUptodate(backpage)) - goto unlock_discard; - - _debug("reissue read"); - ret = bmapping->a_ops->readpage(NULL, backpage); - if (ret < 0) - goto discard; - } - - /* but the page may have been read before the monitor was installed, so - * the monitor may miss the event - so we have to ensure that we do get - * one in such a case */ - if (trylock_page(backpage)) { - _debug("jumpstart %p {%lx}", backpage, backpage->flags); - unlock_page(backpage); - } - - /* it'll reappear on the todo list */ - _leave(" = -EINPROGRESS"); - return -EINPROGRESS; - -unlock_discard: - unlock_page(backpage); -discard: - spin_lock_irq(&object->work_lock); - list_del(&monitor->op_link); - spin_unlock_irq(&object->work_lock); - _leave(" = %d", ret); - return ret; -} - -/* - * copy data from backing pages to netfs pages to complete a read operation - * - driven by FS-Cache's thread pool - */ -static void cachefiles_read_copier(struct fscache_operation *_op) -{ - struct cachefiles_one_read *monitor; - struct cachefiles_object *object; - struct fscache_retrieval *op; - int error, max; - - op = container_of(_op, struct fscache_retrieval, op); - object = container_of(op->op.object, - struct cachefiles_object, fscache); - - _enter("{ino=%lu}", d_backing_inode(object->backer)->i_ino); - - max = 8; - spin_lock_irq(&object->work_lock); - - while (!list_empty(&op->to_do)) { - monitor = list_entry(op->to_do.next, - struct cachefiles_one_read, op_link); - list_del(&monitor->op_link); - - spin_unlock_irq(&object->work_lock); - - _debug("- copy {%lu}", monitor->back_page->index); - - recheck: - if (test_bit(FSCACHE_COOKIE_INVALIDATING, - &object->fscache.cookie->flags)) { - error = -ESTALE; - } else if (PageUptodate(monitor->back_page)) { - copy_highpage(monitor->netfs_page, monitor->back_page); - fscache_mark_page_cached(monitor->op, - monitor->netfs_page); - error = 0; - } else if (!PageError(monitor->back_page)) { - /* the page has probably been truncated */ - error = cachefiles_read_reissue(object, monitor); - if (error == -EINPROGRESS) - goto next; - goto recheck; - } else { - cachefiles_io_error_obj( - object, - "Readpage failed on backing file %lx", - (unsigned long) monitor->back_page->flags); - error = -EIO; - } - - put_page(monitor->back_page); - - fscache_end_io(op, monitor->netfs_page, error); - put_page(monitor->netfs_page); - fscache_retrieval_complete(op, 1); - fscache_put_retrieval(op); - kfree(monitor); - - next: - /* let the thread pool have some air occasionally */ - max--; - if (max < 0 || need_resched()) { - if (!list_empty(&op->to_do)) - fscache_enqueue_retrieval(op); - _leave(" [maxed out]"); - return; - } - - spin_lock_irq(&object->work_lock); - } - - spin_unlock_irq(&object->work_lock); - _leave(""); -} - -/* - * read the corresponding page to the given set from the backing file - * - an uncertain page is simply discarded, to be tried again another time - */ -static int cachefiles_read_backing_file_one(struct cachefiles_object *object, - struct fscache_retrieval *op, - struct page *netpage) -{ - struct cachefiles_one_read *monitor; - struct address_space *bmapping; - struct page *newpage, *backpage; - int ret; - - _enter(""); - - _debug("read back %p{%lu,%d}", - netpage, netpage->index, page_count(netpage)); - - monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); - if (!monitor) - goto nomem; - - monitor->netfs_page = netpage; - monitor->op = fscache_get_retrieval(op); - - init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter); - - /* attempt to get hold of the backing page */ - bmapping = d_backing_inode(object->backer)->i_mapping; - newpage = NULL; - - for (;;) { - backpage = find_get_page(bmapping, netpage->index); - if (backpage) - goto backing_page_already_present; - - if (!newpage) { - newpage = __page_cache_alloc(cachefiles_gfp); - if (!newpage) - goto nomem_monitor; - } - - ret = add_to_page_cache_lru(newpage, bmapping, - netpage->index, cachefiles_gfp); - if (ret == 0) - goto installed_new_backing_page; - if (ret != -EEXIST) - goto nomem_page; - } - - /* we've installed a new backing page, so now we need to start - * it reading */ -installed_new_backing_page: - _debug("- new %p", newpage); - - backpage = newpage; - newpage = NULL; - -read_backing_page: - ret = bmapping->a_ops->readpage(NULL, backpage); - if (ret < 0) - goto read_error; - - /* set the monitor to transfer the data across */ -monitor_backing_page: - _debug("- monitor add"); - - /* install the monitor */ - get_page(monitor->netfs_page); - get_page(backpage); - monitor->back_page = backpage; - monitor->monitor.private = backpage; - add_page_wait_queue(backpage, &monitor->monitor); - monitor = NULL; - - /* but the page may have been read before the monitor was installed, so - * the monitor may miss the event - so we have to ensure that we do get - * one in such a case */ - if (trylock_page(backpage)) { - _debug("jumpstart %p {%lx}", backpage, backpage->flags); - unlock_page(backpage); - } - goto success; - - /* if the backing page is already present, it can be in one of - * three states: read in progress, read failed or read okay */ -backing_page_already_present: - _debug("- present"); - - if (newpage) { - put_page(newpage); - newpage = NULL; - } - - if (PageError(backpage)) - goto io_error; - - if (PageUptodate(backpage)) - goto backing_page_already_uptodate; - - if (!trylock_page(backpage)) - goto monitor_backing_page; - _debug("read %p {%lx}", backpage, backpage->flags); - goto read_backing_page; - - /* the backing page is already up to date, attach the netfs - * page to the pagecache and LRU and copy the data across */ -backing_page_already_uptodate: - _debug("- uptodate"); - - fscache_mark_page_cached(op, netpage); - - copy_highpage(netpage, backpage); - fscache_end_io(op, netpage, 0); - fscache_retrieval_complete(op, 1); - -success: - _debug("success"); - ret = 0; - -out: - if (backpage) - put_page(backpage); - if (monitor) { - fscache_put_retrieval(monitor->op); - kfree(monitor); - } - _leave(" = %d", ret); - return ret; - -read_error: - _debug("read error %d", ret); - if (ret == -ENOMEM) { - fscache_retrieval_complete(op, 1); - goto out; - } -io_error: - cachefiles_io_error_obj(object, "Page read error on backing file"); - fscache_retrieval_complete(op, 1); - ret = -ENOBUFS; - goto out; - -nomem_page: - put_page(newpage); -nomem_monitor: - fscache_put_retrieval(monitor->op); - kfree(monitor); -nomem: - fscache_retrieval_complete(op, 1); - _leave(" = -ENOMEM"); - return -ENOMEM; -} - -/* - * read a page from the cache or allocate a block in which to store it - * - cache withdrawal is prevented by the caller - * - returns -EINTR if interrupted - * - returns -ENOMEM if ran out of memory - * - returns -ENOBUFS if no buffers can be made available - * - returns -ENOBUFS if page is beyond EOF - * - if the page is backed by a block in the cache: - * - a read will be started which will call the callback on completion - * - 0 will be returned - * - else if the page is unbacked: - * - the metadata will be retained - * - -ENODATA will be returned - */ -int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct inode *inode; - sector_t block; - unsigned shift; - int ret, ret2; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("{%p},{%lx},,,", object, page->index); - - if (!object->backer) - goto enobufs; - - inode = d_backing_inode(object->backer); - ASSERT(S_ISREG(inode->i_mode)); - - /* calculate the shift required to use bmap */ - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; - - op->op.flags &= FSCACHE_OP_KEEP_FLAGS; - op->op.flags |= FSCACHE_OP_ASYNC; - op->op.processor = cachefiles_read_copier; - - /* we assume the absence or presence of the first block is a good - * enough indication for the page as a whole - * - TODO: don't use bmap() for this as it is _not_ actually good - * enough for this as it doesn't indicate errors, but it's all we've - * got for the moment - */ - block = page->index; - block <<= shift; - - ret2 = bmap(inode, &block); - ASSERT(ret2 == 0); - - _debug("%llx -> %llx", - (unsigned long long) (page->index << shift), - (unsigned long long) block); - - if (block) { - /* submit the apparently valid page to the backing fs to be - * read from disk */ - ret = cachefiles_read_backing_file_one(object, op, page); - } else if (cachefiles_has_space(cache, 0, 1) == 0) { - /* there's space in the cache we can use */ - fscache_mark_page_cached(op, page); - fscache_retrieval_complete(op, 1); - ret = -ENODATA; - } else { - goto enobufs; - } - - _leave(" = %d", ret); - return ret; - -enobufs: - fscache_retrieval_complete(op, 1); - _leave(" = -ENOBUFS"); - return -ENOBUFS; -} - -/* - * read the corresponding pages to the given set from the backing file - * - any uncertain pages are simply discarded, to be tried again another time - */ -static int cachefiles_read_backing_file(struct cachefiles_object *object, - struct fscache_retrieval *op, - struct list_head *list) -{ - struct cachefiles_one_read *monitor = NULL; - struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping; - struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; - int ret = 0; - - _enter(""); - - list_for_each_entry_safe(netpage, _n, list, lru) { - list_del(&netpage->lru); - - _debug("read back %p{%lu,%d}", - netpage, netpage->index, page_count(netpage)); - - if (!monitor) { - monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); - if (!monitor) - goto nomem; - - monitor->op = fscache_get_retrieval(op); - init_waitqueue_func_entry(&monitor->monitor, - cachefiles_read_waiter); - } - - for (;;) { - backpage = find_get_page(bmapping, netpage->index); - if (backpage) - goto backing_page_already_present; - - if (!newpage) { - newpage = __page_cache_alloc(cachefiles_gfp); - if (!newpage) - goto nomem; - } - - ret = add_to_page_cache_lru(newpage, bmapping, - netpage->index, - cachefiles_gfp); - if (ret == 0) - goto installed_new_backing_page; - if (ret != -EEXIST) - goto nomem; - } - - /* we've installed a new backing page, so now we need - * to start it reading */ - installed_new_backing_page: - _debug("- new %p", newpage); - - backpage = newpage; - newpage = NULL; - - reread_backing_page: - ret = bmapping->a_ops->readpage(NULL, backpage); - if (ret < 0) - goto read_error; - - /* add the netfs page to the pagecache and LRU, and set the - * monitor to transfer the data across */ - monitor_backing_page: - _debug("- monitor add"); - - ret = add_to_page_cache_lru(netpage, op->mapping, - netpage->index, cachefiles_gfp); - if (ret < 0) { - if (ret == -EEXIST) { - put_page(backpage); - backpage = NULL; - put_page(netpage); - netpage = NULL; - fscache_retrieval_complete(op, 1); - continue; - } - goto nomem; - } - - /* install a monitor */ - get_page(netpage); - monitor->netfs_page = netpage; - - get_page(backpage); - monitor->back_page = backpage; - monitor->monitor.private = backpage; - add_page_wait_queue(backpage, &monitor->monitor); - monitor = NULL; - - /* but the page may have been read before the monitor was - * installed, so the monitor may miss the event - so we have to - * ensure that we do get one in such a case */ - if (trylock_page(backpage)) { - _debug("2unlock %p {%lx}", backpage, backpage->flags); - unlock_page(backpage); - } - - put_page(backpage); - backpage = NULL; - - put_page(netpage); - netpage = NULL; - continue; - - /* if the backing page is already present, it can be in one of - * three states: read in progress, read failed or read okay */ - backing_page_already_present: - _debug("- present %p", backpage); - - if (PageError(backpage)) - goto io_error; - - if (PageUptodate(backpage)) - goto backing_page_already_uptodate; - - _debug("- not ready %p{%lx}", backpage, backpage->flags); - - if (!trylock_page(backpage)) - goto monitor_backing_page; - - if (PageError(backpage)) { - _debug("error %lx", backpage->flags); - unlock_page(backpage); - goto io_error; - } - - if (PageUptodate(backpage)) - goto backing_page_already_uptodate_unlock; - - /* we've locked a page that's neither up to date nor erroneous, - * so we need to attempt to read it again */ - goto reread_backing_page; - - /* the backing page is already up to date, attach the netfs - * page to the pagecache and LRU and copy the data across */ - backing_page_already_uptodate_unlock: - _debug("uptodate %lx", backpage->flags); - unlock_page(backpage); - backing_page_already_uptodate: - _debug("- uptodate"); - - ret = add_to_page_cache_lru(netpage, op->mapping, - netpage->index, cachefiles_gfp); - if (ret < 0) { - if (ret == -EEXIST) { - put_page(backpage); - backpage = NULL; - put_page(netpage); - netpage = NULL; - fscache_retrieval_complete(op, 1); - continue; - } - goto nomem; - } - - copy_highpage(netpage, backpage); - - put_page(backpage); - backpage = NULL; - - fscache_mark_page_cached(op, netpage); - - /* the netpage is unlocked and marked up to date here */ - fscache_end_io(op, netpage, 0); - put_page(netpage); - netpage = NULL; - fscache_retrieval_complete(op, 1); - continue; - } - - netpage = NULL; - - _debug("out"); - -out: - /* tidy up */ - if (newpage) - put_page(newpage); - if (netpage) - put_page(netpage); - if (backpage) - put_page(backpage); - if (monitor) { - fscache_put_retrieval(op); - kfree(monitor); - } - - list_for_each_entry_safe(netpage, _n, list, lru) { - list_del(&netpage->lru); - put_page(netpage); - fscache_retrieval_complete(op, 1); - } - - _leave(" = %d", ret); - return ret; - -nomem: - _debug("nomem"); - ret = -ENOMEM; - goto record_page_complete; - -read_error: - _debug("read error %d", ret); - if (ret == -ENOMEM) - goto record_page_complete; -io_error: - cachefiles_io_error_obj(object, "Page read error on backing file"); - ret = -ENOBUFS; -record_page_complete: - fscache_retrieval_complete(op, 1); - goto out; -} - -/* - * read a list of pages from the cache or allocate blocks in which to store - * them - */ -int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct list_head backpages; - struct pagevec pagevec; - struct inode *inode; - struct page *page, *_n; - unsigned shift, nrbackpages; - int ret, ret2, space; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("{OBJ%x,%d},,%d,,", - object->fscache.debug_id, atomic_read(&op->op.usage), - *nr_pages); - - if (!object->backer) - goto all_enobufs; - - space = 1; - if (cachefiles_has_space(cache, 0, *nr_pages) < 0) - space = 0; - - inode = d_backing_inode(object->backer); - ASSERT(S_ISREG(inode->i_mode)); - - /* calculate the shift required to use bmap */ - shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; - - pagevec_init(&pagevec); - - op->op.flags &= FSCACHE_OP_KEEP_FLAGS; - op->op.flags |= FSCACHE_OP_ASYNC; - op->op.processor = cachefiles_read_copier; - - INIT_LIST_HEAD(&backpages); - nrbackpages = 0; - - ret = space ? -ENODATA : -ENOBUFS; - list_for_each_entry_safe(page, _n, pages, lru) { - sector_t block; - - /* we assume the absence or presence of the first block is a - * good enough indication for the page as a whole - * - TODO: don't use bmap() for this as it is _not_ actually - * good enough for this as it doesn't indicate errors, but - * it's all we've got for the moment - */ - block = page->index; - block <<= shift; - - ret2 = bmap(inode, &block); - ASSERT(ret2 == 0); - - _debug("%llx -> %llx", - (unsigned long long) (page->index << shift), - (unsigned long long) block); - - if (block) { - /* we have data - add it to the list to give to the - * backing fs */ - list_move(&page->lru, &backpages); - (*nr_pages)--; - nrbackpages++; - } else if (space && pagevec_add(&pagevec, page) == 0) { - fscache_mark_pages_cached(op, &pagevec); - fscache_retrieval_complete(op, 1); - ret = -ENODATA; - } else { - fscache_retrieval_complete(op, 1); - } - } - - if (pagevec_count(&pagevec) > 0) - fscache_mark_pages_cached(op, &pagevec); - - if (list_empty(pages)) - ret = 0; - - /* submit the apparently valid pages to the backing fs to be read from - * disk */ - if (nrbackpages > 0) { - ret2 = cachefiles_read_backing_file(object, op, &backpages); - if (ret2 == -ENOMEM || ret2 == -EINTR) - ret = ret2; - } - - _leave(" = %d [nr=%u%s]", - ret, *nr_pages, list_empty(pages) ? " empty" : ""); - return ret; - -all_enobufs: - fscache_retrieval_complete(op, *nr_pages); - return -ENOBUFS; -} - -/* - * allocate a block in the cache in which to store a page - * - cache withdrawal is prevented by the caller - * - returns -EINTR if interrupted - * - returns -ENOMEM if ran out of memory - * - returns -ENOBUFS if no buffers can be made available - * - returns -ENOBUFS if page is beyond EOF - * - otherwise: - * - the metadata will be retained - * - 0 will be returned - */ -int cachefiles_allocate_page(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - int ret; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("%p,{%lx},", object, page->index); - - ret = cachefiles_has_space(cache, 0, 1); - if (ret == 0) - fscache_mark_page_cached(op, page); - else - ret = -ENOBUFS; - - fscache_retrieval_complete(op, 1); - _leave(" = %d", ret); - return ret; -} - -/* - * allocate blocks in the cache in which to store a set of pages - * - cache withdrawal is prevented by the caller - * - returns -EINTR if interrupted - * - returns -ENOMEM if ran out of memory - * - returns -ENOBUFS if some buffers couldn't be made available - * - returns -ENOBUFS if some pages are beyond EOF - * - otherwise: - * - -ENODATA will be returned - * - metadata will be retained for any page marked - */ -int cachefiles_allocate_pages(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct pagevec pagevec; - struct page *page; - int ret; - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - _enter("%p,,,%d,", object, *nr_pages); - - ret = cachefiles_has_space(cache, 0, *nr_pages); - if (ret == 0) { - pagevec_init(&pagevec); - - list_for_each_entry(page, pages, lru) { - if (pagevec_add(&pagevec, page) == 0) - fscache_mark_pages_cached(op, &pagevec); - } - - if (pagevec_count(&pagevec) > 0) - fscache_mark_pages_cached(op, &pagevec); - ret = -ENODATA; - } else { - ret = -ENOBUFS; - } - - fscache_retrieval_complete(op, *nr_pages); - _leave(" = %d", ret); - return ret; -} - -/* - * request a page be stored in the cache - * - cache withdrawal is prevented by the caller - * - this request may be ignored if there's no cache block available, in which - * case -ENOBUFS will be returned - * - if the op is in progress, 0 will be returned - */ -int cachefiles_write_page(struct fscache_storage *op, struct page *page) -{ - struct cachefiles_object *object; - struct cachefiles_cache *cache; - struct file *file; - struct path path; - loff_t pos, eof; - size_t len; - void *data; - int ret = -ENOBUFS; - - ASSERT(op != NULL); - ASSERT(page != NULL); - - object = container_of(op->op.object, - struct cachefiles_object, fscache); - - _enter("%p,%p{%lx},,,", object, page, page->index); - - if (!object->backer) { - _leave(" = -ENOBUFS"); - return -ENOBUFS; - } - - ASSERT(d_is_reg(object->backer)); - - cache = container_of(object->fscache.cache, - struct cachefiles_cache, cache); - - pos = (loff_t)page->index << PAGE_SHIFT; - - /* We mustn't write more data than we have, so we have to beware of a - * partial page at EOF. - */ - eof = object->fscache.store_limit_l; - if (pos >= eof) - goto error; - - /* write the page to the backing filesystem and let it store it in its - * own time */ - path.mnt = cache->mnt; - path.dentry = object->backer; - file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto error_2; - } - - len = PAGE_SIZE; - if (eof & ~PAGE_MASK) { - if (eof - pos < PAGE_SIZE) { - _debug("cut short %llx to %llx", - pos, eof); - len = eof - pos; - ASSERTCMP(pos + len, ==, eof); - } - } - - data = kmap(page); - ret = kernel_write(file, data, len, &pos); - kunmap(page); - fput(file); - if (ret != len) - goto error_eio; - - _leave(" = 0"); - return 0; - -error_eio: - ret = -EIO; -error_2: - if (ret == -EIO) - cachefiles_io_error_obj(object, - "Write page to backing file failed"); -error: - _leave(" = -ENOBUFS [%d]", ret); - return -ENOBUFS; -} - -/* - * detach a backing block from a page - * - cache withdrawal is prevented by the caller - */ -void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) - __releases(&object->fscache.cookie->lock) -{ - struct cachefiles_object *object; - - object = container_of(_object, struct cachefiles_object, fscache); - - _enter("%p,{%lu}", object, page->index); - - spin_unlock(&object->fscache.cookie->lock); -} diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index bd4f44c1cce03..cfa60c2faf686 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -381,12 +381,6 @@ void fscache_withdraw_cache(struct fscache_cache *cache) cache->ops->sync_cache(cache); fscache_stat_d(&fscache_n_cop_sync_cache); - /* dissociate all the netfs pages backed by this cache from the block - * mappings in the cache */ - fscache_stat(&fscache_n_cop_dissociate_pages); - cache->ops->dissociate_pages(cache); - fscache_stat_d(&fscache_n_cop_dissociate_pages); - /* we now have to destroy all the active objects pertaining to this * cache - which we do by passing them off to thread pool to be * disposed of */ diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index cd42be646ed3b..8a850c3d07753 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -179,13 +179,8 @@ struct fscache_cookie *fscache_alloc_cookie( cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET); cookie->type = def->type; spin_lock_init(&cookie->lock); - spin_lock_init(&cookie->stores_lock); INIT_HLIST_HEAD(&cookie->backing_objects); - /* radix tree insertion won't use the preallocation pool unless it's - * told it may not wait */ - INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); - write_lock(&fscache_cookies_lock); list_add_tail(&cookie->proc_link, &fscache_cookies); write_unlock(&fscache_cookies_lock); @@ -771,10 +766,6 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, !atomic_read(&cookie->n_active)); } - /* Make sure any pending writes are cancelled. */ - if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) - fscache_invalidate_writes(cookie); - /* Reset the cookie state if it wasn't relinquished */ if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) { atomic_inc(&cookie->n_active); @@ -823,7 +814,6 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, /* Clear pointers back to the netfs */ cookie->netfs_data = NULL; cookie->def = NULL; - BUG_ON(!radix_tree_empty(&cookie->stores)); if (cookie->parent) { ASSERTCMP(refcount_read(&cookie->parent->ref), >, 0); diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 1d1046408311d..5281fa1894cd9 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -116,11 +116,6 @@ extern int fscache_wait_for_operation_activation(struct fscache_object *, struct fscache_operation *, atomic_t *, atomic_t *); -extern void fscache_invalidate_writes(struct fscache_cookie *); -struct fscache_retrieval *fscache_alloc_retrieval(struct fscache_cookie *cookie, - struct address_space *mapping, - fscache_rw_complete_t end_io_func, - void *context); /* * proc.c @@ -253,7 +248,6 @@ extern atomic_t fscache_n_cop_allocate_page; extern atomic_t fscache_n_cop_allocate_pages; extern atomic_t fscache_n_cop_write_page; extern atomic_t fscache_n_cop_uncache_page; -extern atomic_t fscache_n_cop_dissociate_pages; extern atomic_t fscache_n_cache_no_space_reject; extern atomic_t fscache_n_cache_stale_objects; @@ -298,27 +292,6 @@ static inline void fscache_raise_event(struct fscache_object *object, fscache_enqueue_object(object); } -/* - * get an extra reference to a netfs retrieval context - */ -static inline -void *fscache_get_context(struct fscache_cookie *cookie, void *context) -{ - if (cookie->def->get_context) - cookie->def->get_context(cookie->netfs_data, context); - return context; -} - -/* - * release a reference to a netfs retrieval context - */ -static inline -void fscache_put_context(struct fscache_cookie *cookie, void *context) -{ - if (cookie->def->put_context) - cookie->def->put_context(cookie->netfs_data, context); -} - /* * Update the auxiliary data on a cookie. */ diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 6a675652129b2..86ad941726f72 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -965,14 +965,12 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj * retire the object instead. */ if (!fscache_use_cookie(object)) { - ASSERT(radix_tree_empty(&object->cookie->stores)); set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); _leave(" [no cookie]"); return transit_to(KILL_OBJECT); } /* Reject any new read/write ops and abort any that are pending. */ - fscache_invalidate_writes(cookie); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); fscache_cancel_all_ops(object); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index ed41a00b861c0..3fd6a2b45fedc 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -7,181 +7,12 @@ #define FSCACHE_DEBUG_LEVEL PAGE #include -#define FSCACHE_USE_OLD_IO_API #include #include #include #include #include "internal.h" -/* - * check to see if a page is being written to the cache - */ -bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page *page) -{ - void *val; - - rcu_read_lock(); - val = radix_tree_lookup(&cookie->stores, page->index); - rcu_read_unlock(); - trace_fscache_check_page(cookie, page, val, 0); - - return val != NULL; -} -EXPORT_SYMBOL(__fscache_check_page_write); - -/* - * wait for a page to finish being written to the cache - */ -void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *page) -{ - wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0); - - trace_fscache_page(cookie, page, fscache_page_write_wait); - - wait_event(*wq, !__fscache_check_page_write(cookie, page)); -} -EXPORT_SYMBOL(__fscache_wait_on_page_write); - -/* - * wait for a page to finish being written to the cache. Put a timeout here - * since we might be called recursively via parent fs. - */ -static -bool release_page_wait_timeout(struct fscache_cookie *cookie, struct page *page) -{ - wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0); - - return wait_event_timeout(*wq, !__fscache_check_page_write(cookie, page), - HZ); -} - -/* - * decide whether a page can be released, possibly by cancelling a store to it - * - we're allowed to sleep if __GFP_DIRECT_RECLAIM is flagged - */ -bool __fscache_maybe_release_page(struct fscache_cookie *cookie, - struct page *page, - gfp_t gfp) -{ - struct page *xpage; - void *val; - - _enter("%p,%p,%x", cookie, page, gfp); - - trace_fscache_page(cookie, page, fscache_page_maybe_release); - -try_again: - rcu_read_lock(); - val = radix_tree_lookup(&cookie->stores, page->index); - if (!val) { - rcu_read_unlock(); - fscache_stat(&fscache_n_store_vmscan_not_storing); - __fscache_uncache_page(cookie, page); - return true; - } - - /* see if the page is actually undergoing storage - if so we can't get - * rid of it till the cache has finished with it */ - if (radix_tree_tag_get(&cookie->stores, page->index, - FSCACHE_COOKIE_STORING_TAG)) { - rcu_read_unlock(); - goto page_busy; - } - - /* the page is pending storage, so we attempt to cancel the store and - * discard the store request so that the page can be reclaimed */ - spin_lock(&cookie->stores_lock); - rcu_read_unlock(); - - if (radix_tree_tag_get(&cookie->stores, page->index, - FSCACHE_COOKIE_STORING_TAG)) { - /* the page started to undergo storage whilst we were looking, - * so now we can only wait or return */ - spin_unlock(&cookie->stores_lock); - goto page_busy; - } - - xpage = radix_tree_delete(&cookie->stores, page->index); - trace_fscache_page(cookie, page, fscache_page_radix_delete); - spin_unlock(&cookie->stores_lock); - - if (xpage) { - fscache_stat(&fscache_n_store_vmscan_cancelled); - fscache_stat(&fscache_n_store_radix_deletes); - ASSERTCMP(xpage, ==, page); - } else { - fscache_stat(&fscache_n_store_vmscan_gone); - } - - wake_up_bit(&cookie->flags, 0); - trace_fscache_wake_cookie(cookie); - if (xpage) - put_page(xpage); - __fscache_uncache_page(cookie, page); - return true; - -page_busy: - /* We will wait here if we're allowed to, but that could deadlock the - * allocator as the work threads writing to the cache may all end up - * sleeping on memory allocation, so we may need to impose a timeout - * too. */ - if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS)) { - fscache_stat(&fscache_n_store_vmscan_busy); - return false; - } - - fscache_stat(&fscache_n_store_vmscan_wait); - if (!release_page_wait_timeout(cookie, page)) - _debug("fscache writeout timeout page: %p{%lx}", - page, page->index); - - gfp &= ~__GFP_DIRECT_RECLAIM; - goto try_again; -} -EXPORT_SYMBOL(__fscache_maybe_release_page); - -/* - * note that a page has finished being written to the cache - */ -static void fscache_end_page_write(struct fscache_object *object, - struct page *page) -{ - struct fscache_cookie *cookie; - struct page *xpage = NULL, *val; - - spin_lock(&object->lock); - cookie = object->cookie; - if (cookie) { - /* delete the page from the tree if it is now no longer - * pending */ - spin_lock(&cookie->stores_lock); - radix_tree_tag_clear(&cookie->stores, page->index, - FSCACHE_COOKIE_STORING_TAG); - trace_fscache_page(cookie, page, fscache_page_radix_clear_store); - if (!radix_tree_tag_get(&cookie->stores, page->index, - FSCACHE_COOKIE_PENDING_TAG)) { - fscache_stat(&fscache_n_store_radix_deletes); - xpage = radix_tree_delete(&cookie->stores, page->index); - trace_fscache_page(cookie, page, fscache_page_radix_delete); - trace_fscache_page(cookie, page, fscache_page_write_end); - - val = radix_tree_lookup(&cookie->stores, page->index); - trace_fscache_check_page(cookie, page, val, 1); - } else { - trace_fscache_page(cookie, page, fscache_page_write_end_pend); - } - spin_unlock(&cookie->stores_lock); - wake_up_bit(&cookie->flags, 0); - trace_fscache_wake_cookie(cookie); - } else { - trace_fscache_page(cookie, page, fscache_page_write_end_noc); - } - spin_unlock(&object->lock); - if (xpage) - put_page(xpage); -} - /* * actually apply the changed attributes to a cache object */ @@ -266,74 +97,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) } EXPORT_SYMBOL(__fscache_attr_changed); -/* - * Handle cancellation of a pending retrieval op - */ -static void fscache_do_cancel_retrieval(struct fscache_operation *_op) -{ - struct fscache_retrieval *op = - container_of(_op, struct fscache_retrieval, op); - - atomic_set(&op->n_pages, 0); -} - -/* - * release a retrieval op reference - */ -static void fscache_release_retrieval_op(struct fscache_operation *_op) -{ - struct fscache_retrieval *op = - container_of(_op, struct fscache_retrieval, op); - - _enter("{OP%x}", op->op.debug_id); - - ASSERTIFCMP(op->op.state != FSCACHE_OP_ST_INITIALISED, - atomic_read(&op->n_pages), ==, 0); - - if (op->context) - fscache_put_context(op->cookie, op->context); - - _leave(""); -} - -/* - * allocate a retrieval op - */ -struct fscache_retrieval *fscache_alloc_retrieval( - struct fscache_cookie *cookie, - struct address_space *mapping, - fscache_rw_complete_t end_io_func, - void *context) -{ - struct fscache_retrieval *op; - - /* allocate a retrieval operation and attempt to submit it */ - op = kzalloc(sizeof(*op), GFP_NOIO); - if (!op) { - fscache_stat(&fscache_n_retrievals_nomem); - return NULL; - } - - fscache_operation_init(cookie, &op->op, NULL, - fscache_do_cancel_retrieval, - fscache_release_retrieval_op); - op->op.flags = FSCACHE_OP_MYTHREAD | - (1UL << FSCACHE_OP_WAITING) | - (1UL << FSCACHE_OP_UNUSE_COOKIE); - op->cookie = cookie; - op->mapping = mapping; - op->end_io_func = end_io_func; - op->context = context; - INIT_LIST_HEAD(&op->to_do); - - /* Pin the netfs read context in case we need to do the actual netfs - * read because we've encountered a cache read failure. - */ - if (context) - fscache_get_context(op->cookie, context); - return op; -} - /* * wait for a deferred lookup to complete */ @@ -411,833 +174,3 @@ int fscache_wait_for_operation_activation(struct fscache_object *object, } return 0; } - -/* - * read a page from the cache or allocate a block in which to store it - * - we return: - * -ENOMEM - out of memory, nothing done - * -ERESTARTSYS - interrupted - * -ENOBUFS - no backing object available in which to cache the block - * -ENODATA - no data available in the backing object for this block - * 0 - dispatched a read - it'll call end_io_func() when finished - */ -int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, - struct page *page, - fscache_rw_complete_t end_io_func, - void *context, - gfp_t gfp) -{ - struct fscache_retrieval *op; - struct fscache_object *object; - bool wake_cookie = false; - int ret; - - _enter("%p,%p,,,", cookie, page); - - fscache_stat(&fscache_n_retrievals); - - if (hlist_empty(&cookie->backing_objects)) - goto nobufs; - - if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { - _leave(" = -ENOBUFS [invalidating]"); - return -ENOBUFS; - } - - ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); - ASSERTCMP(page, !=, NULL); - - if (fscache_wait_for_deferred_lookup(cookie) < 0) - return -ERESTARTSYS; - - op = fscache_alloc_retrieval(cookie, page->mapping, - end_io_func, context); - if (!op) { - _leave(" = -ENOMEM"); - return -ENOMEM; - } - atomic_set(&op->n_pages, 1); - trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_retr_one); - - spin_lock(&cookie->lock); - - if (!fscache_cookie_enabled(cookie) || - hlist_empty(&cookie->backing_objects)) - goto nobufs_unlock; - object = hlist_entry(cookie->backing_objects.first, - struct fscache_object, cookie_link); - - ASSERT(test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)); - - __fscache_use_cookie(cookie); - atomic_inc(&object->n_reads); - __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); - - if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock_dec; - spin_unlock(&cookie->lock); - - fscache_stat(&fscache_n_retrieval_ops); - - /* we wait for the operation to become active, and then process it - * *here*, in this thread, and not in the thread pool */ - ret = fscache_wait_for_operation_activation( - object, &op->op, - __fscache_stat(&fscache_n_retrieval_op_waits), - __fscache_stat(&fscache_n_retrievals_object_dead)); - if (ret < 0) - goto error; - - /* ask the cache to honour the operation */ - if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { - fscache_stat(&fscache_n_cop_allocate_page); - ret = object->cache->ops->allocate_page(op, page, gfp); - fscache_stat_d(&fscache_n_cop_allocate_page); - if (ret == 0) - ret = -ENODATA; - } else { - fscache_stat(&fscache_n_cop_read_or_alloc_page); - ret = object->cache->ops->read_or_alloc_page(op, page, gfp); - fscache_stat_d(&fscache_n_cop_read_or_alloc_page); - } - -error: - if (ret == -ENOMEM) - fscache_stat(&fscache_n_retrievals_nomem); - else if (ret == -ERESTARTSYS) - fscache_stat(&fscache_n_retrievals_intr); - else if (ret == -ENODATA) - fscache_stat(&fscache_n_retrievals_nodata); - else if (ret < 0) - fscache_stat(&fscache_n_retrievals_nobufs); - else - fscache_stat(&fscache_n_retrievals_ok); - - fscache_put_retrieval(op); - _leave(" = %d", ret); - return ret; - -nobufs_unlock_dec: - atomic_dec(&object->n_reads); - wake_cookie = __fscache_unuse_cookie(cookie); -nobufs_unlock: - spin_unlock(&cookie->lock); - if (wake_cookie) - __fscache_wake_unused_cookie(cookie); - fscache_put_retrieval(op); -nobufs: - fscache_stat(&fscache_n_retrievals_nobufs); - _leave(" = -ENOBUFS"); - return -ENOBUFS; -} -EXPORT_SYMBOL(__fscache_read_or_alloc_page); - -/* - * read a list of page from the cache or allocate a block in which to store - * them - * - we return: - * -ENOMEM - out of memory, some pages may be being read - * -ERESTARTSYS - interrupted, some pages may be being read - * -ENOBUFS - no backing object or space available in which to cache any - * pages not being read - * -ENODATA - no data available in the backing object for some or all of - * the pages - * 0 - dispatched a read on all pages - * - * end_io_func() will be called for each page read from the cache as it is - * finishes being read - * - * any pages for which a read is dispatched will be removed from pages and - * nr_pages - */ -int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages, - fscache_rw_complete_t end_io_func, - void *context, - gfp_t gfp) -{ - struct fscache_retrieval *op; - struct fscache_object *object; - bool wake_cookie = false; - int ret; - - _enter("%p,,%d,,,", cookie, *nr_pages); - - fscache_stat(&fscache_n_retrievals); - - if (hlist_empty(&cookie->backing_objects)) - goto nobufs; - - if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { - _leave(" = -ENOBUFS [invalidating]"); - return -ENOBUFS; - } - - ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); - ASSERTCMP(*nr_pages, >, 0); - ASSERT(!list_empty(pages)); - - if (fscache_wait_for_deferred_lookup(cookie) < 0) - return -ERESTARTSYS; - - op = fscache_alloc_retrieval(cookie, mapping, end_io_func, context); - if (!op) - return -ENOMEM; - atomic_set(&op->n_pages, *nr_pages); - trace_fscache_page_op(cookie, NULL, &op->op, fscache_page_op_retr_multi); - - spin_lock(&cookie->lock); - - if (!fscache_cookie_enabled(cookie) || - hlist_empty(&cookie->backing_objects)) - goto nobufs_unlock; - object = hlist_entry(cookie->backing_objects.first, - struct fscache_object, cookie_link); - - __fscache_use_cookie(cookie); - atomic_inc(&object->n_reads); - __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); - - if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock_dec; - spin_unlock(&cookie->lock); - - fscache_stat(&fscache_n_retrieval_ops); - - /* we wait for the operation to become active, and then process it - * *here*, in this thread, and not in the thread pool */ - ret = fscache_wait_for_operation_activation( - object, &op->op, - __fscache_stat(&fscache_n_retrieval_op_waits), - __fscache_stat(&fscache_n_retrievals_object_dead)); - if (ret < 0) - goto error; - - /* ask the cache to honour the operation */ - if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { - fscache_stat(&fscache_n_cop_allocate_pages); - ret = object->cache->ops->allocate_pages( - op, pages, nr_pages, gfp); - fscache_stat_d(&fscache_n_cop_allocate_pages); - } else { - fscache_stat(&fscache_n_cop_read_or_alloc_pages); - ret = object->cache->ops->read_or_alloc_pages( - op, pages, nr_pages, gfp); - fscache_stat_d(&fscache_n_cop_read_or_alloc_pages); - } - -error: - if (ret == -ENOMEM) - fscache_stat(&fscache_n_retrievals_nomem); - else if (ret == -ERESTARTSYS) - fscache_stat(&fscache_n_retrievals_intr); - else if (ret == -ENODATA) - fscache_stat(&fscache_n_retrievals_nodata); - else if (ret < 0) - fscache_stat(&fscache_n_retrievals_nobufs); - else - fscache_stat(&fscache_n_retrievals_ok); - - fscache_put_retrieval(op); - _leave(" = %d", ret); - return ret; - -nobufs_unlock_dec: - atomic_dec(&object->n_reads); - wake_cookie = __fscache_unuse_cookie(cookie); -nobufs_unlock: - spin_unlock(&cookie->lock); - fscache_put_retrieval(op); - if (wake_cookie) - __fscache_wake_unused_cookie(cookie); -nobufs: - fscache_stat(&fscache_n_retrievals_nobufs); - _leave(" = -ENOBUFS"); - return -ENOBUFS; -} -EXPORT_SYMBOL(__fscache_read_or_alloc_pages); - -/* - * allocate a block in the cache on which to store a page - * - we return: - * -ENOMEM - out of memory, nothing done - * -ERESTARTSYS - interrupted - * -ENOBUFS - no backing object available in which to cache the block - * 0 - block allocated - */ -int __fscache_alloc_page(struct fscache_cookie *cookie, - struct page *page, - gfp_t gfp) -{ - struct fscache_retrieval *op; - struct fscache_object *object; - bool wake_cookie = false; - int ret; - - _enter("%p,%p,,,", cookie, page); - - fscache_stat(&fscache_n_allocs); - - if (hlist_empty(&cookie->backing_objects)) - goto nobufs; - - ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); - ASSERTCMP(page, !=, NULL); - - if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { - _leave(" = -ENOBUFS [invalidating]"); - return -ENOBUFS; - } - - if (fscache_wait_for_deferred_lookup(cookie) < 0) - return -ERESTARTSYS; - - op = fscache_alloc_retrieval(cookie, page->mapping, NULL, NULL); - if (!op) - return -ENOMEM; - atomic_set(&op->n_pages, 1); - trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_alloc_one); - - spin_lock(&cookie->lock); - - if (!fscache_cookie_enabled(cookie) || - hlist_empty(&cookie->backing_objects)) - goto nobufs_unlock; - object = hlist_entry(cookie->backing_objects.first, - struct fscache_object, cookie_link); - - __fscache_use_cookie(cookie); - if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock_dec; - spin_unlock(&cookie->lock); - - fscache_stat(&fscache_n_alloc_ops); - - ret = fscache_wait_for_operation_activation( - object, &op->op, - __fscache_stat(&fscache_n_alloc_op_waits), - __fscache_stat(&fscache_n_allocs_object_dead)); - if (ret < 0) - goto error; - - /* ask the cache to honour the operation */ - fscache_stat(&fscache_n_cop_allocate_page); - ret = object->cache->ops->allocate_page(op, page, gfp); - fscache_stat_d(&fscache_n_cop_allocate_page); - -error: - if (ret == -ERESTARTSYS) - fscache_stat(&fscache_n_allocs_intr); - else if (ret < 0) - fscache_stat(&fscache_n_allocs_nobufs); - else - fscache_stat(&fscache_n_allocs_ok); - - fscache_put_retrieval(op); - _leave(" = %d", ret); - return ret; - -nobufs_unlock_dec: - wake_cookie = __fscache_unuse_cookie(cookie); -nobufs_unlock: - spin_unlock(&cookie->lock); - fscache_put_retrieval(op); - if (wake_cookie) - __fscache_wake_unused_cookie(cookie); -nobufs: - fscache_stat(&fscache_n_allocs_nobufs); - _leave(" = -ENOBUFS"); - return -ENOBUFS; -} -EXPORT_SYMBOL(__fscache_alloc_page); - -/* - * Unmark pages allocate in the readahead code path (via: - * fscache_readpages_or_alloc) after delegating to the base filesystem - */ -void __fscache_readpages_cancel(struct fscache_cookie *cookie, - struct list_head *pages) -{ - struct page *page; - - list_for_each_entry(page, pages, lru) { - if (PageFsCache(page)) - __fscache_uncache_page(cookie, page); - } -} -EXPORT_SYMBOL(__fscache_readpages_cancel); - -/* - * release a write op reference - */ -static void fscache_release_write_op(struct fscache_operation *_op) -{ - _enter("{OP%x}", _op->debug_id); -} - -/* - * perform the background storage of a page into the cache - */ -static void fscache_write_op(struct fscache_operation *_op) -{ - struct fscache_storage *op = - container_of(_op, struct fscache_storage, op); - struct fscache_object *object = op->op.object; - struct fscache_cookie *cookie; - struct page *page; - unsigned n; - void *results[1]; - int ret; - - _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); - -again: - spin_lock(&object->lock); - cookie = object->cookie; - - if (!fscache_object_is_active(object)) { - /* If we get here, then the on-disk cache object likely no - * longer exists, so we should just cancel this write - * operation. - */ - spin_unlock(&object->lock); - fscache_op_complete(&op->op, true); - _leave(" [inactive]"); - return; - } - - if (!cookie) { - /* If we get here, then the cookie belonging to the object was - * detached, probably by the cookie being withdrawn due to - * memory pressure, which means that the pages we might write - * to the cache from no longer exist - therefore, we can just - * cancel this write operation. - */ - spin_unlock(&object->lock); - fscache_op_complete(&op->op, true); - _leave(" [cancel] op{f=%lx s=%u} obj{s=%s f=%lx}", - _op->flags, _op->state, object->state->short_name, - object->flags); - return; - } - - spin_lock(&cookie->stores_lock); - - fscache_stat(&fscache_n_store_calls); - - /* find a page to store */ - results[0] = NULL; - page = NULL; - n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, 1, - FSCACHE_COOKIE_PENDING_TAG); - trace_fscache_gang_lookup(cookie, &op->op, results, n, op->store_limit); - if (n != 1) - goto superseded; - page = results[0]; - _debug("gang %d [%lx]", n, page->index); - - radix_tree_tag_set(&cookie->stores, page->index, - FSCACHE_COOKIE_STORING_TAG); - radix_tree_tag_clear(&cookie->stores, page->index, - FSCACHE_COOKIE_PENDING_TAG); - trace_fscache_page(cookie, page, fscache_page_radix_pend2store); - - spin_unlock(&cookie->stores_lock); - spin_unlock(&object->lock); - - if (page->index >= op->store_limit) - goto discard_page; - - fscache_stat(&fscache_n_store_pages); - fscache_stat(&fscache_n_cop_write_page); - ret = object->cache->ops->write_page(op, page); - fscache_stat_d(&fscache_n_cop_write_page); - trace_fscache_wrote_page(cookie, page, &op->op, ret); - fscache_end_page_write(object, page); - if (ret < 0) { - fscache_abort_object(object); - fscache_op_complete(&op->op, true); - } else { - fscache_enqueue_operation(&op->op); - } - - _leave(""); - return; - -discard_page: - fscache_stat(&fscache_n_store_pages_over_limit); - trace_fscache_wrote_page(cookie, page, &op->op, -ENOBUFS); - fscache_end_page_write(object, page); - goto again; - -superseded: - /* this writer is going away and there aren't any more things to - * write */ - _debug("cease"); - spin_unlock(&cookie->stores_lock); - clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); - spin_unlock(&object->lock); - fscache_op_complete(&op->op, false); - _leave(""); -} - -/* - * Clear the pages pending writing for invalidation - */ -void fscache_invalidate_writes(struct fscache_cookie *cookie) -{ - struct page *page; - void *results[16]; - int n, i; - - _enter(""); - - for (;;) { - spin_lock(&cookie->stores_lock); - n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, - ARRAY_SIZE(results), - FSCACHE_COOKIE_PENDING_TAG); - if (n == 0) { - spin_unlock(&cookie->stores_lock); - break; - } - - for (i = n - 1; i >= 0; i--) { - page = results[i]; - radix_tree_delete(&cookie->stores, page->index); - trace_fscache_page(cookie, page, fscache_page_radix_delete); - trace_fscache_page(cookie, page, fscache_page_inval); - } - - spin_unlock(&cookie->stores_lock); - - for (i = n - 1; i >= 0; i--) - put_page(results[i]); - } - - wake_up_bit(&cookie->flags, 0); - trace_fscache_wake_cookie(cookie); - - _leave(""); -} - -/* - * request a page be stored in the cache - * - returns: - * -ENOMEM - out of memory, nothing done - * -ENOBUFS - no backing object available in which to cache the page - * 0 - dispatched a write - it'll call end_io_func() when finished - * - * if the cookie still has a backing object at this point, that object can be - * in one of a few states with respect to storage processing: - * - * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is - * set) - * - * (a) no writes yet - * - * (b) writes deferred till post-creation (mark page for writing and - * return immediately) - * - * (2) negative lookup, object created, initial fill being made from netfs - * - * (a) fill point not yet reached this page (mark page for writing and - * return) - * - * (b) fill point passed this page (queue op to store this page) - * - * (3) object extant (queue op to store this page) - * - * any other state is invalid - */ -int __fscache_write_page(struct fscache_cookie *cookie, - struct page *page, - loff_t object_size, - gfp_t gfp) -{ - struct fscache_storage *op; - struct fscache_object *object; - bool wake_cookie = false; - int ret; - - _enter("%p,%x,", cookie, (u32) page->flags); - - ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); - ASSERT(PageFsCache(page)); - - fscache_stat(&fscache_n_stores); - - if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { - _leave(" = -ENOBUFS [invalidating]"); - return -ENOBUFS; - } - - op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY); - if (!op) - goto nomem; - - fscache_operation_init(cookie, &op->op, fscache_write_op, NULL, - fscache_release_write_op); - op->op.flags = FSCACHE_OP_ASYNC | - (1 << FSCACHE_OP_WAITING) | - (1 << FSCACHE_OP_UNUSE_COOKIE); - - ret = radix_tree_maybe_preload(gfp & ~__GFP_HIGHMEM); - if (ret < 0) - goto nomem_free; - - trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_write_one); - - ret = -ENOBUFS; - spin_lock(&cookie->lock); - - if (!fscache_cookie_enabled(cookie) || - hlist_empty(&cookie->backing_objects)) - goto nobufs; - object = hlist_entry(cookie->backing_objects.first, - struct fscache_object, cookie_link); - if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) - goto nobufs; - - trace_fscache_page(cookie, page, fscache_page_write); - - /* add the page to the pending-storage radix tree on the backing - * object */ - spin_lock(&object->lock); - - if (object->store_limit_l != object_size) - fscache_set_store_limit(object, object_size); - - spin_lock(&cookie->stores_lock); - - _debug("store limit %llx", (unsigned long long) object->store_limit); - - ret = radix_tree_insert(&cookie->stores, page->index, page); - if (ret < 0) { - if (ret == -EEXIST) - goto already_queued; - _debug("insert failed %d", ret); - goto nobufs_unlock_obj; - } - - trace_fscache_page(cookie, page, fscache_page_radix_insert); - radix_tree_tag_set(&cookie->stores, page->index, - FSCACHE_COOKIE_PENDING_TAG); - trace_fscache_page(cookie, page, fscache_page_radix_set_pend); - get_page(page); - - /* we only want one writer at a time, but we do need to queue new - * writers after exclusive ops */ - if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags)) - goto already_pending; - - spin_unlock(&cookie->stores_lock); - spin_unlock(&object->lock); - - op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); - op->store_limit = object->store_limit; - - __fscache_use_cookie(cookie); - if (fscache_submit_op(object, &op->op) < 0) - goto submit_failed; - - spin_unlock(&cookie->lock); - radix_tree_preload_end(); - fscache_stat(&fscache_n_store_ops); - fscache_stat(&fscache_n_stores_ok); - - /* the work queue now carries its own ref on the object */ - fscache_put_operation(&op->op); - _leave(" = 0"); - return 0; - -already_queued: - fscache_stat(&fscache_n_stores_again); -already_pending: - spin_unlock(&cookie->stores_lock); - spin_unlock(&object->lock); - spin_unlock(&cookie->lock); - radix_tree_preload_end(); - fscache_put_operation(&op->op); - fscache_stat(&fscache_n_stores_ok); - _leave(" = 0"); - return 0; - -submit_failed: - spin_lock(&cookie->stores_lock); - radix_tree_delete(&cookie->stores, page->index); - trace_fscache_page(cookie, page, fscache_page_radix_delete); - spin_unlock(&cookie->stores_lock); - wake_cookie = __fscache_unuse_cookie(cookie); - put_page(page); - ret = -ENOBUFS; - goto nobufs; - -nobufs_unlock_obj: - spin_unlock(&cookie->stores_lock); - spin_unlock(&object->lock); -nobufs: - spin_unlock(&cookie->lock); - radix_tree_preload_end(); - fscache_put_operation(&op->op); - if (wake_cookie) - __fscache_wake_unused_cookie(cookie); - fscache_stat(&fscache_n_stores_nobufs); - _leave(" = -ENOBUFS"); - return -ENOBUFS; - -nomem_free: - fscache_put_operation(&op->op); -nomem: - fscache_stat(&fscache_n_stores_oom); - _leave(" = -ENOMEM"); - return -ENOMEM; -} -EXPORT_SYMBOL(__fscache_write_page); - -/* - * remove a page from the cache - */ -void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) -{ - struct fscache_object *object; - - _enter(",%p", page); - - ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); - ASSERTCMP(page, !=, NULL); - - fscache_stat(&fscache_n_uncaches); - - /* cache withdrawal may beat us to it */ - if (!PageFsCache(page)) - goto done; - - trace_fscache_page(cookie, page, fscache_page_uncache); - - /* get the object */ - spin_lock(&cookie->lock); - - if (hlist_empty(&cookie->backing_objects)) { - ClearPageFsCache(page); - goto done_unlock; - } - - object = hlist_entry(cookie->backing_objects.first, - struct fscache_object, cookie_link); - - /* there might now be stuff on disk we could read */ - clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); - - /* only invoke the cache backend if we managed to mark the page - * uncached here; this deals with synchronisation vs withdrawal */ - if (TestClearPageFsCache(page) && - object->cache->ops->uncache_page) { - /* the cache backend releases the cookie lock */ - fscache_stat(&fscache_n_cop_uncache_page); - object->cache->ops->uncache_page(object, page); - fscache_stat_d(&fscache_n_cop_uncache_page); - goto done; - } - -done_unlock: - spin_unlock(&cookie->lock); -done: - _leave(""); -} -EXPORT_SYMBOL(__fscache_uncache_page); - -/** - * fscache_mark_page_cached - Mark a page as being cached - * @op: The retrieval op pages are being marked for - * @page: The page to be marked - * - * Mark a netfs page as being cached. After this is called, the netfs - * must call fscache_uncache_page() to remove the mark. - */ -void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page) -{ - struct fscache_cookie *cookie = op->op.object->cookie; - -#ifdef CONFIG_FSCACHE_STATS - atomic_inc(&fscache_n_marks); -#endif - - trace_fscache_page(cookie, page, fscache_page_cached); - - _debug("- mark %p{%lx}", page, page->index); - if (TestSetPageFsCache(page)) { - static bool once_only; - if (!once_only) { - once_only = true; - pr_warn("Cookie type %s marked page %lx multiple times\n", - cookie->def->name, page->index); - } - } - - if (cookie->def->mark_page_cached) - cookie->def->mark_page_cached(cookie->netfs_data, - op->mapping, page); -} -EXPORT_SYMBOL(fscache_mark_page_cached); - -/** - * fscache_mark_pages_cached - Mark pages as being cached - * @op: The retrieval op pages are being marked for - * @pagevec: The pages to be marked - * - * Mark a bunch of netfs pages as being cached. After this is called, - * the netfs must call fscache_uncache_page() to remove the mark. - */ -void fscache_mark_pages_cached(struct fscache_retrieval *op, - struct pagevec *pagevec) -{ - unsigned long loop; - - for (loop = 0; loop < pagevec->nr; loop++) - fscache_mark_page_cached(op, pagevec->pages[loop]); - - pagevec_reinit(pagevec); -} -EXPORT_SYMBOL(fscache_mark_pages_cached); - -/* - * Uncache all the pages in an inode that are marked PG_fscache, assuming them - * to be associated with the given cookie. - */ -void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, - struct inode *inode) -{ - struct address_space *mapping = inode->i_mapping; - struct pagevec pvec; - pgoff_t next; - int i; - - _enter("%p,%p", cookie, inode); - - if (!mapping || mapping->nrpages == 0) { - _leave(" [no pages]"); - return; - } - - pagevec_init(&pvec); - next = 0; - do { - if (!pagevec_lookup(&pvec, mapping, &next)) - break; - for (i = 0; i < pagevec_count(&pvec); i++) { - struct page *page = pvec.pages[i]; - if (PageFsCache(page)) { - __fscache_wait_on_page_write(cookie, page); - __fscache_uncache_page(cookie, page); - } - } - pagevec_release(&pvec); - cond_resched(); - } while (next); - - _leave(""); -} -EXPORT_SYMBOL(__fscache_uncache_all_inode_pages); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 3ffa34c99977a..4cefce5bf4cd5 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -127,7 +127,6 @@ atomic_t fscache_n_cop_allocate_page; atomic_t fscache_n_cop_allocate_pages; atomic_t fscache_n_cop_write_page; atomic_t fscache_n_cop_uncache_page; -atomic_t fscache_n_cop_dissociate_pages; atomic_t fscache_n_cache_no_space_reject; atomic_t fscache_n_cache_stale_objects; @@ -271,14 +270,13 @@ int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_cop_put_object), atomic_read(&fscache_n_cop_attr_changed), atomic_read(&fscache_n_cop_sync_cache)); - seq_printf(m, "CacheOp: rap=%d ras=%d alp=%d als=%d wrp=%d ucp=%d dsp=%d\n", + seq_printf(m, "CacheOp: rap=%d ras=%d alp=%d als=%d wrp=%d ucp=%d\n", atomic_read(&fscache_n_cop_read_or_alloc_page), atomic_read(&fscache_n_cop_read_or_alloc_pages), atomic_read(&fscache_n_cop_allocate_page), atomic_read(&fscache_n_cop_allocate_pages), atomic_read(&fscache_n_cop_write_page), - atomic_read(&fscache_n_cop_uncache_page), - atomic_read(&fscache_n_cop_dissociate_pages)); + atomic_read(&fscache_n_cop_uncache_page)); seq_printf(m, "CacheEv: nsp=%d stl=%d rtr=%d cul=%d\n", atomic_read(&fscache_n_cache_no_space_reject), atomic_read(&fscache_n_cache_stale_objects), diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c index 573b1da9342c1..4bd5ce736193d 100644 --- a/fs/nfs/fscache-index.c +++ b/fs/nfs/fscache-index.c @@ -98,30 +98,6 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, return FSCACHE_CHECKAUX_OKAY; } -/* - * Get an extra reference on a read context. - * - This function can be absent if the completion function doesn't require a - * context. - * - The read context is passed back to NFS in the event that a data read on the - * cache fails with EIO - in which case the server must be contacted to - * retrieve the data, which requires the read context for security. - */ -static void nfs_fh_get_context(void *cookie_netfs_data, void *context) -{ - get_nfs_open_context(context); -} - -/* - * Release an extra reference on a read context. - * - This function can be absent if the completion function doesn't require a - * context. - */ -static void nfs_fh_put_context(void *cookie_netfs_data, void *context) -{ - if (context) - put_nfs_open_context(context); -} - /* * Define the inode object for FS-Cache. This is used to describe an inode * object to fscache_acquire_cookie(). It is keyed by the NFS file handle for @@ -135,6 +111,4 @@ const struct fscache_cookie_def nfs_fscache_inode_object_def = { .name = "NFS.fh", .type = FSCACHE_COOKIE_TYPE_DATAFILE, .check_aux = nfs_fscache_inode_check_aux, - .get_context = nfs_fh_get_context, - .put_context = nfs_fh_put_context, }; diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index efa9b6f9fab17..5e610f9a524c9 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -137,87 +137,6 @@ extern void fscache_operation_init(struct fscache_cookie *, fscache_operation_cancel_t, fscache_operation_release_t); -/* - * data read operation - */ -struct fscache_retrieval { - struct fscache_operation op; - struct fscache_cookie *cookie; /* The netfs cookie */ - struct address_space *mapping; /* netfs pages */ - fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ - void *context; /* netfs read context (pinned) */ - struct list_head to_do; /* list of things to be done by the backend */ - atomic_t n_pages; /* number of pages to be retrieved */ -}; - -typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp); - -typedef int (*fscache_pages_retrieval_func_t)(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp); - -/** - * fscache_get_retrieval - Get an extra reference on a retrieval operation - * @op: The retrieval operation to get a reference on - * - * Get an extra reference on a retrieval operation. - */ -static inline -struct fscache_retrieval *fscache_get_retrieval(struct fscache_retrieval *op) -{ - atomic_inc(&op->op.usage); - return op; -} - -/** - * fscache_enqueue_retrieval - Enqueue a retrieval operation for processing - * @op: The retrieval operation affected - * - * Enqueue a retrieval operation for processing by the FS-Cache thread pool. - */ -static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) -{ - fscache_enqueue_operation(&op->op); -} - -/** - * fscache_retrieval_complete - Record (partial) completion of a retrieval - * @op: The retrieval operation affected - * @n_pages: The number of pages to account for - */ -static inline void fscache_retrieval_complete(struct fscache_retrieval *op, - int n_pages) -{ - if (atomic_sub_return_relaxed(n_pages, &op->n_pages) <= 0) - fscache_op_complete(&op->op, false); -} - -/** - * fscache_put_retrieval - Drop a reference to a retrieval operation - * @op: The retrieval operation affected - * - * Drop a reference to a retrieval operation. - */ -static inline void fscache_put_retrieval(struct fscache_retrieval *op) -{ - fscache_put_operation(&op->op); -} - -/* - * cached page storage work item - * - used to do three things: - * - batch writes to the cache - * - do cache writes asynchronously - * - defer writes until cache object lookup completion - */ -struct fscache_storage { - struct fscache_operation op; - pgoff_t store_limit; /* don't write more than this */ -}; - /* * cache operations */ @@ -275,35 +194,6 @@ struct fscache_cache_ops { /* reserve space for an object's data and associated metadata */ int (*reserve_space)(struct fscache_object *object, loff_t i_size); - /* request a backing block for a page be read or allocated in the - * cache */ - fscache_page_retrieval_func_t read_or_alloc_page; - - /* request backing blocks for a list of pages be read or allocated in - * the cache */ - fscache_pages_retrieval_func_t read_or_alloc_pages; - - /* request a backing block for a page be allocated in the cache so that - * it can be written directly */ - fscache_page_retrieval_func_t allocate_page; - - /* request backing blocks for pages be allocated in the cache so that - * they can be written directly */ - fscache_pages_retrieval_func_t allocate_pages; - - /* write a page to its backing block in the cache */ - int (*write_page)(struct fscache_storage *op, struct page *page); - - /* detach backing block from a page (optional) - * - must release the cookie lock before returning - * - may sleep - */ - void (*uncache_page)(struct fscache_object *object, - struct page *page); - - /* dissociate a cache from all the pages it was backing */ - void (*dissociate_pages)(struct fscache_cache *cache); - /* Begin an operation for the netfs lib */ int (*begin_operation)(struct netfs_cache_resources *cres, struct fscache_operation *op); @@ -466,21 +356,6 @@ void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) object->store_limit++; } -/** - * fscache_end_io - End a retrieval operation on a page - * @op: The FS-Cache operation covering the retrieval - * @page: The page that was to be fetched - * @error: The error code (0 if successful) - * - * Note the end of an operation to retrieve a page, as covered by a particular - * operation record. - */ -static inline void fscache_end_io(struct fscache_retrieval *op, - struct page *page, int error) -{ - op->end_io_func(page, op->context, error); -} - static inline void __fscache_use_cookie(struct fscache_cookie *cookie) { atomic_inc(&cookie->n_active); @@ -538,12 +413,6 @@ extern void fscache_withdraw_cache(struct fscache_cache *cache); extern void fscache_io_error(struct fscache_cache *cache); -extern void fscache_mark_page_cached(struct fscache_retrieval *op, - struct page *page); - -extern void fscache_mark_pages_cached(struct fscache_retrieval *op, - struct pagevec *pagevec); - extern bool fscache_object_sleep_till_congested(signed long *timeoutp); extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 715fb830e982f..01558d1557999 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -37,10 +37,6 @@ struct fscache_cookie; struct fscache_netfs; struct netfs_read_request; -typedef void (*fscache_rw_complete_t)(struct page *page, - void *context, - int error); - /* result of index entry consultation */ enum fscache_checkaux { FSCACHE_CHECKAUX_OKAY, /* entry okay as is */ @@ -79,27 +75,6 @@ struct fscache_cookie_def { const void *data, uint16_t datalen, loff_t object_size); - - /* get an extra reference on a read context - * - this function can be absent if the completion function doesn't - * require a context - */ - void (*get_context)(void *cookie_netfs_data, void *context); - - /* release an extra reference on a read context - * - this function can be absent if the completion function doesn't - * require a context - */ - void (*put_context)(void *cookie_netfs_data, void *context); - - /* indicate page that now have cache metadata retained - * - this function should mark the specified page as now being cached - * - the page will have been marked with PG_fscache before this is - * called, so this is optional - */ - void (*mark_page_cached)(void *cookie_netfs_data, - struct address_space *mapping, - struct page *page); }; /* @@ -126,16 +101,12 @@ struct fscache_cookie { atomic_t n_active; /* number of active users of netfs ptrs */ unsigned int debug_id; spinlock_t lock; - spinlock_t stores_lock; /* lock on page store tree */ struct hlist_head backing_objects; /* object(s) backing this file/index */ const struct fscache_cookie_def *def; /* definition */ struct fscache_cookie *parent; /* parent of this entry */ struct hlist_bl_node hash_link; /* Link in hash table */ struct list_head proc_link; /* Link in proc list */ void *netfs_data; /* back pointer to netfs */ - struct radix_tree_root stores; /* pages to be stored on this cookie */ -#define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ -#define FSCACHE_COOKIE_STORING_TAG 1 /* pages tag: writing to cache */ unsigned long flags; #define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */ @@ -193,7 +164,6 @@ extern void __fscache_update_cookie(struct fscache_cookie *, const void *); extern int __fscache_attr_changed(struct fscache_cookie *); extern void __fscache_invalidate(struct fscache_cookie *); extern void __fscache_wait_on_invalidate(struct fscache_cookie *); - #ifdef FSCACHE_USE_NEW_IO_API extern int __fscache_begin_operation(struct netfs_cache_resources *, struct fscache_cookie *, bool); @@ -202,33 +172,6 @@ extern int __fscache_begin_operation(struct netfs_cache_resources *, struct fsca extern int __fscache_fallback_read_page(struct fscache_cookie *, struct page *); extern int __fscache_fallback_write_page(struct fscache_cookie *, struct page *); #endif -#ifdef FSCACHE_USE_OLD_IO_API -extern int __fscache_read_or_alloc_page(struct fscache_cookie *, - struct page *, - fscache_rw_complete_t, - void *, - gfp_t); -extern int __fscache_read_or_alloc_pages(struct fscache_cookie *, - struct address_space *, - struct list_head *, - unsigned *, - fscache_rw_complete_t, - void *, - gfp_t); -extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t); -extern int __fscache_write_page(struct fscache_cookie *, struct page *, loff_t, gfp_t); -extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); -extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); -extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); -extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *, - gfp_t); -extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *, - struct inode *); -extern void __fscache_readpages_cancel(struct fscache_cookie *cookie, - struct list_head *pages); - -#endif /* FSCACHE_USE_OLD_IO_API */ - extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool); extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t, bool (*)(void *), void *); @@ -495,24 +438,6 @@ void fscache_wait_on_invalidate(struct fscache_cookie *cookie) __fscache_wait_on_invalidate(cookie); } -/** - * fscache_reserve_space - Reserve data space for a cached object - * @cookie: The cookie representing the cache object - * @i_size: The amount of space to be reserved - * - * Reserve an amount of space in the cache for the cache object attached to a - * cookie so that a write to that object within the space can always be - * honoured. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) -{ - return -ENOBUFS; -} - #ifdef FSCACHE_USE_NEW_IO_API /** @@ -619,289 +544,6 @@ int fscache_write(struct netfs_cache_resources *cres, #endif /* FSCACHE_USE_NEW_IO_API */ -#ifdef FSCACHE_USE_OLD_IO_API - -/** - * fscache_read_or_alloc_page - Read a page from the cache or allocate a block - * in which to store it - * @cookie: The cookie representing the cache object - * @page: The netfs page to fill if possible - * @end_io_func: The callback to invoke when and if the page is filled - * @context: An arbitrary piece of data to pass on to end_io_func() - * @gfp: The conditions under which memory allocation should be made - * - * Read a page from the cache, or if that's not possible make a potential - * one-block reservation in the cache into which the page may be stored once - * fetched from the server. - * - * If the page is not backed by the cache object, or if it there's some reason - * it can't be, -ENOBUFS will be returned and nothing more will be done for - * that page. - * - * Else, if that page is backed by the cache, a read will be initiated directly - * to the netfs's page and 0 will be returned by this function. The - * end_io_func() callback will be invoked when the operation terminates on a - * completion or failure. Note that the callback may be invoked before the - * return. - * - * Else, if the page is unbacked, -ENODATA is returned and a block may have - * been allocated in the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_read_or_alloc_page(struct fscache_cookie *cookie, - struct page *page, - fscache_rw_complete_t end_io_func, - void *context, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_read_or_alloc_page(cookie, page, end_io_func, - context, gfp); - else - return -ENOBUFS; -} - -/** - * fscache_read_or_alloc_pages - Read pages from the cache and/or allocate - * blocks in which to store them - * @cookie: The cookie representing the cache object - * @mapping: The netfs inode mapping to which the pages will be attached - * @pages: A list of potential netfs pages to be filled - * @nr_pages: Number of pages to be read and/or allocated - * @end_io_func: The callback to invoke when and if each page is filled - * @context: An arbitrary piece of data to pass on to end_io_func() - * @gfp: The conditions under which memory allocation should be made - * - * Read a set of pages from the cache, or if that's not possible, attempt to - * make a potential one-block reservation for each page in the cache into which - * that page may be stored once fetched from the server. - * - * If some pages are not backed by the cache object, or if it there's some - * reason they can't be, -ENOBUFS will be returned and nothing more will be - * done for that pages. - * - * Else, if some of the pages are backed by the cache, a read will be initiated - * directly to the netfs's page and 0 will be returned by this function. The - * end_io_func() callback will be invoked when the operation terminates on a - * completion or failure. Note that the callback may be invoked before the - * return. - * - * Else, if a page is unbacked, -ENODATA is returned and a block may have - * been allocated in the cache. - * - * Because the function may want to return all of -ENOBUFS, -ENODATA and 0 in - * regard to different pages, the return values are prioritised in that order. - * Any pages submitted for reading are removed from the pages list. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages, - fscache_rw_complete_t end_io_func, - void *context, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_read_or_alloc_pages(cookie, mapping, pages, - nr_pages, end_io_func, - context, gfp); - else - return -ENOBUFS; -} - -/** - * fscache_alloc_page - Allocate a block in which to store a page - * @cookie: The cookie representing the cache object - * @page: The netfs page to allocate a page for - * @gfp: The conditions under which memory allocation should be made - * - * Request Allocation a block in the cache in which to store a netfs page - * without retrieving any contents from the cache. - * - * If the page is not backed by a file then -ENOBUFS will be returned and - * nothing more will be done, and no reservation will be made. - * - * Else, a block will be allocated if one wasn't already, and 0 will be - * returned - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_alloc_page(struct fscache_cookie *cookie, - struct page *page, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_alloc_page(cookie, page, gfp); - else - return -ENOBUFS; -} - -/** - * fscache_readpages_cancel - Cancel read/alloc on pages - * @cookie: The cookie representing the inode's cache object. - * @pages: The netfs pages that we canceled write on in readpages() - * - * Uncache/unreserve the pages reserved earlier in readpages() via - * fscache_readpages_or_alloc() and similar. In most successful caches in - * readpages() this doesn't do anything. In cases when the underlying netfs's - * readahead failed we need to clean up the pagelist (unmark and uncache). - * - * This function may sleep as it may have to clean up disk state. - */ -static inline -void fscache_readpages_cancel(struct fscache_cookie *cookie, - struct list_head *pages) -{ - if (fscache_cookie_valid(cookie)) - __fscache_readpages_cancel(cookie, pages); -} - -/** - * fscache_write_page - Request storage of a page in the cache - * @cookie: The cookie representing the cache object - * @page: The netfs page to store - * @object_size: Updated size of object - * @gfp: The conditions under which memory allocation should be made - * - * Request the contents of the netfs page be written into the cache. This - * request may be ignored if no cache block is currently allocated, in which - * case it will return -ENOBUFS. - * - * If a cache block was already allocated, a write will be initiated and 0 will - * be returned. The PG_fscache_write page bit is set immediately and will then - * be cleared at the completion of the write to indicate the success or failure - * of the operation. Note that the completion may happen before the return. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -int fscache_write_page(struct fscache_cookie *cookie, - struct page *page, - loff_t object_size, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) - return __fscache_write_page(cookie, page, object_size, gfp); - else - return -ENOBUFS; -} - -/** - * fscache_uncache_page - Indicate that caching is no longer required on a page - * @cookie: The cookie representing the cache object - * @page: The netfs page that was being cached. - * - * Tell the cache that we no longer want a page to be cached and that it should - * remove any knowledge of the netfs page it may have. - * - * Note that this cannot cancel any outstanding I/O operations between this - * page and the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_uncache_page(struct fscache_cookie *cookie, - struct page *page) -{ - if (fscache_cookie_valid(cookie)) - __fscache_uncache_page(cookie, page); -} - -/** - * fscache_check_page_write - Ask if a page is being writing to the cache - * @cookie: The cookie representing the cache object - * @page: The netfs page that is being cached. - * - * Ask the cache if a page is being written to the cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -bool fscache_check_page_write(struct fscache_cookie *cookie, - struct page *page) -{ - if (fscache_cookie_valid(cookie)) - return __fscache_check_page_write(cookie, page); - return false; -} - -/** - * fscache_wait_on_page_write - Wait for a page to complete writing to the cache - * @cookie: The cookie representing the cache object - * @page: The netfs page that is being cached. - * - * Ask the cache to wake us up when a page is no longer being written to the - * cache. - * - * See Documentation/filesystems/caching/netfs-api.rst for a complete - * description. - */ -static inline -void fscache_wait_on_page_write(struct fscache_cookie *cookie, - struct page *page) -{ - if (fscache_cookie_valid(cookie)) - __fscache_wait_on_page_write(cookie, page); -} - -/** - * fscache_maybe_release_page - Consider releasing a page, cancelling a store - * @cookie: The cookie representing the cache object - * @page: The netfs page that is being cached. - * @gfp: The gfp flags passed to releasepage() - * - * Consider releasing a page for the vmscan algorithm, on behalf of the netfs's - * releasepage() call. A storage request on the page may cancelled if it is - * not currently being processed. - * - * The function returns true if the page no longer has a storage request on it, - * and false if a storage request is left in place. If true is returned, the - * page will have been passed to fscache_uncache_page(). If false is returned - * the page cannot be freed yet. - */ -static inline -bool fscache_maybe_release_page(struct fscache_cookie *cookie, - struct page *page, - gfp_t gfp) -{ - if (fscache_cookie_valid(cookie) && PageFsCache(page)) - return __fscache_maybe_release_page(cookie, page, gfp); - return true; -} - -/** - * fscache_uncache_all_inode_pages - Uncache all an inode's pages - * @cookie: The cookie representing the inode's cache object. - * @inode: The inode to uncache pages from. - * - * Uncache all the pages in an inode that are marked PG_fscache, assuming them - * to be associated with the given cookie. - * - * This function may sleep. It will wait for pages that are being written out - * and will wait whilst the PG_fscache mark is removed by the cache. - */ -static inline -void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, - struct inode *inode) -{ - if (fscache_cookie_valid(cookie)) - __fscache_uncache_all_inode_pages(cookie, inode); -} - -#endif /* FSCACHE_USE_OLD_IO_API */ - /** * fscache_disable_cookie - Disable a cookie * @cookie: The cookie representing the cache object From 0a741e05f43ff0b112b627a378b6fd0fe743bc8c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Sep 2021 14:12:23 +0100 Subject: [PATCH 0183/1202] fscache: Remove stats that are no longer used Remove stats counters that are no longer used and remove their display from /proc/fs/fscache/stats. Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163162776915.438332.16744135153328311006.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/163189114616.2509237.9890995143218948138.stgit@warthog.procyon.org.uk/ # rfc v2 --- fs/fscache/internal.h | 28 ------------------- fs/fscache/stats.c | 63 +------------------------------------------ 2 files changed, 1 insertion(+), 90 deletions(-) diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 5281fa1894cd9..6eb3f51d7275c 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -151,15 +151,6 @@ extern atomic_t fscache_n_attr_changed_nobufs; extern atomic_t fscache_n_attr_changed_nomem; extern atomic_t fscache_n_attr_changed_calls; -extern atomic_t fscache_n_allocs; -extern atomic_t fscache_n_allocs_ok; -extern atomic_t fscache_n_allocs_wait; -extern atomic_t fscache_n_allocs_nobufs; -extern atomic_t fscache_n_allocs_intr; -extern atomic_t fscache_n_allocs_object_dead; -extern atomic_t fscache_n_alloc_ops; -extern atomic_t fscache_n_alloc_op_waits; - extern atomic_t fscache_n_retrievals; extern atomic_t fscache_n_retrievals_ok; extern atomic_t fscache_n_retrievals_wait; @@ -178,22 +169,9 @@ extern atomic_t fscache_n_stores_nobufs; extern atomic_t fscache_n_stores_intr; extern atomic_t fscache_n_stores_oom; extern atomic_t fscache_n_store_ops; -extern atomic_t fscache_n_store_calls; -extern atomic_t fscache_n_store_pages; -extern atomic_t fscache_n_store_radix_deletes; -extern atomic_t fscache_n_store_pages_over_limit; extern atomic_t fscache_n_stores_object_dead; extern atomic_t fscache_n_store_op_waits; -extern atomic_t fscache_n_store_vmscan_not_storing; -extern atomic_t fscache_n_store_vmscan_gone; -extern atomic_t fscache_n_store_vmscan_busy; -extern atomic_t fscache_n_store_vmscan_cancelled; -extern atomic_t fscache_n_store_vmscan_wait; - -extern atomic_t fscache_n_marks; -extern atomic_t fscache_n_uncaches; - extern atomic_t fscache_n_acquires; extern atomic_t fscache_n_acquires_null; extern atomic_t fscache_n_acquires_no_cache; @@ -242,12 +220,6 @@ extern atomic_t fscache_n_cop_drop_object; extern atomic_t fscache_n_cop_put_object; extern atomic_t fscache_n_cop_sync_cache; extern atomic_t fscache_n_cop_attr_changed; -extern atomic_t fscache_n_cop_read_or_alloc_page; -extern atomic_t fscache_n_cop_read_or_alloc_pages; -extern atomic_t fscache_n_cop_allocate_page; -extern atomic_t fscache_n_cop_allocate_pages; -extern atomic_t fscache_n_cop_write_page; -extern atomic_t fscache_n_cop_uncache_page; extern atomic_t fscache_n_cache_no_space_reject; extern atomic_t fscache_n_cache_stale_objects; diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4cefce5bf4cd5..2449aa4591409 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -30,15 +30,6 @@ atomic_t fscache_n_attr_changed_nobufs; atomic_t fscache_n_attr_changed_nomem; atomic_t fscache_n_attr_changed_calls; -atomic_t fscache_n_allocs; -atomic_t fscache_n_allocs_ok; -atomic_t fscache_n_allocs_wait; -atomic_t fscache_n_allocs_nobufs; -atomic_t fscache_n_allocs_intr; -atomic_t fscache_n_allocs_object_dead; -atomic_t fscache_n_alloc_ops; -atomic_t fscache_n_alloc_op_waits; - atomic_t fscache_n_retrievals; atomic_t fscache_n_retrievals_ok; atomic_t fscache_n_retrievals_wait; @@ -57,22 +48,9 @@ atomic_t fscache_n_stores_nobufs; atomic_t fscache_n_stores_intr; atomic_t fscache_n_stores_oom; atomic_t fscache_n_store_ops; -atomic_t fscache_n_store_calls; -atomic_t fscache_n_store_pages; -atomic_t fscache_n_store_radix_deletes; -atomic_t fscache_n_store_pages_over_limit; atomic_t fscache_n_stores_object_dead; atomic_t fscache_n_store_op_waits; -atomic_t fscache_n_store_vmscan_not_storing; -atomic_t fscache_n_store_vmscan_gone; -atomic_t fscache_n_store_vmscan_busy; -atomic_t fscache_n_store_vmscan_cancelled; -atomic_t fscache_n_store_vmscan_wait; - -atomic_t fscache_n_marks; -atomic_t fscache_n_uncaches; - atomic_t fscache_n_acquires; atomic_t fscache_n_acquires_null; atomic_t fscache_n_acquires_no_cache; @@ -121,12 +99,6 @@ atomic_t fscache_n_cop_drop_object; atomic_t fscache_n_cop_put_object; atomic_t fscache_n_cop_sync_cache; atomic_t fscache_n_cop_attr_changed; -atomic_t fscache_n_cop_read_or_alloc_page; -atomic_t fscache_n_cop_read_or_alloc_pages; -atomic_t fscache_n_cop_allocate_page; -atomic_t fscache_n_cop_allocate_pages; -atomic_t fscache_n_cop_write_page; -atomic_t fscache_n_cop_uncache_page; atomic_t fscache_n_cache_no_space_reject; atomic_t fscache_n_cache_stale_objects; @@ -156,10 +128,6 @@ int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_checkaux_update), atomic_read(&fscache_n_checkaux_obsolete)); - seq_printf(m, "Pages : mrk=%u unc=%u\n", - atomic_read(&fscache_n_marks), - atomic_read(&fscache_n_uncaches)); - seq_printf(m, "Acquire: n=%u nul=%u noc=%u ok=%u nbf=%u" " oom=%u\n", atomic_read(&fscache_n_acquires), @@ -198,17 +166,6 @@ int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_attr_changed_nomem), atomic_read(&fscache_n_attr_changed_calls)); - seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u int=%u\n", - atomic_read(&fscache_n_allocs), - atomic_read(&fscache_n_allocs_ok), - atomic_read(&fscache_n_allocs_wait), - atomic_read(&fscache_n_allocs_nobufs), - atomic_read(&fscache_n_allocs_intr)); - seq_printf(m, "Allocs : ops=%u owt=%u abt=%u\n", - atomic_read(&fscache_n_alloc_ops), - atomic_read(&fscache_n_alloc_op_waits), - atomic_read(&fscache_n_allocs_object_dead)); - seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u" " int=%u oom=%u\n", atomic_read(&fscache_n_retrievals), @@ -230,22 +187,11 @@ int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_stores_nobufs), atomic_read(&fscache_n_stores_intr), atomic_read(&fscache_n_stores_oom)); - seq_printf(m, "Stores : ops=%u owt=%u run=%u pgs=%u rxd=%u olm=%u abt=%u\n", + seq_printf(m, "Stores : ops=%u owt=%u abt=%u\n", atomic_read(&fscache_n_store_ops), atomic_read(&fscache_n_store_op_waits), - atomic_read(&fscache_n_store_calls), - atomic_read(&fscache_n_store_pages), - atomic_read(&fscache_n_store_radix_deletes), - atomic_read(&fscache_n_store_pages_over_limit), atomic_read(&fscache_n_stores_object_dead)); - seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u wt=%u\n", - atomic_read(&fscache_n_store_vmscan_not_storing), - atomic_read(&fscache_n_store_vmscan_gone), - atomic_read(&fscache_n_store_vmscan_busy), - atomic_read(&fscache_n_store_vmscan_cancelled), - atomic_read(&fscache_n_store_vmscan_wait)); - seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u rej=%u\n", atomic_read(&fscache_n_op_pend), atomic_read(&fscache_n_op_run), @@ -270,13 +216,6 @@ int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_cop_put_object), atomic_read(&fscache_n_cop_attr_changed), atomic_read(&fscache_n_cop_sync_cache)); - seq_printf(m, "CacheOp: rap=%d ras=%d alp=%d als=%d wrp=%d ucp=%d\n", - atomic_read(&fscache_n_cop_read_or_alloc_page), - atomic_read(&fscache_n_cop_read_or_alloc_pages), - atomic_read(&fscache_n_cop_allocate_page), - atomic_read(&fscache_n_cop_allocate_pages), - atomic_read(&fscache_n_cop_write_page), - atomic_read(&fscache_n_cop_uncache_page)); seq_printf(m, "CacheEv: nsp=%d stl=%d rtr=%d cul=%d\n", atomic_read(&fscache_n_cache_no_space_reject), atomic_read(&fscache_n_cache_stale_objects), From a193a3a202ed96e84d18d3bae840496700258919 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Sep 2021 13:00:57 +0100 Subject: [PATCH 0184/1202] fscache: Update the documentation to reflect I/O API changes Update the fscache documentation to remove the old I/O API bits and to note the new fallback API. Changes ======= ver #2: - Changed "deprecated" to "fallback" in the new function names[1]. Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/CAHk-=wiVK+1CyEjW8u71zVPK8msea=qPpznX35gnX+s8sXnJTg@mail.gmail.com/ [1] Link: https://lore.kernel.org/r/163162778200.438332.1918683687532006409.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/163189115518.2509237.6454712882112339524.stgit@warthog.procyon.org.uk/ # rfc v2 --- .../filesystems/caching/backend-api.rst | 138 +------ .../filesystems/caching/netfs-api.rst | 385 ++---------------- 2 files changed, 47 insertions(+), 476 deletions(-) diff --git a/Documentation/filesystems/caching/backend-api.rst b/Documentation/filesystems/caching/backend-api.rst index 19fbf6b9aa360..08fdd92d502aa 100644 --- a/Documentation/filesystems/caching/backend-api.rst +++ b/Documentation/filesystems/caching/backend-api.rst @@ -355,14 +355,6 @@ performed on the denizens of the cache. These are held in a structure of type: device. - * Dissociate a cache [mandatory]:: - - void (*dissociate_pages)(struct fscache_cache *cache) - - This is called to ask a cache to perform any page dissociations as part of - cache withdrawal. - - * Notification that the attributes on a netfs file changed [mandatory]:: int (*attr_changed)(struct fscache_object *object); @@ -402,123 +394,14 @@ performed on the denizens of the cache. These are held in a structure of type: size if larger than that already. - * Request page be read from cache [mandatory]:: - - int (*read_or_alloc_page)(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp) - - This is called to attempt to read a netfs page from the cache, or to - reserve a backing block if not. FS-Cache will have done as much checking - as it can before calling, but most of the work belongs to the backend. - - If there's no page in the cache, then -ENODATA should be returned if the - backend managed to reserve a backing block; -ENOBUFS or -ENOMEM if it - didn't. - - If there is suitable data in the cache, then a read operation should be - queued and 0 returned. When the read finishes, fscache_end_io() should be - called. - - The fscache_mark_pages_cached() should be called for the page if any cache - metadata is retained. This will indicate to the netfs that the page needs - explicit uncaching. This operation takes a pagevec, thus allowing several - pages to be marked at once. - - The retrieval record pointed to by op should be retained for each page - queued and released when I/O on the page has been formally ended. - fscache_get/put_retrieval() are available for this purpose. - - The retrieval record may be used to get CPU time via the FS-Cache thread - pool. If this is desired, the op->op.processor should be set to point to - the appropriate processing routine, and fscache_enqueue_retrieval() should - be called at an appropriate point to request CPU time. For instance, the - retrieval routine could be enqueued upon the completion of a disk read. - The to_do field in the retrieval record is provided to aid in this. - - If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS - returned if possible or fscache_end_io() called with a suitable error - code. - - fscache_put_retrieval() should be called after a page or pages are dealt - with. This will complete the operation when all pages are dealt with. - - - * Request pages be read from cache [mandatory]:: - - int (*read_or_alloc_pages)(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp) - - This is like the read_or_alloc_page() method, except it is handed a list - of pages instead of one page. Any pages on which a read operation is - started must be added to the page cache for the specified mapping and also - to the LRU. Such pages must also be removed from the pages list and - ``*nr_pages`` decremented per page. - - If there was an error such as -ENOMEM, then that should be returned; else - if one or more pages couldn't be read or allocated, then -ENOBUFS should - be returned; else if one or more pages couldn't be read, then -ENODATA - should be returned. If all the pages are dispatched then 0 should be - returned. - - - * Request page be allocated in the cache [mandatory]:: + * Begin an operation [mandatory]:: - int (*allocate_page)(struct fscache_retrieval *op, - struct page *page, - gfp_t gfp) + int (*begin_operation)(struct netfs_cache_resources *cres, + struct fscache_operation *op); - This is like the read_or_alloc_page() method, except that it shouldn't - read from the cache, even if there's data there that could be retrieved. - It should, however, set up any internal metadata required such that - the write_page() method can write to the cache. - - If there's no backing block available, then -ENOBUFS should be returned - (or -ENOMEM if there were other problems). If a block is successfully - allocated, then the netfs page should be marked and 0 returned. - - - * Request pages be allocated in the cache [mandatory]:: - - int (*allocate_pages)(struct fscache_retrieval *op, - struct list_head *pages, - unsigned *nr_pages, - gfp_t gfp) - - This is an multiple page version of the allocate_page() method. pages and - nr_pages should be treated as for the read_or_alloc_pages() method. - - - * Request page be written to cache [mandatory]:: - - int (*write_page)(struct fscache_storage *op, - struct page *page); - - This is called to write from a page on which there was a previously - successful read_or_alloc_page() call or similar. FS-Cache filters out - pages that don't have mappings. - - This method is called asynchronously from the FS-Cache thread pool. It is - not required to actually store anything, provided -ENODATA is then - returned to the next read of this page. - - If an error occurred, then a negative error code should be returned, - otherwise zero should be returned. FS-Cache will take appropriate action - in response to an error, such as withdrawing this object. - - If this method returns success then FS-Cache will inform the netfs - appropriately. - - - * Discard retained per-page metadata [mandatory]:: - - void (*uncache_page)(struct fscache_object *object, struct page *page) - - This is called when a netfs page is being evicted from the pagecache. The - cache backend should tear down any internal representation or tracking it - maintains for this page. + This is called to start an operation on behalf of the network filesystem + or the netfs helper library. The cache resources attached to *cres + should be filled in by the cache so that the operation can be performed. FS-Cache Utilities @@ -578,15 +461,6 @@ FS-Cache provides some utilities that a cache backend may make use of: rejected by fscache_read_alloc_page() and co with -ENOBUFS. - * Mark pages as being cached:: - - void fscache_mark_pages_cached(struct fscache_retrieval *op, - struct pagevec *pagevec); - - This marks a set of pages as being cached. After this has been called, - the netfs must call fscache_uncache_page() to unmark the pages. - - * Perform coherency check on an object:: enum fscache_checkaux fscache_check_aux(struct fscache_object *object, diff --git a/Documentation/filesystems/caching/netfs-api.rst b/Documentation/filesystems/caching/netfs-api.rst index d9f14b8610bad..a469cb9dbdcdc 100644 --- a/Documentation/filesystems/caching/netfs-api.rst +++ b/Documentation/filesystems/caching/netfs-api.rst @@ -32,15 +32,13 @@ This API is declared in . (7) Data file registration (8) Miscellaneous object registration (9) Setting the data file size - (10) Page alloc/read/write - (11) Page uncaching - (12) Index and data file consistency - (13) Cookie enablement - (14) Miscellaneous cookie operations - (15) Cookie unregistration - (16) Index invalidation - (17) Data file invalidation - (18) FS-Cache specific page flags. + (10) Page read/write + (11) Index and data file consistency + (12) Cookie enablement + (13) Miscellaneous cookie operations + (14) Cookie unregistration + (15) Index invalidation + (16) Data file invalidation Network Filesystem Definition @@ -132,14 +130,6 @@ To define an object, a structure of the following type should be filled out:: const void *data, uint16_t datalen, loff_t object_size); - - void (*get_context)(void *cookie_netfs_data, void *context); - - void (*put_context)(void *cookie_netfs_data, void *context); - - void (*mark_pages_cached)(void *cookie_netfs_data, - struct address_space *mapping, - struct pagevec *cached_pvec); }; This has the following fields: @@ -200,42 +190,6 @@ This has the following fields: This function can also be used to extract data from the auxiliary data in the cache and copy it into the netfs's structures. - (5) A pair of functions to manage contexts for the completion callback - [optional]. - - The cache read/write functions are passed a context which is then passed - to the I/O completion callback function. To ensure this context remains - valid until after the I/O completion is called, two functions may be - provided: one to get an extra reference on the context, and one to drop a - reference to it. - - If the context is not used or is a type of object that won't go out of - scope, then these functions are not required. These functions are not - required for indices as indices may not contain data. These functions may - be called in interrupt context and so may not sleep. - - (6) A function to mark a page as retaining cache metadata [optional]. - - This is called by the cache to indicate that it is retaining in-memory - information for this page and that the netfs should uncache the page when - it has finished. This does not indicate whether there's data on the disk - or not. Note that several pages at once may be presented for marking. - - The PG_fscache bit is set on the pages before this function would be - called, so the function need not be provided if this is sufficient. - - This function is not required for indices as they're not permitted data. - - (7) A function to unmark all the pages retaining cache metadata [mandatory]. - - This is called by FS-Cache to indicate that a backing store is being - unbound from a cookie and that all the marks on the pages should be - cleared to prevent confusion. Note that the cache will have torn down all - its tracking information so that the pages don't need to be explicitly - uncached. - - This function is not required for indices as they're not permitted data. - Network Filesystem (Un)registration =================================== @@ -412,277 +366,56 @@ some point in the future, and as such, it may happen after the function returns to the caller. The attribute adjustment excludes read and write operations. -Page alloc/read/write +Page Read/Write ===================== -And the sixth step is to store and retrieve pages in the cache. There are -three functions that are used to do this. - -Note: - - (1) A page should not be re-read or re-allocated without uncaching it first. - - (2) A read or allocated page must be uncached when the netfs page is released - from the pagecache. - - (3) A page should only be written to the cache if previous read or allocated. - -This permits the cache to maintain its page tracking in proper order. - - -PAGE READ ---------- - -Firstly, the netfs should ask FS-Cache to examine the caches and read the -contents cached for a particular page of a particular file if present, or else -allocate space to store the contents if not:: - - typedef - void (*fscache_rw_complete_t)(struct page *page, - void *context, - int error); - - int fscache_read_or_alloc_page(struct fscache_cookie *cookie, - struct page *page, - fscache_rw_complete_t end_io_func, - void *context, - gfp_t gfp); - -The cookie argument must specify a cookie for an object that isn't an index, -the page specified will have the data loaded into it (and is also used to -specify the page number), and the gfp argument is used to control how any -memory allocations made are satisfied. - -If the cookie indicates the inode is not cached: - - (1) The function will return -ENOBUFS. - -Else if there's a copy of the page resident in the cache: - - (1) The mark_pages_cached() cookie operation will be called on that page. - - (2) The function will submit a request to read the data from the cache's - backing device directly into the page specified. - - (3) The function will return 0. - - (4) When the read is complete, end_io_func() will be invoked with: - - * The netfs data supplied when the cookie was created. - - * The page descriptor. - - * The context argument passed to the above function. This will be - maintained with the get_context/put_context functions mentioned above. - - * An argument that's 0 on success or negative for an error code. - - If an error occurs, it should be assumed that the page contains no usable - data. fscache_readpages_cancel() may need to be called. - - end_io_func() will be called in process context if the read is results in - an error, but it might be called in interrupt context if the read is - successful. - -Otherwise, if there's not a copy available in cache, but the cache may be able -to store the page: - - (1) The mark_pages_cached() cookie operation will be called on that page. - - (2) A block may be reserved in the cache and attached to the object at the - appropriate place. - - (3) The function will return -ENODATA. - -This function may also return -ENOMEM or -EINTR, in which case it won't have -read any data from the cache. - - -Page Allocate -------------- - -Alternatively, if there's not expected to be any data in the cache for a page -because the file has been extended, a block can simply be allocated instead:: - - int fscache_alloc_page(struct fscache_cookie *cookie, - struct page *page, - gfp_t gfp); - -This is similar to the fscache_read_or_alloc_page() function, except that it -never reads from the cache. It will return 0 if a block has been allocated, -rather than -ENODATA as the other would. One or the other must be performed -before writing to the cache. - -The mark_pages_cached() cookie operation will be called on the page if -successful. - - -Page Write ----------- +And the sixth step is to store and retrieve pages in the cache. The functions +provided may do direct I/O calls on the backing filesystem and it is up to the +network filesystem to prevent clashes. Typically, a page would be locked for +the duration of a read and a page would be marked with PageFsCache whilst it is +being written out. -Secondly, if the netfs changes the contents of the page (either due to an -initial download or if a user performs a write), then the page should be -written back to the cache:: +By preference, reading would be performed through the netfs library's helper +functions, but there is a fallback API, though this should be considered +deprecated as it may lead to data corruption, depending on the characteristics +of the backing filesystem. If the fallback API is to be used, the filesystem +must do:: - int fscache_write_page(struct fscache_cookie *cookie, - struct page *page, - loff_t object_size, - gfp_t gfp); - -The cookie argument must specify a data file cookie, the page specified should -contain the data to be written (and is also used to specify the page number), -object_size is the revised size of the object and the gfp argument is used to -control how any memory allocations made are satisfied. - -The page must have first been read or allocated successfully and must not have -been uncached before writing is performed. - -If the cookie indicates the inode is not cached then: - - (1) The function will return -ENOBUFS. - -Else if space can be allocated in the cache to hold this page: - - (1) PG_fscache_write will be set on the page. + #define FSCACHE_USE_FALLBACK_IO_API + #include - (2) The function will submit a request to write the data to cache's backing - device directly from the page specified. - (3) The function will return 0. - - (4) When the write is complete PG_fscache_write is cleared on the page and - anyone waiting for that bit will be woken up. - -Else if there's no space available in the cache, -ENOBUFS will be returned. It -is also possible for the PG_fscache_write bit to be cleared when no write took -place if unforeseen circumstances arose (such as a disk error). - -Writing takes place asynchronously. - - -Multiple Page Read +Fallback Page Read ------------------ -A facility is provided to read several pages at once, as requested by the -readpages() address space operation:: - - int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, - struct address_space *mapping, - struct list_head *pages, - int *nr_pages, - fscache_rw_complete_t end_io_func, - void *context, - gfp_t gfp); - -This works in a similar way to fscache_read_or_alloc_page(), except: - - (1) Any page it can retrieve data for is removed from pages and nr_pages and - dispatched for reading to the disk. Reads of adjacent pages on disk may - be merged for greater efficiency. - - (2) The mark_pages_cached() cookie operation will be called on several pages - at once if they're being read or allocated. - - (3) If there was an general error, then that error will be returned. - - Else if some pages couldn't be allocated or read, then -ENOBUFS will be - returned. - - Else if some pages couldn't be read but were allocated, then -ENODATA will - be returned. - - Otherwise, if all pages had reads dispatched, then 0 will be returned, the - list will be empty and ``*nr_pages`` will be 0. - - (4) end_io_func will be called once for each page being read as the reads - complete. It will be called in process context if error != 0, but it may - be called in interrupt context if there is no error. - -Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude -some of the pages being read and some being allocated. Those pages will have -been marked appropriately and will need uncaching. - - -Cancellation of Unread Pages ----------------------------- - -If one or more pages are passed to fscache_read_or_alloc_pages() but not then -read from the cache and also not read from the underlying filesystem then -those pages will need to have any marks and reservations removed. This can be -done by calling:: - - void fscache_readpages_cancel(struct fscache_cookie *cookie, - struct list_head *pages); +A page may be synchronously read from the backing filesystem:: -prior to returning to the caller. The cookie argument should be as passed to -fscache_read_or_alloc_pages(). Every page in the pages list will be examined -and any that have PG_fscache set will be uncached. + int fscache_fallback_read_page(struct fscache_cookie *cookie, + struct page *page); +The cookie argument must specify a cookie for an object that isn't an index and +the page specified will have the data loaded into it (and is also used to +specify the page number). The function will return 0 if the page was +read, -ENODATA if there was no data and -ENOBUFS if there was no cache +attached. It may also return errors such as -ENOMEM or -EINTR. It might also +return some other error from the backing filesystem, but this should be treated +as -ENOBUS. -Page Uncaching -============== - -To uncache a page, this function should be called:: - - void fscache_uncache_page(struct fscache_cookie *cookie, - struct page *page); - -This function permits the cache to release any in-memory representation it -might be holding for this netfs page. This function must be called once for -each page on which the read or write page functions above have been called to -make sure the cache's in-memory tracking information gets torn down. - -Note that pages can't be explicitly deleted from the a data file. The whole -data file must be retired (see the relinquish cookie function below). - -Furthermore, note that this does not cancel the asynchronous read or write -operation started by the read/alloc and write functions, so the page -invalidation functions must use:: - bool fscache_check_page_write(struct fscache_cookie *cookie, - struct page *page); +Fallback Page Write +------------------- -to see if a page is being written to the cache, and:: +A page may be synchronously written to the backing filesystem:: - void fscache_wait_on_page_write(struct fscache_cookie *cookie, + int fscache_fallback_write_page(struct fscache_cookie *cookie, struct page *page); -to wait for it to finish if it is. - - -When releasepage() is being implemented, a special FS-Cache function exists to -manage the heuristics of coping with vmscan trying to eject pages, which may -conflict with the cache trying to write pages to the cache (which may itself -need to allocate memory):: - - bool fscache_maybe_release_page(struct fscache_cookie *cookie, - struct page *page, - gfp_t gfp); - -This takes the netfs cookie, and the page and gfp arguments as supplied to -releasepage(). It will return false if the page cannot be released yet for -some reason and if it returns true, the page has been uncached and can now be -released. - -To make a page available for release, this function may wait for an outstanding -storage request to complete, or it may attempt to cancel the storage request - -in which case the page will not be stored in the cache this time. - - -Bulk Image Page Uncache ------------------------ - -A convenience routine is provided to perform an uncache on all the pages -attached to an inode. This assumes that the pages on the inode correspond on a -1:1 basis with the pages in the cache:: - - void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, - struct inode *inode); - -This takes the netfs cookie that the pages were cached with and the inode that -the pages are attached to. This function will wait for pages to finish being -written to the cache and for the cache to finish with the page generally. No -error is returned. +The cookie argument must specify a cookie for an object that isn't an index and +the page specified will have the data written from it (and is also used to +specify the page number). The function will return 0 if the page was read +and -ENOBUFS if there was no cache attached or no space available in the cache. +It may also return errors such as -ENOMEM or -EINTR. It might also return some +other error from the backing filesystem, but this should be treated as -ENOBUS. Index and Data File consistency @@ -858,39 +591,3 @@ to have reached a point at which it can start submitting ordinary operations once again:: void fscache_wait_on_invalidate(struct fscache_cookie *cookie); - - -FS-cache Specific Page Flag -=========================== - -FS-Cache makes use of a page flag, PG_private_2, for its own purpose. This is -given the alternative name PG_fscache. - -PG_fscache is used to indicate that the page is known by the cache, and that -the cache must be informed if the page is going to go away. It's an indication -to the netfs that the cache has an interest in this page, where an interest may -be a pointer to it, resources allocated or reserved for it, or I/O in progress -upon it. - -The netfs can use this information in methods such as releasepage() to -determine whether it needs to uncache a page or update it. - -Furthermore, if this bit is set, releasepage() and invalidatepage() operations -will be called on a page to get rid of it, even if PG_private is not set. This -allows caching to attempted on a page before read_cache_pages() to be called -after fscache_read_or_alloc_pages() as the former will try and release pages it -was given under certain circumstances. - -This bit does not overlap with such as PG_private. This means that FS-Cache -can be used with a filesystem that uses the block buffering code. - -There are a number of operations defined on this flag:: - - int PageFsCache(struct page *page); - void SetPageFsCache(struct page *page) - void ClearPageFsCache(struct page *page) - int TestSetPageFsCache(struct page *page) - int TestClearPageFsCache(struct page *page) - -These functions are bit test, bit set, bit clear, bit test and set and bit -test and clear operations on PG_fscache. From 5dccbc9de8f0071eb731b4de81d0638ea6c06a53 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 30 Sep 2021 01:28:05 +0300 Subject: [PATCH 0185/1202] drm/tegra: dc: rgb: Allow changing PLLD rate on Tegra30+ Asus Transformer TF700T is a Tegra30 tablet device which uses RGB->DSI bridge that requires a precise clock rate in order to operate properly. Tegra30 has a dedicated PLL for each display controller, hence the PLL rate can be changed freely. Allow PLL rate changes on Tegra30+ for RGB output. Configure the clock rate before display controller is enabled since DC itself may be running off this PLL and it's not okay to change the rate of the active PLL that doesn't support dynamic frequency switching since hardware will hang. Tested-by: Maxim Schwalm #TF700T Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/dc.c | 27 ++++++++++++-------- drivers/gpu/drm/tegra/dc.h | 1 + drivers/gpu/drm/tegra/rgb.c | 49 +++++++++++++++++++++++++++++++++++-- 3 files changed, 65 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index e62b091b93f22..a457ee954a495 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -1834,10 +1834,9 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc, return 0; } -static void tegra_dc_commit_state(struct tegra_dc *dc, - struct tegra_dc_state *state) +static void tegra_dc_set_clock_rate(struct tegra_dc *dc, + struct tegra_dc_state *state) { - u32 value; int err; err = clk_set_parent(dc->clk, state->clk); @@ -1868,11 +1867,6 @@ static void tegra_dc_commit_state(struct tegra_dc *dc, DRM_DEBUG_KMS("rate: %lu, div: %u\n", clk_get_rate(dc->clk), state->div); DRM_DEBUG_KMS("pclk: %lu\n", state->pclk); - - if (!dc->soc->has_nvdisplay) { - value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1; - tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL); - } } static void tegra_dc_stop(struct tegra_dc *dc) @@ -2074,6 +2068,9 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, u32 value; int err; + /* apply PLL changes */ + tegra_dc_set_clock_rate(dc, crtc_state); + err = host1x_client_resume(&dc->client); if (err < 0) { dev_err(dc->dev, "failed to resume: %d\n", err); @@ -2148,8 +2145,11 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, else tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR); - /* apply PLL and pixel clock changes */ - tegra_dc_commit_state(dc, crtc_state); + /* apply pixel clock changes */ + if (!dc->soc->has_nvdisplay) { + value = SHIFT_CLK_DIVIDER(crtc_state->div) | PIXEL_CLK_DIVIDER_PCD1; + tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL); + } /* program display mode */ tegra_dc_set_timings(dc, mode); @@ -2763,6 +2763,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = { .has_win_b_vfilter_mem_client = true, .has_win_c_without_vert_filter = true, .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = false, }; static const struct tegra_dc_soc_info tegra30_dc_soc_info = { @@ -2785,6 +2786,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = { .has_win_b_vfilter_mem_client = true, .has_win_c_without_vert_filter = false, .plane_tiled_memory_bandwidth_x2 = true, + .has_pll_d2_out0 = true, }; static const struct tegra_dc_soc_info tegra114_dc_soc_info = { @@ -2807,6 +2809,7 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = { .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, .plane_tiled_memory_bandwidth_x2 = true, + .has_pll_d2_out0 = true, }; static const struct tegra_dc_soc_info tegra124_dc_soc_info = { @@ -2829,6 +2832,7 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = { .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = true, }; static const struct tegra_dc_soc_info tegra210_dc_soc_info = { @@ -2851,6 +2855,7 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = { .has_win_b_vfilter_mem_client = false, .has_win_c_without_vert_filter = false, .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = true, }; static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = { @@ -2901,6 +2906,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = { .wgrps = tegra186_dc_wgrps, .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps), .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = false, }; static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = { @@ -2951,6 +2957,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = { .wgrps = tegra194_dc_wgrps, .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps), .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = false, }; static const struct of_device_id tegra_dc_of_match[] = { diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 40378308d527a..c9c4c45c05183 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -76,6 +76,7 @@ struct tegra_dc_soc_info { bool has_win_b_vfilter_mem_client; bool has_win_c_without_vert_filter; bool plane_tiled_memory_bandwidth_x2; + bool has_pll_d2_out0; }; struct tegra_dc { diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index 933e14e4609f0..ff8fce36d2aa1 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -17,6 +17,8 @@ struct tegra_rgb { struct tegra_output output; struct tegra_dc *dc; + struct clk *pll_d_out0; + struct clk *pll_d2_out0; struct clk *clk_parent; struct clk *clk; }; @@ -119,6 +121,18 @@ static void tegra_rgb_encoder_enable(struct drm_encoder *encoder) tegra_dc_commit(rgb->dc); } +static bool tegra_rgb_pll_rate_change_allowed(struct tegra_rgb *rgb) +{ + if (!rgb->pll_d2_out0) + return false; + + if (!clk_is_match(rgb->clk_parent, rgb->pll_d_out0) && + !clk_is_match(rgb->clk_parent, rgb->pll_d2_out0)) + return false; + + return true; +} + static int tegra_rgb_encoder_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, @@ -147,8 +161,17 @@ tegra_rgb_encoder_atomic_check(struct drm_encoder *encoder, * and hope that the desired frequency can be matched (or at least * matched sufficiently close that the panel will still work). */ - div = ((clk_get_rate(rgb->clk) * 2) / pclk) - 2; - pclk = 0; + if (tegra_rgb_pll_rate_change_allowed(rgb)) { + /* + * Set display controller clock to x2 of PCLK in order to + * produce higher resolution pulse positions. + */ + div = 2; + pclk *= 2; + } else { + div = ((clk_get_rate(rgb->clk) * 2) / pclk) - 2; + pclk = 0; + } err = tegra_dc_state_setup_clock(dc, crtc_state, rgb->clk_parent, pclk, div); @@ -206,6 +229,22 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) return err; } + rgb->pll_d_out0 = clk_get_sys(NULL, "pll_d_out0"); + if (IS_ERR(rgb->pll_d_out0)) { + err = PTR_ERR(rgb->pll_d_out0); + dev_err(dc->dev, "failed to get pll_d_out0: %d\n", err); + return err; + } + + if (dc->soc->has_pll_d2_out0) { + rgb->pll_d2_out0 = clk_get_sys(NULL, "pll_d2_out0"); + if (IS_ERR(rgb->pll_d2_out0)) { + err = PTR_ERR(rgb->pll_d2_out0); + dev_err(dc->dev, "failed to get pll_d2_out0: %d\n", err); + return err; + } + } + dc->rgb = &rgb->output; return 0; @@ -213,9 +252,15 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) int tegra_dc_rgb_remove(struct tegra_dc *dc) { + struct tegra_rgb *rgb; + if (!dc->rgb) return 0; + rgb = to_rgb(dc->rgb); + clk_put(rgb->pll_d2_out0); + clk_put(rgb->pll_d_out0); + tegra_output_remove(dc->rgb); dc->rgb = NULL; From f9d8de699ac4a059e30cf0e84e7027f1058b9163 Mon Sep 17 00:00:00 2001 From: Chanho Park Date: Fri, 8 Oct 2021 18:14:41 +0900 Subject: [PATCH 0186/1202] pinctrl: samsung: support ExynosAutov9 SoC pinctrl Add pinctrl data for ExynosAuto v9 SoC. - GPA0, GPA1: 10, External wake up interrupt - GPQ0: 2, XbootLDO, Speedy PMIC I/F - GPB0, GPB1, GPB2, GPB3: 29, I2S 7 CH - GPF0, GPF1, GPF2, GPF3,GPF4, GPF5, GPF6, GPF8: 52, FSYS - GPG0, GPG1, GPG2, GPG3: 25, GPIO x 24, SMPL_INT - GPP0, GPP1, GPP2, GPP3, GPP4, GPP5: 48, USI 12 CH Signed-off-by: Chanho Park Link: https://lore.kernel.org/r/20211008091443.44625-2-chanho61.park@samsung.com Signed-off-by: Krzysztof Kozlowski --- .../bindings/pinctrl/samsung-pinctrl.txt | 1 + .../pinctrl/samsung/pinctrl-exynos-arm64.c | 108 ++++++++++++++++++ drivers/pinctrl/samsung/pinctrl-samsung.c | 2 + drivers/pinctrl/samsung/pinctrl-samsung.h | 1 + 4 files changed, 112 insertions(+) diff --git a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt index e7a1b18803757..b8b475967ff92 100644 --- a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt @@ -23,6 +23,7 @@ Required Properties: - "samsung,exynos5433-pinctrl": for Exynos5433 compatible pin-controller. - "samsung,exynos7-pinctrl": for Exynos7 compatible pin-controller. - "samsung,exynos850-pinctrl": for Exynos850 compatible pin-controller. + - "samsung,exynosautov9-pinctrl": for ExynosAutov9 compatible pin-controller. - reg: Base address of the pin controller hardware module and length of the address space it occupies. diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c index fe5f6046fbd52..6b77fd24571e1 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c @@ -538,3 +538,111 @@ const struct samsung_pinctrl_of_match_data exynos850_of_data __initconst = { .ctrl = exynos850_pin_ctrl, .num_ctrl = ARRAY_SIZE(exynos850_pin_ctrl), }; + +/* pin banks of exynosautov9 pin-controller 0 (ALIVE) */ +static const struct samsung_pin_bank_data exynosautov9_pin_banks0[] __initconst = { + EXYNOS850_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00), + EXYNOS850_PIN_BANK_EINTW(2, 0x020, "gpa1", 0x04), + EXYNOS850_PIN_BANK_EINTN(2, 0x040, "gpq0"), +}; + +/* pin banks of exynosautov9 pin-controller 1 (AUD) */ +static const struct samsung_pin_bank_data exynosautov9_pin_banks1[] __initconst = { + EXYNOS850_PIN_BANK_EINTG(5, 0x000, "gpb0", 0x00), + EXYNOS850_PIN_BANK_EINTG(8, 0x020, "gpb1", 0x04), + EXYNOS850_PIN_BANK_EINTG(8, 0x040, "gpb2", 0x08), + EXYNOS850_PIN_BANK_EINTG(8, 0x060, "gpb3", 0x0C), +}; + +/* pin banks of exynosautov9 pin-controller 2 (FSYS0) */ +static const struct samsung_pin_bank_data exynosautov9_pin_banks2[] __initconst = { + EXYNOS850_PIN_BANK_EINTG(6, 0x000, "gpf0", 0x00), + EXYNOS850_PIN_BANK_EINTG(6, 0x020, "gpf1", 0x04), +}; + +/* pin banks of exynosautov9 pin-controller 3 (FSYS1) */ +static const struct samsung_pin_bank_data exynosautov9_pin_banks3[] __initconst = { + EXYNOS850_PIN_BANK_EINTG(6, 0x000, "gpf8", 0x00), +}; + +/* pin banks of exynosautov9 pin-controller 4 (FSYS2) */ +static const struct samsung_pin_bank_data exynosautov9_pin_banks4[] __initconst = { + EXYNOS850_PIN_BANK_EINTG(4, 0x000, "gpf2", 0x00), + EXYNOS850_PIN_BANK_EINTG(8, 0x020, "gpf3", 0x04), + EXYNOS850_PIN_BANK_EINTG(7, 0x040, "gpf4", 0x08), + EXYNOS850_PIN_BANK_EINTG(8, 0x060, "gpf5", 0x0C), + EXYNOS850_PIN_BANK_EINTG(7, 0x080, "gpf6", 0x10), +}; + +/* pin banks of exynosautov9 pin-controller 5 (PERIC0) */ +static const struct samsung_pin_bank_data exynosautov9_pin_banks5[] __initconst = { + EXYNOS850_PIN_BANK_EINTG(8, 0x000, "gpp0", 0x00), + EXYNOS850_PIN_BANK_EINTG(8, 0x020, "gpp1", 0x04), + EXYNOS850_PIN_BANK_EINTG(8, 0x040, "gpp2", 0x08), + EXYNOS850_PIN_BANK_EINTG(5, 0x060, "gpg0", 0x0C), +}; + +/* pin banks of exynosautov9 pin-controller 6 (PERIC1) */ +static const struct samsung_pin_bank_data exynosautov9_pin_banks6[] __initconst = { + EXYNOS850_PIN_BANK_EINTG(8, 0x000, "gpp3", 0x00), + EXYNOS850_PIN_BANK_EINTG(8, 0x020, "gpp4", 0x04), + EXYNOS850_PIN_BANK_EINTG(8, 0x040, "gpp5", 0x08), + EXYNOS850_PIN_BANK_EINTG(8, 0x060, "gpg1", 0x0C), + EXYNOS850_PIN_BANK_EINTG(8, 0x080, "gpg2", 0x10), + EXYNOS850_PIN_BANK_EINTG(4, 0x0A0, "gpg3", 0x14), +}; + +static const struct samsung_pin_ctrl exynosautov9_pin_ctrl[] __initconst = { + { + /* pin-controller instance 0 ALIVE data */ + .pin_banks = exynosautov9_pin_banks0, + .nr_banks = ARRAY_SIZE(exynosautov9_pin_banks0), + .eint_wkup_init = exynos_eint_wkup_init, + .suspend = exynos_pinctrl_suspend, + .resume = exynos_pinctrl_resume, + }, { + /* pin-controller instance 1 AUD data */ + .pin_banks = exynosautov9_pin_banks1, + .nr_banks = ARRAY_SIZE(exynosautov9_pin_banks1), + }, { + /* pin-controller instance 2 FSYS0 data */ + .pin_banks = exynosautov9_pin_banks2, + .nr_banks = ARRAY_SIZE(exynosautov9_pin_banks2), + .eint_gpio_init = exynos_eint_gpio_init, + .suspend = exynos_pinctrl_suspend, + .resume = exynos_pinctrl_resume, + }, { + /* pin-controller instance 3 FSYS1 data */ + .pin_banks = exynosautov9_pin_banks3, + .nr_banks = ARRAY_SIZE(exynosautov9_pin_banks3), + .eint_gpio_init = exynos_eint_gpio_init, + .suspend = exynos_pinctrl_suspend, + .resume = exynos_pinctrl_resume, + }, { + /* pin-controller instance 4 FSYS2 data */ + .pin_banks = exynosautov9_pin_banks4, + .nr_banks = ARRAY_SIZE(exynosautov9_pin_banks4), + .eint_gpio_init = exynos_eint_gpio_init, + .suspend = exynos_pinctrl_suspend, + .resume = exynos_pinctrl_resume, + }, { + /* pin-controller instance 5 PERIC0 data */ + .pin_banks = exynosautov9_pin_banks5, + .nr_banks = ARRAY_SIZE(exynosautov9_pin_banks5), + .eint_gpio_init = exynos_eint_gpio_init, + .suspend = exynos_pinctrl_suspend, + .resume = exynos_pinctrl_resume, + }, { + /* pin-controller instance 6 PERIC1 data */ + .pin_banks = exynosautov9_pin_banks6, + .nr_banks = ARRAY_SIZE(exynosautov9_pin_banks6), + .eint_gpio_init = exynos_eint_gpio_init, + .suspend = exynos_pinctrl_suspend, + .resume = exynos_pinctrl_resume, + }, +}; + +const struct samsung_pinctrl_of_match_data exynosautov9_of_data __initconst = { + .ctrl = exynosautov9_pin_ctrl, + .num_ctrl = ARRAY_SIZE(exynosautov9_pin_ctrl), +}; diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index 2a0fc63516f12..23f355ae9ca01 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -1266,6 +1266,8 @@ static const struct of_device_id samsung_pinctrl_dt_match[] = { .data = &exynos7_of_data }, { .compatible = "samsung,exynos850-pinctrl", .data = &exynos850_of_data }, + { .compatible = "samsung,exynosautov9-pinctrl", + .data = &exynosautov9_of_data }, #endif #ifdef CONFIG_PINCTRL_S3C64XX { .compatible = "samsung,s3c64xx-pinctrl", diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.h b/drivers/pinctrl/samsung/pinctrl-samsung.h index 4c2149e9c544e..547968a31aed6 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.h +++ b/drivers/pinctrl/samsung/pinctrl-samsung.h @@ -340,6 +340,7 @@ extern const struct samsung_pinctrl_of_match_data exynos5420_of_data; extern const struct samsung_pinctrl_of_match_data exynos5433_of_data; extern const struct samsung_pinctrl_of_match_data exynos7_of_data; extern const struct samsung_pinctrl_of_match_data exynos850_of_data; +extern const struct samsung_pinctrl_of_match_data exynosautov9_of_data; extern const struct samsung_pinctrl_of_match_data s3c64xx_of_data; extern const struct samsung_pinctrl_of_match_data s3c2412_of_data; extern const struct samsung_pinctrl_of_match_data s3c2416_of_data; From ab6ff1fda1e8646b27b9d87b718250f8c828ad55 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 28 Sep 2021 11:51:46 +0200 Subject: [PATCH 0187/1202] uml: x86: add FORCE to user_constants.h The build system has started warning when filechk is called without FORCE: arch/x86/um/Makefile:44: FORCE prerequisite is missing Add FORCE to make sure the file is checked/rebuilt when necessary (and to quiet up the warning.) Signed-off-by: Johannes Berg Reviewed-by: David Gow Signed-off-by: Richard Weinberger --- arch/x86/um/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 5ccb18290d717..ba5789c358094 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -40,7 +40,7 @@ $(obj)/user-offsets.s: c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) \ -Iarch/x86/include/generated targets += user-offsets.s -include/generated/user_constants.h: $(obj)/user-offsets.s +include/generated/user_constants.h: $(obj)/user-offsets.s FORCE $(call filechk,offsets,__USER_CONSTANT_H__) UNPROFILE_OBJS := stub_segv.o From 80484b7f8db101119928c73e7ce09ae6be54e45c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 10 Sep 2021 08:22:49 +0200 Subject: [PATCH 0188/1202] PCI/VPD: Use pci_read_vpd_any() in pci_vpd_size() Use new function pci_read_vpd_any() to simplify the code. Link: https://lore.kernel.org/r/049fa71c-c7af-9c69-51c0-05c1bc2bf660@gmail.com Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas Acked-by: Jakub Kicinski --- drivers/pci/vpd.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c index 0a804715d98e7..5108bbd20c957 100644 --- a/drivers/pci/vpd.c +++ b/drivers/pci/vpd.c @@ -57,10 +57,7 @@ static size_t pci_vpd_size(struct pci_dev *dev) size_t off = 0, size; unsigned char tag, header[1+2]; /* 1 byte tag, 2 bytes length */ - /* Otherwise the following reads would fail. */ - dev->vpd.len = PCI_VPD_MAX_SIZE; - - while (pci_read_vpd(dev, off, 1, header) == 1) { + while (pci_read_vpd_any(dev, off, 1, header) == 1) { size = 0; if (off == 0 && (header[0] == 0x00 || header[0] == 0xff)) @@ -68,7 +65,7 @@ static size_t pci_vpd_size(struct pci_dev *dev) if (header[0] & PCI_VPD_LRDT) { /* Large Resource Data Type Tag */ - if (pci_read_vpd(dev, off + 1, 2, &header[1]) != 2) { + if (pci_read_vpd_any(dev, off + 1, 2, &header[1]) != 2) { pci_warn(dev, "failed VPD read at offset %zu\n", off + 1); return off ?: PCI_VPD_SZ_INVALID; From eb89bfbd0e8f5a9a2573afa8097dbdeb9ca7da53 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 10 Sep 2021 08:24:07 +0200 Subject: [PATCH 0189/1202] cxgb3: Remove t3_seeprom_read and use VPD API Using the VPD API allows to simplify the code and completely get rid of t3_seeprom_read(). Note that we don't have to use pci_read_vpd_any() here because a VPD quirk sets dev->vpd.len to the full EEPROM size. Tested with a T320 card. Link: https://lore.kernel.org/r/68ef15bb-b6bf-40ad-160c-aaa72c4a70f8@gmail.com Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas Acked-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb3/common.h | 1 - .../net/ethernet/chelsio/cxgb3/cxgb3_main.c | 27 ++++------ drivers/net/ethernet/chelsio/cxgb3/t3_hw.c | 54 +++---------------- 3 files changed, 18 insertions(+), 64 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb3/common.h b/drivers/net/ethernet/chelsio/cxgb3/common.h index b706f2fbe4f48..56312f9ed5d86 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/common.h +++ b/drivers/net/ethernet/chelsio/cxgb3/common.h @@ -676,7 +676,6 @@ void t3_link_changed(struct adapter *adapter, int port_id); void t3_link_fault(struct adapter *adapter, int port_id); int t3_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc); const struct adapter_info *t3_get_adapter_info(unsigned int board_id); -int t3_seeprom_read(struct adapter *adapter, u32 addr, __le32 *data); int t3_seeprom_write(struct adapter *adapter, u32 addr, __le32 data); int t3_seeprom_wp(struct adapter *adapter, int enable); int t3_get_tp_version(struct adapter *adapter, u32 *vers); diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 38e47703f9abd..d733396821332 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2036,20 +2036,16 @@ static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e, { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; - int i, err = 0; - - u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL); - if (!buf) - return -ENOMEM; + int cnt; e->magic = EEPROM_MAGIC; - for (i = e->offset & ~3; !err && i < e->offset + e->len; i += 4) - err = t3_seeprom_read(adapter, i, (__le32 *) & buf[i]); + cnt = pci_read_vpd(adapter->pdev, e->offset, e->len, data); + if (cnt < 0) + return cnt; - if (!err) - memcpy(data, buf + e->offset, e->len); - kfree(buf); - return err; + e->len = cnt; + + return 0; } static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, @@ -2072,12 +2068,9 @@ static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, buf = kmalloc(aligned_len, GFP_KERNEL); if (!buf) return -ENOMEM; - err = t3_seeprom_read(adapter, aligned_offset, (__le32 *) buf); - if (!err && aligned_len > 4) - err = t3_seeprom_read(adapter, - aligned_offset + aligned_len - 4, - (__le32 *) & buf[aligned_len - 4]); - if (err) + err = pci_read_vpd(adapter->pdev, aligned_offset, aligned_len, + buf); + if (err < 0) goto out; memcpy(buf + (eeprom->offset & 3), data, eeprom->len); } else diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index 7ff31d1026fb2..4ecf40b02d7b8 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -599,42 +599,6 @@ struct t3_vpd { #define EEPROM_STAT_ADDR 0x4000 #define VPD_BASE 0xc00 -/** - * t3_seeprom_read - read a VPD EEPROM location - * @adapter: adapter to read - * @addr: EEPROM address - * @data: where to store the read data - * - * Read a 32-bit word from a location in VPD EEPROM using the card's PCI - * VPD ROM capability. A zero is written to the flag bit when the - * address is written to the control register. The hardware device will - * set the flag to 1 when 4 bytes have been read into the data register. - */ -int t3_seeprom_read(struct adapter *adapter, u32 addr, __le32 *data) -{ - u16 val; - int attempts = EEPROM_MAX_POLL; - u32 v; - unsigned int base = adapter->params.pci.vpd_cap_addr; - - if ((addr >= EEPROMSIZE && addr != EEPROM_STAT_ADDR) || (addr & 3)) - return -EINVAL; - - pci_write_config_word(adapter->pdev, base + PCI_VPD_ADDR, addr); - do { - udelay(10); - pci_read_config_word(adapter->pdev, base + PCI_VPD_ADDR, &val); - } while (!(val & PCI_VPD_ADDR_F) && --attempts); - - if (!(val & PCI_VPD_ADDR_F)) { - CH_ERR(adapter, "reading EEPROM address 0x%x failed\n", addr); - return -EIO; - } - pci_read_config_dword(adapter->pdev, base + PCI_VPD_DATA, &v); - *data = cpu_to_le32(v); - return 0; -} - /** * t3_seeprom_write - write a VPD EEPROM location * @adapter: adapter to write @@ -708,24 +672,22 @@ static int vpdstrtou16(char *s, u8 len, unsigned int base, u16 *val) */ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p) { - int i, addr, ret; struct t3_vpd vpd; + u8 base_val = 0; + int addr, ret; /* * Card information is normally at VPD_BASE but some early cards had * it at 0. */ - ret = t3_seeprom_read(adapter, VPD_BASE, (__le32 *)&vpd); - if (ret) + ret = pci_read_vpd(adapter->pdev, VPD_BASE, 1, &base_val); + if (ret < 0) return ret; - addr = vpd.id_tag == 0x82 ? VPD_BASE : 0; + addr = base_val == PCI_VPD_LRDT_ID_STRING ? VPD_BASE : 0; - for (i = 0; i < sizeof(vpd); i += 4) { - ret = t3_seeprom_read(adapter, addr + i, - (__le32 *)((u8 *)&vpd + i)); - if (ret) - return ret; - } + ret = pci_read_vpd(adapter->pdev, addr, sizeof(vpd), &vpd); + if (ret < 0) + return ret; ret = vpdstrtouint(vpd.cclk_data, vpd.cclk_len, 10, &p->cclk); if (ret) From 7724d929fdde07bbd0a0d276c0e7c35d146bbeaa Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 10 Sep 2021 08:25:49 +0200 Subject: [PATCH 0190/1202] cxgb3: Use VPD API in t3_seeprom_wp() Use standard VPD API to replace t3_seeprom_write(), this prepares for removing this function. Chelsio T3 maps the EEPROM write protect flag to an arbitrary place in VPD address space, therefore we have to use pci_write_vpd_any(). Link: https://lore.kernel.org/r/f768fdbe-3a16-d539-57d2-c7c908294336@gmail.com Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas Acked-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb3/t3_hw.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index 4ecf40b02d7b8..ec4b49ebe62ae 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -642,7 +642,14 @@ int t3_seeprom_write(struct adapter *adapter, u32 addr, __le32 data) */ int t3_seeprom_wp(struct adapter *adapter, int enable) { - return t3_seeprom_write(adapter, EEPROM_STAT_ADDR, enable ? 0xc : 0); + u32 data = enable ? 0xc : 0; + int ret; + + /* EEPROM_STAT_ADDR is outside VPD area, use pci_write_vpd_any() */ + ret = pci_write_vpd_any(adapter->pdev, EEPROM_STAT_ADDR, sizeof(u32), + &data); + + return ret < 0 ? ret : 0; } static int vpdstrtouint(char *s, u8 len, unsigned int base, unsigned int *val) From 12a1cb02fee9e239cf20d3e8feba48f86870d1d2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 10 Sep 2021 08:27:08 +0200 Subject: [PATCH 0191/1202] cxgb3: Remove seeprom_write and use VPD API Using the VPD API allows to simplify the code and completely get rid of t3_seeprom_write(). Link: https://lore.kernel.org/r/a0291004-dda3-ea08-4d6c-a2f8826c8527@gmail.com Signed-off-by: Heiner Kallweit Signed-off-by: Bjorn Helgaas Acked-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb3/common.h | 1 - .../net/ethernet/chelsio/cxgb3/cxgb3_main.c | 11 ++---- drivers/net/ethernet/chelsio/cxgb3/t3_hw.c | 35 ------------------- 3 files changed, 3 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb3/common.h b/drivers/net/ethernet/chelsio/cxgb3/common.h index 56312f9ed5d86..d115ec93296d5 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/common.h +++ b/drivers/net/ethernet/chelsio/cxgb3/common.h @@ -676,7 +676,6 @@ void t3_link_changed(struct adapter *adapter, int port_id); void t3_link_fault(struct adapter *adapter, int port_id); int t3_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc); const struct adapter_info *t3_get_adapter_info(unsigned int board_id); -int t3_seeprom_write(struct adapter *adapter, u32 addr, __le32 data); int t3_seeprom_wp(struct adapter *adapter, int enable); int t3_get_tp_version(struct adapter *adapter, u32 *vers); int t3_check_tpsram_version(struct adapter *adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index d733396821332..e185f5f248490 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2054,7 +2054,6 @@ static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; u32 aligned_offset, aligned_len; - __le32 *p; u8 *buf; int err; @@ -2080,17 +2079,13 @@ static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, if (err) goto out; - for (p = (__le32 *) buf; !err && aligned_len; aligned_len -= 4, p++) { - err = t3_seeprom_write(adapter, aligned_offset, *p); - aligned_offset += 4; - } - - if (!err) + err = pci_write_vpd(adapter->pdev, aligned_offset, aligned_len, buf); + if (err >= 0) err = t3_seeprom_wp(adapter, 1); out: if (buf != data) kfree(buf); - return err; + return err < 0 ? err : 0; } static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index ec4b49ebe62ae..cb85c2f215259 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -595,44 +595,9 @@ struct t3_vpd { u32 pad; /* for multiple-of-4 sizing and alignment */ }; -#define EEPROM_MAX_POLL 40 #define EEPROM_STAT_ADDR 0x4000 #define VPD_BASE 0xc00 -/** - * t3_seeprom_write - write a VPD EEPROM location - * @adapter: adapter to write - * @addr: EEPROM address - * @data: value to write - * - * Write a 32-bit word to a location in VPD EEPROM using the card's PCI - * VPD ROM capability. - */ -int t3_seeprom_write(struct adapter *adapter, u32 addr, __le32 data) -{ - u16 val; - int attempts = EEPROM_MAX_POLL; - unsigned int base = adapter->params.pci.vpd_cap_addr; - - if ((addr >= EEPROMSIZE && addr != EEPROM_STAT_ADDR) || (addr & 3)) - return -EINVAL; - - pci_write_config_dword(adapter->pdev, base + PCI_VPD_DATA, - le32_to_cpu(data)); - pci_write_config_word(adapter->pdev,base + PCI_VPD_ADDR, - addr | PCI_VPD_ADDR_F); - do { - msleep(1); - pci_read_config_word(adapter->pdev, base + PCI_VPD_ADDR, &val); - } while ((val & PCI_VPD_ADDR_F) && --attempts); - - if (val & PCI_VPD_ADDR_F) { - CH_ERR(adapter, "write to EEPROM address 0x%x failed\n", addr); - return -EIO; - } - return 0; -} - /** * t3_seeprom_wp - enable/disable EEPROM write protection * @adapter: the adapter From ecea7adad80d9d230df766345e5f8061792da00d Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 11 Sep 2020 16:37:08 -0700 Subject: [PATCH 0192/1202] lib: zstd: Upgrade to latest upstream zstd version 1.4.10 Upgrade to the latest upstream zstd version 1.4.10. This patch is 100% generated from upstream zstd commit 20821a46f412 [0]. This patch is very large because it is transitioning from the custom kernel zstd to using upstream directly. The new zstd follows upstreams file structure which is different. Future update patches will be much smaller because they will only contain the changes from one upstream zstd release. As an aid for review I've created a commit [1] that shows the diff between upstream zstd as-is (which doesn't compile), and the zstd code imported in this patch. The verion of zstd in this patch is generated from upstream with changes applied by automation to replace upstreams libc dependencies, remove unnecessary portability macros, replace `/**` comments with `/*` comments, and use the kernel's xxhash instead of bundling it. The benefits of this patch are as follows: 1. Using upstream directly with automated script to generate kernel code. This allows us to update the kernel every upstream release, so the kernel gets the latest bug fixes and performance improvements, and doesn't get 3 years out of date again. The automation and the translated code are tested every upstream commit to ensure it continues to work. 2. Upgrades from a custom zstd based on 1.3.1 to 1.4.10, getting 3 years of performance improvements and bug fixes. On x86_64 I've measured 15% faster BtrFS and SquashFS decompression+read speeds, 35% faster kernel decompression, and 30% faster ZRAM decompression+read speeds. 3. Zstd-1.4.10 supports negative compression levels, which allow zstd to match or subsume lzo's performance. 4. Maintains the same kernel-specific wrapper API, so no callers have to be modified with zstd version updates. One concern that was brought up was stack usage. Upstream zstd had already removed most of its heavy stack usage functions, but I just removed the last functions that allocate arrays on the stack. I've measured the high water mark for both compression and decompression before and after this patch. Decompression is approximately neutral, using about 1.2KB of stack space. Compression levels up to 3 regressed from 1.4KB -> 1.6KB, and higher compression levels regressed from 1.5KB -> 2KB. We've added unit tests upstream to prevent further regression. I believe that this is a reasonable increase, and if it does end up causing problems, this commit can be cleanly reverted, because it only touches zstd. I chose the bulk update instead of replaying upstream commits because there have been ~3500 upstream commits since the 1.3.1 release, zstd wasn't ready to be used in the kernel as-is before a month ago, and not all upstream zstd commits build. The bulk update preserves bisectablity because bugs can be bisected to the zstd version update. At that point the update can be reverted, and we can work with upstream to find and fix the bug. Note that upstream zstd release 1.4.10 doesn't exist yet. I have cut a staging branch at 20821a46f412 [0] and will apply any changes requested to the staging branch. Once we're ready to merge this update I will cut a zstd release at the commit we merge, so we have a known zstd release in the kernel. The implementation of the kernel API is contained in zstd_compress_module.c and zstd_decompress_module.c. [0] https://github.com/facebook/zstd/commit/20821a46f4122f9abd7c7b245d28162dde8129c9 [1] https://github.com/terrelln/linux/commit/e0fa481d0e3df26918da0a13749740a1f6777574 Signed-off-by: Nick Terrell --- include/linux/zstd.h | 13 +- include/linux/zstd_errors.h | 77 + include/linux/zstd_lib.h | 3367 +++++++---- lib/zstd/Makefile | 46 +- lib/zstd/bitstream.h | 380 -- lib/zstd/common/bitstream.h | 437 ++ lib/zstd/common/compiler.h | 170 + lib/zstd/common/cpu.h | 194 + lib/zstd/common/debug.c | 24 + lib/zstd/common/debug.h | 101 + lib/zstd/common/entropy_common.c | 357 ++ lib/zstd/common/error_private.c | 56 + lib/zstd/common/error_private.h | 66 + lib/zstd/common/fse.h | 710 +++ lib/zstd/common/fse_decompress.c | 390 ++ lib/zstd/common/huf.h | 356 ++ lib/zstd/common/mem.h | 259 + lib/zstd/common/zstd_common.c | 83 + lib/zstd/common/zstd_deps.h | 125 + lib/zstd/common/zstd_internal.h | 450 ++ lib/zstd/compress.c | 3534 ------------ lib/zstd/compress/fse_compress.c | 625 ++ lib/zstd/compress/hist.c | 165 + lib/zstd/compress/hist.h | 75 + lib/zstd/compress/huf_compress.c | 905 +++ lib/zstd/compress/zstd_compress.c | 5109 +++++++++++++++++ lib/zstd/compress/zstd_compress_internal.h | 1188 ++++ lib/zstd/compress/zstd_compress_literals.c | 158 + lib/zstd/compress/zstd_compress_literals.h | 29 + lib/zstd/compress/zstd_compress_sequences.c | 439 ++ lib/zstd/compress/zstd_compress_sequences.h | 54 + lib/zstd/compress/zstd_compress_superblock.c | 850 +++ lib/zstd/compress/zstd_compress_superblock.h | 32 + lib/zstd/compress/zstd_cwksp.h | 482 ++ lib/zstd/compress/zstd_double_fast.c | 519 ++ lib/zstd/compress/zstd_double_fast.h | 32 + lib/zstd/compress/zstd_fast.c | 496 ++ lib/zstd/compress/zstd_fast.h | 31 + lib/zstd/compress/zstd_lazy.c | 1414 +++++ lib/zstd/compress/zstd_lazy.h | 81 + lib/zstd/compress/zstd_ldm.c | 686 +++ lib/zstd/compress/zstd_ldm.h | 110 + lib/zstd/compress/zstd_ldm_geartab.h | 103 + lib/zstd/compress/zstd_opt.c | 1346 +++++ lib/zstd/compress/zstd_opt.h | 50 + lib/zstd/decompress.c | 2571 --------- lib/zstd/decompress/huf_decompress.c | 1206 ++++ lib/zstd/decompress/zstd_ddict.c | 241 + lib/zstd/decompress/zstd_ddict.h | 44 + lib/zstd/decompress/zstd_decompress.c | 2085 +++++++ lib/zstd/decompress/zstd_decompress_block.c | 1540 +++++ lib/zstd/decompress/zstd_decompress_block.h | 62 + .../decompress/zstd_decompress_internal.h | 202 + lib/zstd/decompress_sources.h | 15 +- lib/zstd/entropy_common.c | 243 - lib/zstd/error_private.h | 53 - lib/zstd/fse.h | 575 -- lib/zstd/fse_compress.c | 795 --- lib/zstd/fse_decompress.c | 325 -- lib/zstd/huf.h | 212 - lib/zstd/huf_compress.c | 773 --- lib/zstd/huf_decompress.c | 960 ---- lib/zstd/mem.h | 151 - lib/zstd/zstd_common.c | 75 - lib/zstd/zstd_compress_module.c | 160 + lib/zstd/zstd_decompress_module.c | 105 + lib/zstd/zstd_internal.h | 273 - lib/zstd/zstd_opt.h | 1014 ---- 68 files changed, 26861 insertions(+), 12993 deletions(-) create mode 100644 include/linux/zstd_errors.h delete mode 100644 lib/zstd/bitstream.h create mode 100644 lib/zstd/common/bitstream.h create mode 100644 lib/zstd/common/compiler.h create mode 100644 lib/zstd/common/cpu.h create mode 100644 lib/zstd/common/debug.c create mode 100644 lib/zstd/common/debug.h create mode 100644 lib/zstd/common/entropy_common.c create mode 100644 lib/zstd/common/error_private.c create mode 100644 lib/zstd/common/error_private.h create mode 100644 lib/zstd/common/fse.h create mode 100644 lib/zstd/common/fse_decompress.c create mode 100644 lib/zstd/common/huf.h create mode 100644 lib/zstd/common/mem.h create mode 100644 lib/zstd/common/zstd_common.c create mode 100644 lib/zstd/common/zstd_deps.h create mode 100644 lib/zstd/common/zstd_internal.h delete mode 100644 lib/zstd/compress.c create mode 100644 lib/zstd/compress/fse_compress.c create mode 100644 lib/zstd/compress/hist.c create mode 100644 lib/zstd/compress/hist.h create mode 100644 lib/zstd/compress/huf_compress.c create mode 100644 lib/zstd/compress/zstd_compress.c create mode 100644 lib/zstd/compress/zstd_compress_internal.h create mode 100644 lib/zstd/compress/zstd_compress_literals.c create mode 100644 lib/zstd/compress/zstd_compress_literals.h create mode 100644 lib/zstd/compress/zstd_compress_sequences.c create mode 100644 lib/zstd/compress/zstd_compress_sequences.h create mode 100644 lib/zstd/compress/zstd_compress_superblock.c create mode 100644 lib/zstd/compress/zstd_compress_superblock.h create mode 100644 lib/zstd/compress/zstd_cwksp.h create mode 100644 lib/zstd/compress/zstd_double_fast.c create mode 100644 lib/zstd/compress/zstd_double_fast.h create mode 100644 lib/zstd/compress/zstd_fast.c create mode 100644 lib/zstd/compress/zstd_fast.h create mode 100644 lib/zstd/compress/zstd_lazy.c create mode 100644 lib/zstd/compress/zstd_lazy.h create mode 100644 lib/zstd/compress/zstd_ldm.c create mode 100644 lib/zstd/compress/zstd_ldm.h create mode 100644 lib/zstd/compress/zstd_ldm_geartab.h create mode 100644 lib/zstd/compress/zstd_opt.c create mode 100644 lib/zstd/compress/zstd_opt.h delete mode 100644 lib/zstd/decompress.c create mode 100644 lib/zstd/decompress/huf_decompress.c create mode 100644 lib/zstd/decompress/zstd_ddict.c create mode 100644 lib/zstd/decompress/zstd_ddict.h create mode 100644 lib/zstd/decompress/zstd_decompress.c create mode 100644 lib/zstd/decompress/zstd_decompress_block.c create mode 100644 lib/zstd/decompress/zstd_decompress_block.h create mode 100644 lib/zstd/decompress/zstd_decompress_internal.h delete mode 100644 lib/zstd/entropy_common.c delete mode 100644 lib/zstd/error_private.h delete mode 100644 lib/zstd/fse.h delete mode 100644 lib/zstd/fse_compress.c delete mode 100644 lib/zstd/fse_decompress.c delete mode 100644 lib/zstd/huf.h delete mode 100644 lib/zstd/huf_compress.c delete mode 100644 lib/zstd/huf_decompress.c delete mode 100644 lib/zstd/mem.h delete mode 100644 lib/zstd/zstd_common.c create mode 100644 lib/zstd/zstd_compress_module.c create mode 100644 lib/zstd/zstd_decompress_module.c delete mode 100644 lib/zstd/zstd_internal.h delete mode 100644 lib/zstd/zstd_opt.h diff --git a/include/linux/zstd.h b/include/linux/zstd.h index 9fbc7729b0a0d..113408eef6ece 100644 --- a/include/linux/zstd.h +++ b/include/linux/zstd.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. @@ -22,6 +22,7 @@ /* ====== Dependency ====== */ #include +#include #include /* ====== Helper Functions ====== */ @@ -417,12 +418,18 @@ size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); /** * struct zstd_frame_params - zstd frame parameters stored in the frame header - * @frameContentSize: The frame content size, or 0 if not present. + * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not + * present. * @windowSize: The window size, or 0 if the frame is a skippable frame. + * @blockSizeMax: The maximum block size. + * @frameType: The frame type (zstd or skippable) + * @headerSize: The size of the frame header. * @dictID: The dictionary id, or 0 if not present. * @checksumFlag: Whether a checksum was used. + * + * See zstd_lib.h. */ -typedef ZSTD_frameParams zstd_frame_header; +typedef ZSTD_frameHeader zstd_frame_header; /** * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h new file mode 100644 index 0000000000000..58b6dd45a969f --- /dev/null +++ b/include/linux/zstd_errors.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + + +/*===== dependency =====*/ +#include /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#define ZSTDERRORLIB_VISIBILITY +#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + + +#endif /* ZSTD_ERRORS_H_398273423 */ diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h index 13151c34f725f..b8c7dbf98390f 100644 --- a/include/linux/zstd_lib.h +++ b/include/linux/zstd_lib.h @@ -2,1156 +2,2431 @@ * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ -#ifndef ZSTD_H -#define ZSTD_H +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 /* ====== Dependency ======*/ +#include /* INT_MAX */ #include /* size_t */ -/*-***************************************************************************** - * Introduction - * - * zstd, short for Zstandard, is a fast lossless compression algorithm, - * targeting real-time compression scenarios at zlib-level and better - * compression ratios. The zstd compression library provides in-memory - * compression and decompression functions. The library supports compression - * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled - * ultra, should be used with caution, as they require more memory. - * Compression can be done in: - * - a single step, reusing a context (described as Explicit memory management) - * - unbounded multiple steps (described as Streaming compression) - * The compression ratio achievable on small data can be highly improved using - * compression with a dictionary in: - * - a single step (described as Simple dictionary API) - * - a single step, reusing a dictionary (described as Fast dictionary API) - ******************************************************************************/ +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#define ZSTDLIB_VISIBILITY +#define ZSTDLIB_API ZSTDLIB_VISIBILITY + + +/* ***************************************************************************** + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 10 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); + +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/* ************************************* + * Constants + ***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + /*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ + + +/* ************************************* +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer */ + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); -/** - * enum ZSTD_ErrorCode - zstd error codes + +/* ************************************* +* Advanced compression API +***************************************/ + +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). * - * Functions that return size_t can be checked for errors using ZSTD_isError() - * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode(). + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ + + +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009 +} ZSTD_cParameter; + +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + typedef enum { - ZSTD_error_no_error, - ZSTD_error_GENERIC, - ZSTD_error_prefix_unknown, - ZSTD_error_version_unsupported, - ZSTD_error_parameter_unknown, - ZSTD_error_frameParameter_unsupported, - ZSTD_error_frameParameter_unsupportedBy32bits, - ZSTD_error_frameParameter_windowTooLarge, - ZSTD_error_compressionParameter_unsupported, - ZSTD_error_init_missing, - ZSTD_error_memory_allocation, - ZSTD_error_stage_wrong, - ZSTD_error_dstSize_tooSmall, - ZSTD_error_srcSize_wrong, - ZSTD_error_corruption_detected, - ZSTD_error_checksum_wrong, - ZSTD_error_tableLog_tooLarge, - ZSTD_error_maxSymbolValue_tooLarge, - ZSTD_error_maxSymbolValue_tooSmall, - ZSTD_error_dictionary_corrupted, - ZSTD_error_dictionary_wrong, - ZSTD_error_dictionaryCreation_failed, - ZSTD_error_maxCode -} ZSTD_ErrorCode; - -/** - * ZSTD_maxCLevel() - maximum compression level available - * - * Return: Maximum compression level available. - */ -int ZSTD_maxCLevel(void); -/** - * ZSTD_compressBound() - maximum compressed size in worst case scenario - * @srcSize: The size of the data to compress. - * - * Return: The maximum compressed size in the worst case scenario. - */ -size_t ZSTD_compressBound(size_t srcSize); -/** - * ZSTD_isError() - tells if a size_t function result is an error code - * @code: The function result to check for error. - * - * Return: Non-zero iff the code is an error. - */ -static __attribute__((unused)) unsigned int ZSTD_isError(size_t code) -{ - return code > (size_t)-ZSTD_error_maxCode; -} -/** - * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode - * @functionResult: The result of a function for which ZSTD_isError() is true. - * - * Return: The ZSTD_ErrorCode corresponding to the functionResult or 0 - * if the functionResult isn't an error. - */ -static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode( - size_t functionResult) -{ - if (!ZSTD_isError(functionResult)) - return (ZSTD_ErrorCode)0; - return (ZSTD_ErrorCode)(0 - functionResult); -} - -/** - * enum ZSTD_strategy - zstd compression search strategy - * - * From faster to stronger. + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/* ************************************* +* Advanced decompression API +***************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + typedef enum { - ZSTD_fast, - ZSTD_dfast, - ZSTD_greedy, - ZSTD_lazy, - ZSTD_lazy2, - ZSTD_btlazy2, - ZSTD_btopt, - ZSTD_btopt2 -} ZSTD_strategy; -/** - * struct ZSTD_compressionParameters - zstd compression parameters - * @windowLog: Log of the largest match distance. Larger means more - * compression, and more memory needed during decompression. - * @chainLog: Fully searched segment. Larger means more compression, slower, - * and more memory (useless for fast). - * @hashLog: Dispatch table. Larger means more compression, - * slower, and more memory. - * @searchLog: Number of searches. Larger means more compression and slower. - * @searchLength: Match length searched. Larger means faster decompression, - * sometimes less compression. - * @targetLength: Acceptable match size for optimal parser (only). Larger means - * more compression, and slower. - * @strategy: The zstd compression strategy. + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive */ -typedef struct { - unsigned int windowLog; - unsigned int chainLog; - unsigned int hashLog; - unsigned int searchLog; - unsigned int searchLength; - unsigned int targetLength; - ZSTD_strategy strategy; -} ZSTD_compressionParameters; +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/* ************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /*< start of input buffer */ + size_t size; /*< size of input buffer */ + size_t pos; /*< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /*< start of output buffer */ + size_t size; /*< size of output buffer */ + size_t pos; /*< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + -/** - * struct ZSTD_frameParameters - zstd frame parameters - * @contentSizeFlag: Controls whether content size will be present in the frame - * header (when known). - * @checksumFlag: Controls whether a 32-bit checksum is generated at the end - * of the frame for error detection. - * @noDictIDFlag: Controls whether dictID will be saved into the frame header - * when using dictionary compression. + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /*< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); /* accept NULL pointer */ + +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /*< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /*< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ + +/*! + * Equivalent to: * - * The default value is all fields set to 0. + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /*< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer */ + +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/* ************************ +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dictBuilder/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/* ********************************* + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/* ****************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/* ***************************************************************************** + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ + + +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/* ************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ + +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1< 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; + +typedef struct { + unsigned windowLog; /*< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /*< dispatch table : larger == faster, more memory */ + unsigned searchLog; /*< nb of searches : larger == more compression, slower */ + unsigned minMatch; /*< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /*< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /*< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; + +typedef struct { + int contentSizeFlag; /*< 1: content size will be in frame header (when known) */ + int checksumFlag; /*< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /*< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ } ZSTD_frameParameters; -/** - * struct ZSTD_parameters - zstd parameters - * @cParams: The compression parameters. - * @fParams: The frame parameters. - */ typedef struct { - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; } ZSTD_parameters; -/** - * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level - * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). - * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. - * @dictSize: The dictionary size or 0 if a dictionary isn't being used. - * - * Return: The selected ZSTD_compressionParameters. +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /*< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /*< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; + +typedef enum { + ZSTD_lcm_auto = 0, /*< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /*< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /*< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + + +/* ************************************* +* Frame size functions +***************************************/ + +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, - unsigned long long estimatedSrcSize, size_t dictSize); +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); -/** - * ZSTD_getParams() - returns ZSTD_parameters for selected level - * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). - * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. - * @dictSize: The dictionary size or 0 if a dictionary isn't being used. +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); + +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. * - * The same as ZSTD_getCParams() except also selects the default frame - * parameters (all zero). + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 * - * Return: The selected ZSTD_parameters. + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, - unsigned long long estimatedSrcSize, size_t dictSize); -/*-************************************* - * Explicit memory management - **************************************/ +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); -/** - * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx - * @cParams: The compression parameters to be used for compression. +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. * - * If multiple compression parameters might be used, the caller must call - * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum - * size. + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCCtx(). + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. */ -size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); -/** - * struct ZSTD_CCtx - the zstd compression context + +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. * - * When compressing many times it is recommended to allocate a context just once - * and reuse it for each successive compression operation. - */ -typedef struct ZSTD_CCtx_s ZSTD_CCtx; -/** - * ZSTD_initCCtx() - initialize a zstd compression context - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to - * determine how large the workspace must be. + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. * - * Return: A compression context emplaced into workspace. - */ -ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize); - -/** - * ZSTD_compressCCtx() - compress src into dst - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @params: The parameters to use for compression. See ZSTD_getParams(). + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). + * @return : number of bytes written or a ZSTD error. */ -size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, ZSTD_parameters params); - -/** - * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx - * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDCtx(). +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + +/* ************************************* +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. */ -size_t ZSTD_DCtxWorkspaceBound(void); +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /*< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +__attribute__((__unused__)) +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /*< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); + +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); -/** - * struct ZSTD_DCtx - the zstd decompression context - * - * When decompressing many times it is recommended to allocate a context just - * once and reuse it for each successive decompression operation. + +/* + * This API is temporary and is expected to change or disappear in the future! */ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -/** - * ZSTD_initDCtx() - initialize a zstd decompression context - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to - * determine how large the workspace must be. - * - * Return: A decompression context emplaced into workspace. - */ -ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize); - -/** - * ZSTD_decompressDCtx() - decompress zstd compressed src into dst - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); - -/*-************************ - * Simple dictionary API - **************************/ - -/** - * ZSTD_compress_usingDict() - compress src into dst using a dictionary - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @dict: The dictionary to use for compression. - * @dictSize: The size of the dictionary. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * - * Compression using a predefined dictionary. The same dictionary must be used - * during decompression. - * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const void *dict, size_t dictSize, - ZSTD_parameters params); - -/** - * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * @dict: The dictionary to use for decompression. The same dictionary - * must've been used to compress the data. - * @dictSize: The size of the dictionary. - * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const void *dict, size_t dictSize); - -/*-************************** - * Fast dictionary API - ***************************/ - -/** - * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict - * @cParams: The compression parameters to be used for compression. - * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCDict(). - */ -size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams); - -/** - * struct ZSTD_CDict - a digested dictionary to be used for compression +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + +/* ************************************* +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ +ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now REDUNDANT. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning in some future version */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. */ -typedef struct ZSTD_CDict_s ZSTD_CDict; +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 -/** - * ZSTD_initCDict() - initialize a digested dictionary for compression - * @dictBuffer: The dictionary to digest. The buffer is referenced by the - * ZSTD_CDict so it must outlive the returned ZSTD_CDict. - * @dictSize: The size of the dictionary. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * @workspace: The workspace. It must outlive the returned ZSTD_CDict. - * @workspaceSize: The workspace size. Must be at least - * ZSTD_CDictWorkspaceBound(params.cParams). +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. * - * When compressing multiple messages / blocks with the same dictionary it is - * recommended to load it just once. The ZSTD_CDict merely references the - * dictBuffer, so it must outlive the returned ZSTD_CDict. + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. * - * Return: The digested dictionary emplaced into workspace. + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. */ -ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize, - ZSTD_parameters params, void *workspace, size_t workspaceSize); +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 -/** - * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(cParams) where - * cParams are the compression parameters used to initialize the - * cdict. - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @cdict: The digested dictionary to use for compression. - * @params: The parameters to use for compression. See ZSTD_getParams(). +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. * - * Compression using a digested dictionary. The same dictionary must be used - * during decompression. + * For use with sequence compression API: ZSTD_compressSequences(). * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. */ -size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const ZSTD_CDict *cdict); - +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 -/** - * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDDict(). */ -size_t ZSTD_DDictWorkspaceBound(void); +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 -/** - * struct ZSTD_DDict - a digested dictionary to be used for decompression +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). */ -typedef struct ZSTD_DDict_s ZSTD_DDict; - -/** - * ZSTD_initDDict() - initialize a digested dictionary for decompression - * @dictBuffer: The dictionary to digest. The buffer is referenced by the - * ZSTD_DDict so it must outlive the returned ZSTD_DDict. - * @dictSize: The size of the dictionary. - * @workspace: The workspace. It must outlive the returned ZSTD_DDict. - * @workspaceSize: The workspace size. Must be at least - * ZSTD_DDictWorkspaceBound(). - * - * When decompressing multiple messages / blocks with the same dictionary it is - * recommended to load it just once. The ZSTD_DDict merely references the - * dictBuffer, so it must outlive the returned ZSTD_DDict. - * - * Return: The digested dictionary emplaced into workspace. - */ -ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize, - void *workspace, size_t workspaceSize); - -/** - * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * @ddict: The digested dictionary to use for decompression. The same - * dictionary must've been used to compress the data. - * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, - size_t dstCapacity, const void *src, size_t srcSize, - const ZSTD_DDict *ddict); - - -/*-************************** - * Streaming - ***************************/ - -/** - * struct ZSTD_inBuffer - input buffer for streaming - * @src: Start of the input buffer. - * @size: Size of the input buffer. - * @pos: Position where reading stopped. Will be updated. - * Necessarily 0 <= pos <= size. +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. */ -typedef struct ZSTD_inBuffer_s { - const void *src; - size_t size; - size_t pos; -} ZSTD_inBuffer; +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ -/** - * struct ZSTD_outBuffer - output buffer for streaming - * @dst: Start of the output buffer. - * @size: Size of the output buffer. - * @pos: Position where writing stopped. Will be updated. - * Necessarily 0 <= pos <= size. +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. */ -typedef struct ZSTD_outBuffer_s { - void *dst; - size_t size; - size_t pos; -} ZSTD_outBuffer; - - +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); -/*-***************************************************************************** - * Streaming compression - HowTo - * - * A ZSTD_CStream object is required to track streaming operation. - * Use ZSTD_initCStream() to initialize a ZSTD_CStream object. - * ZSTD_CStream objects can be reused multiple times on consecutive compression - * operations. It is recommended to re-use ZSTD_CStream in situations where many - * streaming operations will be achieved consecutively. Use one separate - * ZSTD_CStream per thread for parallel execution. - * - * Use ZSTD_compressStream() repetitively to consume input stream. - * The function will automatically update both `pos` fields. - * Note that it may not consume the entire input, in which case `pos < size`, - * and it's up to the caller to present again remaining data. - * It returns a hint for the preferred number of bytes to use as an input for - * the next function call. - * - * At any moment, it's possible to flush whatever data remains within internal - * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might - * still be some content left within the internal buffer if `output->size` is - * too small. It returns the number of bytes left in the internal buffer and - * must be called until it returns 0. - * - * ZSTD_endStream() instructs to finish a frame. It will perform a flush and - * write frame epilogue. The epilogue is required for decoders to consider a - * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush - * the full content if `output->size` is too small. In which case, call again - * ZSTD_endStream() to complete the flush. It returns the number of bytes left - * in the internal buffer and must be called until it returns 0. - ******************************************************************************/ +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); -/** - * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream - * @cParams: The compression parameters to be used for compression. - * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCStream() and ZSTD_initCStream_usingCDict(). +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. */ -size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); -/** - * struct ZSTD_CStream - the zstd streaming compression context +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. */ -typedef struct ZSTD_CStream_s ZSTD_CStream; +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); + + +/* ************************************* +* Advanced decompression functions +***************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); -/*===== ZSTD_CStream management functions =====*/ -/** - * ZSTD_initCStream() - initialize a zstd streaming compression context - * @params: The zstd compression parameters. - * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must - * pass the source size (zero means empty source). Otherwise, - * the caller may optionally pass the source size, or zero if - * unknown. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine - * how large the workspace must be. - * - * Return: The zstd streaming compression context. - */ -ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, - unsigned long long pledgedSrcSize, void *workspace, - size_t workspaceSize); - -/** - * ZSTD_initCStream_usingCDict() - initialize a streaming compression context - * @cdict: The digested dictionary to use for compression. - * @pledgedSrcSize: Optionally the source size, or zero if unknown. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Call ZSTD_CStreamWorkspaceBound() - * with the cParams used to initialize the cdict to determine - * how large the workspace must be. - * - * Return: The zstd streaming compression context. - */ -ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, - unsigned long long pledgedSrcSize, void *workspace, - size_t workspaceSize); +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); -/*===== Streaming compression functions =====*/ -/** - * ZSTD_resetCStream() - reset the context using parameters from creation - * @zcs: The zstd streaming compression context to reset. - * @pledgedSrcSize: Optionally the source size, or zero if unknown. - * - * Resets the context using the parameters from creation. Skips dictionary - * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame - * content size is always written into the frame header. - * - * Return: Zero or an error, which can be checked using ZSTD_isError(). +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats */ -size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize); -/** - * ZSTD_compressStream() - streaming compress some of input into output - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. - * @input: Source buffer. `input->pos` is updated to indicate how much data was - * read. Note that it may not consume the entire input, in which case - * `input->pos < input->size`, and it's up to the caller to present - * remaining data again. - * - * The `input` and `output` buffers may be any size. Guaranteed to make some - * forward progress if `input` and `output` are not empty. - * - * Return: A hint for the number of bytes to use as the input for the next - * function call or an error, which can be checked using - * ZSTD_isError(). +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. */ -size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, - ZSTD_inBuffer *input); -/** - * ZSTD_flushStream() - flush internal buffers into output - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. - * - * ZSTD_flushStream() must be called until it returns 0, meaning all the data - * has been flushed. Since ZSTD_flushStream() causes a block to be ended, - * calling it too often will degrade the compression ratio. +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable * - * Return: The number of bytes still present within internal buffers or an - * error, which can be checked using ZSTD_isError(). + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e */ -size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); -/** - * ZSTD_endStream() - flush internal buffers into output and end the frame - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 + +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable * - * ZSTD_endStream() must be called until it returns 0, meaning all the data has - * been flushed and the frame epilogue has been written. + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. * - * Return: The number of bytes still present within internal buffers or an - * error, which can be checked using ZSTD_isError(). - */ -size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); - -/** - * ZSTD_CStreamInSize() - recommended size for the input buffer + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. * - * Return: The recommended size for the input buffer. - */ -size_t ZSTD_CStreamInSize(void); -/** - * ZSTD_CStreamOutSize() - recommended size for the output buffer + * Param has values of byte ZSTD_refMultipleDDicts_e * - * When the output buffer is at least this large, it is guaranteed to be large - * enough to flush at least one complete compressed block. + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. * - * Return: The recommended size for the output buffer. + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. */ -size_t ZSTD_CStreamOutSize(void); - +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 -/*-***************************************************************************** - * Streaming decompression - HowTo - * - * A ZSTD_DStream object is required to track streaming operations. - * Use ZSTD_initDStream() to initialize a ZSTD_DStream object. - * ZSTD_DStream objects can be re-used multiple times. - * - * Use ZSTD_decompressStream() repetitively to consume your input. - * The function will update both `pos` fields. - * If `input->pos < input->size`, some input has not been consumed. - * It's up to the caller to present again remaining data. - * If `output->pos < output->size`, decoder has flushed everything it could. - * Returns 0 iff a frame is completely decoded and fully flushed. - * Otherwise it returns a suggested next input size that will never load more - * than the current frame. - ******************************************************************************/ +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); -/** - * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream - * @maxWindowSize: The maximum window size allowed for compressed frames. - * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDStream() and ZSTD_initDStream_usingDDict(). +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. */ -size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize); +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + +/* ****************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ + +/*! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + +/*! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); -/** - * struct ZSTD_DStream - the zstd streaming decompression context + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. */ -typedef struct ZSTD_DStream_s ZSTD_DStream; -/*===== ZSTD_DStream management functions =====*/ -/** - * ZSTD_initDStream() - initialize a zstd streaming decompression context - * @maxWindowSize: The maximum window size allowed for compressed frames. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine - * how large the workspace must be. - * - * Return: The zstd streaming decompression context. - */ -ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, - size_t workspaceSize); -/** - * ZSTD_initDStream_usingDDict() - initialize streaming decompression context - * @maxWindowSize: The maximum window size allowed for compressed frames. - * @ddict: The digested dictionary to use for decompression. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine - * how large the workspace must be. - * - * Return: The zstd streaming decompression context. - */ -ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, - const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); -/*===== Streaming decompression functions =====*/ -/** - * ZSTD_resetDStream() - reset the context using parameters from creation - * @zds: The zstd streaming decompression context to reset. + +/*===== Advanced Streaming decompression functions =====*/ + +/*! + * This function is deprecated, and is equivalent to: * - * Resets the context using the parameters from creation. Skips dictionary - * loading, since it can be reused. + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); * - * Return: Zero or an error, which can be checked using ZSTD_isError(). + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -size_t ZSTD_resetDStream(ZSTD_DStream *zds); -/** - * ZSTD_decompressStream() - streaming decompress some of input into output - * @zds: The zstd streaming decompression context. - * @output: Destination buffer. `output.pos` is updated to indicate how much - * decompressed data was written. - * @input: Source buffer. `input.pos` is updated to indicate how much data was - * read. Note that it may not consume the entire input, in which case - * `input.pos < input.size`, and it's up to the caller to present - * remaining data again. +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: * - * The `input` and `output` buffers may be any size. Guaranteed to make some - * forward progress if `input` and `output` are not empty. - * ZSTD_decompressStream() will not consume the last byte of the frame until - * the entire frame is flushed. + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); * - * Return: Returns 0 iff a frame is completely decoded and fully flushed. - * Otherwise returns a hint for the number of bytes to use as the input - * for the next function call or an error, which can be checked using - * ZSTD_isError(). The size hint will never load more than the frame. + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, - ZSTD_inBuffer *input); +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); -/** - * ZSTD_DStreamInSize() - recommended size for the input buffer - * - * Return: The recommended size for the input buffer. - */ -size_t ZSTD_DStreamInSize(void); -/** - * ZSTD_DStreamOutSize() - recommended size for the output buffer +/*! + * This function is deprecated, and is equivalent to: * - * When the output buffer is at least this large, it is guaranteed to be large - * enough to flush at least one complete decompressed block. + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * - * Return: The recommended size for the output buffer. + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ -size_t ZSTD_DStreamOutSize(void); +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); -/* --- Constants ---*/ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ -#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U +/* ******************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ -#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +/* + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ -#define ZSTD_WINDOWLOG_MAX_32 27 -#define ZSTD_WINDOWLOG_MAX_64 27 -#define ZSTD_WINDOWLOG_MAX \ - ((unsigned int)(sizeof(size_t) == 4 \ - ? ZSTD_WINDOWLOG_MAX_32 \ - : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX -#define ZSTD_HASHLOG_MIN 6 -#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) -#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN -#define ZSTD_HASHLOG3_MAX 17 -#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -/* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_SEARCHLENGTH_MAX 7 -/* only for ZSTD_btopt, other strategies are limited to 4 */ -#define ZSTD_SEARCHLENGTH_MIN 3 -#define ZSTD_TARGETLENGTH_MIN 4 -#define ZSTD_TARGETLENGTH_MAX 999 - -/* for static allocation */ -#define ZSTD_FRAMEHEADERSIZE_MAX 18 -#define ZSTD_FRAMEHEADERSIZE_MIN 6 -#define ZSTD_frameHeaderSize_prefix 5 -#define ZSTD_frameHeaderSize_min ZSTD_FRAMEHEADERSIZE_MIN -#define ZSTD_frameHeaderSize_max ZSTD_FRAMEHEADERSIZE_MAX -/* magic number + skippable frame length */ -#define ZSTD_skippableHeaderSize 8 - - -/*-************************************* - * Compressed size functions - **************************************/ - -/** - * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame - * @src: Source buffer. It should point to the start of a zstd encoded frame - * or a skippable frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * size of the frame. - * - * Return: The compressed size of the frame pointed to by `src` or an error, - * which can be check with ZSTD_isError(). - * Suitable to pass to ZSTD_decompress() or similar functions. - */ -size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); - -/*-************************************* - * Decompressed size functions - **************************************/ -/** - * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header - * @src: It should point to the start of a zstd encoded frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * frame header. `ZSTD_frameHeaderSize_max` is always large enough. - * - * Return: The frame content size stored in the frame header if known. - * `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the - * frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input. - */ -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); - -/** - * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames - * @src: It should point to the start of a series of zstd encoded and/or - * skippable frames. - * @srcSize: The exact size of the series of frames. - * - * If any zstd encoded frame in the series doesn't have the frame content size - * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always - * set when using ZSTD_compress(). The decompressed size can be very large. - * If the source is untrusted, the decompressed size could be wrong or - * intentionally modified. Always ensure the result fits within the - * application's authorized limits. ZSTD_findDecompressedSize() handles multiple - * frames, and so it must traverse the input to read each frame header. This is - * efficient as most of the data is skipped, however it does mean that all frame - * data must be present and valid. - * - * Return: Decompressed size of all the data contained in the frames if known. - * `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown. - * `ZSTD_CONTENTSIZE_ERROR` if an error occurred. - */ -unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize); - -/*-************************************* - * Advanced compression functions - **************************************/ -/** - * ZSTD_checkCParams() - ensure parameter values remain within authorized range - * @cParams: The zstd compression parameters. - * - * Return: Zero or an error, which can be checked using ZSTD_isError(). - */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams); - -/** - * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize - * @srcSize: Optionally the estimated source size, or zero if unknown. - * @dictSize: Optionally the estimated dictionary size, or zero if unknown. - * - * Return: The optimized parameters. - */ -ZSTD_compressionParameters ZSTD_adjustCParams( - ZSTD_compressionParameters cParams, unsigned long long srcSize, - size_t dictSize); - -/*--- Advanced decompression functions ---*/ - -/** - * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame - * @buffer: The source buffer to check. - * @size: The size of the source buffer, must be at least 4 bytes. - * - * Return: True iff the buffer starts with a zstd or skippable frame identifier. - */ -unsigned int ZSTD_isFrame(const void *buffer, size_t size); - -/** - * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary - * @dict: The dictionary buffer. - * @dictSize: The size of the dictionary buffer. - * - * Return: The dictionary id stored within the dictionary or 0 if the - * dictionary is not a zstd dictionary. If it returns 0 the - * dictionary can still be loaded as a content-only dictionary. - */ -unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize); +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ -/** - * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict - * @ddict: The ddict to find the id of. - * - * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not - * a zstd dictionary. If it returns 0 `ddict` will be loaded as a - * content-only dictionary. - */ -unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict); +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -/** - * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame - * @src: Source buffer. It must be a zstd encoded frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * frame header. `ZSTD_frameHeaderSize_max` is always large enough. - * - * Return: The dictionary id required to decompress the frame stored within - * `src` or 0 if the dictionary id could not be decoded. It can return - * 0 if the frame does not require a dictionary, the dictionary id - * wasn't stored in the frame, `src` is not a zstd frame, or `srcSize` - * is too small. - */ -unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize); -/** - * struct ZSTD_frameParams - zstd frame parameters stored in the frame header - * @frameContentSize: The frame content size, or 0 if not present. - * @windowSize: The window size, or 0 if the frame is a skippable frame. - * @dictID: The dictionary id, or 0 if not present. - * @checksumFlag: Whether a checksum was used. - */ +/* + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; typedef struct { - unsigned long long frameContentSize; - unsigned int windowSize; - unsigned int dictID; - unsigned int checksumFlag; -} ZSTD_frameParams; - -/** - * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame - * @fparamsPtr: On success the frame parameters are written here. - * @src: The source buffer. It must point to a zstd or skippable frame. - * @srcSize: The size of the source buffer. `ZSTD_frameHeaderSize_max` is - * always large enough to succeed. - * - * Return: 0 on success. If more data is required it returns how many bytes - * must be provided to make forward progress. Otherwise it returns - * an error, which can be checked using ZSTD_isError(). - */ -size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, - size_t srcSize); - -/*-***************************************************************************** - * Buffer-less and synchronous inner streaming functions - * - * This is an advanced API, giving full control over buffer management, for - * users which need direct control over memory. - * But it's also a complex one, with many restrictions (documented below). - * Prefer using normal streaming API for an easier experience - ******************************************************************************/ + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/* Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ -/*-***************************************************************************** - * Buffer-less streaming compression (synchronous mode) - * - * A ZSTD_CCtx object is required to track streaming operations. - * Use ZSTD_initCCtx() to initialize a context. - * ZSTD_CCtx object can be re-used multiple times within successive compression - * operations. - * - * Start by initializing a context. - * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary - * compression, - * or ZSTD_compressBegin_advanced(), for finer parameter control. - * It's also possible to duplicate a reference context which has already been - * initialized, using ZSTD_copyCCtx() - * - * Then, consume your input using ZSTD_compressContinue(). - * There are some important considerations to keep in mind when using this - * advanced function : - * - ZSTD_compressContinue() has no internal buffer. It uses externally provided - * buffer only. - * - Interface is synchronous : input is consumed entirely and produce 1+ - * (or more) compressed blocks. - * - Caller must ensure there is enough space in `dst` to store compressed data - * under worst case scenario. Worst case evaluation is provided by - * ZSTD_compressBound(). - * ZSTD_compressContinue() doesn't guarantee recover after a failed - * compression. - * - ZSTD_compressContinue() presumes prior input ***is still accessible and - * unmodified*** (up to maximum distance size, see WindowLog). - * It remembers all previous contiguous blocks, plus one separated memory - * segment (which can itself consists of multiple contiguous blocks) - * - ZSTD_compressContinue() detects that prior input has been overwritten when - * `src` buffer overlaps. In which case, it will "discard" the relevant memory - * section from its history. - * - * Finish a frame with ZSTD_compressEnd(), which will write the last block(s) - * and optional checksum. It's possible to use srcSize==0, in which case, it - * will write a final empty block to end the frame. Without last block mark, - * frames will be considered unfinished (corrupted) by decoders. - * - * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new - * frame. - ******************************************************************************/ +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ -/*===== Buffer-less streaming compression functions =====*/ -size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel); -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, - size_t dictSize, int compressionLevel); -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, - size_t dictSize, ZSTD_parameters params, - unsigned long long pledgedSrcSize); -size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx, - unsigned long long pledgedSrcSize); -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, - unsigned long long pledgedSrcSize); -size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); - - - -/*-***************************************************************************** - * Buffer-less streaming decompression (synchronous mode) - * - * A ZSTD_DCtx object is required to track streaming operations. - * Use ZSTD_initDCtx() to initialize a context. - * A ZSTD_DCtx object can be re-used multiple times. - * - * First typical operation is to retrieve frame parameters, using - * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide - * important information to correctly decode the frame, such as the minimum - * rolling buffer size to allocate to decompress data (`windowSize`), and the - * dictionary ID used. - * Note: content size is optional, it may not be present. 0 means unknown. - * Note that these values could be wrong, either because of data malformation, - * or because an attacker is spoofing deliberate false information. As a - * consequence, check that values remain within valid application range, - * especially `windowSize`, before allocation. Each application can set its own - * limit, depending on local restrictions. For extended interoperability, it is - * recommended to support at least 8 MB. - * Frame parameters are extracted from the beginning of the compressed frame. - * Data fragment must be large enough to ensure successful decoding, typically - * `ZSTD_frameHeaderSize_max` bytes. - * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled. - * >0: `srcSize` is too small, provide at least this many bytes. - * errorCode, which can be tested using ZSTD_isError(). - * - * Start decompression, with ZSTD_decompressBegin() or - * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared - * context, using ZSTD_copyDCtx(). - * - * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() - * alternatively. - * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' - * to ZSTD_decompressContinue(). - * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will - * fail. - * - * The result of ZSTD_decompressContinue() is the number of bytes regenerated - * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an - * error; it just means ZSTD_decompressContinue() has decoded some metadata - * item. It can also be an error code, which can be tested with ZSTD_isError(). - * - * ZSTD_decompressContinue() needs previous data blocks during decompression, up - * to `windowSize`. They should preferably be located contiguously, prior to - * current block. Alternatively, a round buffer of sufficient size is also - * possible. Sufficient size is determined by frame parameters. - * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't - * follow each other, make sure that either the compressor breaks contiguity at - * the same place, or that previous contiguous segment is large enough to - * properly handle maximum back-reference. - * - * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - * Context can then be reset to start a new decompression. - * - * Note: it's possible to know if next input to present is a header or a block, - * using ZSTD_nextInputType(). This information is not required to properly - * decode a frame. - * - * == Special case: skippable frames == - * - * Skippable frames allow integration of user-defined data into a flow of - * concatenated frames. Skippable frames will be ignored (skipped) by a - * decompressor. The format of skippable frames is as follows: - * a) Skippable frame ID - 4 Bytes, Little endian format, any value from - * 0x184D2A50 to 0x184D2A5F - * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - * c) Frame Content - any content (User Data) of length equal to Frame Size - * For skippable frames ZSTD_decompressContinue() always returns 0. - * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 - * what means that a frame is skippable. - * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might - * actually be a zstd encoded frame with no content. For purposes of - * decompression, it is valid in both cases to skip the frame using - * ZSTD_findFrameCompressedSize() to find its size in bytes. - * It also returns frame size as fparamsPtr->frameContentSize. - ******************************************************************************/ -/*===== Buffer-less streaming decompression functions =====*/ -size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx); -size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, - size_t dictSize); -void ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx); -size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx); -size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -typedef enum { - ZSTDnit_frameHeader, - ZSTDnit_blockHeader, - ZSTDnit_block, - ZSTDnit_lastBlock, - ZSTDnit_checksum, - ZSTDnit_skippableFrame -} ZSTD_nextInputType_e; -ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx); - -/*-***************************************************************************** - * Block functions - * - * Block functions produce and decode raw zstd blocks, without frame metadata. - * Frame metadata cost is typically ~18 bytes, which can be non-negligible for - * very small blocks (< 100 bytes). User will have to take in charge required - * information to regenerate data, such as compressed and content sizes. - * - * A few rules to respect: - * - Compressing and decompressing require a context structure - * + Use ZSTD_initCCtx() and ZSTD_initDCtx() - * - It is necessary to init context before starting - * + compression : ZSTD_compressBegin() - * + decompression : ZSTD_decompressBegin() - * + variants _usingDict() are also allowed - * + copyCCtx() and copyDCtx() work too - * - Block size is limited, it must be <= ZSTD_getBlockSizeMax() - * + If you need to compress more, cut data into multiple blocks - * + Consider using the regular ZSTD_compress() instead, as frame metadata - * costs become negligible when source size is large. - * - When a block is considered not compressible enough, ZSTD_compressBlock() - * result will be zero. In which case, nothing is produced into `dst`. - * + User must test for such outcome and deal directly with uncompressed data - * + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!! - * + In case of multiple successive blocks, decoder must be informed of - * uncompressed block existence to follow proper history. Use - * ZSTD_insertBlock() in such a case. - ******************************************************************************/ +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ -/* Define for static allocation */ -#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) -/*===== Raw zstd block functions =====*/ -size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx); -size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, - size_t blockSize); - -#endif /* ZSTD_H */ diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile index f5d778e7e5c72..65218ec5b8f2a 100644 --- a/lib/zstd/Makefile +++ b/lib/zstd/Makefile @@ -1,10 +1,46 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause +# ################################################################ +# Copyright (c) Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o ccflags-y += -O3 -zstd_compress-y := fse_compress.o huf_compress.o compress.o \ - entropy_common.o fse_decompress.o zstd_common.o -zstd_decompress-y := huf_decompress.o decompress.o \ - entropy_common.o fse_decompress.o zstd_common.o +zstd_compress-y := \ + zstd_compress_module.o \ + common/debug.o \ + common/entropy_common.o \ + common/error_private.o \ + common/fse_decompress.o \ + common/zstd_common.o \ + compress/fse_compress.o \ + compress/hist.o \ + compress/huf_compress.o \ + compress/zstd_compress.o \ + compress/zstd_compress_literals.o \ + compress/zstd_compress_sequences.o \ + compress/zstd_compress_superblock.o \ + compress/zstd_double_fast.o \ + compress/zstd_fast.o \ + compress/zstd_lazy.o \ + compress/zstd_ldm.o \ + compress/zstd_opt.o \ + +zstd_decompress-y := \ + zstd_decompress_module.o \ + common/debug.o \ + common/entropy_common.o \ + common/error_private.o \ + common/fse_decompress.o \ + common/zstd_common.o \ + decompress/huf_decompress.o \ + decompress/zstd_ddict.o \ + decompress/zstd_decompress.o \ + decompress/zstd_decompress_block.o \ diff --git a/lib/zstd/bitstream.h b/lib/zstd/bitstream.h deleted file mode 100644 index 5d6343c1a909e..0000000000000 --- a/lib/zstd/bitstream.h +++ /dev/null @@ -1,380 +0,0 @@ -/* - * bitstream - * Part of FSE library - * header file (to include) - * Copyright (C) 2013-2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ -#ifndef BITSTREAM_H_MODULE -#define BITSTREAM_H_MODULE - -/* -* This API consists of small unitary functions, which must be inlined for best performance. -* Since link-time-optimization is not available for all compilers, -* these functions are defined into a .h to be included. -*/ - -/*-**************************************** -* Dependencies -******************************************/ -#include "error_private.h" /* error codes and messages */ -#include "mem.h" /* unaligned access routines */ - -/*========================================= -* Target specific -=========================================*/ -#define STREAM_ACCUMULATOR_MIN_32 25 -#define STREAM_ACCUMULATOR_MIN_64 57 -#define STREAM_ACCUMULATOR_MIN ((U32)(ZSTD_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) - -/*-****************************************** -* bitStream encoding API (write forward) -********************************************/ -/* bitStream can mix input from multiple sources. -* A critical property of these streams is that they encode and decode in **reverse** direction. -* So the first bit sequence you add will be the last to be read, like a LIFO stack. -*/ -typedef struct { - size_t bitContainer; - int bitPos; - char *startPtr; - char *ptr; - char *endPtr; -} BIT_CStream_t; - -ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *dstBuffer, size_t dstCapacity); -ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits); -ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC); -ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC); - -/* Start with initCStream, providing the size of buffer to write into. -* bitStream will never write outside of this buffer. -* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. -* -* bits are first added to a local register. -* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. -* Writing data into memory is an explicit operation, performed by the flushBits function. -* Hence keep track how many bits are potentially stored into local register to avoid register overflow. -* After a flushBits, a maximum of 7 bits might still be stored into local register. -* -* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. -* -* Last operation is to close the bitStream. -* The function returns the final size of CStream in bytes. -* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) -*/ - -/*-******************************************** -* bitStream decoding API (read backward) -**********************************************/ -typedef struct { - size_t bitContainer; - unsigned bitsConsumed; - const char *ptr; - const char *start; -} BIT_DStream_t; - -typedef enum { - BIT_DStream_unfinished = 0, - BIT_DStream_endOfBuffer = 1, - BIT_DStream_completed = 2, - BIT_DStream_overflow = 3 -} BIT_DStream_status; /* result of BIT_reloadDStream() */ -/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ - -ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize); -ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, unsigned nbBits); -ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD); -ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *bitD); - -/* Start by invoking BIT_initDStream(). -* A chunk of the bitStream is then stored into a local register. -* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -* You can then retrieve bitFields stored into the local register, **in reverse order**. -* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. -* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. -* Otherwise, it can be less than that, so proceed accordingly. -* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). -*/ - -/*-**************************************** -* unsafe API -******************************************/ -ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits); -/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ - -ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC); -/* unsafe version; does not check buffer overflow */ - -ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, unsigned nbBits); -/* faster, but works only if nbBits >= 1 */ - -/*-************************************************************** -* Internal functions -****************************************************************/ -ZSTD_STATIC unsigned BIT_highbit32(register U32 val) { return 31 - __builtin_clz(val); } - -/*===== Local Constants =====*/ -static const unsigned BIT_mask[] = {0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, - 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, - 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF}; /* up to 26 bits */ - -/*-************************************************************** -* bitStream encoding -****************************************************************/ -/*! BIT_initCStream() : - * `dstCapacity` must be > sizeof(void*) - * @return : 0 if success, - otherwise an error code (can be tested using ERR_isError() ) */ -ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *startPtr, size_t dstCapacity) -{ - bitC->bitContainer = 0; - bitC->bitPos = 0; - bitC->startPtr = (char *)startPtr; - bitC->ptr = bitC->startPtr; - bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr); - if (dstCapacity <= sizeof(bitC->ptr)) - return ERROR(dstSize_tooSmall); - return 0; -} - -/*! BIT_addBits() : - can add up to 26 bits into `bitC`. - Does not check for register overflow ! */ -ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits) -{ - bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; - bitC->bitPos += nbBits; -} - -/*! BIT_addBitsFast() : - * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ -ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits) -{ - bitC->bitContainer |= value << bitC->bitPos; - bitC->bitPos += nbBits; -} - -/*! BIT_flushBitsFast() : - * unsafe version; does not check buffer overflow */ -ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - ZSTD_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ -} - -/*! BIT_flushBits() : - * safe version; check for buffer overflow, and prevents it. - * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ -ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - ZSTD_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - if (bitC->ptr > bitC->endPtr) - bitC->ptr = bitC->endPtr; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ -} - -/*! BIT_closeCStream() : - * @return : size of CStream, in bytes, - or 0 if it could not fit into dstBuffer */ -ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC) -{ - BIT_addBitsFast(bitC, 1, 1); /* endMark */ - BIT_flushBits(bitC); - - if (bitC->ptr >= bitC->endPtr) - return 0; /* doesn't fit within authorized budget : cancel */ - - return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); -} - -/*-******************************************************** -* bitStream decoding -**********************************************************/ -/*! BIT_initDStream() : -* Initialize a BIT_DStream_t. -* `bitD` : a pointer to an already allocated BIT_DStream_t structure. -* `srcSize` must be the *exact* size of the bitStream, in bytes. -* @return : size of stream (== srcSize) or an errorCode if a problem is detected -*/ -ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize) -{ - if (srcSize < 1) { - memset(bitD, 0, sizeof(*bitD)); - return ERROR(srcSize_wrong); - } - - if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ - bitD->start = (const char *)srcBuffer; - bitD->ptr = (const char *)srcBuffer + srcSize - sizeof(bitD->bitContainer); - bitD->bitContainer = ZSTD_readLEST(bitD->ptr); - { - BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ - if (lastByte == 0) - return ERROR(GENERIC); /* endMark not present */ - } - } else { - bitD->start = (const char *)srcBuffer; - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE *)(bitD->start); - switch (srcSize) { - case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16); - fallthrough; - case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24); - fallthrough; - case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32); - fallthrough; - case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24; - fallthrough; - case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16; - fallthrough; - case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8; - fallthrough; - default:; - } - { - BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; - if (lastByte == 0) - return ERROR(GENERIC); /* endMark not present */ - } - bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize) * 8; - } - - return srcSize; -} - -ZSTD_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) { return bitContainer >> start; } - -ZSTD_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { return (bitContainer >> start) & BIT_mask[nbBits]; } - -ZSTD_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { return bitContainer & BIT_mask[nbBits]; } - -/*! BIT_lookBits() : - * Provides next n bits from local register. - * local register is not modified. - * On 32-bits, maxNbBits==24. - * On 64-bits, maxNbBits==56. - * @return : value extracted - */ -ZSTD_STATIC size_t BIT_lookBits(const BIT_DStream_t *bitD, U32 nbBits) -{ - U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1; - return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask - nbBits) & bitMask); -} - -/*! BIT_lookBitsFast() : -* unsafe version; only works only if nbBits >= 1 */ -ZSTD_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t *bitD, U32 nbBits) -{ - U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1; - return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask + 1) - nbBits) & bitMask); -} - -ZSTD_STATIC void BIT_skipBits(BIT_DStream_t *bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; } - -/*! BIT_readBits() : - * Read (consume) next n bits from local register and update. - * Pay attention to not read more than nbBits contained into local register. - * @return : extracted value. - */ -ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, U32 nbBits) -{ - size_t const value = BIT_lookBits(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*! BIT_readBitsFast() : -* unsafe version; only works only if nbBits >= 1 */ -ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, U32 nbBits) -{ - size_t const value = BIT_lookBitsFast(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*! BIT_reloadDStream() : -* Refill `bitD` from buffer previously set in BIT_initDStream() . -* This function is safe, it guarantees it will not read beyond src buffer. -* @return : status of `BIT_DStream_t` internal register. - if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ -ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD) -{ - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer) * 8)) /* should not happen => corruption detected */ - return BIT_DStream_overflow; - - if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = ZSTD_readLEST(bitD->ptr); - return BIT_DStream_unfinished; - } - if (bitD->ptr == bitD->start) { - if (bitD->bitsConsumed < sizeof(bitD->bitContainer) * 8) - return BIT_DStream_endOfBuffer; - return BIT_DStream_completed; - } - { - U32 nbBytes = bitD->bitsConsumed >> 3; - BIT_DStream_status result = BIT_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) { - nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ - result = BIT_DStream_endOfBuffer; - } - bitD->ptr -= nbBytes; - bitD->bitsConsumed -= nbBytes * 8; - bitD->bitContainer = ZSTD_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ - return result; - } -} - -/*! BIT_endOfDStream() : -* @return Tells if DStream has exactly reached its end (all bits consumed). -*/ -ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *DStream) -{ - return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer) * 8)); -} - -#endif /* BITSTREAM_H_MODULE */ diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h new file mode 100644 index 0000000000000..28248abe8612a --- /dev/null +++ b/lib/zstd/common/bitstream.h @@ -0,0 +1,437 @@ +/* ****************************************************************** + * bitstream + * Part of FSE library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +#include "mem.h" /* unaligned access routines */ +#include "compiler.h" /* UNLIKELY() */ +#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ +#include "error_private.h" /* error codes and messages */ + + +/*========================================= +* Target specific +=========================================*/ + +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct { + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct { + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ + assert(val != 0); + { +# if (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif + } +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + ZSTD_FALLTHROUGH; + + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + ZSTD_FALLTHROUGH; + + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + ZSTD_FALLTHROUGH; + + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + ZSTD_FALLTHROUGH; + + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + ZSTD_FALLTHROUGH; + + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + ZSTD_FALLTHROUGH; + + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ + } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ + assert(nbBits < BIT_MASK_SIZE); + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ + /* arbitrate between double-shift and shift+mask */ +#if 1 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + /* this code path is slower on my os-x laptop */ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif +} + +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); +} + +MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : + * unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; +} + +/*! BIT_reloadDStream() : + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not read beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->limitPtr) { + return BIT_reloadDStreamFast(bitD); + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } +} + +/*! BIT_endOfDStream() : + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + + +#endif /* BITSTREAM_H_MODULE */ diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h new file mode 100644 index 0000000000000..a1a051e4bce66 --- /dev/null +++ b/lib/zstd/common/compiler.h @@ -0,0 +1,170 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ + +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#define FORCE_INLINE_ATTR __attribute__((always_inline)) + + +/* + On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). + This explictly marks such functions as __cdecl so that the code will still compile + if a CC other than __cdecl has been made the default. +*/ +#define WIN_CDECL + +/* + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/* + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#define UNUSED_ATTR __attribute__((unused)) + +/* force no inlining */ +#define FORCE_NOINLINE static __attribute__((__noinline__)) + + +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +#elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) +#else +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ +} + +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif +#else +# define DONT_VECTORIZE +#endif + +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) + +/* disable warnings */ + +/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ + + +/* compat. with non-clang compilers */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +/* C-language Attributes are added in C23. */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute) +# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) +#else +# define ZSTD_HAS_C_ATTRIBUTE(x) 0 +#endif + +/* Only use C++ attributes in C++. Some compilers report support for C++ + * attributes when compiling with C. + */ +#define ZSTD_HAS_CPP_ATTRIBUTE(x) 0 + +/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute. + * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough + * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough + * - Else: __attribute__((__fallthrough__)) + */ +#define ZSTD_FALLTHROUGH fallthrough + +/* detects whether we are being compiled under msan */ + + +/* detects whether we are being compiled under asan */ + + +#endif /* ZSTD_COMPILER_H */ diff --git a/lib/zstd/common/cpu.h b/lib/zstd/common/cpu.h new file mode 100644 index 0000000000000..0db7b42407eea --- /dev/null +++ b/lib/zstd/common/cpu.h @@ -0,0 +1,194 @@ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H + +/* + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ + +#include "mem.h" + + +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; + +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#if defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1)); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\t" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; + } +} + +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } + +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D + +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C + +#undef X + +#endif /* ZSTD_COMMON_CPU_H */ diff --git a/lib/zstd/common/debug.c b/lib/zstd/common/debug.c new file mode 100644 index 0000000000000..bb863c9ea6164 --- /dev/null +++ b/lib/zstd/common/debug.c @@ -0,0 +1,24 @@ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * This module only hosts one global variable + * which can be used to dynamically influence the verbosity of traces, + * such as DEBUGLOG and RAWLOG + */ + +#include "debug.h" + +int g_debuglevel = DEBUGLEVEL; diff --git a/lib/zstd/common/debug.h b/lib/zstd/common/debug.h new file mode 100644 index 0000000000000..6dd88d1fbd02c --- /dev/null +++ b/lib/zstd/common/debug.h @@ -0,0 +1,101 @@ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ + +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 + + + +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + + +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + + +/* recommended values for DEBUGLEVEL : + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively trigger high verbosity levels + * by modifying g_debug_level. + */ + +#if (DEBUGLEVEL>=1) +# define ZSTD_DEPS_NEED_ASSERT +# include "zstd_deps.h" +#else +# ifndef assert /* assert may be already defined, due to prior #include */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif + +#if (DEBUGLEVEL>=2) +# define ZSTD_DEPS_NEED_IO +# include "zstd_deps.h" +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ + +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ + ZSTD_DEBUG_PRINT(" \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + + +#endif /* DEBUG_H_12987983217 */ diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c new file mode 100644 index 0000000000000..53b47a2b52ff2 --- /dev/null +++ b/lib/zstd/common/entropy_common.c @@ -0,0 +1,357 @@ +/* ****************************************************************** + * Common functions of New Generation Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "error_private.h" /* ERR_*, ERROR */ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ +#include "huf.h" + + +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } + + +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +static U32 FSE_ctz(U32 val) +{ + assert(val != 0); + { +# if (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_ctz(val); +# else /* Software version */ + U32 count = 0; + while ((val & 1) == 0) { + val >>= 1; + ++count; + } + return count; +# endif + } +} + +FORCE_INLINE_TEMPLATE +size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + unsigned const maxSV1 = *maxSVPtr + 1; + int previous0 = 0; + + if (hbSize < 8) { + /* This function only works when hbSize >= 8 */ + char buffer[8] = {0}; + ZSTD_memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 8); + + /* init */ + ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<> 1; + while (repeats >= 12) { + charnum += 3 * 12; + if (LIKELY(ip <= iend-7)) { + ip += 3; + } else { + bitCount -= (int)(8 * (iend - 7 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + } + charnum += 3 * repeats; + bitStream >>= 2 * repeats; + bitCount += 2 * repeats; + + /* Add the final repeat which isn't 0b11. */ + assert((bitStream & 3) < 3); + charnum += bitStream & 3; + bitCount += 2; + + /* This is an error, but break and return an error + * at the end, because returning out of a loop makes + * it harder for the compiler to optimize. + */ + if (charnum >= maxSV1) break; + + /* We don't need to set the normalized count to 0 + * because we already memset the whole buffer to 0. + */ + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } + { + int const max = (2*threshold-1) - remaining; + int count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + /* When it matters (small blocks), this is a + * predictable branch, because we don't use -1. + */ + if (count >= 0) { + remaining -= count; + } else { + assert(count == -1); + remaining += count; + } + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + + assert(threshold > 1); + if (remaining < threshold) { + /* This branch can be folded into the + * threshold update condition because we + * know that threshold > 1. + */ + if (remaining <= 1) break; + nbBits = BIT_highbit32(remaining) + 1; + threshold = 1 << (nbBits - 1); + } + if (charnum >= maxSV1) break; + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } } + if (remaining != 1) return ERROR(corruption_detected); + /* Only possible when there are too many zeros. */ + if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_readNCount_body_default( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} +#endif + +size_t FSE_readNCount_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); + } +#endif + (void)bmi2; + return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +size_t FSE_readNCount( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t +HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); +} +#endif + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); + } +#endif + (void)bmi2; + return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); +} diff --git a/lib/zstd/common/error_private.c b/lib/zstd/common/error_private.c new file mode 100644 index 0000000000000..6d1135f8c3733 --- /dev/null +++ b/lib/zstd/common/error_private.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +#include "error_private.h" + +const char* ERR_getErrorString(ERR_enum code) +{ +#ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; + return "Error strings stripped"; +#else + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; + case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(maxCode): + default: return notErrorCode; + } +#endif +} diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h new file mode 100644 index 0000000000000..d14e686adf951 --- /dev/null +++ b/lib/zstd/common/error_private.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + + + +/* **************************************** +* Dependencies +******************************************/ +#include "zstd_deps.h" /* size_t */ +#include /* enum list */ + + +/* **************************************** +* Compiler-specific +******************************************/ +#define ERR_STATIC static __attribute__((unused)) + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#undef ERROR /* already defined on Visual Studio */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + +/* check and forward error code */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + + +#endif /* ERROR_H_MODULE */ diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h new file mode 100644 index 0000000000000..0bb174c2c3677 --- /dev/null +++ b/lib/zstd/common/fse.h @@ -0,0 +1,710 @@ +/* ****************************************************************** + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +#ifndef FSE_H +#define FSE_H + + +/*-***************************************** +* Dependencies +******************************************/ +#include "zstd_deps.h" /* size_t, ptrdiff_t */ + + +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /*< library version number; to be used when checking dll version */ + + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] (see hist.h) +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + useLowProbCount is a boolean parameter which trades off compressed size for + faster header decoding. When it is set to 1, the compressed data will be slightly + smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be + faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 + is a good default, since header deserialization makes a big speed difference. + Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); + +/*! FSE_readNCount_bmi2(): + * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. + */ +FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize, int bmi2); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + +#endif /* FSE_H */ + +#if !defined(FSE_H_FSE_STATIC_LINKING_ONLY) +#define FSE_H_FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +#include "bitstream.h" + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog))) + +/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ +#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) +#define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) + + +/* ***************************************** + * FSE advanced API + ***************************************** */ + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/*< same as FSE_optimalTableLog(), which used `minus==2` */ + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. + */ +#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); +/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ + +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/*< build a fake FSE_CTable, designed to compress always the same symbolValue */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. + */ +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) +size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) +FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +/*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); +/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ + +size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); +/*< build a fake FSE_DTable, designed to always generate the same symbolValue */ + +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) +#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); +/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); +/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ + +typedef enum { + FSE_repeat_none, /*< Cannot use the previous table */ + FSE_repeat_check, /*< Can use the previous table but it must be checked */ + FSE_repeat_valid /*< Can use the previous table and it is assumed to be valid */ + } FSE_repeat; + +/* ***************************************** +* FSE symbol compression API +*******************************************/ +/*! + This API consists of small unitary functions, which highly benefit from being inlined. + Hence their body are included in next section. +*/ +typedef struct { + ptrdiff_t value; + const void* stateTable; + const void* symbolTT; + unsigned stateLog; +} FSE_CState_t; + +static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); + +static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); + +static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); + +/*< +These functions are inner components of FSE_compress_usingCTable(). +They allow the creation of custom streams, mixing multiple tables and bit sources. + +A key property to keep in mind is that encoding and decoding are done **in reverse direction**. +So the first symbol you will encode is the last you will decode, like a LIFO stack. + +You will need a few variables to track your CStream. They are : + +FSE_CTable ct; // Provided by FSE_buildCTable() +BIT_CStream_t bitStream; // bitStream tracking structure +FSE_CState_t state; // State tracking structure (can have several) + + +The first thing to do is to init bitStream and state. + size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); + FSE_initCState(&state, ct); + +Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); +You can then encode your input data, byte after byte. +FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. +Remember decoding will be done in reverse direction. + FSE_encodeByte(&bitStream, &state, symbol); + +At any time, you can also add any bit sequence. +Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders + BIT_addBits(&bitStream, bitField, nbBits); + +The above methods don't commit data to memory, they just store it into local register, for speed. +Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +Writing data to memory is a manual operation, performed by the flushBits function. + BIT_flushBits(&bitStream); + +Your last FSE encoding operation shall be to flush your last state value(s). + FSE_flushState(&bitStream, &state); + +Finally, you must close the bitStream. +The function returns the size of CStream in bytes. +If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) +If there is an error, it returns an errorCode (which can be tested using FSE_isError()). + size_t size = BIT_closeCStream(&bitStream); +*/ + + +/* ***************************************** +* FSE symbol decompression API +*******************************************/ +typedef struct { + size_t state; + const void* table; /* precise table may vary, depending on U16 */ +} FSE_DState_t; + + +static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); + +static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); + +/*< +Let's now decompose FSE_decompress_usingDTable() into its unitary components. +You will decode FSE-encoded symbols from the bitStream, +and also any other bitFields you put in, **in reverse order**. + +You will need a few variables to track your bitStream. They are : + +BIT_DStream_t DStream; // Stream context +FSE_DState_t DState; // State context. Multiple ones are possible +FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() + +The first thing to do is to init the bitStream. + errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); + +You should then retrieve your initial state(s) +(in reverse flushing order if you have several ones) : + errorCode = FSE_initDState(&DState, &DStream, DTablePtr); + +You can then decode your data, symbol after symbol. +For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. +Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). + unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); + +You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) +Note : maximum allowed nbBits is 25, for 32-bits compatibility + size_t bitField = BIT_readBits(&DStream, nbBits); + +All above operations only read from local register (which size depends on size_t). +Refueling the register from memory is manually performed by the reload method. + endSignal = FSE_reloadDStream(&DStream); + +BIT_reloadDStream() result tells if there is still some more data to read from DStream. +BIT_DStream_unfinished : there is still some data left into the DStream. +BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. +BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. +BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. + +When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, +to properly detect the exact end of stream. +After each decoded symbol, check if DStream is fully consumed using this simple test : + BIT_reloadDStream(&DStream) >= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + + +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } +} + + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif +#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) +# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" +#endif + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c new file mode 100644 index 0000000000000..2c8bbe3e4c148 --- /dev/null +++ b/lib/zstd/common/fse_decompress.c @@ -0,0 +1,390 @@ +/* ****************************************************************** + * FSE : Finite State Entropy decoder + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* ************************************************************** +* Includes +****************************************************************/ +#include "debug.h" /* assert */ +#include "bitstream.h" +#include "compiler.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#include "error_private.h" +#define ZSTD_DEPS_NEED_MALLOC +#include "zstd_deps.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +} + +void FSE_freeDTable (FSE_DTable* dt) +{ + ZSTD_free(dt); +} + +static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16* symbolNext = (U16*)workSpace; + BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; utableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + return op-ostart; +} + + +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); +} + +typedef struct { + short ncount[FSE_MAX_SYMBOL_VALUE + 1]; + FSE_DTable dtable[1]; /* Dynamically sized */ +} FSE_DecompressWksp; + + +FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( + void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize, + unsigned maxLog, void* workSpace, size_t wkspSize, + int bmi2) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; + + DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); + if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); + + /* normal FSE decoding mode */ + { + size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); + if (FSE_isError(NCountLength)) return NCountLength; + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + assert(NCountLength <= cSrcSize); + ip += NCountLength; + cSrcSize -= NCountLength; + } + + if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); + workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); + wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); + + CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); + + { + const void* ptr = wksp->dtable; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); + } +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); +} +#endif + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); + } +#endif + (void)bmi2; + return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); +} + + +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h new file mode 100644 index 0000000000000..88c5586646aa5 --- /dev/null +++ b/lib/zstd/common/huf.h @@ -0,0 +1,356 @@ +/* ****************************************************************** + * huff0 huffman codec, + * part of Finite State Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +/* *** Dependencies *** */ +#include "zstd_deps.h" /* size_t */ + + +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif + + +/* ========================== */ +/* *** simple functions *** */ +/* ========================== */ + +/* HUF_compress() : + * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + * 'dst' buffer must be already allocated. + * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + * @return : size of compressed data (<= `dstCapacity`). + * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) + */ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/* HUF_decompress() : + * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + * into already allocated buffer 'dst', of minimum size 'dstSize'. + * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + * Note : in contrast with FSE, HUF_decompress can regenerate + * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + * because it knows size to regenerate (originalSize). + * @return : size of regenerated data (== originalSize), + * or an error code, which can be tested using HUF_isError() + */ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); + + +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /*< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /*< maximum compressed size (worst case) */ + +/* Error Management */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /*< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /*< provides error code string (useful for debugging) */ + + +/* *** Advanced function *** */ + +/* HUF_compress2() : + * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. + * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog); + +/* HUF_compress4X_wksp() : + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize); + +#endif /* HUF_H_298734234 */ + +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of a dynamic library, + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + * *****************************************************************/ +#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY) +#define HUF_H_HUF_STATIC_LINKING_ONLY + +/* *** Dependencies *** */ +#include "mem.h" /* U32 */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" + + +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt */ +typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ +#endif + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ +#endif + + +/* **************************************** + * HUF detailed API + * ****************************************/ + +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); + +typedef enum { + HUF_repeat_none, /*< Cannot use the previous table */ + HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /*< Can use the previous table and it is assumed to be valid */ + } HUF_repeat; +/* HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +/* HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. + */ +#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); + +/*! HUF_readStats() : + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/*! HUF_readStats_wksp() : + * Same as HUF_readStats() but takes an external workspace which must be + * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) +#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + int bmi2); + +/* HUF_readCTable() : + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); + +/* HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); + +/* + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ + +/* HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +/* + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + + +/* ====================== */ +/* single stream variants */ +/* ====================== */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +/* HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +#endif + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< single-symbol decoder */ +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< single-symbol decoder */ +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< double-symbols decoder */ +#endif + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /*< automatic selection of sing or double symbol decoder, based on DTable */ +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif + +#endif /* HUF_STATIC_LINKING_ONLY */ + diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h new file mode 100644 index 0000000000000..dcdd586a9fd91 --- /dev/null +++ b/lib/zstd/common/mem.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +/*-**************************************** +* Dependencies +******************************************/ +#include /* get_unaligned, put_unaligned* */ +#include /* inline */ +#include /* swab32, swab64 */ +#include /* size_t, ptrdiff_t */ +#include "debug.h" /* DEBUG_STATIC_ASSERT */ + +/*-**************************************** +* Compiler specifics +******************************************/ +#define MEM_STATIC static inline + +/*-************************************************************** +* Basic Types +*****************************************************************/ +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef int16_t S16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +typedef int64_t S64; + +/*-************************************************************** +* Memory I/O API +*****************************************************************/ +/*=== Static platform detection ===*/ +MEM_STATIC unsigned MEM_32bits(void); +MEM_STATIC unsigned MEM_64bits(void); +MEM_STATIC unsigned MEM_isLittleEndian(void); + +/*=== Native unaligned read/write ===*/ +MEM_STATIC U16 MEM_read16(const void* memPtr); +MEM_STATIC U32 MEM_read32(const void* memPtr); +MEM_STATIC U64 MEM_read64(const void* memPtr); +MEM_STATIC size_t MEM_readST(const void* memPtr); + +MEM_STATIC void MEM_write16(void* memPtr, U16 value); +MEM_STATIC void MEM_write32(void* memPtr, U32 value); +MEM_STATIC void MEM_write64(void* memPtr, U64 value); + +/*=== Little endian unaligned read/write ===*/ +MEM_STATIC U16 MEM_readLE16(const void* memPtr); +MEM_STATIC U32 MEM_readLE24(const void* memPtr); +MEM_STATIC U32 MEM_readLE32(const void* memPtr); +MEM_STATIC U64 MEM_readLE64(const void* memPtr); +MEM_STATIC size_t MEM_readLEST(const void* memPtr); + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val); +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val); +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val); + +/*=== Big endian unaligned read/write ===*/ +MEM_STATIC U32 MEM_readBE32(const void* memPtr); +MEM_STATIC U64 MEM_readBE64(const void* memPtr); +MEM_STATIC size_t MEM_readBEST(const void* memPtr); + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val); + +/*=== Byteswap ===*/ +MEM_STATIC U32 MEM_swap32(U32 in); +MEM_STATIC U64 MEM_swap64(U64 in); +MEM_STATIC size_t MEM_swapST(size_t in); + +/*-************************************************************** +* Memory I/O Implementation +*****************************************************************/ +MEM_STATIC unsigned MEM_32bits(void) +{ + return sizeof(size_t) == 4; +} + +MEM_STATIC unsigned MEM_64bits(void) +{ + return sizeof(size_t) == 8; +} + +#if defined(__LITTLE_ENDIAN) +#define MEM_LITTLE_ENDIAN 1 +#else +#define MEM_LITTLE_ENDIAN 0 +#endif + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + return MEM_LITTLE_ENDIAN; +} + +MEM_STATIC U16 MEM_read16(const void *memPtr) +{ + return get_unaligned((const U16 *)memPtr); +} + +MEM_STATIC U32 MEM_read32(const void *memPtr) +{ + return get_unaligned((const U32 *)memPtr); +} + +MEM_STATIC U64 MEM_read64(const void *memPtr) +{ + return get_unaligned((const U64 *)memPtr); +} + +MEM_STATIC size_t MEM_readST(const void *memPtr) +{ + return get_unaligned((const size_t *)memPtr); +} + +MEM_STATIC void MEM_write16(void *memPtr, U16 value) +{ + put_unaligned(value, (U16 *)memPtr); +} + +MEM_STATIC void MEM_write32(void *memPtr, U32 value) +{ + put_unaligned(value, (U32 *)memPtr); +} + +MEM_STATIC void MEM_write64(void *memPtr, U64 value) +{ + put_unaligned(value, (U64 *)memPtr); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void *memPtr) +{ + return get_unaligned_le16(memPtr); +} + +MEM_STATIC void MEM_writeLE16(void *memPtr, U16 val) +{ + put_unaligned_le16(val, memPtr); +} + +MEM_STATIC U32 MEM_readLE24(const void *memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void *memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE *)memPtr)[2] = (BYTE)(val >> 16); +} + +MEM_STATIC U32 MEM_readLE32(const void *memPtr) +{ + return get_unaligned_le32(memPtr); +} + +MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32) +{ + put_unaligned_le32(val32, memPtr); +} + +MEM_STATIC U64 MEM_readLE64(const void *memPtr) +{ + return get_unaligned_le64(memPtr); +} + +MEM_STATIC void MEM_writeLE64(void *memPtr, U64 val64) +{ + put_unaligned_le64(val64, memPtr); +} + +MEM_STATIC size_t MEM_readLEST(const void *memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void *memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void *memPtr) +{ + return get_unaligned_be32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void *memPtr, U32 val32) +{ + put_unaligned_be32(val32, memPtr); +} + +MEM_STATIC U64 MEM_readBE64(const void *memPtr) +{ + return get_unaligned_be64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void *memPtr, U64 val64) +{ + put_unaligned_be64(val64, memPtr); +} + +MEM_STATIC size_t MEM_readBEST(const void *memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void *memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + +MEM_STATIC U32 MEM_swap32(U32 in) +{ + return swab32(in); +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ + return swab64(in); +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +#endif /* MEM_H_MODULE */ diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c new file mode 100644 index 0000000000000..3d7e35b309b5d --- /dev/null +++ b/lib/zstd/common/zstd_common.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#define ZSTD_DEPS_NEED_MALLOC +#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ +#include "error_private.h" +#include "zstd_internal.h" + + +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } + +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +#undef ZSTD_isError /* defined within zstd_internal.h */ +/*! ZSTD_isError() : + * tells if a return value is an error code + * symbol is required for external callers */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName() : + * provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! ZSTD_getError() : + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! ZSTD_getErrorString() : + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } + + + +/*=************************************************************** +* Custom allocator +****************************************************************/ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return ZSTD_malloc(size); +} + +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + ZSTD_memset(ptr, 0, size); + return ptr; + } + return ZSTD_calloc(1, size); +} + +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + ZSTD_free(ptr); + } +} diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h new file mode 100644 index 0000000000000..7a5bf44839c9c --- /dev/null +++ b/lib/zstd/common/zstd_deps.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + * This file provides common libc dependencies that zstd requires. + * The purpose is to allow replacing this file with a custom implementation + * to compile zstd without libc support. + */ + +/* Need: + * NULL + * INT_MAX + * UINT_MAX + * ZSTD_memcpy() + * ZSTD_memset() + * ZSTD_memmove() + */ +#ifndef ZSTD_DEPS_COMMON +#define ZSTD_DEPS_COMMON + +#include +#include + +#define ZSTD_memcpy(d,s,n) __builtin_memcpy((d),(s),(n)) +#define ZSTD_memmove(d,s,n) __builtin_memmove((d),(s),(n)) +#define ZSTD_memset(d,s,n) __builtin_memset((d),(s),(n)) + +#endif /* ZSTD_DEPS_COMMON */ + +/* + * Define malloc as always failing. That means the user must + * either use ZSTD_customMem or statically allocate memory. + * Need: + * ZSTD_malloc() + * ZSTD_free() + * ZSTD_calloc() + */ +#ifdef ZSTD_DEPS_NEED_MALLOC +#ifndef ZSTD_DEPS_MALLOC +#define ZSTD_DEPS_MALLOC + +#define ZSTD_malloc(s) ({ (void)(s); NULL; }) +#define ZSTD_free(p) ((void)(p)) +#define ZSTD_calloc(n,s) ({ (void)(n); (void)(s); NULL; }) + +#endif /* ZSTD_DEPS_MALLOC */ +#endif /* ZSTD_DEPS_NEED_MALLOC */ + +/* + * Provides 64-bit math support. + * Need: + * U64 ZSTD_div64(U64 dividend, U32 divisor) + */ +#ifdef ZSTD_DEPS_NEED_MATH64 +#ifndef ZSTD_DEPS_MATH64 +#define ZSTD_DEPS_MATH64 + +#include + +static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) { + return div_u64(dividend, divisor); +} + +#endif /* ZSTD_DEPS_MATH64 */ +#endif /* ZSTD_DEPS_NEED_MATH64 */ + +/* + * This is only requested when DEBUGLEVEL >= 1, meaning + * it is disabled in production. + * Need: + * assert() + */ +#ifdef ZSTD_DEPS_NEED_ASSERT +#ifndef ZSTD_DEPS_ASSERT +#define ZSTD_DEPS_ASSERT + +#include + +#define assert(x) WARN_ON((x)) + +#endif /* ZSTD_DEPS_ASSERT */ +#endif /* ZSTD_DEPS_NEED_ASSERT */ + +/* + * This is only requested when DEBUGLEVEL >= 2, meaning + * it is disabled in production. + * Need: + * ZSTD_DEBUG_PRINT() + */ +#ifdef ZSTD_DEPS_NEED_IO +#ifndef ZSTD_DEPS_IO +#define ZSTD_DEPS_IO + +#include + +#define ZSTD_DEBUG_PRINT(...) pr_debug(__VA_ARGS__) + +#endif /* ZSTD_DEPS_IO */ +#endif /* ZSTD_DEPS_NEED_IO */ + +/* + * Only requested when MSAN is enabled. + * Need: + * intptr_t + */ +#ifdef ZSTD_DEPS_NEED_STDINT +#ifndef ZSTD_DEPS_STDINT +#define ZSTD_DEPS_STDINT + +/* + * The Linux Kernel doesn't provide intptr_t, only uintptr_t, which + * is an unsigned long. + */ +typedef long intptr_t; + +#endif /* ZSTD_DEPS_STDINT */ +#endif /* ZSTD_DEPS_NEED_STDINT */ diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h new file mode 100644 index 0000000000000..fc6f3a9b40c07 --- /dev/null +++ b/lib/zstd/common/zstd_internal.h @@ -0,0 +1,450 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ + +/*-************************************* +* Dependencies +***************************************/ +#include "compiler.h" +#include "mem.h" +#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ +#include "error_private.h" +#define ZSTD_STATIC_LINKING_ONLY +#include +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include /* XXH_reset, update, digest */ +#define ZSTD_TRACE 0 + + +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + +/* + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/* + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } + +/* + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } + +/* + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); + +/* + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define ZSTD_FRAMECHECKSUMSIZE 4 + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 + +#define Litbits 8 +#define MaxLit ((1<= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); + + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first COPY16() call because the copy length is + * almost certain to be short, so the branches have different + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. + */ +#ifdef __aarch64__ + do { + COPY16(op, ip); + } + while (op < oend); +#else + ZSTD_copy16(op, ip); + if (16 >= length) return; + op += 16; + ip += 16; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); +#endif + } +} + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + ZSTD_memcpy(dst, src, length); + } + return length; +} + +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + +/* Controls whether the input/output buffer is buffered or stable. */ +typedef enum { + ZSTD_bm_buffered = 0, /* Buffer the input/output */ + ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ +} ZSTD_bufferMode_e; + + +/*-******************************************* +* Private declarations +*********************************************/ +typedef struct seqDef_s { + U32 offset; /* Offset code of the sequence */ + U16 litLength; + U16 matchLength; +} seqDef; + +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; /* ptr to end of sequences */ + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; + + /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ +} seqStore_t; + +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; + +/* + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->matchLength + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthID == 1) { + seqLen.litLength += 0xFFFF; + } + if (seqStore->longLengthID == 2) { + seqLen.matchLength += 0xFFFF; + } + } + return seqLen; +} + +/* + * Contains the compressed frame size and an upper-bound for the decompressed frame size. + * Note: before using `compressedSize`, check for errors using ZSTD_isError(). + * similarly, before using `decompressedBound`, check for errors using: + * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` + */ +typedef struct { + size_t compressedSize; + unsigned long long decompressedBound; +} ZSTD_frameSizeInfo; /* decompress & legacy */ + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + +/* custom memory allocation functions */ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); + + +MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + { +# if (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } +} + + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ + + +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; /* declared here for decompress and fullbench */ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); + +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); + + + +#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c deleted file mode 100644 index 57aaa64306a01..0000000000000 --- a/lib/zstd/compress.c +++ /dev/null @@ -1,3534 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -/*-************************************* -* Dependencies -***************************************/ -#include "fse.h" -#include "huf.h" -#include "mem.h" -#include "zstd_internal.h" /* includes zstd.h */ -#include -#include -#include /* memset */ - -/*-************************************* -* Constants -***************************************/ -static const U32 g_searchStrength = 8; /* control skip over incompressible data */ -#define HASH_READ_SIZE 8 -typedef enum { ZSTDcs_created = 0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; - -/*-************************************* -* Helper functions -***************************************/ -size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; } - -/*-************************************* -* Sequence storage -***************************************/ -static void ZSTD_resetSeqStore(seqStore_t *ssPtr) -{ - ssPtr->lit = ssPtr->litStart; - ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; -} - -/*-************************************* -* Context memory management -***************************************/ -struct ZSTD_CCtx_s { - const BYTE *nextSrc; /* next block here to continue on curr prefix */ - const BYTE *base; /* All regular indexes relative to this position */ - const BYTE *dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more data */ - U32 nextToUpdate; /* index from which to continue dictionary update */ - U32 nextToUpdate3; /* index from which to continue dictionary update */ - U32 hashLog3; /* dispatch table : larger == faster, more memory */ - U32 loadedDictEnd; /* index of end of dictionary */ - U32 forceWindow; /* force back-references to respect limit of 1< 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog); - size_t const h3Size = ((size_t)1) << hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - size_t const optSpace = - ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) + (ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t)); - size_t const workspaceSize = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace + - (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0); - - return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_CCtx)) + ZSTD_ALIGN(workspaceSize); -} - -static ZSTD_CCtx *ZSTD_createCCtx_advanced(ZSTD_customMem customMem) -{ - ZSTD_CCtx *cctx; - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - cctx = (ZSTD_CCtx *)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); - if (!cctx) - return NULL; - memset(cctx, 0, sizeof(ZSTD_CCtx)); - cctx->customMem = customMem; - return cctx; -} - -ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - ZSTD_CCtx *cctx = ZSTD_createCCtx_advanced(stackMem); - if (cctx) { - cctx->workSpace = ZSTD_stackAllocAll(cctx->customMem.opaque, &cctx->workSpaceSize); - } - return cctx; -} - -size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx) -{ - if (cctx == NULL) - return 0; /* support free on NULL */ - ZSTD_free(cctx->workSpace, cctx->customMem); - ZSTD_free(cctx, cctx->customMem); - return 0; /* reserved as a potential error code in the future */ -} - -const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx) /* hidden interface */ { return &(ctx->seqStore); } - -static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx *cctx) { return cctx->params; } - -/** ZSTD_checkParams() : - ensure param values remain within authorized range. - @return : 0, or an error code if one value is beyond authorized range */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) -{ -#define CLAMPCHECK(val, min, max) \ - { \ - if ((val < min) | (val > max)) \ - return ERROR(compressionParameter_unsupported); \ - } - CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); - CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); - CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); - CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) - return ERROR(compressionParameter_unsupported); - return 0; -} - -/** ZSTD_cycleLog() : - * condition for correct operation : hashLog > 1 */ -static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) -{ - U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); - return hashLog - btScale; -} - -/** ZSTD_adjustCParams() : - optimize `cPar` for a given input (`srcSize` and `dictSize`). - mostly downsizing to reduce memory consumption and initialization. - Both `srcSize` and `dictSize` are optional (use 0 if unknown), - but if both are 0, no optimization can be done. - Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ -ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize) -{ - if (srcSize + dictSize == 0) - return cPar; /* no size information available : no adjustment */ - - /* resize params, to use less memory when necessary */ - { - U32 const minSrcSize = (srcSize == 0) ? 500 : 0; - U64 const rSize = srcSize + dictSize + minSrcSize; - if (rSize < ((U64)1 << ZSTD_WINDOWLOG_MAX)) { - U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1); - if (cPar.windowLog > srcLog) - cPar.windowLog = srcLog; - } - } - if (cPar.hashLog > cPar.windowLog) - cPar.hashLog = cPar.windowLog; - { - U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); - if (cycleLog > cPar.windowLog) - cPar.chainLog -= (cycleLog - cPar.windowLog); - } - - if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) - cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ - - return cPar; -} - -static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2) -{ - return (param1.cParams.hashLog == param2.cParams.hashLog) & (param1.cParams.chainLog == param2.cParams.chainLog) & - (param1.cParams.strategy == param2.cParams.strategy) & ((param1.cParams.searchLength == 3) == (param2.cParams.searchLength == 3)); -} - -/*! ZSTD_continueCCtx() : - reuse CCtx without reset (note : requires no dictionary) */ -static size_t ZSTD_continueCCtx(ZSTD_CCtx *cctx, ZSTD_parameters params, U64 frameContentSize) -{ - U32 const end = (U32)(cctx->nextSrc - cctx->base); - cctx->params = params; - cctx->frameContentSize = frameContentSize; - cctx->lowLimit = end; - cctx->dictLimit = end; - cctx->nextToUpdate = end + 1; - cctx->stage = ZSTDcs_init; - cctx->dictID = 0; - cctx->loadedDictEnd = 0; - { - int i; - for (i = 0; i < ZSTD_REP_NUM; i++) - cctx->rep[i] = repStartValue[i]; - } - cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */ - xxh64_reset(&cctx->xxhState, 0); - return 0; -} - -typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e; - -/*! ZSTD_resetCCtx_advanced() : - note : `params` must be validated */ -static size_t ZSTD_resetCCtx_advanced(ZSTD_CCtx *zc, ZSTD_parameters params, U64 frameContentSize, ZSTD_compResetPolicy_e const crp) -{ - if (crp == ZSTDcrp_continue) - if (ZSTD_equivalentParams(params, zc->params)) { - zc->flagStaticTables = 0; - zc->flagStaticHufTable = HUF_repeat_none; - return ZSTD_continueCCtx(zc, params, frameContentSize); - } - - { - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog); - U32 const divider = (params.cParams.searchLength == 3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = blockSize + 11 * maxNbSeq; - size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog); - size_t const hSize = ((size_t)1) << params.cParams.hashLog; - U32 const hashLog3 = (params.cParams.searchLength > 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog); - size_t const h3Size = ((size_t)1) << hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - void *ptr; - - /* Check if workSpace is large enough, alloc a new one if needed */ - { - size_t const optSpace = ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) + - (ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t)); - size_t const neededSpace = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace + - (((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optSpace : 0); - if (zc->workSpaceSize < neededSpace) { - ZSTD_free(zc->workSpace, zc->customMem); - zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); - if (zc->workSpace == NULL) - return ERROR(memory_allocation); - zc->workSpaceSize = neededSpace; - } - } - - if (crp != ZSTDcrp_noMemset) - memset(zc->workSpace, 0, tableSpace); /* reset tables only */ - xxh64_reset(&zc->xxhState, 0); - zc->hashLog3 = hashLog3; - zc->hashTable = (U32 *)(zc->workSpace); - zc->chainTable = zc->hashTable + hSize; - zc->hashTable3 = zc->chainTable + chainSize; - ptr = zc->hashTable3 + h3Size; - zc->hufTable = (HUF_CElt *)ptr; - zc->flagStaticTables = 0; - zc->flagStaticHufTable = HUF_repeat_none; - ptr = ((U32 *)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */ - - zc->nextToUpdate = 1; - zc->nextSrc = NULL; - zc->base = NULL; - zc->dictBase = NULL; - zc->dictLimit = 0; - zc->lowLimit = 0; - zc->params = params; - zc->blockSize = blockSize; - zc->frameContentSize = frameContentSize; - { - int i; - for (i = 0; i < ZSTD_REP_NUM; i++) - zc->rep[i] = repStartValue[i]; - } - - if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) { - zc->seqStore.litFreq = (U32 *)ptr; - zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1 << Litbits); - zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL + 1); - zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML + 1); - ptr = zc->seqStore.offCodeFreq + (MaxOff + 1); - zc->seqStore.matchTable = (ZSTD_match_t *)ptr; - ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM + 1; - zc->seqStore.priceTable = (ZSTD_optimal_t *)ptr; - ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM + 1; - zc->seqStore.litLengthSum = 0; - } - zc->seqStore.sequencesStart = (seqDef *)ptr; - ptr = zc->seqStore.sequencesStart + maxNbSeq; - zc->seqStore.llCode = (BYTE *)ptr; - zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; - zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; - zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; - - zc->stage = ZSTDcs_init; - zc->dictID = 0; - zc->loadedDictEnd = 0; - - return 0; - } -} - -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx) -{ - int i; - for (i = 0; i < ZSTD_REP_NUM; i++) - cctx->rep[i] = 0; -} - -/*! ZSTD_copyCCtx() : -* Duplicate an existing context `srcCCtx` into another one `dstCCtx`. -* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). -* @return : 0, or an error code */ -size_t ZSTD_copyCCtx(ZSTD_CCtx *dstCCtx, const ZSTD_CCtx *srcCCtx, unsigned long long pledgedSrcSize) -{ - if (srcCCtx->stage != ZSTDcs_init) - return ERROR(stage_wrong); - - memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); - { - ZSTD_parameters params = srcCCtx->params; - params.fParams.contentSizeFlag = (pledgedSrcSize > 0); - ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset); - } - - /* copy tables */ - { - size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog); - size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog; - size_t const h3Size = (size_t)1 << srcCCtx->hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); - } - - /* copy dictionary offsets */ - dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; - dstCCtx->nextToUpdate3 = srcCCtx->nextToUpdate3; - dstCCtx->nextSrc = srcCCtx->nextSrc; - dstCCtx->base = srcCCtx->base; - dstCCtx->dictBase = srcCCtx->dictBase; - dstCCtx->dictLimit = srcCCtx->dictLimit; - dstCCtx->lowLimit = srcCCtx->lowLimit; - dstCCtx->loadedDictEnd = srcCCtx->loadedDictEnd; - dstCCtx->dictID = srcCCtx->dictID; - - /* copy entropy tables */ - dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; - dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable; - if (srcCCtx->flagStaticTables) { - memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable)); - memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable)); - memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable)); - } - if (srcCCtx->flagStaticHufTable) { - memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256 * 4); - } - - return 0; -} - -/*! ZSTD_reduceTable() : -* reduce table indexes by `reducerValue` */ -static void ZSTD_reduceTable(U32 *const table, U32 const size, U32 const reducerValue) -{ - U32 u; - for (u = 0; u < size; u++) { - if (table[u] < reducerValue) - table[u] = 0; - else - table[u] -= reducerValue; - } -} - -/*! ZSTD_reduceIndex() : -* rescale all indexes to avoid future overflow (indexes are U32) */ -static void ZSTD_reduceIndex(ZSTD_CCtx *zc, const U32 reducerValue) -{ - { - U32 const hSize = 1 << zc->params.cParams.hashLog; - ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); - } - - { - U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog); - ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); - } - - { - U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; - ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); - } -} - -/*-******************************************************* -* Block entropic compression -*********************************************************/ - -/* See doc/zstd_compression_format.md for detailed format description */ - -size_t ZSTD_noCompressBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - if (srcSize + ZSTD_blockHeaderSize > dstCapacity) - return ERROR(dstSize_tooSmall); - memcpy((BYTE *)dst + ZSTD_blockHeaderSize, src, srcSize); - ZSTD_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw); - return ZSTD_blockHeaderSize + srcSize; -} - -static size_t ZSTD_noCompressLiterals(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - BYTE *const ostart = (BYTE * const)dst; - U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095); - - if (srcSize + flSize > dstCapacity) - return ERROR(dstSize_tooSmall); - - switch (flSize) { - case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_basic + (srcSize << 3)); break; - case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_basic + (1 << 2) + (srcSize << 4))); break; - default: /*note : should not be necessary : flSize is within {1,2,3} */ - case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_basic + (3 << 2) + (srcSize << 4))); break; - } - - memcpy(ostart + flSize, src, srcSize); - return srcSize + flSize; -} - -static size_t ZSTD_compressRleLiteralsBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - BYTE *const ostart = (BYTE * const)dst; - U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095); - - (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ - - switch (flSize) { - case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_rle + (srcSize << 3)); break; - case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_rle + (1 << 2) + (srcSize << 4))); break; - default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */ - case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_rle + (3 << 2) + (srcSize << 4))); break; - } - - ostart[flSize] = *(const BYTE *)src; - return flSize + 1; -} - -static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } - -static size_t ZSTD_compressLiterals(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t const minGain = ZSTD_minGain(srcSize); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE *const ostart = (BYTE *)dst; - U32 singleStream = srcSize < 256; - symbolEncodingType_e hType = set_compressed; - size_t cLitSize; - -/* small ? don't even attempt compression (speed opt) */ -#define LITERAL_NOENTROPY 63 - { - size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY; - if (srcSize <= minLitSize) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - - if (dstCapacity < lhSize + 1) - return ERROR(dstSize_tooSmall); /* not enough space for compression */ - { - HUF_repeat repeat = zc->flagStaticHufTable; - int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0; - if (repeat == HUF_repeat_valid && lhSize == 3) - singleStream = 1; - cLitSize = singleStream ? HUF_compress1X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters, - sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat) - : HUF_compress4X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters, - sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat); - if (repeat != HUF_repeat_none) { - hType = set_repeat; - } /* reused the existing table */ - else { - zc->flagStaticHufTable = HUF_repeat_check; - } /* now have a table to reuse */ - } - - if ((cLitSize == 0) | (cLitSize >= srcSize - minGain)) { - zc->flagStaticHufTable = HUF_repeat_none; - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - if (cLitSize == 1) { - zc->flagStaticHufTable = HUF_repeat_none; - return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); - } - - /* Build header */ - switch (lhSize) { - case 3: /* 2 - 2 - 10 - 10 */ - { - U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 14); - ZSTD_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { - U32 const lhc = hType + (2 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 18); - ZSTD_writeLE32(ostart, lhc); - break; - } - default: /* should not be necessary, lhSize is only {3,4,5} */ - case 5: /* 2 - 2 - 18 - 18 */ - { - U32 const lhc = hType + (3 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 22); - ZSTD_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - } - return lhSize + cLitSize; -} - -static const BYTE LL_Code[64] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 17, 17, 18, 18, - 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, - 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24}; - -static const BYTE ML_Code[128] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, - 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42}; - -void ZSTD_seqToCodes(const seqStore_t *seqStorePtr) -{ - BYTE const LL_deltaCode = 19; - BYTE const ML_deltaCode = 36; - const seqDef *const sequences = seqStorePtr->sequencesStart; - BYTE *const llCodeTable = seqStorePtr->llCode; - BYTE *const ofCodeTable = seqStorePtr->ofCode; - BYTE *const mlCodeTable = seqStorePtr->mlCode; - U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - U32 u; - for (u = 0; u < nbSeq; u++) { - U32 const llv = sequences[u].litLength; - U32 const mlv = sequences[u].matchLength; - llCodeTable[u] = (llv > 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv]; - ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); - mlCodeTable[u] = (mlv > 127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv]; - } - if (seqStorePtr->longLengthID == 1) - llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthID == 2) - mlCodeTable[seqStorePtr->longLengthPos] = MaxML; -} - -ZSTD_STATIC size_t ZSTD_compressSequences_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity) -{ - const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN; - const seqStore_t *seqStorePtr = &(zc->seqStore); - FSE_CTable *CTable_LitLength = zc->litlengthCTable; - FSE_CTable *CTable_OffsetBits = zc->offcodeCTable; - FSE_CTable *CTable_MatchLength = zc->matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const seqDef *const sequences = seqStorePtr->sequencesStart; - const BYTE *const ofCodeTable = seqStorePtr->ofCode; - const BYTE *const llCodeTable = seqStorePtr->llCode; - const BYTE *const mlCodeTable = seqStorePtr->mlCode; - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstCapacity; - BYTE *op = ostart; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; - BYTE *seqHead; - - U32 *count; - S16 *norm; - U32 *workspace; - size_t workspaceSize = sizeof(zc->tmpCounters); - { - size_t spaceUsed32 = 0; - count = (U32 *)zc->tmpCounters + spaceUsed32; - spaceUsed32 += MaxSeq + 1; - norm = (S16 *)((U32 *)zc->tmpCounters + spaceUsed32); - spaceUsed32 += ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2; - - workspace = (U32 *)zc->tmpCounters + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - } - - /* Compress literals */ - { - const BYTE *const literals = seqStorePtr->litStart; - size_t const litSize = seqStorePtr->lit - literals; - size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); - if (ZSTD_isError(cSize)) - return cSize; - op += cSize; - } - - /* Sequences Header */ - if ((oend - op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) - return ERROR(dstSize_tooSmall); - if (nbSeq < 0x7F) - *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) - op[0] = (BYTE)((nbSeq >> 8) + 0x80), op[1] = (BYTE)nbSeq, op += 2; - else - op[0] = 0xFF, ZSTD_writeLE16(op + 1, (U16)(nbSeq - LONGNBSEQ)), op += 3; - if (nbSeq == 0) - return op - ostart; - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - -#define MIN_SEQ_FOR_DYNAMIC_FSE 64 -#define MAX_SEQ_FOR_STATIC_FSE 1000 - - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - - /* CTable for Literal Lengths */ - { - U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = llCodeTable[0]; - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - LLtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog - 1)))) { - FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, workspace, workspaceSize); - LLtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - if (count[llCodeTable[nbSeq - 1]] > 1) { - count[llCodeTable[nbSeq - 1]]--; - nbSeq_1--; - } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { - size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) - return NCountSize; - op += NCountSize; - } - FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, workspace, workspaceSize); - LLtype = set_compressed; - } - } - - /* CTable for Offsets */ - { - U32 max = MaxOff; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = ofCodeTable[0]; - FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); - Offtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - Offtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog - 1)))) { - FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, workspace, workspaceSize); - Offtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - if (count[ofCodeTable[nbSeq - 1]] > 1) { - count[ofCodeTable[nbSeq - 1]]--; - nbSeq_1--; - } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { - size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) - return NCountSize; - op += NCountSize; - } - FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, workspace, workspaceSize); - Offtype = set_compressed; - } - } - - /* CTable for MatchLengths */ - { - U32 max = MaxML; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *mlCodeTable; - FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); - MLtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - MLtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog - 1)))) { - FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, workspace, workspaceSize); - MLtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); - if (count[mlCodeTable[nbSeq - 1]] > 1) { - count[mlCodeTable[nbSeq - 1]]--; - nbSeq_1--; - } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { - size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) - return NCountSize; - op += NCountSize; - } - FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, workspace, workspaceSize); - MLtype = set_compressed; - } - } - - *seqHead = (BYTE)((LLtype << 6) + (Offtype << 4) + (MLtype << 2)); - zc->flagStaticTables = 0; - - /* Encoding Sequences */ - { - BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - - CHECK_E(BIT_initCStream(&blockStream, op, oend - op), dstSize_tooSmall); /* not enough space remaining */ - - /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq - 1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq - 1]); - FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq - 1]); - BIT_addBits(&blockStream, sequences[nbSeq - 1].litLength, LL_bits[llCodeTable[nbSeq - 1]]); - if (ZSTD_32bits()) - BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq - 1].matchLength, ML_bits[mlCodeTable[nbSeq - 1]]); - if (ZSTD_32bits()) - BIT_flushBits(&blockStream); - if (longOffsets) { - U32 const ofBits = ofCodeTable[nbSeq - 1]; - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, extraBits); - BIT_flushBits(&blockStream); - } - BIT_addBits(&blockStream, sequences[nbSeq - 1].offset >> extraBits, ofBits - extraBits); - } else { - BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, ofCodeTable[nbSeq - 1]); - } - BIT_flushBits(&blockStream); - - { - size_t n; - for (n = nbSeq - 2; n < nbSeq; n--) { /* intentional underflow */ - BYTE const llCode = llCodeTable[n]; - BYTE const ofCode = ofCodeTable[n]; - BYTE const mlCode = mlCodeTable[n]; - U32 const llBits = LL_bits[llCode]; - U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ - U32 const mlBits = ML_bits[mlCode]; - /* (7)*/ /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ - FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ - if (ZSTD_32bits()) - BIT_flushBits(&blockStream); /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ - if (ZSTD_32bits() || (ofBits + mlBits + llBits >= 64 - 7 - (LLFSELog + MLFSELog + OffFSELog))) - BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].litLength, llBits); - if (ZSTD_32bits() && ((llBits + mlBits) > 24)) - BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); - if (ZSTD_32bits()) - BIT_flushBits(&blockStream); /* (7)*/ - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); - BIT_flushBits(&blockStream); /* (7)*/ - } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, ofBits - extraBits); /* 31 */ - } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ - } - BIT_flushBits(&blockStream); /* (7)*/ - } - } - - FSE_flushCState(&blockStream, &stateMatchLength); - FSE_flushCState(&blockStream, &stateOffsetBits); - FSE_flushCState(&blockStream, &stateLitLength); - - { - size_t const streamSize = BIT_closeCStream(&blockStream); - if (streamSize == 0) - return ERROR(dstSize_tooSmall); /* not enough space */ - op += streamSize; - } - } - return op - ostart; -} - -ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, size_t srcSize) -{ - size_t const cSize = ZSTD_compressSequences_internal(zc, dst, dstCapacity); - size_t const minGain = ZSTD_minGain(srcSize); - size_t const maxCSize = srcSize - minGain; - /* If the srcSize <= dstCapacity, then there is enough space to write a - * raw uncompressed block. Since we ran out of space, the block must not - * be compressible, so fall back to a raw uncompressed block. - */ - int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity; - int i; - - if (ZSTD_isError(cSize) && !uncompressibleError) - return cSize; - if (cSize >= maxCSize || uncompressibleError) { - zc->flagStaticHufTable = HUF_repeat_none; - return 0; - } - /* confirm repcodes */ - for (i = 0; i < ZSTD_REP_NUM; i++) - zc->rep[i] = zc->repToConfirm[i]; - return cSize; -} - -/*! ZSTD_storeSeq() : - Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. - `offsetCode` : distance to match, or 0 == repCode. - `matchCode` : matchLength - MINMATCH -*/ -ZSTD_STATIC void ZSTD_storeSeq(seqStore_t *seqStorePtr, size_t litLength, const void *literals, U32 offsetCode, size_t matchCode) -{ - /* copy Literals */ - ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); - seqStorePtr->lit += litLength; - - /* literal Length */ - if (litLength > 0xFFFF) { - seqStorePtr->longLengthID = 1; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].litLength = (U16)litLength; - - /* match offset */ - seqStorePtr->sequences[0].offset = offsetCode + 1; - - /* match Length */ - if (matchCode > 0xFFFF) { - seqStorePtr->longLengthID = 2; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].matchLength = (U16)matchCode; - - seqStorePtr->sequences++; -} - -/*-************************************* -* Match length counter -***************************************/ -static unsigned ZSTD_NbCommonBytes(register size_t val) -{ - if (ZSTD_isLittleEndian()) { - if (ZSTD_64bits()) { - return (__builtin_ctzll((U64)val) >> 3); - } else { /* 32 bits */ - return (__builtin_ctz((U32)val) >> 3); - } - } else { /* Big Endian CPU */ - if (ZSTD_64bits()) { - return (__builtin_clzll(val) >> 3); - } else { /* 32 bits */ - return (__builtin_clz((U32)val) >> 3); - } - } -} - -static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *const pInLimit) -{ - const BYTE *const pStart = pIn; - const BYTE *const pInLoopLimit = pInLimit - (sizeof(size_t) - 1); - - while (pIn < pInLoopLimit) { - size_t const diff = ZSTD_readST(pMatch) ^ ZSTD_readST(pIn); - if (!diff) { - pIn += sizeof(size_t); - pMatch += sizeof(size_t); - continue; - } - pIn += ZSTD_NbCommonBytes(diff); - return (size_t)(pIn - pStart); - } - if (ZSTD_64bits()) - if ((pIn < (pInLimit - 3)) && (ZSTD_read32(pMatch) == ZSTD_read32(pIn))) { - pIn += 4; - pMatch += 4; - } - if ((pIn < (pInLimit - 1)) && (ZSTD_read16(pMatch) == ZSTD_read16(pIn))) { - pIn += 2; - pMatch += 2; - } - if ((pIn < pInLimit) && (*pMatch == *pIn)) - pIn++; - return (size_t)(pIn - pStart); -} - -/** ZSTD_count_2segments() : -* can count match length with `ip` & `match` in 2 different segments. -* convention : on reaching mEnd, match count continue starting from iStart -*/ -static size_t ZSTD_count_2segments(const BYTE *ip, const BYTE *match, const BYTE *iEnd, const BYTE *mEnd, const BYTE *iStart) -{ - const BYTE *const vEnd = MIN(ip + (mEnd - match), iEnd); - size_t const matchLength = ZSTD_count(ip, match, vEnd); - if (match + matchLength != mEnd) - return matchLength; - return matchLength + ZSTD_count(ip + matchLength, iStart, iEnd); -} - -/*-************************************* -* Hashes -***************************************/ -static const U32 prime3bytes = 506832829U; -static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32 - 24)) * prime3bytes) >> (32 - h); } -ZSTD_STATIC size_t ZSTD_hash3Ptr(const void *ptr, U32 h) { return ZSTD_hash3(ZSTD_readLE32(ptr), h); } /* only in zstd_opt.h */ - -static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32 - h); } -static size_t ZSTD_hash4Ptr(const void *ptr, U32 h) { return ZSTD_hash4(ZSTD_read32(ptr), h); } - -static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64 - 40)) * prime5bytes) >> (64 - h)); } -static size_t ZSTD_hash5Ptr(const void *p, U32 h) { return ZSTD_hash5(ZSTD_readLE64(p), h); } - -static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64 - 48)) * prime6bytes) >> (64 - h)); } -static size_t ZSTD_hash6Ptr(const void *p, U32 h) { return ZSTD_hash6(ZSTD_readLE64(p), h); } - -static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64 - 56)) * prime7bytes) >> (64 - h)); } -static size_t ZSTD_hash7Ptr(const void *p, U32 h) { return ZSTD_hash7(ZSTD_readLE64(p), h); } - -static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; -static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u)*prime8bytes) >> (64 - h)); } -static size_t ZSTD_hash8Ptr(const void *p, U32 h) { return ZSTD_hash8(ZSTD_readLE64(p), h); } - -static size_t ZSTD_hashPtr(const void *p, U32 hBits, U32 mls) -{ - switch (mls) { - // case 3: return ZSTD_hash3Ptr(p, hBits); - default: - case 4: return ZSTD_hash4Ptr(p, hBits); - case 5: return ZSTD_hash5Ptr(p, hBits); - case 6: return ZSTD_hash6Ptr(p, hBits); - case 7: return ZSTD_hash7Ptr(p, hBits); - case 8: return ZSTD_hash8Ptr(p, hBits); - } -} - -/*-************************************* -* Fast Scan -***************************************/ -static void ZSTD_fillHashTable(ZSTD_CCtx *zc, const void *end, const U32 mls) -{ - U32 *const hashTable = zc->hashTable; - U32 const hBits = zc->params.cParams.hashLog; - const BYTE *const base = zc->base; - const BYTE *ip = base + zc->nextToUpdate; - const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE; - const size_t fastHashFillStep = 3; - - while (ip <= iend) { - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); - ip += fastHashFillStep; - } -} - -FORCE_INLINE -void ZSTD_compressBlock_fast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls) -{ - U32 *const hashTable = cctx->hashTable; - U32 const hBits = cctx->params.cParams.hashLog; - seqStore_t *seqStorePtr = &(cctx->seqStore); - const BYTE *const base = cctx->base; - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const U32 lowestIndex = cctx->dictLimit; - const BYTE *const lowest = base + lowestIndex; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - HASH_READ_SIZE; - U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1]; - U32 offsetSaved = 0; - - /* init */ - ip += (ip == lowest); - { - U32 const maxRep = (U32)(ip - lowest); - if (offset_2 > maxRep) - offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) - offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h = ZSTD_hashPtr(ip, hBits, mls); - U32 const curr = (U32)(ip - base); - U32 const matchIndex = hashTable[h]; - const BYTE *match = base + matchIndex; - hashTable[h] = curr; /* update hash table */ - - if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) { - mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH); - } else { - U32 offset; - if ((matchIndex <= lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) { - ip += ((ip - anchor) >> g_searchStrength) + 1; - continue; - } - mLength = ZSTD_count(ip + 4, match + 4, iend) + 4; - offset = (U32)(ip - match); - while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) { - ip--; - match--; - mLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH); - } - - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2; /* here because curr+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base); - /* check immediate repcode */ - while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4; - { - U32 const tmpOff = offset_2; - offset_2 = offset_1; - offset_1 = tmpOff; - } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - } - } - - /* save reps for next block */ - cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; - cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -static void ZSTD_compressBlock_fast(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - const U32 mls = ctx->params.cParams.searchLength; - switch (mls) { - default: /* includes case 3 */ - case 4: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; - case 5: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; - case 6: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; - case 7: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; - } -} - -static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls) -{ - U32 *hashTable = ctx->hashTable; - const U32 hBits = ctx->params.cParams.hashLog; - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const base = ctx->base; - const BYTE *const dictBase = ctx->dictBase; - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const U32 lowestIndex = ctx->lowLimit; - const BYTE *const dictStart = dictBase + lowestIndex; - const U32 dictLimit = ctx->dictLimit; - const BYTE *const lowPrefixPtr = base + dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1]; - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t h = ZSTD_hashPtr(ip, hBits, mls); - const U32 matchIndex = hashTable[h]; - const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE *match = matchBase + matchIndex; - const U32 curr = (U32)(ip - base); - const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ - const BYTE *repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *repMatch = repBase + repIndex; - size_t mLength; - hashTable[h] = curr; /* update hash table */ - - if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) && - (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) { - const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; - ip++; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH); - } else { - if ((matchIndex < lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) { - ip += ((ip - anchor) >> g_searchStrength) + 1; - continue; - } - { - const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; - U32 offset; - mLength = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32; - while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) { - ip--; - match--; - mLength++; - } /* catch up */ - offset = curr - matchIndex; - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH); - } - } - - /* found a match : store it */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2; - hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const curr2 = (U32)(ip - base); - U32 const repIndex2 = curr2 - offset_2; - const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; - if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ - && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) { - const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; - size_t repLength2 = - ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; - U32 tmpOffset = offset_2; - offset_2 = offset_1; - offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH); - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = curr2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } - } - } - - /* save reps for next block */ - ctx->repToConfirm[0] = offset_1; - ctx->repToConfirm[1] = offset_2; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - U32 const mls = ctx->params.cParams.searchLength; - switch (mls) { - default: /* includes case 3 */ - case 4: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; - case 5: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; - case 6: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; - case 7: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; - } -} - -/*-************************************* -* Double Fast -***************************************/ -static void ZSTD_fillDoubleHashTable(ZSTD_CCtx *cctx, const void *end, const U32 mls) -{ - U32 *const hashLarge = cctx->hashTable; - U32 const hBitsL = cctx->params.cParams.hashLog; - U32 *const hashSmall = cctx->chainTable; - U32 const hBitsS = cctx->params.cParams.chainLog; - const BYTE *const base = cctx->base; - const BYTE *ip = base + cctx->nextToUpdate; - const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE; - const size_t fastHashFillStep = 3; - - while (ip <= iend) { - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base); - hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base); - ip += fastHashFillStep; - } -} - -FORCE_INLINE -void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls) -{ - U32 *const hashLong = cctx->hashTable; - const U32 hBitsL = cctx->params.cParams.hashLog; - U32 *const hashSmall = cctx->chainTable; - const U32 hBitsS = cctx->params.cParams.chainLog; - seqStore_t *seqStorePtr = &(cctx->seqStore); - const BYTE *const base = cctx->base; - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const U32 lowestIndex = cctx->dictLimit; - const BYTE *const lowest = base + lowestIndex; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - HASH_READ_SIZE; - U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1]; - U32 offsetSaved = 0; - - /* init */ - ip += (ip == lowest); - { - U32 const maxRep = (U32)(ip - lowest); - if (offset_2 > maxRep) - offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) - offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); - size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); - U32 const curr = (U32)(ip - base); - U32 const matchIndexL = hashLong[h2]; - U32 const matchIndexS = hashSmall[h]; - const BYTE *matchLong = base + matchIndexL; - const BYTE *match = base + matchIndexS; - hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ - - if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) { /* note : by construction, offset_1 <= curr */ - mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH); - } else { - U32 offset; - if ((matchIndexL > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) { - mLength = ZSTD_count(ip + 8, matchLong + 8, iend) + 8; - offset = (U32)(ip - matchLong); - while (((ip > anchor) & (matchLong > lowest)) && (ip[-1] == matchLong[-1])) { - ip--; - matchLong--; - mLength++; - } /* catch up */ - } else if ((matchIndexS > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8); - U32 const matchIndex3 = hashLong[h3]; - const BYTE *match3 = base + matchIndex3; - hashLong[h3] = curr + 1; - if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) { - mLength = ZSTD_count(ip + 9, match3 + 8, iend) + 8; - ip++; - offset = (U32)(ip - match3); - while (((ip > anchor) & (match3 > lowest)) && (ip[-1] == match3[-1])) { - ip--; - match3--; - mLength++; - } /* catch up */ - } else { - mLength = ZSTD_count(ip + 4, match + 4, iend) + 4; - offset = (U32)(ip - match); - while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) { - ip--; - match--; - mLength++; - } /* catch up */ - } - } else { - ip += ((ip - anchor) >> g_searchStrength) + 1; - continue; - } - - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH); - } - - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] = - curr + 2; /* here because curr+2 could be > iend-8 */ - hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base); - - /* check immediate repcode */ - while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4; - { - U32 const tmpOff = offset_2; - offset_2 = offset_1; - offset_1 = tmpOff; - } /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - } - } - - /* save reps for next block */ - cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; - cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - const U32 mls = ctx->params.cParams.searchLength; - switch (mls) { - default: /* includes case 3 */ - case 4: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return; - case 5: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return; - case 6: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return; - case 7: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return; - } -} - -static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls) -{ - U32 *const hashLong = ctx->hashTable; - U32 const hBitsL = ctx->params.cParams.hashLog; - U32 *const hashSmall = ctx->chainTable; - U32 const hBitsS = ctx->params.cParams.chainLog; - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const base = ctx->base; - const BYTE *const dictBase = ctx->dictBase; - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const U32 lowestIndex = ctx->lowLimit; - const BYTE *const dictStart = dictBase + lowestIndex; - const U32 dictLimit = ctx->dictLimit; - const BYTE *const lowPrefixPtr = base + dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1]; - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); - const U32 matchIndex = hashSmall[hSmall]; - const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE *match = matchBase + matchIndex; - - const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); - const U32 matchLongIndex = hashLong[hLong]; - const BYTE *matchLongBase = matchLongIndex < dictLimit ? dictBase : base; - const BYTE *matchLong = matchLongBase + matchLongIndex; - - const U32 curr = (U32)(ip - base); - const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ - const BYTE *repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *repMatch = repBase + repIndex; - size_t mLength; - hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ - - if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) && - (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) { - const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip + 1 + 4, repMatch + 4, iend, repMatchEnd, lowPrefixPtr) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH); - } else { - if ((matchLongIndex > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) { - const BYTE *matchEnd = matchLongIndex < dictLimit ? dictEnd : iend; - const BYTE *lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr; - U32 offset; - mLength = ZSTD_count_2segments(ip + 8, matchLong + 8, iend, matchEnd, lowPrefixPtr) + 8; - offset = curr - matchLongIndex; - while (((ip > anchor) & (matchLong > lowMatchPtr)) && (ip[-1] == matchLong[-1])) { - ip--; - matchLong--; - mLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH); - - } else if ((matchIndex > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8); - U32 const matchIndex3 = hashLong[h3]; - const BYTE *const match3Base = matchIndex3 < dictLimit ? dictBase : base; - const BYTE *match3 = match3Base + matchIndex3; - U32 offset; - hashLong[h3] = curr + 1; - if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) { - const BYTE *matchEnd = matchIndex3 < dictLimit ? dictEnd : iend; - const BYTE *lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr; - mLength = ZSTD_count_2segments(ip + 9, match3 + 8, iend, matchEnd, lowPrefixPtr) + 8; - ip++; - offset = curr + 1 - matchIndex3; - while (((ip > anchor) & (match3 > lowMatchPtr)) && (ip[-1] == match3[-1])) { - ip--; - match3--; - mLength++; - } /* catch up */ - } else { - const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; - mLength = ZSTD_count_2segments(ip + 4, match + 4, iend, matchEnd, lowPrefixPtr) + 4; - offset = curr - matchIndex; - while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) { - ip--; - match--; - mLength++; - } /* catch up */ - } - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH); - - } else { - ip += ((ip - anchor) >> g_searchStrength) + 1; - continue; - } - } - - /* found a match : store it */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] = curr + 2; - hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = curr + 2; - hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base); - hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = (U32)(ip - 2 - base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const curr2 = (U32)(ip - base); - U32 const repIndex2 = curr2 - offset_2; - const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; - if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ - && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) { - const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; - size_t const repLength2 = - ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; - U32 tmpOffset = offset_2; - offset_2 = offset_1; - offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = curr2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = curr2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } - } - } - - /* save reps for next block */ - ctx->repToConfirm[0] = offset_1; - ctx->repToConfirm[1] = offset_2; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - U32 const mls = ctx->params.cParams.searchLength; - switch (mls) { - default: /* includes case 3 */ - case 4: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return; - case 5: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return; - case 6: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return; - case 7: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return; - } -} - -/*-************************************* -* Binary Tree search -***************************************/ -/** ZSTD_insertBt1() : add one or multiple positions to tree. -* ip : assumed <= iend-8 . -* @return : nb of positions added */ -static U32 ZSTD_insertBt1(ZSTD_CCtx *zc, const BYTE *const ip, const U32 mls, const BYTE *const iend, U32 nbCompares, U32 extDict) -{ - U32 *const hashTable = zc->hashTable; - U32 const hashLog = zc->params.cParams.hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 *const bt = zc->chainTable; - U32 const btLog = zc->params.cParams.chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller = 0, commonLengthLarger = 0; - const BYTE *const base = zc->base; - const BYTE *const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const prefixStart = base + dictLimit; - const BYTE *match; - const U32 curr = (U32)(ip - base); - const U32 btLow = btMask >= curr ? 0 : curr - btMask; - U32 *smallerPtr = bt + 2 * (curr & btMask); - U32 *largerPtr = smallerPtr + 1; - U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = zc->lowLimit; - U32 matchEndIdx = curr + 8; - size_t bestLength = 8; - - hashTable[h] = curr; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32 *const nextPtr = bt + 2 * (matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - - if ((!extDict) || (matchIndex + matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1; - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart); - if (matchIndex + matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - bestLength = matchLength; - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - } - - if (ip + matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ - - if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */ - /* match is smaller than curr */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { - smallerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to curr) */ - } else { - /* match is larger than curr */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { - largerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } - } - - *smallerPtr = *largerPtr = 0; - if (bestLength > 384) - return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - if (matchEndIdx > curr + 8) - return matchEndIdx - curr - 8; - return 1; -} - -static size_t ZSTD_insertBtAndFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, size_t *offsetPtr, U32 nbCompares, const U32 mls, - U32 extDict) -{ - U32 *const hashTable = zc->hashTable; - U32 const hashLog = zc->params.cParams.hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 *const bt = zc->chainTable; - U32 const btLog = zc->params.cParams.chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller = 0, commonLengthLarger = 0; - const BYTE *const base = zc->base; - const BYTE *const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const prefixStart = base + dictLimit; - const U32 curr = (U32)(ip - base); - const U32 btLow = btMask >= curr ? 0 : curr - btMask; - const U32 windowLow = zc->lowLimit; - U32 *smallerPtr = bt + 2 * (curr & btMask); - U32 *largerPtr = bt + 2 * (curr & btMask) + 1; - U32 matchEndIdx = curr + 8; - U32 dummy32; /* to be nullified at the end */ - size_t bestLength = 0; - - hashTable[h] = curr; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32 *const nextPtr = bt + 2 * (matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE *match; - - if ((!extDict) || (matchIndex + matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1; - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart); - if (matchIndex + matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - if ((4 * (int)(matchLength - bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)offsetPtr[0] + 1))) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; - if (ip + matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than curr */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { - smallerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to curr) */ - } else { - /* match is larger than curr */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { - largerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } - } - - *smallerPtr = *largerPtr = 0; - - zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1; - return bestLength; -} - -static void ZSTD_updateTree(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE *const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while (idx < target) - idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 0); -} - -/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ -static size_t ZSTD_BtFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls) -{ - if (ip < zc->base + zc->nextToUpdate) - return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); -} - -static size_t ZSTD_BtFindBestMatch_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */ - const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch (matchLengthSearch) { - default: /* includes case 3 */ - case 4: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 7: - case 6: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} - -static void ZSTD_updateTree_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE *const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while (idx < target) - idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 1); -} - -/** Tree updater, providing best match */ -static size_t ZSTD_BtFindBestMatch_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, - const U32 mls) -{ - if (ip < zc->base + zc->nextToUpdate) - return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); -} - -static size_t ZSTD_BtFindBestMatch_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */ - const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, - const U32 matchLengthSearch) -{ - switch (matchLengthSearch) { - default: /* includes case 3 */ - case 4: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 7: - case 6: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} - -/* ********************************* -* Hash Chain -***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d)&mask] - -/* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE -U32 ZSTD_insertAndFindFirstIndex(ZSTD_CCtx *zc, const BYTE *ip, U32 mls) -{ - U32 *const hashTable = zc->hashTable; - const U32 hashLog = zc->params.cParams.hashLog; - U32 *const chainTable = zc->chainTable; - const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; - const BYTE *const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while (idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - zc->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - -/* inlining is important to hardwire a hot branch (template emulation) */ -FORCE_INLINE -size_t ZSTD_HcFindBestMatch_generic(ZSTD_CCtx *zc, /* Index table will be updated */ - const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls, - const U32 extDict) -{ - U32 *const chainTable = zc->chainTable; - const U32 chainSize = (1 << zc->params.cParams.chainLog); - const U32 chainMask = chainSize - 1; - const BYTE *const base = zc->base; - const BYTE *const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE *const prefixStart = base + dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const U32 lowLimit = zc->lowLimit; - const U32 curr = (U32)(ip - base); - const U32 minChain = curr > chainSize ? curr - chainSize : 0; - int nbAttempts = maxNbAttempts; - size_t ml = EQUAL_READ32 - 1; - - /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex(zc, ip, mls); - - for (; (matchIndex > lowLimit) & (nbAttempts > 0); nbAttempts--) { - const BYTE *match; - size_t currMl = 0; - if ((!extDict) || matchIndex >= dictLimit) { - match = base + matchIndex; - if (match[ml] == ip[ml]) /* potentially better */ - currMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex; - if (ZSTD_read32(match) == ZSTD_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currMl = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32; - } - - /* save best solution */ - if (currMl > ml) { - ml = currMl; - *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; - if (ip + currMl == iLimit) - break; /* best possible, and avoid read overflow*/ - } - - if (matchIndex <= minChain) - break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - - return ml; -} - -FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, - const U32 matchLengthSearch) -{ - switch (matchLengthSearch) { - default: /* includes case 3 */ - case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); - case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); - case 7: - case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); - } -} - -FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, - const U32 matchLengthSearch) -{ - switch (matchLengthSearch) { - default: /* includes case 3 */ - case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); - case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); - case 7: - case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); - } -} - -/* ******************************* -* Common parser - lazy strategy -*********************************/ -FORCE_INLINE -void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth) -{ - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - const BYTE *const base = ctx->base + ctx->dictLimit; - - U32 const maxSearches = 1 << ctx->params.cParams.searchLog; - U32 const mls = ctx->params.cParams.searchLength; - - typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset = 0; - - /* init */ - ip += (ip == base); - ctx->nextToUpdate3 = ctx->nextToUpdate; - { - U32 const maxRep = (U32)(ip - base); - if (offset_2 > maxRep) - savedOffset = offset_2, offset_2 = 0; - if (offset_1 > maxRep) - savedOffset = offset_1, offset_1 = 0; - } - - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength = 0; - size_t offset = 0; - const BYTE *start = ip + 1; - - /* check repCode */ - if ((offset_1 > 0) & (ZSTD_read32(ip + 1) == ZSTD_read32(ip + 1 - offset_1))) { - /* repcode : we take it */ - matchLength = ZSTD_count(ip + 1 + EQUAL_READ32, ip + 1 + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32; - if (depth == 0) - goto _storeSequence; - } - - /* first search (depth 0) */ - { - size_t offsetFound = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset = offsetFound; - } - - if (matchLength < EQUAL_READ32) { - ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth >= 1) - while (ip < ilimit) { - ip++; - if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32; - int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1); - if ((mlRep >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - { - size_t offset2 = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */ - int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } - } - - /* let's find an even better one */ - if ((depth == 2) && (ip < ilimit)) { - ip++; - if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) { - size_t const ml2 = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32; - int const gain2 = (int)(ml2 * 4); - int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = ml2, offset = 0, start = ip; - } - { - size_t offset2 = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */ - int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } - } - } - break; /* nothing found : store previous solution */ - } - - /* NOTE: - * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. - * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which - * overflows the pointer, which is undefined behavior. - */ - /* catch up */ - if (offset) { - while ((start > anchor) && (start > base + offset - ZSTD_REP_MOVE) && - (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1])) /* only search for offset within prefix */ - { - start--; - matchLength++; - } - offset_2 = offset_1; - offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - - /* store sequence */ -_storeSequence: - { - size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) { - /* store sequence */ - matchLength = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_2, iend) + EQUAL_READ32; - offset = offset_2; - offset_2 = offset_1; - offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - } - - /* Save reps for next block */ - ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; - ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); } - -static void ZSTD_compressBlock_lazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); } - -static void ZSTD_compressBlock_lazy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); } - -static void ZSTD_compressBlock_greedy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); } - -FORCE_INLINE -void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth) -{ - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - const BYTE *const base = ctx->base; - const U32 dictLimit = ctx->dictLimit; - const U32 lowestIndex = ctx->lowLimit; - const BYTE *const prefixStart = base + dictLimit; - const BYTE *const dictBase = ctx->dictBase; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const dictStart = dictBase + ctx->lowLimit; - - const U32 maxSearches = 1 << ctx->params.cParams.searchLog; - const U32 mls = ctx->params.cParams.searchLength; - - typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; - - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1]; - - /* init */ - ctx->nextToUpdate3 = ctx->nextToUpdate; - ip += (ip == prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength = 0; - size_t offset = 0; - const BYTE *start = ip + 1; - U32 curr = (U32)(ip - base); - - /* check repCode */ - { - const U32 repIndex = (U32)(curr + 1 - offset_1); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (ZSTD_read32(ip + 1) == ZSTD_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = - ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - if (depth == 0) - goto _storeSequence; - } - } - - /* first search (depth 0) */ - { - size_t offsetFound = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset = offsetFound; - } - - if (matchLength < EQUAL_READ32) { - ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth >= 1) - while (ip < ilimit) { - ip++; - curr++; - /* check repCode */ - if (offset) { - const U32 repIndex = (U32)(curr - offset_1); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) { - /* repcode detected */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = - ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + - EQUAL_READ32; - int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1); - if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } - } - - /* search match, depth 1 */ - { - size_t offset2 = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */ - int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } - } - - /* let's find an even better one */ - if ((depth == 2) && (ip < ilimit)) { - ip++; - curr++; - /* check repCode */ - if (offset) { - const U32 repIndex = (U32)(curr - offset_1); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) { - /* repcode detected */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t repLength = ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, - repEnd, prefixStart) + - EQUAL_READ32; - int gain2 = (int)(repLength * 4); - int gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1); - if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } - } - - /* search match, depth 2 */ - { - size_t offset2 = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */ - int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } - } - } - break; /* nothing found : store previous solution */ - } - - /* catch up */ - if (offset) { - U32 const matchIndex = (U32)((start - base) - (offset - ZSTD_REP_MOVE)); - const BYTE *match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE *const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start > anchor) && (match > mStart) && (start[-1] == match[-1])) { - start--; - match--; - matchLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - - /* store sequence */ - _storeSequence : { - size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while (ip <= ilimit) { - const U32 repIndex = (U32)((ip - base) - offset_2); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = - ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - offset = offset_2; - offset_2 = offset_1; - offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - break; - } - } - - /* Save reps for next block */ - ctx->repToConfirm[0] = offset_1; - ctx->repToConfirm[1] = offset_2; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); } - -static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); -} - -static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); -} - -static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); -} - -/* The optimal parser */ -#include "zstd_opt.h" - -static void ZSTD_compressBlock_btopt(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); -#else - (void)ctx; - (void)src; - (void)srcSize; - return; -#endif -} - -static void ZSTD_compressBlock_btopt2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); -#else - (void)ctx; - (void)src; - (void)srcSize; - return; -#endif -} - -static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); -#else - (void)ctx; - (void)src; - (void)srcSize; - return; -#endif -} - -static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); -#else - (void)ctx; - (void)src; - (void)srcSize; - return; -#endif -} - -typedef void (*ZSTD_blockCompressor)(ZSTD_CCtx *ctx, const void *src, size_t srcSize); - -static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) -{ - static const ZSTD_blockCompressor blockCompressor[2][8] = { - {ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, - ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2}, - {ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict, - ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict}}; - - return blockCompressor[extDict][(U32)strat]; -} - -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); - const BYTE *const base = zc->base; - const BYTE *const istart = (const BYTE *)src; - const U32 curr = (U32)(istart - base); - if (srcSize < MIN_CBLOCK_SIZE + ZSTD_blockHeaderSize + 1) - return 0; /* don't even attempt compression below a certain srcSize */ - ZSTD_resetSeqStore(&(zc->seqStore)); - if (curr > zc->nextToUpdate + 384) - zc->nextToUpdate = curr - MIN(192, (U32)(curr - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */ - blockCompressor(zc, src, srcSize); - return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); -} - -/*! ZSTD_compress_generic() : -* Compress a chunk of data into one or multiple blocks. -* All blocks will be terminated, all input will be consumed. -* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. -* Frame is supposed already started (header already produced) -* @return : compressed size, or an error code -*/ -static size_t ZSTD_compress_generic(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 lastFrameChunk) -{ - size_t blockSize = cctx->blockSize; - size_t remaining = srcSize; - const BYTE *ip = (const BYTE *)src; - BYTE *const ostart = (BYTE *)dst; - BYTE *op = ostart; - U32 const maxDist = 1 << cctx->params.cParams.windowLog; - - if (cctx->params.fParams.checksumFlag && srcSize) - xxh64_update(&cctx->xxhState, src, srcSize); - - while (remaining) { - U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - size_t cSize; - - if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) - return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ - if (remaining < blockSize) - blockSize = remaining; - - /* preemptive overflow correction */ - if (cctx->lowLimit > (3U << 29)) { - U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1; - U32 const curr = (U32)(ip - cctx->base); - U32 const newCurr = (curr & cycleMask) + (1 << cctx->params.cParams.windowLog); - U32 const correction = curr - newCurr; - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30); - ZSTD_reduceIndex(cctx, correction); - cctx->base += correction; - cctx->dictBase += correction; - cctx->lowLimit -= correction; - cctx->dictLimit -= correction; - if (cctx->nextToUpdate < correction) - cctx->nextToUpdate = 0; - else - cctx->nextToUpdate -= correction; - } - - if ((U32)(ip + blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) { - /* enforce maxDist */ - U32 const newLowLimit = (U32)(ip + blockSize - cctx->base) - maxDist; - if (cctx->lowLimit < newLowLimit) - cctx->lowLimit = newLowLimit; - if (cctx->dictLimit < cctx->lowLimit) - cctx->dictLimit = cctx->lowLimit; - } - - cSize = ZSTD_compressBlock_internal(cctx, op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, ip, blockSize); - if (ZSTD_isError(cSize)) - return cSize; - - if (cSize == 0) { /* block is not compressible */ - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw) << 1) + (U32)(blockSize << 3); - if (blockSize + ZSTD_blockHeaderSize > dstCapacity) - return ERROR(dstSize_tooSmall); - ZSTD_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */ - memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); - cSize = ZSTD_blockHeaderSize + blockSize; - } else { - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed) << 1) + (U32)(cSize << 3); - ZSTD_writeLE24(op, cBlockHeader24); - cSize += ZSTD_blockHeaderSize; - } - - remaining -= blockSize; - dstCapacity -= cSize; - ip += blockSize; - op += cSize; - } - - if (lastFrameChunk && (op > ostart)) - cctx->stage = ZSTDcs_ending; - return op - ostart; -} - -static size_t ZSTD_writeFrameHeader(void *dst, size_t dstCapacity, ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID) -{ - BYTE *const op = (BYTE *)dst; - U32 const dictIDSizeCode = (dictID > 0) + (dictID >= 256) + (dictID >= 65536); /* 0-3 */ - U32 const checksumFlag = params.fParams.checksumFlag > 0; - U32 const windowSize = 1U << params.cParams.windowLog; - U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); - BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); - U32 const fcsCode = - params.fParams.contentSizeFlag ? (pledgedSrcSize >= 256) + (pledgedSrcSize >= 65536 + 256) + (pledgedSrcSize >= 0xFFFFFFFFU) : 0; /* 0-3 */ - BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag << 2) + (singleSegment << 5) + (fcsCode << 6)); - size_t pos; - - if (dstCapacity < ZSTD_frameHeaderSize_max) - return ERROR(dstSize_tooSmall); - - ZSTD_writeLE32(dst, ZSTD_MAGICNUMBER); - op[4] = frameHeaderDecriptionByte; - pos = 5; - if (!singleSegment) - op[pos++] = windowLogByte; - switch (dictIDSizeCode) { - default: /* impossible */ - case 0: break; - case 1: - op[pos] = (BYTE)(dictID); - pos++; - break; - case 2: - ZSTD_writeLE16(op + pos, (U16)dictID); - pos += 2; - break; - case 3: - ZSTD_writeLE32(op + pos, dictID); - pos += 4; - break; - } - switch (fcsCode) { - default: /* impossible */ - case 0: - if (singleSegment) - op[pos++] = (BYTE)(pledgedSrcSize); - break; - case 1: - ZSTD_writeLE16(op + pos, (U16)(pledgedSrcSize - 256)); - pos += 2; - break; - case 2: - ZSTD_writeLE32(op + pos, (U32)(pledgedSrcSize)); - pos += 4; - break; - case 3: - ZSTD_writeLE64(op + pos, (U64)(pledgedSrcSize)); - pos += 8; - break; - } - return pos; -} - -static size_t ZSTD_compressContinue_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 frame, U32 lastFrameChunk) -{ - const BYTE *const ip = (const BYTE *)src; - size_t fhSize = 0; - - if (cctx->stage == ZSTDcs_created) - return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ - - if (frame && (cctx->stage == ZSTDcs_init)) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID); - if (ZSTD_isError(fhSize)) - return fhSize; - dstCapacity -= fhSize; - dst = (char *)dst + fhSize; - cctx->stage = ZSTDcs_ongoing; - } - - /* Check if blocks follow each other */ - if (src != cctx->nextSrc) { - /* not contiguous */ - ptrdiff_t const delta = cctx->nextSrc - ip; - cctx->lowLimit = cctx->dictLimit; - cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base); - cctx->dictBase = cctx->base; - cctx->base -= delta; - cctx->nextToUpdate = cctx->dictLimit; - if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) - cctx->lowLimit = cctx->dictLimit; /* too small extDict */ - } - - /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ - if ((ip + srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) { - ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase; - U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx; - cctx->lowLimit = lowLimitMax; - } - - cctx->nextSrc = ip + srcSize; - - if (srcSize) { - size_t const cSize = frame ? ZSTD_compress_generic(cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) - : ZSTD_compressBlock_internal(cctx, dst, dstCapacity, src, srcSize); - if (ZSTD_isError(cSize)) - return cSize; - return cSize + fhSize; - } else - return fhSize; -} - -size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0); -} - -size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx) { return MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog); } - -size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx); - if (srcSize > blockSizeMax) - return ERROR(srcSize_wrong); - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0); -} - -/*! ZSTD_loadDictionaryContent() : - * @return : 0, or an error code - */ -static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx *zc, const void *src, size_t srcSize) -{ - const BYTE *const ip = (const BYTE *)src; - const BYTE *const iend = ip + srcSize; - - /* input becomes curr prefix */ - zc->lowLimit = zc->dictLimit; - zc->dictLimit = (U32)(zc->nextSrc - zc->base); - zc->dictBase = zc->base; - zc->base += ip - zc->nextSrc; - zc->nextToUpdate = zc->dictLimit; - zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base); - - zc->nextSrc = iend; - if (srcSize <= HASH_READ_SIZE) - return 0; - - switch (zc->params.cParams.strategy) { - case ZSTD_fast: ZSTD_fillHashTable(zc, iend, zc->params.cParams.searchLength); break; - - case ZSTD_dfast: ZSTD_fillDoubleHashTable(zc, iend, zc->params.cParams.searchLength); break; - - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - if (srcSize >= HASH_READ_SIZE) - ZSTD_insertAndFindFirstIndex(zc, iend - HASH_READ_SIZE, zc->params.cParams.searchLength); - break; - - case ZSTD_btlazy2: - case ZSTD_btopt: - case ZSTD_btopt2: - if (srcSize >= HASH_READ_SIZE) - ZSTD_updateTree(zc, iend - HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); - break; - - default: - return ERROR(GENERIC); /* strategy doesn't exist; impossible */ - } - - zc->nextToUpdate = (U32)(iend - zc->base); - return 0; -} - -/* Dictionaries that assign zero probability to symbols that show up causes problems - when FSE encoding. Refuse dictionaries that assign zero probability to symbols - that we may encounter during compression. - NOTE: This behavior is not standard and could be improved in the future. */ -static size_t ZSTD_checkDictNCount(short *normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) -{ - U32 s; - if (dictMaxSymbolValue < maxSymbolValue) - return ERROR(dictionary_corrupted); - for (s = 0; s <= maxSymbolValue; ++s) { - if (normalizedCounter[s] == 0) - return ERROR(dictionary_corrupted); - } - return 0; -} - -/* Dictionary format : - * See : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format - */ -/*! ZSTD_loadZstdDictionary() : - * @return : 0, or an error code - * assumptions : magic number supposed already checked - * dictSize supposed > 8 - */ -static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize) -{ - const BYTE *dictPtr = (const BYTE *)dict; - const BYTE *const dictEnd = dictPtr + dictSize; - short offcodeNCount[MaxOff + 1]; - unsigned offcodeMaxValue = MaxOff; - - dictPtr += 4; /* skip magic number */ - cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : ZSTD_readLE32(dictPtr); - dictPtr += 4; - - { - size_t const hufHeaderSize = HUF_readCTable_wksp(cctx->hufTable, 255, dictPtr, dictEnd - dictPtr, cctx->tmpCounters, sizeof(cctx->tmpCounters)); - if (HUF_isError(hufHeaderSize)) - return ERROR(dictionary_corrupted); - dictPtr += hufHeaderSize; - } - - { - unsigned offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(offcodeHeaderSize)) - return ERROR(dictionary_corrupted); - if (offcodeLog > OffFSELog) - return ERROR(dictionary_corrupted); - /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ - CHECK_E(FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)), - dictionary_corrupted); - dictPtr += offcodeHeaderSize; - } - - { - short matchlengthNCount[MaxML + 1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(matchlengthHeaderSize)) - return ERROR(dictionary_corrupted); - if (matchlengthLog > MLFSELog) - return ERROR(dictionary_corrupted); - /* Every match length code must have non-zero probability */ - CHECK_F(ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); - CHECK_E( - FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)), - dictionary_corrupted); - dictPtr += matchlengthHeaderSize; - } - - { - short litlengthNCount[MaxLL + 1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(litlengthHeaderSize)) - return ERROR(dictionary_corrupted); - if (litlengthLog > LLFSELog) - return ERROR(dictionary_corrupted); - /* Every literal length code must have non-zero probability */ - CHECK_F(ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); - CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)), - dictionary_corrupted); - dictPtr += litlengthHeaderSize; - } - - if (dictPtr + 12 > dictEnd) - return ERROR(dictionary_corrupted); - cctx->rep[0] = ZSTD_readLE32(dictPtr + 0); - cctx->rep[1] = ZSTD_readLE32(dictPtr + 4); - cctx->rep[2] = ZSTD_readLE32(dictPtr + 8); - dictPtr += 12; - - { - size_t const dictContentSize = (size_t)(dictEnd - dictPtr); - U32 offcodeMax = MaxOff; - if (dictContentSize <= ((U32)-1) - 128 KB) { - U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ - offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ - } - /* All offset values <= dictContentSize + 128 KB must be representable */ - CHECK_F(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); - /* All repCodes must be <= dictContentSize and != 0*/ - { - U32 u; - for (u = 0; u < 3; u++) { - if (cctx->rep[u] == 0) - return ERROR(dictionary_corrupted); - if (cctx->rep[u] > dictContentSize) - return ERROR(dictionary_corrupted); - } - } - - cctx->flagStaticTables = 1; - cctx->flagStaticHufTable = HUF_repeat_valid; - return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize); - } -} - -/** ZSTD_compress_insertDictionary() : -* @return : 0, or an error code */ -static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize) -{ - if ((dict == NULL) || (dictSize <= 8)) - return 0; - - /* dict as pure content */ - if ((ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict)) - return ZSTD_loadDictionaryContent(cctx, dict, dictSize); - - /* dict as zstd dictionary */ - return ZSTD_loadZstdDictionary(cctx, dict, dictSize); -} - -/*! ZSTD_compressBegin_internal() : -* @return : 0, or an error code */ -static size_t ZSTD_compressBegin_internal(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize) -{ - ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue; - CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp)); - return ZSTD_compress_insertDictionary(cctx, dict, dictSize); -} - -/*! ZSTD_compressBegin_advanced() : -* @return : 0, or an error code */ -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize) -{ - /* compression parameters verification and optimization */ - CHECK_F(ZSTD_checkCParams(params.cParams)); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize); -} - -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, int compressionLevel) -{ - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0); -} - -size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel) { return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); } - -/*! ZSTD_writeEpilogue() : -* Ends a frame. -* @return : nb of bytes written into dst (or an error code) */ -static size_t ZSTD_writeEpilogue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity) -{ - BYTE *const ostart = (BYTE *)dst; - BYTE *op = ostart; - size_t fhSize = 0; - - if (cctx->stage == ZSTDcs_created) - return ERROR(stage_wrong); /* init missing */ - - /* special case : empty frame */ - if (cctx->stage == ZSTDcs_init) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0); - if (ZSTD_isError(fhSize)) - return fhSize; - dstCapacity -= fhSize; - op += fhSize; - cctx->stage = ZSTDcs_ongoing; - } - - if (cctx->stage != ZSTDcs_ending) { - /* write one last empty block, make it the "last" block */ - U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw) << 1) + 0; - if (dstCapacity < 4) - return ERROR(dstSize_tooSmall); - ZSTD_writeLE32(op, cBlockHeader24); - op += ZSTD_blockHeaderSize; - dstCapacity -= ZSTD_blockHeaderSize; - } - - if (cctx->params.fParams.checksumFlag) { - U32 const checksum = (U32)xxh64_digest(&cctx->xxhState); - if (dstCapacity < 4) - return ERROR(dstSize_tooSmall); - ZSTD_writeLE32(op, checksum); - op += 4; - } - - cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ - return op - ostart; -} - -size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t endResult; - size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1); - if (ZSTD_isError(cSize)) - return cSize; - endResult = ZSTD_writeEpilogue(cctx, (char *)dst + cSize, dstCapacity - cSize); - if (ZSTD_isError(endResult)) - return endResult; - return cSize + endResult; -} - -static size_t ZSTD_compress_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize, - ZSTD_parameters params) -{ - CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize)); - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -} - -size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize, - ZSTD_parameters params) -{ - return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); -} - -size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, ZSTD_parameters params) -{ - return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, NULL, 0, params); -} - -/* ===== Dictionary API ===== */ - -struct ZSTD_CDict_s { - void *dictBuffer; - const void *dictContent; - size_t dictContentSize; - ZSTD_CCtx *refContext; -}; /* typedef'd tp ZSTD_CDict within "zstd.h" */ - -size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams) { return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CDict)); } - -static ZSTD_CDict *ZSTD_createCDict_advanced(const void *dictBuffer, size_t dictSize, unsigned byReference, ZSTD_parameters params, ZSTD_customMem customMem) -{ - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - - { - ZSTD_CDict *const cdict = (ZSTD_CDict *)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); - ZSTD_CCtx *const cctx = ZSTD_createCCtx_advanced(customMem); - - if (!cdict || !cctx) { - ZSTD_free(cdict, customMem); - ZSTD_freeCCtx(cctx); - return NULL; - } - - if ((byReference) || (!dictBuffer) || (!dictSize)) { - cdict->dictBuffer = NULL; - cdict->dictContent = dictBuffer; - } else { - void *const internalBuffer = ZSTD_malloc(dictSize, customMem); - if (!internalBuffer) { - ZSTD_free(cctx, customMem); - ZSTD_free(cdict, customMem); - return NULL; - } - memcpy(internalBuffer, dictBuffer, dictSize); - cdict->dictBuffer = internalBuffer; - cdict->dictContent = internalBuffer; - } - - { - size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0); - if (ZSTD_isError(errorCode)) { - ZSTD_free(cdict->dictBuffer, customMem); - ZSTD_free(cdict, customMem); - ZSTD_freeCCtx(cctx); - return NULL; - } - } - - cdict->refContext = cctx; - cdict->dictContentSize = dictSize; - return cdict; - } -} - -ZSTD_CDict *ZSTD_initCDict(const void *dict, size_t dictSize, ZSTD_parameters params, void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - return ZSTD_createCDict_advanced(dict, dictSize, 1, params, stackMem); -} - -size_t ZSTD_freeCDict(ZSTD_CDict *cdict) -{ - if (cdict == NULL) - return 0; /* support free on NULL */ - { - ZSTD_customMem const cMem = cdict->refContext->customMem; - ZSTD_freeCCtx(cdict->refContext); - ZSTD_free(cdict->dictBuffer, cMem); - ZSTD_free(cdict, cMem); - return 0; - } -} - -static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict *cdict) { return ZSTD_getParamsFromCCtx(cdict->refContext); } - -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize) -{ - if (cdict->dictContentSize) - CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize)) - else { - ZSTD_parameters params = cdict->refContext->params; - params.fParams.contentSizeFlag = (pledgedSrcSize > 0); - CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize)); - } - return 0; -} - -/*! ZSTD_compress_usingCDict() : -* Compression using a digested Dictionary. -* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. -* Note that compression level is decided during dictionary creation */ -size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_CDict *cdict) -{ - CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize)); - - if (cdict->refContext->params.fParams.contentSizeFlag == 1) { - cctx->params.fParams.contentSizeFlag = 1; - cctx->frameContentSize = srcSize; - } else { - cctx->params.fParams.contentSizeFlag = 0; - } - - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -} - -/* ****************************************************************** -* Streaming -********************************************************************/ - -typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage; - -struct ZSTD_CStream_s { - ZSTD_CCtx *cctx; - ZSTD_CDict *cdictLocal; - const ZSTD_CDict *cdict; - char *inBuff; - size_t inBuffSize; - size_t inToCompress; - size_t inBuffPos; - size_t inBuffTarget; - size_t blockSize; - char *outBuff; - size_t outBuffSize; - size_t outBuffContentSize; - size_t outBuffFlushedSize; - ZSTD_cStreamStage stage; - U32 checksum; - U32 frameEnded; - U64 pledgedSrcSize; - U64 inputProcessed; - ZSTD_parameters params; - ZSTD_customMem customMem; -}; /* typedef'd to ZSTD_CStream within "zstd.h" */ - -size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams) -{ - size_t const inBuffSize = (size_t)1 << cParams.windowLog; - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, inBuffSize); - size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; - - return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize); -} - -ZSTD_CStream *ZSTD_createCStream_advanced(ZSTD_customMem customMem) -{ - ZSTD_CStream *zcs; - - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - - zcs = (ZSTD_CStream *)ZSTD_malloc(sizeof(ZSTD_CStream), customMem); - if (zcs == NULL) - return NULL; - memset(zcs, 0, sizeof(ZSTD_CStream)); - memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem)); - zcs->cctx = ZSTD_createCCtx_advanced(customMem); - if (zcs->cctx == NULL) { - ZSTD_freeCStream(zcs); - return NULL; - } - return zcs; -} - -size_t ZSTD_freeCStream(ZSTD_CStream *zcs) -{ - if (zcs == NULL) - return 0; /* support free on NULL */ - { - ZSTD_customMem const cMem = zcs->customMem; - ZSTD_freeCCtx(zcs->cctx); - zcs->cctx = NULL; - ZSTD_freeCDict(zcs->cdictLocal); - zcs->cdictLocal = NULL; - ZSTD_free(zcs->inBuff, cMem); - zcs->inBuff = NULL; - ZSTD_free(zcs->outBuff, cMem); - zcs->outBuff = NULL; - ZSTD_free(zcs, cMem); - return 0; - } -} - -/*====== Initialization ======*/ - -size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } -size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */; } - -static size_t ZSTD_resetCStream_internal(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize) -{ - if (zcs->inBuffSize == 0) - return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */ - - if (zcs->cdict) - CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize)) - else - CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize)); - - zcs->inToCompress = 0; - zcs->inBuffPos = 0; - zcs->inBuffTarget = zcs->blockSize; - zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; - zcs->stage = zcss_load; - zcs->frameEnded = 0; - zcs->pledgedSrcSize = pledgedSrcSize; - zcs->inputProcessed = 0; - return 0; /* ready to go */ -} - -size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize) -{ - - zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0); - - return ZSTD_resetCStream_internal(zcs, pledgedSrcSize); -} - -static size_t ZSTD_initCStream_advanced(ZSTD_CStream *zcs, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize) -{ - /* allocate buffers */ - { - size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog; - if (zcs->inBuffSize < neededInBuffSize) { - zcs->inBuffSize = neededInBuffSize; - ZSTD_free(zcs->inBuff, zcs->customMem); - zcs->inBuff = (char *)ZSTD_malloc(neededInBuffSize, zcs->customMem); - if (zcs->inBuff == NULL) - return ERROR(memory_allocation); - } - zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize); - } - if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize) + 1) { - zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize) + 1; - ZSTD_free(zcs->outBuff, zcs->customMem); - zcs->outBuff = (char *)ZSTD_malloc(zcs->outBuffSize, zcs->customMem); - if (zcs->outBuff == NULL) - return ERROR(memory_allocation); - } - - if (dict && dictSize >= 8) { - ZSTD_freeCDict(zcs->cdictLocal); - zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem); - if (zcs->cdictLocal == NULL) - return ERROR(memory_allocation); - zcs->cdict = zcs->cdictLocal; - } else - zcs->cdict = NULL; - - zcs->checksum = params.fParams.checksumFlag > 0; - zcs->params = params; - - return ZSTD_resetCStream_internal(zcs, pledgedSrcSize); -} - -ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - ZSTD_CStream *const zcs = ZSTD_createCStream_advanced(stackMem); - if (zcs) { - size_t const code = ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize); - if (ZSTD_isError(code)) { - return NULL; - } - } - return zcs; -} - -ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize) -{ - ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict); - ZSTD_CStream *const zcs = ZSTD_initCStream(params, pledgedSrcSize, workspace, workspaceSize); - if (zcs) { - zcs->cdict = cdict; - if (ZSTD_isError(ZSTD_resetCStream_internal(zcs, pledgedSrcSize))) { - return NULL; - } - } - return zcs; -} - -/*====== Compression ======*/ - -typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e; - -ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t const length = MIN(dstCapacity, srcSize); - memcpy(dst, src, length); - return length; -} - -static size_t ZSTD_compressStream_generic(ZSTD_CStream *zcs, void *dst, size_t *dstCapacityPtr, const void *src, size_t *srcSizePtr, ZSTD_flush_e const flush) -{ - U32 someMoreWork = 1; - const char *const istart = (const char *)src; - const char *const iend = istart + *srcSizePtr; - const char *ip = istart; - char *const ostart = (char *)dst; - char *const oend = ostart + *dstCapacityPtr; - char *op = ostart; - - while (someMoreWork) { - switch (zcs->stage) { - case zcss_init: - return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */ - - case zcss_load: - /* complete inBuffer */ - { - size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; - size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend - ip); - zcs->inBuffPos += loaded; - ip += loaded; - if ((zcs->inBuffPos == zcs->inToCompress) || (!flush && (toLoad != loaded))) { - someMoreWork = 0; - break; /* not enough input to get a full block : stop there, wait for more */ - } - } - /* compress curr block (note : this stage cannot be stopped in the middle) */ - { - void *cDst; - size_t cSize; - size_t const iSize = zcs->inBuffPos - zcs->inToCompress; - size_t oSize = oend - op; - if (oSize >= ZSTD_compressBound(iSize)) - cDst = op; /* compress directly into output buffer (avoid flush stage) */ - else - cDst = zcs->outBuff, oSize = zcs->outBuffSize; - cSize = (flush == zsf_end) ? ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) - : ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize); - if (ZSTD_isError(cSize)) - return cSize; - if (flush == zsf_end) - zcs->frameEnded = 1; - /* prepare next block */ - zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; - if (zcs->inBuffTarget > zcs->inBuffSize) - zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */ - zcs->inToCompress = zcs->inBuffPos; - if (cDst == op) { - op += cSize; - break; - } /* no need to flush */ - zcs->outBuffContentSize = cSize; - zcs->outBuffFlushedSize = 0; - zcs->stage = zcss_flush; /* pass-through to flush stage */ - } - fallthrough; - - case zcss_flush: { - size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); - op += flushed; - zcs->outBuffFlushedSize += flushed; - if (toFlush != flushed) { - someMoreWork = 0; - break; - } /* dst too small to store flushed data : stop there */ - zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; - zcs->stage = zcss_load; - break; - } - - case zcss_final: - someMoreWork = 0; /* do nothing */ - break; - - default: - return ERROR(GENERIC); /* impossible */ - } - } - - *srcSizePtr = ip - istart; - *dstCapacityPtr = op - ostart; - zcs->inputProcessed += *srcSizePtr; - if (zcs->frameEnded) - return 0; - { - size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos; - if (hintInSize == 0) - hintInSize = zcs->blockSize; - return hintInSize; - } -} - -size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, ZSTD_inBuffer *input) -{ - size_t sizeRead = input->size - input->pos; - size_t sizeWritten = output->size - output->pos; - size_t const result = - ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, (const char *)(input->src) + input->pos, &sizeRead, zsf_gather); - input->pos += sizeRead; - output->pos += sizeWritten; - return result; -} - -/*====== Finalize ======*/ - -/*! ZSTD_flushStream() : -* @return : amount of data remaining to flush */ -size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output) -{ - size_t srcSize = 0; - size_t sizeWritten = output->size - output->pos; - size_t const result = ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, &srcSize, - &srcSize, /* use a valid src address instead of NULL */ - zsf_flush); - output->pos += sizeWritten; - if (ZSTD_isError(result)) - return result; - return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */ -} - -size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output) -{ - BYTE *const ostart = (BYTE *)(output->dst) + output->pos; - BYTE *const oend = (BYTE *)(output->dst) + output->size; - BYTE *op = ostart; - - if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize)) - return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */ - - if (zcs->stage != zcss_final) { - /* flush whatever remains */ - size_t srcSize = 0; - size_t sizeWritten = output->size - output->pos; - size_t const notEnded = - ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */ - size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - op += sizeWritten; - if (remainingToFlush) { - output->pos += sizeWritten; - return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4); - } - /* create epilogue */ - zcs->stage = zcss_final; - zcs->outBuffContentSize = !notEnded ? 0 : ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, - 0); /* write epilogue, including final empty block, into outBuff */ - } - - /* flush epilogue */ - { - size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); - op += flushed; - zcs->outBuffFlushedSize += flushed; - output->pos += op - ostart; - if (toFlush == flushed) - zcs->stage = zcss_init; /* end reached */ - return toFlush - flushed; - } -} - -/*-===== Pre-defined compression levels =====-*/ - -#define ZSTD_DEFAULT_CLEVEL 1 -#define ZSTD_MAX_CLEVEL 22 -int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } - -static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL + 1] = { - { - /* "default" */ - /* W, C, H, S, L, TL, strat */ - {18, 12, 12, 1, 7, 16, ZSTD_fast}, /* level 0 - never used */ - {19, 13, 14, 1, 7, 16, ZSTD_fast}, /* level 1 */ - {19, 15, 16, 1, 6, 16, ZSTD_fast}, /* level 2 */ - {20, 16, 17, 1, 5, 16, ZSTD_dfast}, /* level 3.*/ - {20, 18, 18, 1, 5, 16, ZSTD_dfast}, /* level 4.*/ - {20, 15, 18, 3, 5, 16, ZSTD_greedy}, /* level 5 */ - {21, 16, 19, 2, 5, 16, ZSTD_lazy}, /* level 6 */ - {21, 17, 20, 3, 5, 16, ZSTD_lazy}, /* level 7 */ - {21, 18, 20, 3, 5, 16, ZSTD_lazy2}, /* level 8 */ - {21, 20, 20, 3, 5, 16, ZSTD_lazy2}, /* level 9 */ - {21, 19, 21, 4, 5, 16, ZSTD_lazy2}, /* level 10 */ - {22, 20, 22, 4, 5, 16, ZSTD_lazy2}, /* level 11 */ - {22, 20, 22, 5, 5, 16, ZSTD_lazy2}, /* level 12 */ - {22, 21, 22, 5, 5, 16, ZSTD_lazy2}, /* level 13 */ - {22, 21, 22, 6, 5, 16, ZSTD_lazy2}, /* level 14 */ - {22, 21, 21, 5, 5, 16, ZSTD_btlazy2}, /* level 15 */ - {23, 22, 22, 5, 5, 16, ZSTD_btlazy2}, /* level 16 */ - {23, 21, 22, 4, 5, 24, ZSTD_btopt}, /* level 17 */ - {23, 23, 22, 6, 5, 32, ZSTD_btopt}, /* level 18 */ - {23, 23, 22, 6, 3, 48, ZSTD_btopt}, /* level 19 */ - {25, 25, 23, 7, 3, 64, ZSTD_btopt2}, /* level 20 */ - {26, 26, 23, 7, 3, 256, ZSTD_btopt2}, /* level 21 */ - {27, 27, 25, 9, 3, 512, ZSTD_btopt2}, /* level 22 */ - }, - { - /* for srcSize <= 256 KB */ - /* W, C, H, S, L, T, strat */ - {0, 0, 0, 0, 0, 0, ZSTD_fast}, /* level 0 - not used */ - {18, 13, 14, 1, 6, 8, ZSTD_fast}, /* level 1 */ - {18, 14, 13, 1, 5, 8, ZSTD_dfast}, /* level 2 */ - {18, 16, 15, 1, 5, 8, ZSTD_dfast}, /* level 3 */ - {18, 15, 17, 1, 5, 8, ZSTD_greedy}, /* level 4.*/ - {18, 16, 17, 4, 5, 8, ZSTD_greedy}, /* level 5.*/ - {18, 16, 17, 3, 5, 8, ZSTD_lazy}, /* level 6.*/ - {18, 17, 17, 4, 4, 8, ZSTD_lazy}, /* level 7 */ - {18, 17, 17, 4, 4, 8, ZSTD_lazy2}, /* level 8 */ - {18, 17, 17, 5, 4, 8, ZSTD_lazy2}, /* level 9 */ - {18, 17, 17, 6, 4, 8, ZSTD_lazy2}, /* level 10 */ - {18, 18, 17, 6, 4, 8, ZSTD_lazy2}, /* level 11.*/ - {18, 18, 17, 7, 4, 8, ZSTD_lazy2}, /* level 12.*/ - {18, 19, 17, 6, 4, 8, ZSTD_btlazy2}, /* level 13 */ - {18, 18, 18, 4, 4, 16, ZSTD_btopt}, /* level 14.*/ - {18, 18, 18, 4, 3, 16, ZSTD_btopt}, /* level 15.*/ - {18, 19, 18, 6, 3, 32, ZSTD_btopt}, /* level 16.*/ - {18, 19, 18, 8, 3, 64, ZSTD_btopt}, /* level 17.*/ - {18, 19, 18, 9, 3, 128, ZSTD_btopt}, /* level 18.*/ - {18, 19, 18, 10, 3, 256, ZSTD_btopt}, /* level 19.*/ - {18, 19, 18, 11, 3, 512, ZSTD_btopt2}, /* level 20.*/ - {18, 19, 18, 12, 3, 512, ZSTD_btopt2}, /* level 21.*/ - {18, 19, 18, 13, 3, 512, ZSTD_btopt2}, /* level 22.*/ - }, - { - /* for srcSize <= 128 KB */ - /* W, C, H, S, L, T, strat */ - {17, 12, 12, 1, 7, 8, ZSTD_fast}, /* level 0 - not used */ - {17, 12, 13, 1, 6, 8, ZSTD_fast}, /* level 1 */ - {17, 13, 16, 1, 5, 8, ZSTD_fast}, /* level 2 */ - {17, 16, 16, 2, 5, 8, ZSTD_dfast}, /* level 3 */ - {17, 13, 15, 3, 4, 8, ZSTD_greedy}, /* level 4 */ - {17, 15, 17, 4, 4, 8, ZSTD_greedy}, /* level 5 */ - {17, 16, 17, 3, 4, 8, ZSTD_lazy}, /* level 6 */ - {17, 15, 17, 4, 4, 8, ZSTD_lazy2}, /* level 7 */ - {17, 17, 17, 4, 4, 8, ZSTD_lazy2}, /* level 8 */ - {17, 17, 17, 5, 4, 8, ZSTD_lazy2}, /* level 9 */ - {17, 17, 17, 6, 4, 8, ZSTD_lazy2}, /* level 10 */ - {17, 17, 17, 7, 4, 8, ZSTD_lazy2}, /* level 11 */ - {17, 17, 17, 8, 4, 8, ZSTD_lazy2}, /* level 12 */ - {17, 18, 17, 6, 4, 8, ZSTD_btlazy2}, /* level 13.*/ - {17, 17, 17, 7, 3, 8, ZSTD_btopt}, /* level 14.*/ - {17, 17, 17, 7, 3, 16, ZSTD_btopt}, /* level 15.*/ - {17, 18, 17, 7, 3, 32, ZSTD_btopt}, /* level 16.*/ - {17, 18, 17, 7, 3, 64, ZSTD_btopt}, /* level 17.*/ - {17, 18, 17, 7, 3, 256, ZSTD_btopt}, /* level 18.*/ - {17, 18, 17, 8, 3, 256, ZSTD_btopt}, /* level 19.*/ - {17, 18, 17, 9, 3, 256, ZSTD_btopt2}, /* level 20.*/ - {17, 18, 17, 10, 3, 256, ZSTD_btopt2}, /* level 21.*/ - {17, 18, 17, 11, 3, 512, ZSTD_btopt2}, /* level 22.*/ - }, - { - /* for srcSize <= 16 KB */ - /* W, C, H, S, L, T, strat */ - {14, 12, 12, 1, 7, 6, ZSTD_fast}, /* level 0 - not used */ - {14, 14, 14, 1, 6, 6, ZSTD_fast}, /* level 1 */ - {14, 14, 14, 1, 4, 6, ZSTD_fast}, /* level 2 */ - {14, 14, 14, 1, 4, 6, ZSTD_dfast}, /* level 3.*/ - {14, 14, 14, 4, 4, 6, ZSTD_greedy}, /* level 4.*/ - {14, 14, 14, 3, 4, 6, ZSTD_lazy}, /* level 5.*/ - {14, 14, 14, 4, 4, 6, ZSTD_lazy2}, /* level 6 */ - {14, 14, 14, 5, 4, 6, ZSTD_lazy2}, /* level 7 */ - {14, 14, 14, 6, 4, 6, ZSTD_lazy2}, /* level 8.*/ - {14, 15, 14, 6, 4, 6, ZSTD_btlazy2}, /* level 9.*/ - {14, 15, 14, 3, 3, 6, ZSTD_btopt}, /* level 10.*/ - {14, 15, 14, 6, 3, 8, ZSTD_btopt}, /* level 11.*/ - {14, 15, 14, 6, 3, 16, ZSTD_btopt}, /* level 12.*/ - {14, 15, 14, 6, 3, 24, ZSTD_btopt}, /* level 13.*/ - {14, 15, 15, 6, 3, 48, ZSTD_btopt}, /* level 14.*/ - {14, 15, 15, 6, 3, 64, ZSTD_btopt}, /* level 15.*/ - {14, 15, 15, 6, 3, 96, ZSTD_btopt}, /* level 16.*/ - {14, 15, 15, 6, 3, 128, ZSTD_btopt}, /* level 17.*/ - {14, 15, 15, 6, 3, 256, ZSTD_btopt}, /* level 18.*/ - {14, 15, 15, 7, 3, 256, ZSTD_btopt}, /* level 19.*/ - {14, 15, 15, 8, 3, 256, ZSTD_btopt2}, /* level 20.*/ - {14, 15, 15, 9, 3, 256, ZSTD_btopt2}, /* level 21.*/ - {14, 15, 15, 10, 3, 256, ZSTD_btopt2}, /* level 22.*/ - }, -}; - -/*! ZSTD_getCParams() : -* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`. -* Size values are optional, provide 0 if not known or unused */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) -{ - ZSTD_compressionParameters cp; - size_t const addedSize = srcSize ? 0 : 500; - U64 const rSize = srcSize + dictSize ? srcSize + dictSize + addedSize : (U64)-1; - U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ - if (compressionLevel <= 0) - compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */ - if (compressionLevel > ZSTD_MAX_CLEVEL) - compressionLevel = ZSTD_MAX_CLEVEL; - cp = ZSTD_defaultCParameters[tableID][compressionLevel]; - if (ZSTD_32bits()) { /* auto-correction, for 32-bits mode */ - if (cp.windowLog > ZSTD_WINDOWLOG_MAX) - cp.windowLog = ZSTD_WINDOWLOG_MAX; - if (cp.chainLog > ZSTD_CHAINLOG_MAX) - cp.chainLog = ZSTD_CHAINLOG_MAX; - if (cp.hashLog > ZSTD_HASHLOG_MAX) - cp.hashLog = ZSTD_HASHLOG_MAX; - } - cp = ZSTD_adjustCParams(cp, srcSize, dictSize); - return cp; -} - -/*! ZSTD_getParams() : -* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`). -* All fields of `ZSTD_frameParameters` are set to default (0) */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) -{ - ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize); - memset(¶ms, 0, sizeof(params)); - params.cParams = cParams; - return params; -} - -size_t zstd_compress_bound(size_t src_size) -{ - return ZSTD_compressBound(src_size); -} -EXPORT_SYMBOL(zstd_compress_bound); - -int zstd_min_clevel(void) -{ - /* - * zstd-1.3.1 doesn't implement ZSTD_minCLevel(). - * Return 0 (default level). - */ - return 0; -} -EXPORT_SYMBOL(zstd_min_clevel); - -int zstd_max_clevel(void) -{ - return ZSTD_maxCLevel(); -} -EXPORT_SYMBOL(zstd_max_clevel); - -zstd_parameters zstd_get_params(int level, - unsigned long long estimated_src_size) -{ - return ZSTD_getParams(level, estimated_src_size, 0); -} -EXPORT_SYMBOL(zstd_get_params); - -size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams) -{ - return ZSTD_CCtxWorkspaceBound(*cparams); -} -EXPORT_SYMBOL(zstd_cctx_workspace_bound); - -zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size) -{ - return ZSTD_initCCtx(workspace, workspace_size); -} -EXPORT_SYMBOL(zstd_init_cctx); - -size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, - const void *src, size_t src_size, const zstd_parameters *parameters) -{ - return ZSTD_compressCCtx(cctx, dst, dst_capacity, src, src_size, *parameters); -} -EXPORT_SYMBOL(zstd_compress_cctx); - -size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams) -{ - return ZSTD_CStreamWorkspaceBound(*cparams); -} -EXPORT_SYMBOL(zstd_cstream_workspace_bound); - -zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, - unsigned long long pledged_src_size, void *workspace, size_t workspace_size) -{ - return ZSTD_initCStream(*parameters, pledged_src_size, workspace, workspace_size); -} -EXPORT_SYMBOL(zstd_init_cstream); - -size_t zstd_reset_cstream(zstd_cstream *cstream, - unsigned long long pledged_src_size) -{ - return ZSTD_resetCStream(cstream, pledged_src_size); -} -EXPORT_SYMBOL(zstd_reset_cstream); - -size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, - zstd_in_buffer *input) -{ - return ZSTD_compressStream(cstream, output, input); -} -EXPORT_SYMBOL(zstd_compress_stream); - -size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output) -{ - return ZSTD_flushStream(cstream, output); -} -EXPORT_SYMBOL(zstd_flush_stream); - -size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output) -{ - return ZSTD_endStream(cstream, output); -} -EXPORT_SYMBOL(zstd_end_stream); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("Zstd Compressor"); diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c new file mode 100644 index 0000000000000..436985b620e51 --- /dev/null +++ b/lib/zstd/compress/fse_compress.c @@ -0,0 +1,625 @@ +/* ****************************************************************** + * FSE : Finite State Entropy encoder + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Includes +****************************************************************/ +#include "../common/compiler.h" +#include "../common/mem.h" /* U32, U16, etc. */ +#include "../common/debug.h" /* assert, DEBUGLOG */ +#include "hist.h" /* HIST_count_wksp */ +#include "../common/bitstream.h" +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#include "../common/error_private.h" +#define ZSTD_DEPS_NEED_MALLOC +#define ZSTD_DEPS_NEED_MATH64 +#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + + U32* cumul = (U32*)workSpace; + FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); + + U32 highThreshold = tableSize-1; + + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ + if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); + /* CTable header */ + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + assert(tableLog < 16); /* required for threshold strategy to work */ + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + #ifdef __clang_analyzer__ + ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ + #endif + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u <= maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } + + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurrences; + int const freq = normalizedCounter[symbol]; + for (nbOccurrences=0; nbOccurrences highThreshold) + position = (position + step) & tableMask; /* Low proba area */ + } } + + assert(position==0); /* Must have initialized all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static size_t +FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream = 0; + int bitCount = 0; + unsigned symbol = 0; + unsigned const alphabetSize = maxSymbolValue + 1; + int previousIs0 = 0; + + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; + + while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ + if (previousIs0) { + unsigned start = symbol; + while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; + if (symbol == alphabetSize) break; /* incorrect distribution */ + while (symbol >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (symbol >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (symbol-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { int count = normalizedCounter[symbol++]; + int const max = (2*threshold-1) - remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count>=threshold) + count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count>=1; } + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + if (remaining != 1) + return ERROR(GENERIC); /* incorrect normalized distribution */ + assert(symbol <= alphabetSize); + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + return (out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); +} + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ + +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)ZSTD_malloc(size); +} + +void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = lowProbCount; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s]=NOT_YET_ASSIGNED; + } + ToDistribute = (1 << tableLog) - distributed; + + if (ToDistribute == 0) + return 0; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) { maxV=s; maxC=count[s]; } + norm[maxV] += (short)ToDistribute; + return 0; + } + + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) { ToDistribute--; norm[s]++; } + return 0; + } + + { U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ZSTD_div64((((U64)1<> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue, unsigned useLowProbCount) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + short const lowProbCount = useLowProbCount ? -1 : 1; + U64 const scale = 62 - tableLog; + U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = lowProbCount; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba< restToBeat; + } + if (proba > largestP) { largestP=proba; largest=s; } + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; + + /* Build table */ + for (s=0; s FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + while ( ip>istart ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/lib/zstd/compress/hist.c b/lib/zstd/compress/hist.c new file mode 100644 index 0000000000000..3ddc6dfb68948 --- /dev/null +++ b/lib/zstd/compress/hist.c @@ -0,0 +1,165 @@ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +#include "../common/mem.h" /* U32, BYTE, etc. */ +#include "../common/debug.h" /* assert, DEBUGLOG */ +#include "../common/error_private.h" /* ERROR */ +#include "hist.h" + + +/* --- Error management --- */ +unsigned HIST_isError(size_t code) { return ERR_isError(code); } + +/*-************************************************************** + * Histogram functions + ****************************************************************/ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned largestCount=0; + + ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip largestCount) largestCount = count[s]; + } + + return largestCount; +} + +typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; + +/* HIST_count_parallel_wksp() : + * store histogram into 4 intermediate tables, recombined at the end. + * this design makes better use of OoO cpus, + * and is noticeably faster when some values are heavily repeated. + * But it needs some additional workspace for intermediate tables. + * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. + * @return : largest histogram frequency, + * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ +static size_t HIST_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + HIST_checkInput_e check, + U32* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + /* safety checks */ + assert(*maxSymbolValuePtr <= 255); + if (!sourceSize) { + ZSTD_memset(count, 0, countSize); + *maxSymbolValuePtr = 0; + return 0; + } + ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ip max) max = Counting1[s]; + } } + + { unsigned maxSymbolValue = 255; + while (!Counting1[maxSymbolValue]) maxSymbolValue--; + if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); + *maxSymbolValuePtr = maxSymbolValue; + ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ + } + return (size_t)max; +} + +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); +} + +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + if (*maxSymbolValuePtr < 255) + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); + *maxSymbolValuePtr = 255; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); +} + diff --git a/lib/zstd/compress/hist.h b/lib/zstd/compress/hist.h new file mode 100644 index 0000000000000..fc1830abc9c63 --- /dev/null +++ b/lib/zstd/compress/hist.h @@ -0,0 +1,75 @@ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +#include "../common/zstd_deps.h" /* size_t */ + + +/* --- simple histogram functions --- */ + +/*! HIST_count(): + * Provides the precise count of each byte within a table 'count'. + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + * Updates *maxSymbolValuePtr with actual largest symbol value detected. + * @return : count of the most frequent symbol (which isn't identified). + * or an error code, which can be tested using HIST_isError(). + * note : if return == srcSize, there is only one symbol. + */ +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +unsigned HIST_isError(size_t code); /*< tells if a return value is an error code */ + + +/* --- advanced histogram functions --- */ + +#define HIST_WKSP_SIZE_U32 1024 +#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * Benefit is this function will use very little stack space. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/* HIST_countFast() : + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` + */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/*! HIST_count_simple() : + * Same as HIST_countFast(), this function is unsafe, + * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. + * It is also a bit slower for large inputs. + * However, it does not need any additional memory (not even on stack). + * @return : count of the most frequent symbol. + * Note this function doesn't produce any error (i.e. it must succeed). + */ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c new file mode 100644 index 0000000000000..f76a526bfa54b --- /dev/null +++ b/lib/zstd/compress/huf_compress.c @@ -0,0 +1,905 @@ +/* ****************************************************************** + * Huffman encoder, part of New Generation Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ + + +/* ************************************************************** +* Includes +****************************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ +#include "../common/compiler.h" +#include "../common/bitstream.h" +#include "hist.h" +#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ +#include "../common/fse.h" /* header compression */ +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include "../common/error_private.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 + +typedef struct { + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; + unsigned count[HUF_TABLELOG_MAX+1]; + S16 norm[HUF_TABLELOG_MAX+1]; +} HUF_CompressWeightsWksp; + +static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + unsigned maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace; + + if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC); + + /* init conditions */ + if (wtSize <= 1) return 0; /* Not compressible */ + + /* Scan input and build symbol stats */ + { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */ + if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + } + + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); + + /* Write table description header */ + { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) ); + op += hSize; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + return (size_t)(op-ostart); +} + + +typedef struct { + HUF_CompressWeightsWksp wksp; + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; +} HUF_WriteCTableWksp; + +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, + void* workspace, size_t workspaceSize) +{ + BYTE* op = (BYTE*)dst; + U32 n; + HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace; + + /* check conditions */ + if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + + /* convert to weight */ + wksp->bitsToWeight[0] = 0; + for (n=1; nbitsToWeight[n] = (BYTE)(huffLog + 1 - n); + for (n=0; nhuffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits]; + + /* attempt weights compression by FSE */ + { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) ); + if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; + } } + + /* write raw values as 4-bits (max : 15) */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + for (n=0; nhuffWeight[n] << 4) + wksp->huffWeight[n+1]); + return ((maxSymbolValue+1)/2) + 1; +} + +/*! HUF_writeCTable() : + `CTable` : Huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) +{ + HUF_WriteCTableWksp wksp; + return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp)); +} + + +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) +{ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + U32 nbSymbols = 0; + + /* get symbol weights */ + CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); + *hasZeroWeights = (rankVal[0] > 0); + + /* check result */ + if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 curr = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = curr; + } } + + /* fill nbBits */ + { U32 n; for (n=0; nn=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; + { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n maxNbBits to be maxNbBits. Then it adjusts + * the tree to so that it is a valid canonical Huffman tree. + * + * @pre The sum of the ranks of each symbol == 2^largestBits, + * where largestBits == huffNode[lastNonNull].nbBits. + * @post The sum of the ranks of each symbol == 2^largestBits, + * where largestBits is the return value <= maxNbBits. + * + * @param huffNode The Huffman tree modified in place to enforce maxNbBits. + * @param lastNonNull The symbol with the lowest count in the Huffman tree. + * @param maxNbBits The maximum allowed number of bits, which the Huffman tree + * may not respect. After this function the Huffman tree will + * respect maxNbBits. + * @return The maximum number of bits of the Huffman tree after adjustment, + * necessarily no more than maxNbBits. + */ +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) +{ + const U32 largestBits = huffNode[lastNonNull].nbBits; + /* early exit : no elt > maxNbBits, so the tree is already valid. */ + if (largestBits <= maxNbBits) return largestBits; + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + int n = (int)lastNonNull; + + /* Adjust any ranks > maxNbBits to maxNbBits. + * Compute totalCost, which is how far the sum of the ranks is + * we are over 2^largestBits after adjust the offending ranks. + */ + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n--; + } + /* n stops at huffNode[n].nbBits <= maxNbBits */ + assert(huffNode[n].nbBits <= maxNbBits); + /* n end at index of smallest symbol using < maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) --n; + + /* renorm totalCost from 2^largestBits to 2^maxNbBits + * note : totalCost is necessarily a multiple of baseCost */ + assert((totalCost & (baseCost - 1)) == 0); + totalCost >>= (largestBits - maxNbBits); + assert(totalCost > 0); + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; + + /* Get pos of last (smallest = lowest cum. count) symbol per rank */ + ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + int pos; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = (U32)pos; + } } + + while (totalCost > 0) { + /* Try to reduce the next power of 2 above totalCost because we + * gain back half the rank. + */ + U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 const highPos = rankLast[nBitsToDecrease]; + U32 const lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + /* Decrease highPos if no symbols of lowPos or if it is + * not cheaper to remove 2 lowPos than highPos. + */ + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); + /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) + nBitsToDecrease++; + assert(rankLast[nBitsToDecrease] != noSymbol); + /* Increase the number of bits to gain back half the rank cost. */ + totalCost -= 1 << (nBitsToDecrease-1); + huffNode[rankLast[nBitsToDecrease]].nbBits++; + + /* Fix up the new rank. + * If the new rank was empty, this symbol is now its smallest. + * Otherwise, this symbol will be the largest in the new rank so no adjustment. + */ + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; + /* Fix up the old rank. + * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, + * it must be the only symbol in its rank, so the old rank now has no symbols. + * Otherwise, since the Huffman nodes are sorted by count, the previous position is now + * the smallest node in the rank. If the previous position belongs to a different rank, + * then the rank is now empty. + */ + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } + } /* while (totalCost > 0) */ + + /* If we've removed too much weight, then we have to add it back. + * To avoid overshooting again, we only adjust the smallest rank. + * We take the largest nodes from the lowest rank 0 and move them + * to rank 1. There's guaranteed to be enough rank 0 symbols because + * TODO. + */ + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + /* special case : no rank 1 symbol (using maxNbBits-1); + * let's create one from largest rank 0 (using maxNbBits). + */ + if (rankLast[1] == noSymbol) { + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + assert(n >= 0); + rankLast[1] = (U32)(n+1); + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } + } /* repay normalized cost */ + } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; +} + +typedef struct { + U32 base; + U32 curr; +} rankPos; + +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; + +#define RANK_POSITION_TABLE_SIZE 32 + +typedef struct { + huffNodeTable huffNodeTbl; + rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; +} HUF_buildCTable_wksp_tables; + +/* + * HUF_sort(): + * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. + * + * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. + * Must have (maxSymbolValue + 1) entries. + * @param[in] count Histogram of the symbols. + * @param[in] maxSymbolValue Maximum symbol value. + * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. + */ +static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) +{ + int n; + int const maxSymbolValue1 = (int)maxSymbolValue + 1; + + /* Compute base and set curr to base. + * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1. + * Then 2^lowerRank <= count[n]+1 <= 2^rank. + * We attribute each symbol to lowerRank's base value, because we want to know where + * each rank begins in the output, so for rank R we want to count ranks R+1 and above. + */ + ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); + for (n = 0; n < maxSymbolValue1; ++n) { + U32 lowerRank = BIT_highbit32(count[n] + 1); + rankPosition[lowerRank].base++; + } + assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); + for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { + rankPosition[n-1].base += rankPosition[n].base; + rankPosition[n-1].curr = rankPosition[n-1].base; + } + /* Sort */ + for (n = 0; n < maxSymbolValue1; ++n) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rankPosition[r].curr++; + /* Insert into the correct position in the rank. + * We have at most 256 symbols, so this insertion should be fine. + */ + while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { + huffNode[pos] = huffNode[pos-1]; + pos--; + } + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +/* HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). + */ +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) + +/* HUF_buildTree(): + * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. + * + * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. + * @param maxSymbolValue The maximum symbol value. + * @return The smallest node in the Huffman tree (by count). + */ +static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) +{ + nodeElt* const huffNode0 = huffNode - 1; + int nonNullRank; + int lowS, lowN; + int nodeNb = STARTNODE; + int n, nodeRoot; + /* init for parents */ + nonNullRank = (int)maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ + + /* create parents */ + while (nodeNb <= nodeRoot) { + int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + return nonNullRank; +} + +/* + * HUF_buildCTableFromTree(): + * Build the CTable given the Huffman tree in huffNode. + * + * @param[out] CTable The output Huffman CTable. + * @param huffNode The Huffman tree. + * @param nonNullRank The last and smallest node in the Huffman tree. + * @param maxSymbolValue The maximum symbol value. + * @param maxNbBits The exact maximum number of bits used in the Huffman tree. + */ +static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) +{ + /* fill result into ctable (val, nbBits) */ + int n; + U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + int const alphabetSize = (int)(maxSymbolValue + 1); + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine starting value per rank */ + { U16 min = 0; + for (n=(int)maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; nhuffNodeTbl; + nodeElt* const huffNode = huffNode0+1; + int nonNullRank; + + /* safety checks */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) + return ERROR(workSpace_tooSmall); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) + return ERROR(maxSymbolValue_tooLarge); + ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); + + /* build tree */ + nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + + HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits); + + return maxNbBits; +} + +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +{ + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += CTable[s].nbBits * count[s]; + } + return nbBits >> 3; +} + +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + int bad = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + } + return !bad; +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +#define HUF_FLUSHBITS(s) BIT_flushBits(s) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) + +FORCE_INLINE_TEMPLATE size_t +HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); + if (HUF_isError(initErr)) return 0; } + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3: + HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + ZSTD_FALLTHROUGH; + case 2: + HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + ZSTD_FALLTHROUGH; + case 1: + HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + ZSTD_FALLTHROUGH; + case 0: ZSTD_FALLTHROUGH; + default: break; + } + + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + if (bmi2) { + return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); + } + return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); +} + +#else + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + (void)bmi2; + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +#endif + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + + +static size_t +HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, int bmi2) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + assert(ip <= iend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); + if (cSize==0) return 0; + op += cSize; + } + + return (size_t)(op-ostart); +} + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + +typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; + +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, + const void* src, size_t srcSize, + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) +{ + size_t const cSize = (nbStreams==HUF_singleStream) ? + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + assert(op >= ostart); + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return (size_t)(op-ostart); +} + +typedef struct { + unsigned count[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + union { + HUF_buildCTable_wksp_tables buildCTable_wksp; + HUF_WriteCTableWksp writeCTable_wksp; + } wksps; +} HUF_compress_tables_t; + +/* HUF_compress_internal() : + * `workSpace_align4` must be aligned on 4-bytes boundaries, + * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */ +static size_t +HUF_compress_internal (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + HUF_nbStreams_e nbStreams, + void* workSpace_align4, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, + const int bmi2) +{ + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); + assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */ + + /* checks & inits */ + if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); + if (!srcSize) return 0; /* Uncompressed */ + if (!dstSize) return 0; /* cannot fit anything within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + + /* Heuristic : If old table is valid, use it for small inputs */ + if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + + /* Scan input and build symbol stats */ + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) ); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + + /* Check validity of previous table */ + if ( repeat + && *repeat == HUF_repeat_check + && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, + maxSymbolValue, huffLog, + &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); + CHECK_F(maxBits); + huffLog = (U32)maxBits; + /* Zero unused symbols in CTable, so we can check it for validity */ + ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0, + sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); + } + + /* Write table description header */ + { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, + &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); + /* Check if using previous huffman table is beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } } + + /* Use the new huffman table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) + ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ + } + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, table->CTable, bmi2); +} + + +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, hufTable, + repeat, preferRepeat, bmi2); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * provide workspace to generate compression tables */ +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * re-use an existing huffman compression table */ +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + hufTable, repeat, preferRepeat, bmi2); +} + diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c new file mode 100644 index 0000000000000..a4e916008b3a7 --- /dev/null +++ b/lib/zstd/compress/zstd_compress.c @@ -0,0 +1,5109 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ +#include "../common/cpu.h" +#include "../common/mem.h" +#include "hist.h" /* HIST_countFast_wksp */ +#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ +#include "../common/fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include "zstd_compress_internal.h" +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" +#include "zstd_fast.h" +#include "zstd_double_fast.h" +#include "zstd_lazy.h" +#include "zstd_opt.h" +#include "zstd_ldm.h" +#include "zstd_compress_superblock.h" + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * COMPRESS_HEAPMODE : + * Select how default decompression function ZSTD_compress() allocates its context, + * on stack (0, default), or into heap (1). + * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. + */ + + +/*-************************************* +* Helper functions +***************************************/ +/* ZSTD_compressBound() + * Note that the result from this function is only compatible with the "normal" + * full-block strategy. + * When there are a lot of small blocks due to frequent flush in streaming mode + * the overhead of headers can make the compressed data to be larger than the + * return value of ZSTD_compressBound(). + */ +size_t ZSTD_compressBound(size_t srcSize) { + return ZSTD_COMPRESSBOUND(srcSize); +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CDict_s { + const void* dictContent; + size_t dictContentSize; + ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ + U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ + ZSTD_cwksp workspace; + ZSTD_matchState_t matchState; + ZSTD_compressedBlockState_t cBlockState; + ZSTD_customMem customMem; + U32 dictID; + int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ +}; /* typedef'd to ZSTD_CDict within "zstd.h" */ + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) +{ + assert(cctx != NULL); + ZSTD_memset(cctx, 0, sizeof(*cctx)); + cctx->customMem = memManager; + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_STATIC_ASSERT(zcss_init==0); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + ZSTD_initCCtx(cctx, customMem); + return cctx; + } +} + +ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) +{ + ZSTD_cwksp ws; + ZSTD_CCtx* cctx; + if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ + if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + + cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); + if (cctx == NULL) return NULL; + + ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); + ZSTD_cwksp_move(&cctx->workspace, &ws); + cctx->staticSize = workspaceSize; + + /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + return cctx; +} + +/* + * Clears and frees all of the dictionaries in the CCtx. + */ +static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) +{ + ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem); + ZSTD_freeCDict(cctx->localDict.cdict); + ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); + cctx->cdict = NULL; +} + +static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) +{ + size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; + size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); + return bufferSize + cdictSize; +} + +static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) +{ + assert(cctx != NULL); + assert(cctx->staticSize == 0); + ZSTD_clearAllDicts(cctx); + ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "not compatible with static CCtx"); + { + int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); + ZSTD_freeCCtxContent(cctx); + if (!cctxInWorkspace) { + ZSTD_customFree(cctx, cctx->customMem); + } + } + return 0; +} + + +static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) +{ + (void)cctx; + return 0; +} + + +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support sizeof on NULL */ + /* cctx may be in the workspace */ + return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) + + ZSTD_cwksp_sizeof(&cctx->workspace) + + ZSTD_sizeof_localDict(cctx->localDict) + + ZSTD_sizeof_mtctx(cctx); +} + +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + return ZSTD_sizeof_CCtx(zcs); /* same object */ +} + +/* private API call, for dictBuilder only */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } + +/* Returns 1 if compression parameters are such that we should + * enable long distance matching (wlog >= 27, strategy >= btopt). + * Returns 0 otherwise. + */ +static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) { + return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27; +} + +static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( + ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params cctxParams; + /* should not matter, as all cParams are presumed properly defined */ + ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); + cctxParams.cParams = cParams; + + if (ZSTD_CParams_shouldEnableLdm(&cParams)) { + DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params"); + cctxParams.ldmParams.enableLdm = 1; + /* LDM is enabled by default for optimal parser and window size >= 128MB */ + ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); + assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); + assert(cctxParams.ldmParams.hashRateLog < 32); + } + + assert(!ZSTD_checkCParams(cParams)); + return cctxParams; +} + +static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params* params; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + params = (ZSTD_CCtx_params*)ZSTD_customCalloc( + sizeof(ZSTD_CCtx_params), customMem); + if (!params) { return NULL; } + ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); + params->customMem = customMem; + return params; +} + +ZSTD_CCtx_params* ZSTD_createCCtxParams(void) +{ + return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); +} + +size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +{ + if (params == NULL) { return 0; } + ZSTD_customFree(params, params->customMem); + return 0; +} + +size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) +{ + return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); +} + +size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; + return 0; +} + +#define ZSTD_NO_CLEVEL 0 + +/* + * Initializes the cctxParams from params and compressionLevel. + * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. + */ +static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) +{ + assert(!ZSTD_checkCParams(params->cParams)); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = compressionLevel; +} + +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +{ + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return 0; +} + +/* + * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. + * @param param Validated zstd parameters. + */ +static void ZSTD_CCtxParams_setZstdParams( + ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) +{ + assert(!ZSTD_checkCParams(params->cParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = ZSTD_NO_CLEVEL; +} + +ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + + switch(param) + { + case ZSTD_c_compressionLevel: + bounds.lowerBound = ZSTD_minCLevel(); + bounds.upperBound = ZSTD_maxCLevel(); + return bounds; + + case ZSTD_c_windowLog: + bounds.lowerBound = ZSTD_WINDOWLOG_MIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + + case ZSTD_c_hashLog: + bounds.lowerBound = ZSTD_HASHLOG_MIN; + bounds.upperBound = ZSTD_HASHLOG_MAX; + return bounds; + + case ZSTD_c_chainLog: + bounds.lowerBound = ZSTD_CHAINLOG_MIN; + bounds.upperBound = ZSTD_CHAINLOG_MAX; + return bounds; + + case ZSTD_c_searchLog: + bounds.lowerBound = ZSTD_SEARCHLOG_MIN; + bounds.upperBound = ZSTD_SEARCHLOG_MAX; + return bounds; + + case ZSTD_c_minMatch: + bounds.lowerBound = ZSTD_MINMATCH_MIN; + bounds.upperBound = ZSTD_MINMATCH_MAX; + return bounds; + + case ZSTD_c_targetLength: + bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; + bounds.upperBound = ZSTD_TARGETLENGTH_MAX; + return bounds; + + case ZSTD_c_strategy: + bounds.lowerBound = ZSTD_STRATEGY_MIN; + bounds.upperBound = ZSTD_STRATEGY_MAX; + return bounds; + + case ZSTD_c_contentSizeFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_checksumFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_dictIDFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_nbWorkers: + bounds.lowerBound = 0; + bounds.upperBound = 0; + return bounds; + + case ZSTD_c_jobSize: + bounds.lowerBound = 0; + bounds.upperBound = 0; + return bounds; + + case ZSTD_c_overlapLog: + bounds.lowerBound = 0; + bounds.upperBound = 0; + return bounds; + + case ZSTD_c_enableDedicatedDictSearch: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_enableLongDistanceMatching: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_ldmHashLog: + bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; + return bounds; + + case ZSTD_c_ldmMinMatch: + bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; + bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; + return bounds; + + case ZSTD_c_ldmBucketSizeLog: + bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; + bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; + return bounds; + + case ZSTD_c_ldmHashRateLog: + bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; + return bounds; + + /* experimental parameters */ + case ZSTD_c_rsyncable: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_forceMaxWindow : + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_format: + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + bounds.lowerBound = ZSTD_f_zstd1; + bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_forceAttachDict: + ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); + bounds.lowerBound = ZSTD_dictDefaultAttach; + bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_literalCompressionMode: + ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); + bounds.lowerBound = ZSTD_lcm_auto; + bounds.upperBound = ZSTD_lcm_uncompressed; + return bounds; + + case ZSTD_c_targetCBlockSize: + bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; + bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; + return bounds; + + case ZSTD_c_srcSizeHint: + bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; + bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; + return bounds; + + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + + case ZSTD_c_blockDelimiters: + bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; + bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; + return bounds; + + case ZSTD_c_validateSequences: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + default: + bounds.error = ERROR(parameter_unsupported); + return bounds; + } +} + +/* ZSTD_cParam_clampBounds: + * Clamps the value into the bounded range. + */ +static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return bounds.error; + if (*value < bounds.lowerBound) *value = bounds.lowerBound; + if (*value > bounds.upperBound) *value = bounds.upperBound; + return 0; +} + +#define BOUNDCHECK(cParam, val) { \ + RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ + parameter_outOfBound, "Param out of bounds"); \ +} + + +static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) +{ + switch(param) + { + case ZSTD_c_compressionLevel: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + return 1; + + case ZSTD_c_format: + case ZSTD_c_windowLog: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow : + case ZSTD_c_nbWorkers: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + default: + return 0; + } +} + +size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); + if (cctx->streamStage != zcss_init) { + if (ZSTD_isUpdateAuthorized(param)) { + cctx->cParamsChanged = 1; + } else { + RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); + } } + + switch(param) + { + case ZSTD_c_nbWorkers: + RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, + "MT not compatible with static alloc"); + break; + + case ZSTD_c_compressionLevel: + case ZSTD_c_windowLog: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_format: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + break; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, + ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); + switch(param) + { + case ZSTD_c_format : + BOUNDCHECK(ZSTD_c_format, value); + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; + + case ZSTD_c_compressionLevel : { + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + if (value == 0) + CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else + CCtxParams->compressionLevel = value; + if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } + + case ZSTD_c_windowLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_windowLog, value); + CCtxParams->cParams.windowLog = (U32)value; + return CCtxParams->cParams.windowLog; + + case ZSTD_c_hashLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_hashLog, value); + CCtxParams->cParams.hashLog = (U32)value; + return CCtxParams->cParams.hashLog; + + case ZSTD_c_chainLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_chainLog, value); + CCtxParams->cParams.chainLog = (U32)value; + return CCtxParams->cParams.chainLog; + + case ZSTD_c_searchLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_searchLog, value); + CCtxParams->cParams.searchLog = (U32)value; + return (size_t)value; + + case ZSTD_c_minMatch : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_minMatch, value); + CCtxParams->cParams.minMatch = value; + return CCtxParams->cParams.minMatch; + + case ZSTD_c_targetLength : + BOUNDCHECK(ZSTD_c_targetLength, value); + CCtxParams->cParams.targetLength = value; + return CCtxParams->cParams.targetLength; + + case ZSTD_c_strategy : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_strategy, value); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + return (size_t)CCtxParams->cParams.strategy; + + case ZSTD_c_contentSizeFlag : + /* Content size written in frame header _when known_ (default:1) */ + DEBUGLOG(4, "set content size flag = %u", (value!=0)); + CCtxParams->fParams.contentSizeFlag = value != 0; + return CCtxParams->fParams.contentSizeFlag; + + case ZSTD_c_checksumFlag : + /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ + CCtxParams->fParams.checksumFlag = value != 0; + return CCtxParams->fParams.checksumFlag; + + case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ + DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); + CCtxParams->fParams.noDictIDFlag = !value; + return !CCtxParams->fParams.noDictIDFlag; + + case ZSTD_c_forceMaxWindow : + CCtxParams->forceWindow = (value != 0); + return CCtxParams->forceWindow; + + case ZSTD_c_forceAttachDict : { + const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; + BOUNDCHECK(ZSTD_c_forceAttachDict, pref); + CCtxParams->attachDictPref = pref; + return CCtxParams->attachDictPref; + } + + case ZSTD_c_literalCompressionMode : { + const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); + CCtxParams->literalCompressionMode = lcm; + return CCtxParams->literalCompressionMode; + } + + case ZSTD_c_nbWorkers : + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; + + case ZSTD_c_jobSize : + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; + + case ZSTD_c_overlapLog : + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; + + case ZSTD_c_rsyncable : + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; + + case ZSTD_c_enableDedicatedDictSearch : + CCtxParams->enableDedicatedDictSearch = (value!=0); + return CCtxParams->enableDedicatedDictSearch; + + case ZSTD_c_enableLongDistanceMatching : + CCtxParams->ldmParams.enableLdm = (value!=0); + return CCtxParams->ldmParams.enableLdm; + + case ZSTD_c_ldmHashLog : + if (value!=0) /* 0 ==> auto */ + BOUNDCHECK(ZSTD_c_ldmHashLog, value); + CCtxParams->ldmParams.hashLog = value; + return CCtxParams->ldmParams.hashLog; + + case ZSTD_c_ldmMinMatch : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmMinMatch, value); + CCtxParams->ldmParams.minMatchLength = value; + return CCtxParams->ldmParams.minMatchLength; + + case ZSTD_c_ldmBucketSizeLog : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); + CCtxParams->ldmParams.bucketSizeLog = value; + return CCtxParams->ldmParams.bucketSizeLog; + + case ZSTD_c_ldmHashRateLog : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmHashRateLog, value); + CCtxParams->ldmParams.hashRateLog = value; + return CCtxParams->ldmParams.hashRateLog; + + case ZSTD_c_targetCBlockSize : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_targetCBlockSize, value); + CCtxParams->targetCBlockSize = value; + return CCtxParams->targetCBlockSize; + + case ZSTD_c_srcSizeHint : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_srcSizeHint, value); + CCtxParams->srcSizeHint = value; + return CCtxParams->srcSizeHint; + + case ZSTD_c_stableInBuffer: + BOUNDCHECK(ZSTD_c_stableInBuffer, value); + CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->inBufferMode; + + case ZSTD_c_stableOutBuffer: + BOUNDCHECK(ZSTD_c_stableOutBuffer, value); + CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->outBufferMode; + + case ZSTD_c_blockDelimiters: + BOUNDCHECK(ZSTD_c_blockDelimiters, value); + CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; + return CCtxParams->blockDelimiters; + + case ZSTD_c_validateSequences: + BOUNDCHECK(ZSTD_c_validateSequences, value); + CCtxParams->validateSequences = value; + return CCtxParams->validateSequences; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } +} + +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) +{ + return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_getParameter( + ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) +{ + switch(param) + { + case ZSTD_c_format : + *value = CCtxParams->format; + break; + case ZSTD_c_compressionLevel : + *value = CCtxParams->compressionLevel; + break; + case ZSTD_c_windowLog : + *value = (int)CCtxParams->cParams.windowLog; + break; + case ZSTD_c_hashLog : + *value = (int)CCtxParams->cParams.hashLog; + break; + case ZSTD_c_chainLog : + *value = (int)CCtxParams->cParams.chainLog; + break; + case ZSTD_c_searchLog : + *value = CCtxParams->cParams.searchLog; + break; + case ZSTD_c_minMatch : + *value = CCtxParams->cParams.minMatch; + break; + case ZSTD_c_targetLength : + *value = CCtxParams->cParams.targetLength; + break; + case ZSTD_c_strategy : + *value = (unsigned)CCtxParams->cParams.strategy; + break; + case ZSTD_c_contentSizeFlag : + *value = CCtxParams->fParams.contentSizeFlag; + break; + case ZSTD_c_checksumFlag : + *value = CCtxParams->fParams.checksumFlag; + break; + case ZSTD_c_dictIDFlag : + *value = !CCtxParams->fParams.noDictIDFlag; + break; + case ZSTD_c_forceMaxWindow : + *value = CCtxParams->forceWindow; + break; + case ZSTD_c_forceAttachDict : + *value = CCtxParams->attachDictPref; + break; + case ZSTD_c_literalCompressionMode : + *value = CCtxParams->literalCompressionMode; + break; + case ZSTD_c_nbWorkers : + assert(CCtxParams->nbWorkers == 0); + *value = CCtxParams->nbWorkers; + break; + case ZSTD_c_jobSize : + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); + case ZSTD_c_overlapLog : + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); + case ZSTD_c_rsyncable : + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); + case ZSTD_c_enableDedicatedDictSearch : + *value = CCtxParams->enableDedicatedDictSearch; + break; + case ZSTD_c_enableLongDistanceMatching : + *value = CCtxParams->ldmParams.enableLdm; + break; + case ZSTD_c_ldmHashLog : + *value = CCtxParams->ldmParams.hashLog; + break; + case ZSTD_c_ldmMinMatch : + *value = CCtxParams->ldmParams.minMatchLength; + break; + case ZSTD_c_ldmBucketSizeLog : + *value = CCtxParams->ldmParams.bucketSizeLog; + break; + case ZSTD_c_ldmHashRateLog : + *value = CCtxParams->ldmParams.hashRateLog; + break; + case ZSTD_c_targetCBlockSize : + *value = (int)CCtxParams->targetCBlockSize; + break; + case ZSTD_c_srcSizeHint : + *value = (int)CCtxParams->srcSizeHint; + break; + case ZSTD_c_stableInBuffer : + *value = (int)CCtxParams->inBufferMode; + break; + case ZSTD_c_stableOutBuffer : + *value = (int)CCtxParams->outBufferMode; + break; + case ZSTD_c_blockDelimiters : + *value = (int)CCtxParams->blockDelimiters; + break; + case ZSTD_c_validateSequences : + *value = (int)CCtxParams->validateSequences; + break; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return 0; +} + +/* ZSTD_CCtx_setParametersUsingCCtxParams() : + * just applies `params` into `cctx` + * no action is performed, parameters are merely stored. + * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. + * This is possible even if a compression is ongoing. + * In which case, new parameters will be applied on the fly, starting with next compression job. + */ +size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "The context is in the wrong stage!"); + RETURN_ERROR_IF(cctx->cdict, stage_wrong, + "Can't override parameters with cdict attached (some must " + "be inherited from the cdict)."); + + cctx->requestedParams = *params; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't set pledgedSrcSize when not in init stage."); + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + return 0; +} + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( + int const compressionLevel, + size_t const dictSize); +static int ZSTD_dedicatedDictSearch_isSupported( + const ZSTD_compressionParameters* cParams); +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams); + +/* + * Initializes the local dict using the requested parameters. + * NOTE: This does not use the pledged src size, because it may be used for more + * than one compression. + */ +static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) +{ + ZSTD_localDict* const dl = &cctx->localDict; + if (dl->dict == NULL) { + /* No local dictionary. */ + assert(dl->dictBuffer == NULL); + assert(dl->cdict == NULL); + assert(dl->dictSize == 0); + return 0; + } + if (dl->cdict != NULL) { + assert(cctx->cdict == dl->cdict); + /* Local dictionary already initialized. */ + return 0; + } + assert(dl->dictSize > 0); + assert(cctx->cdict == NULL); + assert(cctx->prefixDict.dict == NULL); + + dl->cdict = ZSTD_createCDict_advanced2( + dl->dict, + dl->dictSize, + ZSTD_dlm_byRef, + dl->dictContentType, + &cctx->requestedParams, + cctx->customMem); + RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); + cctx->cdict = dl->cdict; + return 0; +} + +size_t ZSTD_CCtx_loadDictionary_advanced( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't load a dictionary when ctx is not in init stage."); + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); + ZSTD_clearAllDicts(cctx); /* in case one already exists */ + if (dict == NULL || dictSize == 0) /* no dictionary mode */ + return 0; + if (dictLoadMethod == ZSTD_dlm_byRef) { + cctx->localDict.dict = dict; + } else { + void* dictBuffer; + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "no malloc for static CCtx"); + dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); + RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); + ZSTD_memcpy(dictBuffer, dict, dictSize); + cctx->localDict.dictBuffer = dictBuffer; + cctx->localDict.dict = dictBuffer; + } + cctx->localDict.dictSize = dictSize; + cctx->localDict.dictContentType = dictContentType; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + + +size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a dict when ctx not in init stage."); + /* Free the existing local cdict (if any) to save memory. */ + ZSTD_clearAllDicts(cctx); + cctx->cdict = cdict; + return 0; +} + +size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a pool when ctx not in init stage."); + cctx->pool = pool; + return 0; +} + +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + +size_t ZSTD_CCtx_refPrefix_advanced( + ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a prefix when ctx not in init stage."); + ZSTD_clearAllDicts(cctx); + if (prefix != NULL && prefixSize > 0) { + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + } + return 0; +} + +/*! ZSTD_CCtx_reset() : + * Also dumps dictionary */ +size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + cctx->streamStage = zcss_init; + cctx->pledgedSrcSizePlusOne = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't reset parameters only when not in init stage."); + ZSTD_clearAllDicts(cctx); + return ZSTD_CCtxParams_reset(&cctx->requestedParams); + } + return 0; +} + + +/* ZSTD_checkCParams() : + control CParam values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); + BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); + BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); + return 0; +} + +/* ZSTD_clampCParams() : + * make CParam values within valid range. + * @return : valid CParams */ +static ZSTD_compressionParameters +ZSTD_clampCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMP_TYPE(cParam, val, type) { \ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ + if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ + } +# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) + CLAMP(ZSTD_c_windowLog, cParams.windowLog); + CLAMP(ZSTD_c_chainLog, cParams.chainLog); + CLAMP(ZSTD_c_hashLog, cParams.hashLog); + CLAMP(ZSTD_c_searchLog, cParams.searchLog); + CLAMP(ZSTD_c_minMatch, cParams.minMatch); + CLAMP(ZSTD_c_targetLength,cParams.targetLength); + CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); + return cParams; +} + +/* ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} + +/* ZSTD_dictAndWindowLog() : + * Returns an adjusted window log that is large enough to fit the source and the dictionary. + * The zstd format says that the entire dictionary is valid if one byte of the dictionary + * is within the window. So the hashLog and chainLog should be large enough to reference both + * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing + * the hashLog and windowLog. + * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. + */ +static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) +{ + const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; + /* No dictionary ==> No change */ + if (dictSize == 0) { + return windowLog; + } + assert(windowLog <= ZSTD_WINDOWLOG_MAX); + assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ + { + U64 const windowSize = 1ULL << windowLog; + U64 const dictAndWindowSize = dictSize + windowSize; + /* If the window size is already large enough to fit both the source and the dictionary + * then just use the window size. Otherwise adjust so that it fits the dictionary and + * the window. + */ + if (windowSize >= dictSize + srcSize) { + return windowLog; /* Window size large enough already */ + } else if (dictAndWindowSize >= maxWindowSize) { + return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ + } else { + return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; + } + } +} + +/* ZSTD_adjustCParams_internal() : + * optimize `cPar` for a specified input (`srcSize` and `dictSize`). + * mostly downsize to reduce memory consumption and initialization latency. + * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. + * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. + * note : `srcSize==0` means 0! + * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ +static ZSTD_compressionParameters +ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize, + ZSTD_cParamMode_e mode) +{ + const U64 minSrcSize = 513; /* (1<<9) + 1 */ + const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + assert(ZSTD_checkCParams(cPar)==0); + + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + /* If we don't know the source size, don't make any + * assumptions about it. We will already have selected + * smaller parameters if a dictionary is in use. + */ + break; + case ZSTD_cpm_createCDict: + /* Assume a small source size when creating a dictionary + * with an unkown source size. + */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; + break; + case ZSTD_cpm_attachDict: + /* Dictionary has its own dedicated parameters which have + * already been selected. We are selecting parameters + * for only the source. + */ + dictSize = 0; + break; + default: + assert(0); + break; + } + + /* resize windowLog if input is small enough, to use less memory */ + if ( (srcSize < maxWindowResize) + && (dictSize < maxWindowResize) ) { + U32 const tSize = (U32)(srcSize + dictSize); + static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; + U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : + ZSTD_highbit32(tSize-1) + 1; + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } + if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); + U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; + if (cycleLog > dictAndWindowLog) + cPar.chainLog -= (cycleLog - dictAndWindowLog); + } + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ + + return cPar; +} + +ZSTD_compressionParameters +ZSTD_adjustCParams(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); +} + +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +static void ZSTD_overrideCParams( + ZSTD_compressionParameters* cParams, + const ZSTD_compressionParameters* overrides) +{ + if (overrides->windowLog) cParams->windowLog = overrides->windowLog; + if (overrides->hashLog) cParams->hashLog = overrides->hashLog; + if (overrides->chainLog) cParams->chainLog = overrides->chainLog; + if (overrides->searchLog) cParams->searchLog = overrides->searchLog; + if (overrides->minMatch) cParams->minMatch = overrides->minMatch; + if (overrides->targetLength) cParams->targetLength = overrides->targetLength; + if (overrides->strategy) cParams->strategy = overrides->strategy; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + ZSTD_compressionParameters cParams; + if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { + srcSizeHint = CCtxParams->srcSizeHint; + } + cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); + if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); + assert(!ZSTD_checkCParams(cParams)); + /* srcSizeHint == 0 means 0 */ + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); +} + +static size_t +ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const U32 forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't + * surrounded by redzones in ASAN. */ + size_t const tableSpace = chainSize * sizeof(U32) + + hSize * sizeof(U32) + + h3Size * sizeof(U32); + size_t const optPotentialSpace = + ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((1<strategy >= ZSTD_btopt)) + ? optPotentialSpace + : 0; + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size); + return tableSpace + optSpace; +} + +static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( + const ZSTD_compressionParameters* cParams, + const ldmParams_t* ldmParams, + const int isStatic, + const size_t buffInSize, + const size_t buffOutSize, + const U64 pledgedSrcSize) +{ + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (cParams->minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); + size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1); + + size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); + size_t const ldmSeqSpace = ldmParams->enableLdm ? + ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; + + + size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + + ZSTD_cwksp_alloc_size(buffOutSize); + + size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; + + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); + return neededSpace; +} + +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + /* estimateCCtxSize is for one-shot compression. So no buffers should + * be needed. However, we still allocate two 0-sized buffers, which can + * take space under ASAN. */ + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) +{ + int tier = 0; + size_t largestSize = 0; + static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN}; + for (; tier < 4; ++tier) { + /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict); + largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize); + } + return largestSize; +} + +size_t ZSTD_estimateCCtxSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + /* Ensure monotonically increasing memory usage as compression level increases */ + size_t const newMB = ZSTD_estimateCCtxSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) + ? ((size_t)1 << cParams.windowLog) + blockSize + : 0; + size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize, + ZSTD_CONTENTSIZE_UNKNOWN); + } +} + +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + return ZSTD_estimateCStreamSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCStreamSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCStreamSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +/* ZSTD_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads (non-blocking mode). + */ +ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) +{ + { ZSTD_frameProgression fp; + size_t const buffered = (cctx->inBuff == NULL) ? 0 : + cctx->inBuffPos - cctx->inToCompress; + if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); + assert(buffered <= ZSTD_BLOCKSIZE_MAX); + fp.ingested = cctx->consumedSrcSize + buffered; + fp.consumed = cctx->consumedSrcSize; + fp.produced = cctx->producedCSize; + fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ + fp.currentJobID = 0; + fp.nbActiveWorkers = 0; + return fp; +} } + +/*! ZSTD_toFlushNow() + * Only useful for multithreading scenarios currently (nbWorkers >= 1). + */ +size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) +{ + (void)cctx; + return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ +} + +static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + (void)cParams1; + (void)cParams2; + assert(cParams1.windowLog == cParams2.windowLog); + assert(cParams1.chainLog == cParams2.chainLog); + assert(cParams1.hashLog == cParams2.hashLog); + assert(cParams1.searchLog == cParams2.searchLog); + assert(cParams1.minMatch == cParams2.minMatch); + assert(cParams1.targetLength == cParams2.targetLength); + assert(cParams1.strategy == cParams2.strategy); +} + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +{ + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = repStartValue[i]; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; +} + +/*! ZSTD_invalidateMatchState() + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). + */ +static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) +{ + ZSTD_window_clear(&ms->window); + + ms->nextToUpdate = ms->window.dictLimit; + ms->loadedDictEnd = 0; + ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; +} + +/* + * Controls, for this matchState reset, whether the tables need to be cleared / + * prepared for the coming compression (ZSTDcrp_makeClean), or whether the + * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a + * subsequent operation will overwrite the table space anyways (e.g., copying + * the matchState contents in from a CDict). + */ +typedef enum { + ZSTDcrp_makeClean, + ZSTDcrp_leaveDirty +} ZSTD_compResetPolicy_e; + +/* + * Controls, for this matchState reset, whether indexing can continue where it + * left off (ZSTDirp_continue), or whether it needs to be restarted from zero + * (ZSTDirp_reset). + */ +typedef enum { + ZSTDirp_continue, + ZSTDirp_reset +} ZSTD_indexResetPolicy_e; + +typedef enum { + ZSTD_resetTarget_CDict, + ZSTD_resetTarget_CCtx +} ZSTD_resetTarget_e; + +static size_t +ZSTD_reset_matchState(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + const ZSTD_compressionParameters* cParams, + const ZSTD_compResetPolicy_e crp, + const ZSTD_indexResetPolicy_e forceResetIndex, + const ZSTD_resetTarget_e forWho) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + + DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + if (forceResetIndex == ZSTDirp_reset) { + ZSTD_window_init(&ms->window); + ZSTD_cwksp_mark_tables_dirty(ws); + } + + ms->hashLog3 = hashLog3; + + ZSTD_invalidateMatchState(ms); + + assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ + + ZSTD_cwksp_clear_tables(ws); + + DEBUGLOG(5, "reserving table space"); + /* table Space */ + ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); + ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); + ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); + if (crp!=ZSTDcrp_leaveDirty) { + /* reset tables only */ + ZSTD_cwksp_clean_tables(ws); + } + + /* opt parser space */ + if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { + DEBUGLOG(4, "reserving optimal parser space"); + ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); + ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); + ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); + } + + ms->cParams = *cParams; + + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + return 0; +} + +/* ZSTD_indexTooCloseToMax() : + * minor optimization : prefer memset() rather than reduceIndex() + * which is measurably slow in some circumstances (reported for Visual Studio). + * Works when re-using a context for a lot of smallish inputs : + * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, + * memset() will be triggered before reduceIndex(). + */ +#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) +static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) +{ + return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); +} + +/*! ZSTD_resetCCtx_internal() : + note : `params` are assumed fully validated at this stage */ +static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, + ZSTD_CCtx_params params, + U64 const pledgedSrcSize, + ZSTD_compResetPolicy_e const crp, + ZSTD_buffered_policy_e const zbuff) +{ + ZSTD_cwksp* const ws = &zc->workspace; + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", + (U32)pledgedSrcSize, params.cParams.windowLog); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + + zc->isFirstBlock = 1; + + if (params.ldmParams.enableLdm) { + /* Adjust long distance matching parameters */ + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + } + + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered) + ? windowSize + blockSize + : 0; + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); + + int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); + ZSTD_indexResetPolicy_e needsIndexReset = + (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset; + + size_t const neededSpace = + ZSTD_estimateCCtxSize_usingCCtxParams_internal( + ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, + buffInSize, buffOutSize, pledgedSrcSize); + FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); + + if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); + + /* Check if workspace is large enough, alloc a new one if needed */ + { + int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; + int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); + + DEBUGLOG(4, "Need %zu B workspace", neededSpace); + DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); + + if (workspaceTooSmall || workspaceWasteful) { + DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", + ZSTD_cwksp_sizeof(ws) >> 10, + neededSpace >> 10); + + RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); + + needsIndexReset = ZSTDirp_reset; + + ZSTD_cwksp_free(ws, zc->customMem); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); + + DEBUGLOG(5, "reserving object space"); + /* Statically sized space. + * entropyWorkspace never moves, + * though prev/next block swap places */ + assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); + zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); + zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); + } } + + ZSTD_cwksp_clear(ws); + + /* init params */ + zc->appliedParams = params; + zc->blockState.matchState.cParams = params.cParams; + zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; + zc->consumedSrcSize = 0; + zc->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + zc->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); + zc->blockSize = blockSize; + + xxh64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; + zc->dictContentSize = 0; + + ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); + + /* ZSTD_wildcopy() is used to copy into the literals buffer, + * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. + */ + zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); + zc->seqStore.maxNbLit = blockSize; + + /* buffers */ + zc->bufferedPolicy = zbuff; + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); + zc->outBuffSize = buffOutSize; + zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); + + /* ldm bucketOffsets table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const numBuckets = + ((size_t)1) << (params.ldmParams.hashLog - + params.ldmParams.bucketSizeLog); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); + ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); + } + + /* sequences storage */ + ZSTD_referenceExternalSequences(zc, NULL, 0); + zc->seqStore.maxNbSeq = maxNbSeq; + zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); + + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &zc->blockState.matchState, + ws, + ¶ms.cParams, + crp, + needsIndexReset, + ZSTD_resetTarget_CCtx), ""); + + /* ldm hash table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); + ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); + zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); + zc->maxNbLdmSequences = maxNbLdmSeq; + + ZSTD_window_init(&zc->ldmState.window); + ZSTD_window_clear(&zc->ldmState.window); + zc->ldmState.loadedDictEnd = 0; + } + + /* Due to alignment, when reusing a workspace, we can actually consume + * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h + */ + assert(ZSTD_cwksp_used(ws) >= neededSpace && + ZSTD_cwksp_used(ws) <= neededSpace + 3); + + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + zc->initialized = 1; + + return 0; + } +} + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; iblockState.prevCBlock->rep[i] = 0; + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); +} + +/* These are the approximate sizes for each strategy past which copying the + * dictionary tables into the working context is faster than using them + * in-place. + */ +static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { + 8 KB, /* unused */ + 8 KB, /* ZSTD_fast */ + 16 KB, /* ZSTD_dfast */ + 32 KB, /* ZSTD_greedy */ + 32 KB, /* ZSTD_lazy */ + 32 KB, /* ZSTD_lazy2 */ + 32 KB, /* ZSTD_btlazy2 */ + 32 KB, /* ZSTD_btopt */ + 8 KB, /* ZSTD_btultra */ + 8 KB /* ZSTD_btultra2 */ +}; + +static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize) +{ + size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; + int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; + return dedicatedDictSearch + || ( ( pledgedSrcSize <= cutoff + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || params->attachDictPref == ZSTD_dictForceAttach ) + && params->attachDictPref != ZSTD_dictForceCopy + && !params->forceWindow ); /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ +} + +static size_t +ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + { + ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; + unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Resize working context table params for input only, since the dict + * has its own tables. */ + /* pledgedSrcSize == 0 means 0! */ + + if (cdict->matchState.dedicatedDictSearch) { + ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); + } + + params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, + cdict->dictContentSize, ZSTD_cpm_attachDict); + params.cParams.windowLog = windowLog; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); + } + + { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; + if (cdictLen == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); + } else { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; + + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + cdictEnd; + ZSTD_window_clear(&cctx->blockState.matchState.window); + } + /* loadedDictEnd is expressed within the referential of the active context */ + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + } } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + + assert(!cdict->matchState.dedicatedDictSearch); + + DEBUGLOG(4, "copying dictionary into context"); + + { unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Copy only compression parameters related to tables. */ + params.cParams = *cdict_cParams; + params.cParams.windowLog = windowLog; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); + assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); + } + + ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + + /* copy tables */ + { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); + size_t const hSize = (size_t)1 << cdict_cParams->hashLog; + + ZSTD_memcpy(cctx->blockState.matchState.hashTable, + cdict->matchState.hashTable, + hSize * sizeof(U32)); + ZSTD_memcpy(cctx->blockState.matchState.chainTable, + cdict->matchState.chainTable, + chainSize * sizeof(U32)); + } + + /* Zero the hashTable3, since the cdict never fills it */ + { int const h3log = cctx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + assert(cdict->matchState.hashLog3 == 0); + ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&cctx->workspace); + + /* copy dictionary offsets */ + { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +/* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ +static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + + DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", + (unsigned)pledgedSrcSize); + + if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { + return ZSTD_resetCCtx_byAttachingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } else { + return ZSTD_resetCCtx_byCopyingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } +} + +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ +static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, + const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, + "Can't copy a ctx that's not in init stage."); + + ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_CCtx_params params = dstCCtx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = srcCCtx->appliedParams.cParams; + params.fParams = fParams; + ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff); + assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); + assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); + assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); + assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); + } + + ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); + + /* copy tables */ + { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; + int const h3log = srcCCtx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, + srcCCtx->blockState.matchState.hashTable, + hSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, + srcCCtx->blockState.matchState.chainTable, + chainSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, + srcCCtx->blockState.matchState.hashTable3, + h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); + + /* copy dictionary offsets */ + { + const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; + ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + dstCCtx->dictID = srcCCtx->dictID; + dstCCtx->dictContentSize = srcCCtx->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); + + return 0; +} + +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; + ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); + + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + fParams, pledgedSrcSize, + zbuff); +} + + +#define ZSTD_ROWSIZE 16 +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) +{ + int const nbRows = (int)size / ZSTD_ROWSIZE; + int cellNb = 0; + int rowNb; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ + + + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { + int column; + for (column=0; columncParams.hashLog; + ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); + } + + if (params->cParams.strategy != ZSTD_fast) { + U32 const chainSize = (U32)1 << params->cParams.chainLog; + if (params->cParams.strategy == ZSTD_btlazy2) + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + } + + if (ms->hashLog3) { + U32 const h3Size = (U32)1 << ms->hashLog3; + ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); + } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* See doc/zstd_compression_format.md for detailed format description */ + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + assert(nbSeq <= seqStorePtr->maxNbSeq); + for (u=0; ulongLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; +} + +/* ZSTD_useTargetCBlockSize(): + * Returns if target compressed block size param is being used. + * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); + return (cctxParams->targetCBlockSize != 0); +} + +/* ZSTD_entropyCompressSequences_internal(): + * actually compresses both literals and sequences */ +MEM_STATIC size_t +ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + void* entropyWorkspace, size_t entropyWkspSize, + const int bmi2) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + unsigned* count = (unsigned*)entropyWorkspace; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + BYTE* seqHead; + BYTE* lastNCount = NULL; + + entropyWorkspace = count + (MaxSeq + 1); + entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); + + DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = (size_t)(seqStorePtr->lit - literals); + size_t const cSize = ZSTD_compressLiterals( + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_disableLiteralsCompression(cctxParams), + op, dstCapacity, + literals, litSize, + entropyWorkspace, entropyWkspSize, + bmi2); + FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); + assert(cSize <= dstCapacity); + op += cSize; + } + + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, "Can't fit seq hdr in output buf!"); + if (nbSeq < 128) { + *op++ = (BYTE)nbSeq; + } else if (nbSeq < LONGNBSEQ) { + op[0] = (BYTE)((nbSeq>>8) + 0x80); + op[1] = (BYTE)nbSeq; + op+=2; + } else { + op[0]=0xFF; + MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); + op+=3; + } + assert(op <= oend); + if (nbSeq==0) { + /* Copy the old tables over as if we repeated them */ + ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return (size_t)(op - ostart); + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + assert(op <= oend); + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, + count, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->fse.litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->fse.litlengthCTable, + sizeof(prevEntropy->fse.litlengthCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, + count, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->fse.offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->fse.offcodeCTable, + sizeof(prevEntropy->fse.offcodeCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, + count, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->fse.matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->fse.matchlengthCTable, + sizeof(prevEntropy->fse.matchlengthCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, (size_t)(oend - op), + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + assert(op <= oend); + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (lastNCount && (op - lastNCount) < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(op - lastNCount == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } + } + + DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); + return (size_t)(op - ostart); +} + +MEM_STATIC size_t +ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, + void* entropyWorkspace, size_t entropyWkspSize, + int bmi2) +{ + size_t const cSize = ZSTD_entropyCompressSequences_internal( + seqStorePtr, prevEntropy, nextEntropy, cctxParams, + dst, dstCapacity, + entropyWorkspace, entropyWkspSize, bmi2); + if (cSize == 0) return 0; + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. + */ + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) + return 0; /* block not compressed */ + FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed"); + + /* Check compressibility */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); + if (cSize >= maxCSize) return 0; /* block not compressed */ + } + DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize); + return cSize; +} + +/* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) + * assumption : strat is a valid strategy */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) +{ + static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { + { ZSTD_compressBlock_fast /* default for 0 */, + ZSTD_compressBlock_fast, + ZSTD_compressBlock_doubleFast, + ZSTD_compressBlock_greedy, + ZSTD_compressBlock_lazy, + ZSTD_compressBlock_lazy2, + ZSTD_compressBlock_btlazy2, + ZSTD_compressBlock_btopt, + ZSTD_compressBlock_btultra, + ZSTD_compressBlock_btultra2 }, + { ZSTD_compressBlock_fast_extDict /* default for 0 */, + ZSTD_compressBlock_fast_extDict, + ZSTD_compressBlock_doubleFast_extDict, + ZSTD_compressBlock_greedy_extDict, + ZSTD_compressBlock_lazy_extDict, + ZSTD_compressBlock_lazy2_extDict, + ZSTD_compressBlock_btlazy2_extDict, + ZSTD_compressBlock_btopt_extDict, + ZSTD_compressBlock_btultra_extDict, + ZSTD_compressBlock_btultra_extDict }, + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + ZSTD_compressBlock_doubleFast_dictMatchState, + ZSTD_compressBlock_greedy_dictMatchState, + ZSTD_compressBlock_lazy_dictMatchState, + ZSTD_compressBlock_lazy2_dictMatchState, + ZSTD_compressBlock_btlazy2_dictMatchState, + ZSTD_compressBlock_btopt_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState }, + { NULL /* default for 0 */, + NULL, + NULL, + ZSTD_compressBlock_greedy_dedicatedDictSearch, + ZSTD_compressBlock_lazy_dedicatedDictSearch, + ZSTD_compressBlock_lazy2_dedicatedDictSearch, + NULL, + NULL, + NULL, + NULL } + }; + ZSTD_blockCompressor selectedCompressor; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); + + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + assert(selectedCompressor != NULL); + return selectedCompressor; +} + +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; +} + +void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; +} + +typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; + +static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + /* Assert that we have correctly flushed the ctx params into the ms's copy */ + ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { + ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); + } else { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); + } + return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ + } + ZSTD_resetSeqStore(&(zc->seqStore)); + /* required for optimal parser to read stats from dictionary */ + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; + /* tell the optimal parser how we expect to compress literals */ + ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, + * and when that stops being the case, the dict must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + + /* limited update after a very long match */ + { const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 curr = (U32)(istart-base); + if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ + if (curr > ms->nextToUpdate + 384) + ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); + } + + /* select and store sequences */ + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); + size_t lastLLSize; + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(!zc->appliedParams.ldmParams.enableLdm); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&zc->externSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm) { + rawSeqStore_t ldmSeqStore = kNullRawSeqStore; + + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize), ""); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(ldmSeqStore.pos == ldmSeqStore.size); + } else { /* not long range mode */ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + ms->ldmSeqStore = NULL; + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } + { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } } + return ZSTDbss_compress; +} + +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqStoreSeqs = seqStore->sequencesStart; + size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; + size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); + size_t literalsRead = 0; + size_t lastLLSize; + + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; + repcodes_t updatedRepcodes; + + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + /* Ensure we have enough space for last literals "sequence" */ + assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); + ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (i = 0; i < seqStoreSeqSize; ++i) { + U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM; + outSeqs[i].litLength = seqStoreSeqs[i].litLength; + outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH; + outSeqs[i].rep = 0; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) { + /* Derive the correct offset corresponding to a repcode */ + outSeqs[i].rep = seqStoreSeqs[i].offset; + if (outSeqs[i].litLength != 0) { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; + } else { + if (outSeqs[i].rep == 3) { + rawOffset = updatedRepcodes.rep[0] - 1; + } else { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; + } + } + } + outSeqs[i].offset = rawOffset; + /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode + so we provide seqStoreSeqs[i].offset - 1 */ + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, + seqStoreSeqs[i].offset - 1, + seqStoreSeqs[i].litLength == 0); + literalsRead += outSeqs[i].litLength; + } + /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. + * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker + * for the block boundary, according to the API. + */ + assert(seqStoreLiteralsSize >= literalsRead); + lastLLSize = seqStoreLiteralsSize - literalsRead; + outSeqs[i].litLength = (U32)lastLLSize; + outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; + seqStoreSeqSize++; + zc->seqCollector.seqIndex += seqStoreSeqSize; +} + +size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize); + void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); + SeqCollector seqCollector; + + RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); + + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_customFree(dst, ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; +} + +size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { + size_t in = 0; + size_t out = 0; + for (; in < seqsSize; ++in) { + if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { + if (in != seqsSize - 1) { + sequences[in+1].litLength += sequences[in].litLength; + } + } else { + sequences[out] = sequences[in]; + ++out; + } + } + return out; +} + +/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ +static int ZSTD_isRLE(const BYTE* src, size_t length) { + const BYTE* ip = src; + const BYTE value = ip[0]; + const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); + const size_t unrollSize = sizeof(size_t) * 4; + const size_t unrollMask = unrollSize - 1; + const size_t prefixLength = length & unrollMask; + size_t i; + size_t u; + if (length == 1) return 1; + /* Check if prefix is RLE first before using unrolled loop */ + if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { + return 0; + } + for (i = prefixLength; i != length; i += unrollSize) { + for (u = 0; u < unrollSize; u += sizeof(size_t)) { + if (MEM_readST(ip + i + u) != valueST) { + return 0; + } + } + } + return 1; +} + +/* Returns true if the given block may be RLE. + * This is just a heuristic based on the compressibility. + * It may return both false positives and false negatives. + */ +static int ZSTD_maybeRLE(seqStore_t const* seqStore) +{ + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); + + return nbSeqs < 4 && nbLits < 10; +} + +static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) +{ + ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; +} + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame) +{ + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. + */ + const U32 rleMaxLength = 25; + size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + } + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + ZSTD_confirmRepcodesAndEntropyTables(zc); + return 0; + } + + /* encode sequences and literals */ + cSize = ZSTD_entropyCompressSequences(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + + + if (frame && + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + cSize < rleMaxLength && + ZSTD_isRLE(ip, srcSize)) + { + cSize = 1; + op[0] = ip[0]; + } + +out: + if (!ZSTD_isError(cSize) && cSize > 1) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + } + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const size_t bss, U32 lastBlock) +{ + DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); + if (bss == ZSTDbss_compress) { + if (/* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) + { + return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); + } + /* Attempt superblock compression. + * + * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the + * standard ZSTD_compressBound(). This is a problem, because even if we have + * space now, taking an extra byte now could cause us to run out of space later + * and violate ZSTD_compressBound(). + * + * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. + * + * In order to respect ZSTD_compressBound() we must attempt to emit a raw + * uncompressed block in these cases: + * * cSize == 0: Return code for an uncompressed block. + * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). + * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of + * output space. + * * cSize >= blockBound(srcSize): We have expanded the block too much so + * emit an uncompressed block. + */ + { + size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); + if (cSize != ERROR(dstSize_tooSmall)) { + size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); + if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + return cSize; + } + } + } + } + + DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); + /* Superblock compression failed, attempt to emit a single no compress block. + * The decoder will be able to stream this block since it is uncompressed. + */ + return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); +} + +static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) +{ + size_t cSize = 0; + const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + + cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + void const* ip, + void const* iend) +{ + if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { + U32 const maxDist = (U32)1 << params->cParams.windowLog; + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_cwksp_mark_tables_dirty(ws); + ZSTD_reduceIndex(ms, params, correction); + ZSTD_cwksp_mark_tables_clean(ws); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + /* invalidate dictionaries on overflow correction */ + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } +} + +/*! ZSTD_compress_frameChunk() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); + + DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); + if (cctx->appliedParams.fParams.checksumFlag && srcSize) + xxh64_update(&cctx->xxhState, src, srcSize); + + while (remaining) { + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, + dstSize_tooSmall, + "not enough space to store compressed block"); + if (remaining < blockSize) blockSize = remaining; + + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + + /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ + if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; + + { size_t cSize; + if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); + assert(cSize > 0); + assert(cSize <= blockSize + ZSTD_blockHeaderSize); + } else { + cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize; + } + } + + + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + cctx->isFirstBlock = 0; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (unsigned)cSize); + } } + + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return (size_t)(op-ostart); +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params->fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params->cParams.windowLog; + U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params->fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ + BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos=0; + + assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, + "dst buf is too small to fit worst-case frame header size."); + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); + if (params->format == ZSTD_f_zstd1) { + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDescriptionByte; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: + assert(0); /* impossible */ + ZSTD_FALLTHROUGH; + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: + assert(0); /* impossible */ + ZSTD_FALLTHROUGH; + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} + +/* ZSTD_writeSkippableFrame_advanced() : + * Writes out a skippable frame with the specified magic number variant (16 are supported), + * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. + * + * Returns the total number of bytes written, or a ZSTD error code. + */ +size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant) { + BYTE* op = (BYTE*)dst; + RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, + dstSize_tooSmall, "Not enough room for skippable frame"); + RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame"); + RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported"); + + MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); + MEM_writeLE32(op+4, (U32)srcSize); + ZSTD_memcpy(op+8, src, srcSize); + return srcSize + ZSTD_SKIPPABLEHEADERSIZE; +} + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small (stage != ZSTDcs_init, stage_wrong, + "wrong cctx stage"); + RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, + parameter_unsupported, + "incompatible with ldm"); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + cctx->externSeqStore.posInSequence = 0; + return 0; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + size_t fhSize = 0; + + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (unsigned)srcSize); + RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, + "missing init (ZSTD_compressBegin)"); + + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, + cctx->pledgedSrcSizePlusOne-1, cctx->dictID); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + assert(fhSize <= dstCapacity); + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ + + if (!ZSTD_window_update(&ms->window, src, srcSize)) { + ms->nextToUpdate = ms->window.dictLimit; + } + if (cctx->appliedParams.ldmParams.enableLdm) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + } + + if (!frame) { + /* overflow check and correction for block mode */ + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, + src, (BYTE const*)src + srcSize); + } + + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); + { size_t const cSize = frame ? + ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); + FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += (cSize + fhSize); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + RETURN_ERROR_IF( + cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + return cSize + fhSize; + } +} + +size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); +} + + +size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) +{ + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); + return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); +} + +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); + { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } + + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); +} + +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ +static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* src, size_t srcSize, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + ZSTD_window_update(&ms->window, src, srcSize); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + + if (params->ldmParams.enableLdm && ls != NULL) { + ZSTD_window_update(&ls->window, src, srcSize); + ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); + } + + /* Assert that we the ms params match the params we're being given */ + ZSTD_assertEqualCParams(params->cParams, ms->cParams); + + if (srcSize <= HASH_READ_SIZE) return 0; + + while (iend - ip > HASH_READ_SIZE) { + size_t const remaining = (size_t)(iend - ip); + size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); + const BYTE* const ichunk = ip + chunk; + + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); + + if (params->ldmParams.enableLdm && ls != NULL) + ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); + + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, ichunk, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) { + assert(chunk == remaining); /* must load everything in one go */ + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); + } else if (chunk >= HASH_READ_SIZE) { + ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + } + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + if (chunk >= HASH_READ_SIZE) + ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); + break; + + default: + assert(0); /* not possible : not a valid strategy id */ + } + + ip = ichunk; + } + + ms->nextToUpdate = (U32)(iend - ms->window.base); + return 0; +} + + +/* Dictionaries that assign zero probability to symbols that show up causes problems + * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check + * and only dictionaries with 100% valid symbols can be assumed valid. + */ +static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) +{ + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) { + return FSE_repeat_check; + } + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) { + return FSE_repeat_check; + } + } + return FSE_repeat_valid; +} + +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize) +{ + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ + const BYTE* const dictEnd = dictPtr + dictSize; + dictPtr += 8; + bs->entropy.huf.repeatMode = HUF_repeat_check; + + { unsigned maxSymbolValue = 255; + unsigned hasZeroWeights = 1; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); + + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights) + bs->entropy.huf.repeatMode = HUF_repeat_valid; + + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); + dictPtr += hufHeaderSize; + } + + { unsigned offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + /* fill all offset symbols to avoid garbage at end of table */ + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.offcodeCTable, + offcodeNCount, MaxOff, offcodeLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.matchlengthCTable, + matchlengthNCount, matchlengthMaxValue, matchlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.litlengthCTable, + litlengthNCount, litlengthMaxValue, litlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + bs->rep[0] = MEM_readLE32(dictPtr+0); + bs->rep[1] = MEM_readLE32(dictPtr+4); + bs->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; + + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ + } + /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ + bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)); + + /* All repCodes must be <= dictContentSize and != 0 */ + { U32 u; + for (u=0; u<3; u++) { + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); + } } } + + return dictPtr - (const BYTE*)dict; +} + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed >= 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + size_t dictID; + size_t eSize; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); + eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); + FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); + dictPtr += eSize; + + { + size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( + ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); + } + return dictID; +} + +/* ZSTD_compress_insertDictionary() : +* @return : dictID, or an error code */ +static size_t +ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + const ZSTD_CCtx_params* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); + if ((dict==NULL) || (dictSize<8)) { + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + return 0; + } + + ZSTD_reset_compressedBlockState(bs); + + /* dict restricted modes */ + if (dictContentType == ZSTD_dct_rawContent) + return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); + + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_auto) { + DEBUGLOG(4, "raw content dictionary detected"); + return ZSTD_loadDictionaryContent( + ms, ls, ws, params, dict, dictSize, dtlm); + } + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + assert(0); /* impossible */ + } + + /* dict as full zstd dictionary */ + return ZSTD_loadZstdDictionary( + bs, ms, ws, params, dict, dictSize, dtlm, workspace); +} + +#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) + +/*! ZSTD_compressBegin_internal() : + * @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if ( (cdict) + && (cdict->dictContentSize > 0) + && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceLoad) ) { + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); + } + + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff) , ""); + { size_t const dictID = cdict ? + ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, + cdict->dictContentSize, cdict->dictContentType, dtlm, + cctx->entropyWorkspace) + : ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, + dictContentType, dtlm, cctx->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= UINT_MAX); + cctx->dictID = (U32)dictID; + cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize; + } + return 0; +} + +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); + /* compression parameters verification and optimization */ + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, dtlm, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, + NULL /*cdict*/, + &cctxParams, pledgedSrcSize); +} + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); + } + DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); +} + +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); +} + + +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t fhSize = 0; + + DEBUGLOG(4, "ZSTD_writeEpilogue"); + RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); + + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) xxh64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); + MEM_writeLE32(op, checksum); + op += 4; + } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; +} + +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) +{ + (void)cctx; + (void)extraCSize; +} + +size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, + dst, dstCapacity, src, srcSize, + 1 /* frame mode */, 1 /* last chunk */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + DEBUGLOG(4, "end of frame : controlling src size"); + RETURN_ERROR_IF( + cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize = %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + ZSTD_CCtx_trace(cctx, endResult); + return cSize + endResult; +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compress_advanced"); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + &cctxParams); +} + +/* Internal */ +size_t ZSTD_compress_advanced_internal( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + params, srcSize, ZSTDb_not_buffered) , ""); + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel) +{ + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + assert(params.fParams.contentSizeFlag == 1); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); + } + DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); +} + +size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); + assert(cctx != NULL); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + size_t result; + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed"); + result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtx(cctx); + return result; +} + + +/* ===== Dictionary API ===== */ + +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters cParams, + ZSTD_dictLoadMethod_e dictLoadMethod) +{ + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); + return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); +} + +size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); +} + +size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); + /* cdict may be in the workspace */ + return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) + + ZSTD_cwksp_sizeof(&cdict->workspace); +} + +static size_t ZSTD_initCDict_internal( + ZSTD_CDict* cdict, + const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_CCtx_params params) +{ + DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); + assert(!ZSTD_checkCParams(params.cParams)); + cdict->matchState.cParams = params.cParams; + cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; + if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) { + cdict->matchState.dedicatedDictSearch = 0; + } + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { + cdict->dictContent = dictBuffer; + } else { + void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); + RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); + cdict->dictContent = internalBuffer; + ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); + } + cdict->dictContentSize = dictSize; + cdict->dictContentType = dictContentType; + + cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); + + + /* Reset the state to no dictionary */ + ZSTD_reset_compressedBlockState(&cdict->cBlockState); + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &cdict->matchState, + &cdict->workspace, + ¶ms.cParams, + ZSTDcrp_makeClean, + ZSTDirp_reset, + ZSTD_resetTarget_CDict), ""); + /* (Maybe) load the dictionary + * Skips loading the dictionary if it is < 8 bytes. + */ + { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + params.fParams.contentSizeFlag = 1; + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, + ¶ms, cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= (size_t)(U32)-1); + cdict->dictID = (U32)dictID; + } + } + + return 0; +} + +static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { size_t const workspaceSize = + ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); + void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); + ZSTD_cwksp ws; + ZSTD_CDict* cdict; + + if (!workspace) { + ZSTD_customFree(workspace, customMem); + return NULL; + } + + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc); + + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + assert(cdict != NULL); + ZSTD_cwksp_move(&cdict->workspace, &ws); + cdict->customMem = customMem; + cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ + + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); + ZSTD_CCtxParams_init(&cctxParams, 0); + cctxParams.cParams = cParams; + cctxParams.customMem = customMem; + return ZSTD_createCDict_advanced2( + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + &cctxParams, customMem); +} + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* originalCctxParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams = *originalCctxParams; + ZSTD_compressionParameters cParams; + ZSTD_CDict* cdict; + + DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + if (cctxParams.enableDedicatedDictSearch) { + cParams = ZSTD_dedicatedDictSearch_getCParams( + cctxParams.compressionLevel, dictSize); + ZSTD_overrideCParams(&cParams, &cctxParams.cParams); + } else { + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) { + /* Fall back to non-DDSS params */ + cctxParams.enableDedicatedDictSearch = 0; + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + cctxParams.cParams = cParams; + + cdict = ZSTD_createCDict_advanced_internal(dictSize, + dictLoadMethod, cctxParams.cParams, + customMem); + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + cctxParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } + + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = cdict->customMem; + int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); + ZSTD_cwksp_free(&cdict->workspace, cMem); + if (!cdictInWorkspace) { + ZSTD_customFree(cdict, cMem); + } + return 0; + } +} + +/*! ZSTD_initStaticCDict_advanced() : + * Generate a digested dictionary in provided memory area. + * workspace: The memory area to emplace the dictionary into. + * Provided pointer must 8-bytes aligned. + * It must outlive dictionary usage. + * workspaceSize: Use ZSTD_estimateCDictSize() + * to determine how large workspace must be. + * cParams : use ZSTD_getCParams() to transform a compression level + * into its relevants cParams. + * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * Note : there is no corresponding "free" function. + * Since workspace was allocated externally, it must be freed externally. + */ +const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + matchStateSize; + ZSTD_CDict* cdict; + ZSTD_CCtx_params params; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + + { + ZSTD_cwksp ws; + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + if (cdict == NULL) return NULL; + ZSTD_cwksp_move(&cdict->workspace, &ws); + } + + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", + (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); + if (workspaceSize < neededSize) return NULL; + + ZSTD_CCtxParams_init(¶ms, 0); + params.cParams = cParams; + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + params) )) + return NULL; + + return cdict; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +{ + assert(cdict != NULL); + return cdict->matchState.cParams; +} + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; + return cdict->dictID; +} + + +/* ZSTD_compressBegin_usingCDict_advanced() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); + /* Initialize the cctxParams from the cdict */ + { + ZSTD_parameters params; + params.fParams = fParams; + params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0 ) ? + ZSTD_getCParamsFromCDict(cdict) + : ZSTD_getCParams(cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); + } + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); + } + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + &cctxParams, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/* ZSTD_compressBegin_usingCDict() : + * pledgedSrcSize=0 means "unknown" + * if pledgedSrcSize>0, it will enable contentSizeFlag */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); + return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + + + +/* ****************************************************************** +* Streaming +********************************************************************/ + +ZSTD_CStream* ZSTD_createCStream(void) +{ + DEBUGLOG(3, "ZSTD_createCStream"); + return ZSTD_createCStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticCCtx(workspace, workspaceSize); +} + +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ /* CStream and CCtx are now same object */ + return ZSTD_createCCtx_advanced(customMem); +} + +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + return ZSTD_freeCCtx(zcs); /* same object */ +} + + + +/*====== Initialization ======*/ + +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_CStreamOutSize(void) +{ + return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} + +static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) +{ + if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) + return ZSTD_cpm_attachDict; + else + return ZSTD_cpm_noAttachDict; +} + +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +/*! ZSTD_initCStream_internal() : + * Note : for lib/compress only. Used by zstdmt_compress.c. + * Assumption 1 : params are valid + * Assumption 2 : either dict, or cdict, is defined, not both */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_internal"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + zcs->requestedParams = *params; + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if (dict) { + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + } else { + /* Dictionary is cleared if !cdict */ + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + } + return 0; +} + +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + zcs->requestedParams.fParams = fParams; + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + + +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be exact. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pss) +{ + /* for compatibility with older programs relying on this behavior. + * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. + * This line will be removed in the future. + */ + U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_srcSize"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + return 0; +} + +/*====== Compression ======*/ + +static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) +{ + size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; + if (hintInSize==0) hintInSize = cctx->blockSize; + return hintInSize; +} + +/* ZSTD_compressStream_generic(): + * internal function for all *compressStream*() variants + * non-static, because can be called from zstdmt_compress.c + * @return : hint size for next input */ +static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) +{ + const char* const istart = (const char*)input->src; + const char* const iend = input->size != 0 ? istart + input->size : istart; + const char* ip = input->pos != 0 ? istart + input->pos : istart; + char* const ostart = (char*)output->dst; + char* const oend = output->size != 0 ? ostart + output->size : ostart; + char* op = output->pos != 0 ? ostart + output->pos : ostart; + U32 someMoreWork = 1; + + /* check expectations */ + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + } + if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + } + assert(output->pos <= output->size); + assert(input->pos <= input->size); + assert((U32)flushMode <= (U32)ZSTD_e_end); + + while (someMoreWork) { + switch(zcs->streamStage) + { + case zcss_init: + RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); + + case zcss_load: + if ( (flushMode == ZSTD_e_end) + && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ + || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ + && (zcs->inBuffPos == 0) ) { + /* shortcut to compression pass directly into output buffer */ + size_t const cSize = ZSTD_compressEnd(zcs, + op, oend-op, ip, iend-ip); + DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); + ip = iend; + op += cSize; + zcs->frameEnded = 1; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + someMoreWork = 0; break; + } + /* complete loading into inBuffer in buffered mode */ + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy( + zcs->inBuff + zcs->inBuffPos, toLoad, + ip, iend-ip); + zcs->inBuffPos += loaded; + if (loaded != 0) + ip += loaded; + if ( (flushMode == ZSTD_e_continue) + && (zcs->inBuffPos < zcs->inBuffTarget) ) { + /* not enough input to fill full block : stop here */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (zcs->inBuffPos == zcs->inToCompress) ) { + /* empty */ + someMoreWork = 0; break; + } + } + /* compress current block (note : this stage cannot be stopped in the middle) */ + DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); + { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); + void* cDst; + size_t cSize; + size_t oSize = oend-op; + size_t const iSize = inputBuffered + ? zcs->inBuffPos - zcs->inToCompress + : MIN((size_t)(iend - ip), zcs->blockSize); + if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) + cDst = op; /* compress into output buffer, to skip flush stage */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + if (inputBuffered) { + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); + zcs->inToCompress = zcs->inBuffPos; + } else { + unsigned const lastBlock = (ip + iSize == iend); + assert(flushMode == ZSTD_e_end /* Already validated */); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); + /* Consume the input prior to error checking to mirror buffered mode. */ + if (iSize > 0) + ip += iSize; + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + if (lastBlock) + assert(ip == iend); + } + if (cDst == op) { /* no need to flush */ + op += cSize; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed directly in outBuffer"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + } + break; + } + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->streamStage = zcss_flush; /* pass-through to flush stage */ + } + ZSTD_FALLTHROUGH; + case zcss_flush: + DEBUGLOG(5, "flush stage"); + assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), + zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", + (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); + if (flushed) + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { + /* flush not fully completed, presumably because dst is too small */ + assert(op==oend); + someMoreWork = 0; + break; + } + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed on flush"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + break; + } + zcs->streamStage = zcss_load; + break; + } + + default: /* impossible */ + assert(0); + } + } + + input->pos = ip - istart; + output->pos = op - ostart; + if (zcs->frameEnded) return 0; + return ZSTD_nextInputSizeHint(zcs); +} + +static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) +{ + return ZSTD_nextInputSizeHint(cctx); + +} + +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); + return ZSTD_nextInputSizeHint_MTorST(zcs); +} + +/* After a compression call set the expected input/output buffer. + * This is validated at the start of the next compression call. + */ +static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + cctx->expectedInBuffer = *input; + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + cctx->expectedOutBufferSize = output->size - output->pos; + } +} + +/* Validate that the input/output buffers match the expectations set by + * ZSTD_setBufferExpectations. + */ +static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, + ZSTD_outBuffer const* output, + ZSTD_inBuffer const* input, + ZSTD_EndDirective endOp) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + ZSTD_inBuffer const expect = cctx->expectedInBuffer; + if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); + if (endOp != ZSTD_e_end) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + size_t const outBufferSize = output->size - output->pos; + if (cctx->expectedOutBufferSize != outBufferSize) + RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); + } + return 0; +} + +static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, + ZSTD_EndDirective endOp, + size_t inSize) { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + if (cctx->cdict) + params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */ + DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ + { + size_t const dictSize = prefixDict.dict + ? prefixDict.dictSize + : (cctx->cdict ? cctx->cdict->dictContentSize : 0); + ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); + params.cParams = ZSTD_getCParamsFromCCtxParams( + ¶ms, cctx->pledgedSrcSizePlusOne-1, + dictSize, mode); + } + + if (ZSTD_CParams_shouldEnableLdm(¶ms.cParams)) { + /* Enable LDM by default for optimal parser and window size >= 128MB */ + DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)"); + params.ldmParams.enableLdm = 1; + } + + { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, + cctx->cdict, + ¶ms, pledgedSrcSize, + ZSTDb_buffered) , ""); + assert(cctx->appliedParams.nbWorkers == 0); + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { + /* for small input: avoid automatic flush on reaching end of block, since + * it would require to add a 3-bytes null block to end frame + */ + cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); + } else { + cctx->inBuffTarget = 0; + } + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + } + return 0; +} + +size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); + /* check conditions */ + RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer"); + RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer"); + RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective"); + assert(cctx != NULL); + + /* transparent initialization stage */ + if (cctx->streamStage == zcss_init) { + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); + ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ + } + /* end of transparent initialization stage */ + + FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers"); + /* compression stage */ + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); + DEBUGLOG(5, "completed ZSTD_compressStream2"); + ZSTD_setBufferExpectations(cctx, output, input); + return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ +} + +size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + +size_t ZSTD_compress2(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; + ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; + DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + /* Enable stable input/output buffers. */ + cctx->requestedParams.inBufferMode = ZSTD_bm_stable; + cctx->requestedParams.outBufferMode = ZSTD_bm_stable; + { size_t oPos = 0; + size_t iPos = 0; + size_t const result = ZSTD_compressStream2_simpleArgs(cctx, + dst, dstCapacity, &oPos, + src, srcSize, &iPos, + ZSTD_e_end); + /* Reset to the original values. */ + cctx->requestedParams.inBufferMode = originalInBufferMode; + cctx->requestedParams.outBufferMode = originalOutBufferMode; + FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); + if (result != 0) { /* compression not completed, due to lack of output space */ + assert(oPos == dstCapacity); + RETURN_ERROR(dstSize_tooSmall, ""); + } + assert(iPos == srcSize); /* all input is expected consumed */ + return oPos; + } +} + +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_sequencePosition; + +/* Returns a ZSTD error code if sequence is not valid */ +static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, + size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { + size_t offsetBound; + U32 windowSize = 1 << windowLog; + /* posInSrc represents the amount of data the the decoder would decode up to this point. + * As long as the amount of data decoded is less than or equal to window size, offsets may be + * larger than the total length of output decoded in order to reference the dict, even larger than + * window size. After output surpasses windowSize, we're limited to windowSize offsets again. + */ + offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; + RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!"); + RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small"); + return 0; +} + +/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ +static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) { + U32 offCode = rawOffset + ZSTD_REP_MOVE; + U32 repCode = 0; + + if (!ll0 && rawOffset == rep[0]) { + repCode = 1; + } else if (rawOffset == rep[1]) { + repCode = 2 - ll0; + } else if (rawOffset == rep[2]) { + repCode = 3 - ll0; + } else if (ll0 && rawOffset == rep[0] - 1) { + repCode = 3; + } + if (repCode) { + /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */ + offCode = repCode - 1; + } + return offCode; +} + +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + */ +static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + BYTE const* ip = (BYTE const*)(src); + const BYTE* const iend = ip + blockSize; + repcodes_t updatedRepcodes; + U32 dictSize; + U32 litLength; + U32 matchLength; + U32 ll0; + U32 offCode; + + if (cctx->cdict) { + dictSize = (U32)cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = (U32)cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { + litLength = inSeqs[idx].litLength; + matchLength = inSeqs[idx].matchLength; + ll0 = litLength == 0; + offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + if (inSeqs[idx].litLength) { + DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength); + ip += inSeqs[idx].litLength; + seqPos->posInSrc += inSeqs[idx].litLength; + } + RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); + seqPos->idx = idx+1; + return 0; +} + +/* Returns the number of bytes to move the current read position back by. Only non-zero + * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something + * went wrong. + * + * This function will attempt to scan through blockSize bytes represented by the sequences + * in inSeqs, storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. + */ +static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + U32 startPosInSequence = seqPos->posInSequence; + U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; + size_t dictSize; + BYTE const* ip = (BYTE const*)(src); + BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ + repcodes_t updatedRepcodes; + U32 bytesAdjustment = 0; + U32 finalMatchSplit = 0; + U32 litLength; + U32 matchLength; + U32 rawOffset; + U32 offCode; + + if (cctx->cdict) { + dictSize = cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); + DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { + const ZSTD_Sequence currSeq = inSeqs[idx]; + litLength = currSeq.litLength; + matchLength = currSeq.matchLength; + rawOffset = currSeq.offset; + + /* Modify the sequence depending on where endPosInSequence lies */ + if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { + if (startPosInSequence >= litLength) { + startPosInSequence -= litLength; + litLength = 0; + matchLength -= startPosInSequence; + } else { + litLength -= startPosInSequence; + } + /* Move to the next sequence */ + endPosInSequence -= currSeq.litLength + currSeq.matchLength; + startPosInSequence = 0; + idx++; + } else { + /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence + does not reach the end of the match. So, we have to split the sequence */ + DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", + currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); + if (endPosInSequence > litLength) { + U32 firstHalfMatchLength; + litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; + firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; + if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { + /* Only ever split the match if it is larger than the block size */ + U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; + if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { + /* Move the endPosInSequence backward so that it creates match of minMatch length */ + endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + firstHalfMatchLength -= bytesAdjustment; + } + matchLength = firstHalfMatchLength; + /* Flag that we split the last match - after storing the sequence, exit the loop, + but keep the value of endPosInSequence */ + finalMatchSplit = 1; + } else { + /* Move the position in sequence backwards so that we don't split match, and break to store + * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence + * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so + * would cause the first half of the match to be too small + */ + bytesAdjustment = endPosInSequence - currSeq.litLength; + endPosInSequence = currSeq.litLength; + break; + } + } else { + /* This sequence ends inside the literals, break to store the last literals */ + break; + } + } + /* Check if this offset can be represented with a repcode */ + { U32 ll0 = (litLength == 0); + offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + } + + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); + seqPos->idx = idx; + seqPos->posInSequence = endPosInSequence; + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + iend -= bytesAdjustment; + if (ip != iend) { + /* Store any last literals */ + U32 lastLLSize = (U32)(iend - ip); + assert(ip <= iend); + DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); + seqPos->posInSrc += lastLLSize; + } + + return bytesAdjustment; +} + +typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); +static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { + ZSTD_sequenceCopier sequenceCopier = NULL; + assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); + if (mode == ZSTD_sf_explicitBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; + } else if (mode == ZSTD_sf_noBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreNoBlockDelim; + } + assert(sequenceCopier != NULL); + return sequenceCopier; +} + +/* Compress, block-by-block, all of the sequences given. + * + * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error. + */ +static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + size_t cSize = 0; + U32 lastBlock; + size_t blockSize; + size_t compressedSeqsSize; + size_t remaining = srcSize; + ZSTD_sequencePosition seqPos = {0, 0, 0}; + + BYTE const* ip = (BYTE const*)src; + BYTE* op = (BYTE*)dst; + ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); + + DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); + /* Special case: empty frame */ + if (remaining == 0) { + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + cSize += ZSTD_blockHeaderSize; + } + + while (remaining) { + size_t cBlockSize; + size_t additionalByteAdjustment; + lastBlock = remaining <= cctx->blockSize; + blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; + ZSTD_resetSeqStore(&cctx->seqStore); + DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); + + additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); + FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); + blockSize -= additionalByteAdjustment; + + /* If blocks are too small, emit as a nocompress block */ + if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); + cSize += cBlockSize; + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + continue; + } + + compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore, + &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, + &cctx->appliedParams, + op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, + blockSize, + cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + cctx->bmi2); + FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); + DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); + + if (!cctx->isFirstBlock && + ZSTD_maybeRLE(&cctx->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + compressedSeqsSize = 1; + } + + if (compressedSeqsSize == 0) { + /* ZSTD_noCompressBlock writes the block header as well */ + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); + } else if (compressedSeqsSize == 1) { + cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_confirmRepcodesAndEntropyTables(cctx); + if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + /* Write block header into beginning of block*/ + cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); + MEM_writeLE24(op, cBlockHeader); + cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); + } + + cSize += cBlockSize; + DEBUGLOG(4, "cSize running total: %zu", cSize); + + if (lastBlock) { + break; + } else { + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + cctx->isFirstBlock = 0; + } + } + + return cSize; +} + +size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + BYTE* op = (BYTE*)dst; + size_t cSize = 0; + size_t compressedBlocksSize = 0; + size_t frameHeaderSize = 0; + + /* Transparent initialization stage, same as compressStream2() */ + DEBUGLOG(3, "ZSTD_compressSequences()"); + assert(cctx != NULL); + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); + /* Begin writing output, starting with frame header */ + frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); + op += frameHeaderSize; + dstCapacity -= frameHeaderSize; + cSize += frameHeaderSize; + if (cctx->appliedParams.fParams.checksumFlag && srcSize) { + xxh64_update(&cctx->xxhState, src, srcSize); + } + /* cSize includes block header size and compressed sequences size */ + compressedBlocksSize = ZSTD_compressSequences_internal(cctx, + op, dstCapacity, + inSeqs, inSeqsSize, + src, srcSize); + FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); + cSize += compressedBlocksSize; + dstCapacity -= compressedBlocksSize; + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) xxh64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum); + MEM_writeLE32((char*)dst + cSize, checksum); + cSize += 4; + } + + DEBUGLOG(3, "Final compressed size: %zu", cSize); + return cSize; +} + +/*====== Finalize ======*/ + +/*! ZSTD_flushStream() : + * @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); +} + + +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); + FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); + if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ + /* single thread mode : attempt to calculate remaining to flush more precisely */ + { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; + size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); + size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); + return toFlush; + } +} + + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_MAX_CLEVEL 22 +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } +int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - for any srcSize > 256 KB */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ + { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +}; + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); + switch (cParams.strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } + return cParams; +} + +static int ZSTD_dedicatedDictSearch_isSupported( + ZSTD_compressionParameters const* cParams) +{ + return (cParams->strategy >= ZSTD_greedy) + && (cParams->strategy <= ZSTD_lazy2) + && (cParams->hashLog >= cParams->chainLog) + && (cParams->chainLog <= 24); +} + +/* + * Reverses the adjustment applied to cparams when enabling dedicated dict + * search. This is used to recover the params set to be used in the working + * context. (Otherwise, those tables would also grow.) + */ +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams) { + switch (cParams->strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } +} + +static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + case ZSTD_cpm_createCDict: + break; + case ZSTD_cpm_attachDict: + dictSize = 0; + break; + default: + assert(0); + break; + } + { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; + size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; + return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; + } +} + +/*! ZSTD_getCParams_internal() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. + * Use dictSize == 0 for unknown or unused. + * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); + int row; + DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); + + /* row */ + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + else row = compressionLevel; + + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + /* acceleration factor */ + if (compressionLevel < 0) { + int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); + cp.targetLength = (unsigned)(-clampedCompressionLevel); + } + /* refine parameters based on srcSize & dictSize */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); + } +} + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); + ZSTD_memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + return params; +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h new file mode 100644 index 0000000000000..685d2f996cc2a --- /dev/null +++ b/lib/zstd/compress/zstd_compress_internal.h @@ -0,0 +1,1188 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +#include "../common/zstd_internal.h" +#include "zstd_cwksp.h" + + +/*-************************************* +* Constants +***************************************/ +#define kSearchStrength 8 +#define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. + This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + + +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; + +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; +} ZSTD_prefixDict; + +typedef struct { + void* dictBuffer; + void const* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; + ZSTD_CDict* cdict; +} ZSTD_localDict; + +typedef struct { + HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_repeat repeatMode; +} ZSTD_hufCTables_t; + +typedef struct { + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_fseCTables_t; + +typedef struct { + ZSTD_hufCTables_t huf; + ZSTD_fseCTables_t fse; +} ZSTD_entropyCTables_t; + +typedef struct { + U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ + U32 len; /* Raw length of match */ +} ZSTD_match_t; + +typedef struct { + U32 offset; /* Offset of sequence */ + U32 litLength; /* Length of literals prior to match */ + U32 matchLength; /* Raw length of match */ +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The index in seq where reading stopped. pos <= size. */ + size_t posInSequence; /* The position within the sequence at seq[pos] where reading + stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ +} rawSeqStore_t; + +UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; + +typedef struct { + int price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; + +typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; + +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + unsigned* litFreq; /* table of literals statistics, of size 256 */ + unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + U32 litSumBasePrice; /* to compare to log2(litfreq) */ + U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ + U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ + U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ + ZSTD_literalCompressionMode_e literalCompressionMode; +} optState_t; + +typedef struct { + ZSTD_entropyCTables_t entropy; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_compressedBlockState_t; + +typedef struct { + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more valid data */ +} ZSTD_window_t; + +typedef struct ZSTD_matchState_t ZSTD_matchState_t; +struct ZSTD_matchState_t { + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary, within context's referential. + * When loadedDictEnd != 0, a dictionary is in use, and still valid. + * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. + * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). + * When dict referential is copied into active context (i.e. not attached), + * loadedDictEnd == dictSize, since referential starts from zero. + */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + int dedicatedDictSearch; /* Indicates whether this matchState is using the + * dedicated dictionary search structure. + */ + optState_t opt; /* optimal parser state */ + const ZSTD_matchState_t* dictMatchState; + ZSTD_compressionParameters cParams; + const rawSeqStore_t* ldmSeqStore; +}; + +typedef struct { + ZSTD_compressedBlockState_t* prevCBlock; + ZSTD_compressedBlockState_t* nextCBlock; + ZSTD_matchState_t matchState; +} ZSTD_blockState_t; + +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + BYTE const* split; + U32 hash; + U32 checksum; + ldmEntry_t* bucket; +} ldmMatchCandidate_t; + +#define LDM_BATCH_SIZE 64 + +typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ + ldmEntry_t* hashTable; + U32 loadedDictEnd; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + size_t splitIndices[LDM_BATCH_SIZE]; + ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; +} ldmState_t; + +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashRateLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ +} ldmParams_t; + +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + size_t seqIndex; + size_t maxSequences; +} SeqCollector; + +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; + + int compressionLevel; + int forceWindow; /* force back-references to respect limit of + * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} + +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} + +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + ZSTD_memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + +/* ZSTD_cParam_withinBounds: + * @return 1 if value is within cParam bounds, + * 0 otherwise */ +MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +/* ZSTD_noCompressBlock() : + * Writes uncompressed block to dst buffer from given src. + * Returns the size of the block */ +MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +{ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall, "dst buf too small for uncompressed block"); + MEM_writeLE24(dst, cBlockHeader24); + ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + return ZSTD_blockHeaderSize + srcSize; +} + +MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) +{ + BYTE* const op = (BYTE*)dst; + U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); + MEM_writeLE24(op, cBlockHeader); + op[3] = src; + return 4; +} + + +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) +{ + U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; + ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + return (srcSize >> minlog) + 2; +} + +MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +{ + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + ZSTD_FALLTHROUGH; + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} + +/*! ZSTD_safecopyLiterals() : + * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. + * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single + * large copies. + */ +static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { + assert(iend > ilimit_w); + if (ip <= ilimit_w) { + ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); + op += ilimit_w - ip; + ip = ilimit_w; + } + while (ip < iend) *op++ = *ip++; +} + +/*! ZSTD_storeSeq() : + * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. + * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). + * `mlBase` : matchLength - MINMATCH + * Allowed to overread literals up to litLimit. +*/ +HINT_INLINE UNUSED_ATTR +void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) +{ + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; + BYTE const* const litEnd = literals + litLength; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); + } +#endif + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); + /* copy Literals */ + assert(seqStorePtr->maxNbLit <= 128 KB); + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); + assert(literals + litLength <= litLimit); + if (litEnd <= litLimit_w) { + /* Common case we can use wildcopy. + * First copy 16 bytes, because literals are likely short. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(seqStorePtr->lit, literals); + if (litLength > 16) { + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); + } + } else { + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); + } + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 1; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offCode + 1; + + /* match Length */ + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 2; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)mlBase; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if (__GNUC__ >= 4) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if (__GNUC__ >= 4) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +MEM_STATIC FORCE_INLINE_ATTR +size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + +/* ZSTD_ipow() : + * Return base^exponent. + */ +static U64 ZSTD_ipow(U64 base, U64 exponent) +{ + U64 power = 1; + while (exponent) { + if (exponent & 1) power *= base; + exponent >>= 1; + base *= base; + } + return power; +} + +#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 + +/* ZSTD_rollingHash_append() : + * Add the buffer to the hash value. + */ +static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) +{ + BYTE const* istart = (BYTE const*)buf; + size_t pos; + for (pos = 0; pos < size; ++pos) { + hash *= prime8bytes; + hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; + } + return hash; +} + +/* ZSTD_rollingHash_compute() : + * Compute the rolling hash value of the buffer. + */ +MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) +{ + return ZSTD_rollingHash_append(0, buf, size); +} + +/* ZSTD_rollingHash_primePower() : + * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash + * over a window of length bytes. + */ +MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) +{ + return ZSTD_ipow(prime8bytes, length - 1); +} + +/* ZSTD_rollingHash_rotate() : + * Rotate the rolling hash by one byte. + */ +MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) +{ + hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; + hash *= prime8bytes; + hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; + return hash; +} + +/*-************************************* +* Round buffer management +***************************************/ +#if (ZSTD_WINDOWLOG_MAX_64 > 31) +# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" +#endif +/* Max current allowed */ +#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Maximum chunk size before overflow correction needs to be called again */ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ + +/* + * ZSTD_window_clear(): + * Clears the window containing the history by simply setting it to empty. + */ +MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) +{ + size_t const endT = (size_t)(window->nextSrc - window->base); + U32 const end = (U32)endT; + + window->lowLimit = end; + window->dictLimit = end; +} + +/* + * ZSTD_window_hasExtDict(): + * Returns non-zero if the window has a non-empty extDict. + */ +MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) +{ + return window.lowLimit < window.dictLimit; +} + +/* + * ZSTD_matchState_dictMode(): + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : + ZSTD_noDict; +} + +/* + * ZSTD_window_needOverflowCorrection(): + * Returns non-zero if the indices are getting too large and need overflow + * protection. + */ +MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + void const* srcEnd) +{ + U32 const curr = (U32)((BYTE const*)srcEnd - window.base); + return curr > ZSTD_CURRENT_MAX; +} + +/* + * ZSTD_window_correctOverflow(): + * Reduces the indices to protect from index overflow. + * Returns the correction made to the indices, which must be applied to every + * stored index. + * + * The least significant cycleLog bits of the indices must remain the same, + * which may be 0. Every index up to maxDist in the past must be valid. + * NOTE: (maxDist & cycleMask) must be zero. + */ +MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, + U32 maxDist, void const* src) +{ + /* preemptive overflow correction: + * 1. correction is large enough: + * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) + * > 1<<29 + * + * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: + * After correction, current is less than (1<base < 1<<32. + * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); + U32 const currentCycle0 = curr & cycleMask; + /* Exclude zero so that newCurrent - maxDist >= 1. */ + U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; + U32 const newCurrent = currentCycle1 + maxDist; + U32 const correction = curr - newCurrent; + assert((maxDist & cycleMask) == 0); + assert(curr > newCurrent); + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + + window->base += correction; + window->dictBase += correction; + if (window->lowLimit <= correction) window->lowLimit = 1; + else window->lowLimit -= correction; + if (window->dictLimit <= correction) window->dictLimit = 1; + else window->dictLimit -= correction; + + /* Ensure we can still reference the full window. */ + assert(newCurrent >= maxDist); + assert(newCurrent - maxDist >= 1); + /* Ensure that lowLimit and dictLimit didn't underflow. */ + assert(window->lowLimit <= newCurrent); + assert(window->dictLimit <= newCurrent); + + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, + window->lowLimit); + return correction; +} + +/* + * ZSTD_window_enforceMaxDist(): + * Updates lowLimit so that: + * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * + * It ensures index is valid as long as index >= lowLimit. + * This must be called before a block compression call. + * + * loadedDictEnd is only defined if a dictionary is in use for current compression. + * As the name implies, loadedDictEnd represents the index at end of dictionary. + * The value lies within context's referential, it can be directly compared to blockEndIdx. + * + * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. + * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. + * This is because dictionaries are allowed to be referenced fully + * as long as the last byte of the dictionary is in the window. + * Once input has progressed beyond window size, dictionary cannot be referenced anymore. + * + * In normal dict mode, the dictionary lies between lowLimit and dictLimit. + * In dictMatchState mode, lowLimit and dictLimit are the same, + * and the dictionary is below them. + * forceWindow and dictMatchState are therefore incompatible. + */ +MEM_STATIC void +ZSTD_window_enforceMaxDist(ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + + /* - When there is no dictionary : loadedDictEnd == 0. + In which case, the test (blockEndIdx > maxDist) is merely to avoid + overflowing next operation `newLowLimit = blockEndIdx - maxDist`. + - When there is a standard dictionary : + Index referential is copied from the dictionary, + which means it starts from 0. + In which case, loadedDictEnd == dictSize, + and it makes sense to compare `blockEndIdx > maxDist + dictSize` + since `blockEndIdx` also starts from zero. + - When there is an attached dictionary : + loadedDictEnd is expressed within the referential of the context, + so it can be directly compared against blockEndIdx. + */ + if (blockEndIdx > maxDist + loadedDictEnd) { + U32 const newLowLimit = blockEndIdx - maxDist; + if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; + if (window->dictLimit < window->lowLimit) { + DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", + (unsigned)window->dictLimit, (unsigned)window->lowLimit); + window->dictLimit = window->lowLimit; + } + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; + } +} + +/* Similar to ZSTD_window_enforceMaxDist(), + * but only invalidates dictionary + * when input progresses beyond window size. + * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) + * loadedDictEnd uses same referential as window->base + * maxDist is the window size */ +MEM_STATIC void +ZSTD_checkDictValidity(const ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + assert(loadedDictEndPtr != NULL); + assert(dictMatchStatePtr != NULL); + { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = *loadedDictEndPtr; + DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + assert(blockEndIdx >= loadedDictEnd); + + if (blockEndIdx > loadedDictEnd + maxDist) { + /* On reaching window size, dictionaries are invalidated. + * For simplification, if window size is reached anywhere within next block, + * the dictionary is invalidated for the full block. + */ + DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); + *loadedDictEndPtr = 0; + *dictMatchStatePtr = NULL; + } else { + if (*loadedDictEndPtr != 0) { + DEBUGLOG(6, "dictionary considered valid for current block"); + } } } +} + +MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { + ZSTD_memset(window, 0, sizeof(*window)); + window->base = (BYTE const*)""; + window->dictBase = (BYTE const*)""; + window->dictLimit = 1; /* start from 1, so that 1st position is valid */ + window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + 1; /* see issue #1241 */ +} + +/* + * ZSTD_window_update(): + * Updates the window by appending [src, src + srcSize) to the window. + * If it is not contiguous, the current prefix becomes the extDict, and we + * forget about the extDict. Handles overlap of the prefix and extDict. + * Returns non-zero if the segment is contiguous. + */ +MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, + void const* src, size_t srcSize) +{ + BYTE const* const ip = (BYTE const*)src; + U32 contiguous = 1; + DEBUGLOG(5, "ZSTD_window_update"); + if (srcSize == 0) + return contiguous; + assert(window->base != NULL); + assert(window->dictBase != NULL); + /* Check if blocks follow each other */ + if (src != window->nextSrc) { + /* not contiguous */ + size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); + window->lowLimit = window->dictLimit; + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + window->dictLimit = (U32)distanceFromBase; + window->dictBase = window->base; + window->base = ip - distanceFromBase; + /* ms->nextToUpdate = window->dictLimit; */ + if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ + contiguous = 0; + } + window->nextSrc = ip + srcSize; + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ( (ip+srcSize > window->dictBase + window->lowLimit) + & (ip < window->dictBase + window->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + window->lowLimit = lowLimitMax; + DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); + } + return contiguous; +} + +/* + * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary + * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't + * valid for the entire block. So this check is sufficient to find the lowest valid match index. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + +/* + * Returns the lowest allowed match index in the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.dictLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When computing the lowest prefix index we need to take the dictionary into account to handle + * the edge case where the dictionary and the source are contiguous in memory. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + + + +/* debug functions */ +#if (DEBUGLEVEL>=2) + +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const newStat = rawStat + 1; + U32 const hb = ZSTD_highbit32(newStat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (newStat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} + +/* display a table content, + * listing each element, its frequency, and its predicted bit cost */ +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} + +#endif + + + +/* =============================================================== + * Shared internal declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ + +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize); + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); + +/* ============================================================== + * Private declarations + * These prototypes shall only be called from within lib/compress + * ============================================================== */ + +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + * Note: srcSizeHint == 0 means 0! + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +/*! ZSTD_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); + +void ZSTD_resetSeqStore(seqStore_t* ssPtr); + +/*! ZSTD_getCParamsFromCDict() : + * as the name implies */ +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); + +/* ZSTD_compressBegin_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize); + +/* ZSTD_compress_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params); + + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small ( 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); + +/* ZSTD_CCtx_trace() : + * Trace the end of a compression call. + */ +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); + +#endif /* ZSTD_COMPRESS_H */ diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c new file mode 100644 index 0000000000000..655bcda4d1f1c --- /dev/null +++ b/lib/zstd/compress/zstd_compress_literals.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_literals.h" + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ZSTD_memcpy(ostart + flSize, src, srcSize); + DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); + return srcSize + flSize; +} + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ostart[flSize] = *(const BYTE*)src; + DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); + return flSize+1; +} + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2) +{ + size_t const minGain = ZSTD_minGain(srcSize, strategy); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", + disableLiteralCompression, (U32)srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); + { HUF_repeat repeat = prevHuf->repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? + HUF_compress1X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : + HUF_compress4X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + DEBUGLOG(5, "Reusing previous huffman table"); + hType = set_repeat; + } + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); + return lhSize+cLitSize; +} diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h new file mode 100644 index 0000000000000..9904c0cd30a0a --- /dev/null +++ b/lib/zstd/compress/zstd_compress_literals.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_LITERALS_H +#define ZSTD_COMPRESS_LITERALS_H + +#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ + + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2); + +#endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c new file mode 100644 index 0000000000000..dcfcdc9cc5e8d --- /dev/null +++ b/lib/zstd/compress/zstd_compress_sequences.c @@ -0,0 +1,439 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_sequences.h" + +/* + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabilityLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; + +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} + +/* + * Returns true if we should use ncount=-1 else we should + * use ncount=1 for low probability symbols instead. + */ +static unsigned ZSTD_useLowProbCount(size_t const nbSeq) +{ + /* Heuristic: This should cover most blocks <= 16K and + * start to fade out after 16K to about 32K depending on + * comprssibility. + */ + return nbSeq >= 2048; +} + +/* + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), ""); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} + +/* + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabilityLog256[norm]; + } + return cost >> 8; +} + +/* + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += (size_t)count[s] * bitCost; + } + return cost >> kAccuracyLog; +} + +/* + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabilityLog256[norm256]; + } + return cost >> 8; +} + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) +{ + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +typedef struct { + S16 norm[MaxSeq + 1]; + U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)]; +} ZSTD_BuildCTableWksp; + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); + + switch (type) { + case set_rle: + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); + *op = codeTable[0]; + return 1; + case set_repeat: + ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); + (void)entropyWorkspaceSize; + FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), ""); + return NCountSize; + } + } + default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); + } +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); + DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", + (int)(blockStream.endPtr - blockStream.startPtr), + (unsigned)dstCapacity); + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); + } } + + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); + return streamSize; + } +} + +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +#endif + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ + DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} diff --git a/lib/zstd/compress/zstd_compress_sequences.h b/lib/zstd/compress/zstd_compress_sequences.h new file mode 100644 index 0000000000000..7991364c2f71f --- /dev/null +++ b/lib/zstd/compress/zstd_compress_sequences.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_SEQUENCES_H +#define ZSTD_COMPRESS_SEQUENCES_H + +#include "../common/fse.h" /* FSE_repeat, FSE_CTable */ +#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ + +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy); + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize); + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); + +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max); + +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max); +#endif /* ZSTD_COMPRESS_SEQUENCES_H */ diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c new file mode 100644 index 0000000000000..ee03e0aedb030 --- /dev/null +++ b/lib/zstd/compress/zstd_compress_superblock.c @@ -0,0 +1,850 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_superblock.h" + +#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */ +#include "hist.h" /* HIST_countFast_wksp */ +#include "zstd_compress_internal.h" +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" + +/*-************************************* +* Superblock entropy buffer structs +***************************************/ +/* ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/* ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + + +/* ZSTD_buildSuperBlockEntropy_literal() : + * Builds entropy for the super-block literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int disableLiteralsCompression, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = 255; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralsCompression) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable_wksp( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + +/* ZSTD_buildSuperBlockEntropy_sequences() : + * Builds entropy for the super-block sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); + BYTE* const cTableWksp = countWkspStart + countWkspSize; + const size_t cTableWkspSize = wkspEnd-cTableWksp; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + + assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); + ZSTD_memset(workspace, 0, wkspSize); + + fseMetadata->lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 LLtype; + unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + } } + /* build CTable for Offsets */ + { U32 Offtype; + unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWksp, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + } } + /* build CTable for MatchLengths */ + { U32 MLtype; + unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + } } + assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); + return op-ostart; +} + + +/* ZSTD_buildSuperBlockEntropy() : + * Builds entropy for the super-block. + * @return : 0 on success or error code */ +static size_t +ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_disableLiteralsCompression(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); + return 0; +} + +/* ZSTD_compressSubBlock_literal() : + * Compresses literals section for a sub-block. + * When we have to write the Huffman table we will sometimes choose a header + * size larger than necessary. This is because we have to pick the header size + * before we know the table size + compressed size, so we have a bound on the + * table size. If we guessed incorrectly, we fall back to uncompressed literals. + * + * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded + * in writing the header, otherwise it is set to 0. + * + * hufMetadata->hType has literals block type info. + * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. + * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. + * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block + * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block + * and the following sub-blocks' literals sections will be Treeless_Literals_Block. + * @return : compressed size of literals section of a sub-block + * Or 0 if it unable to compress. + * Or error code */ +static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + const BYTE* literals, size_t litSize, + void* dst, size_t dstSize, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + size_t const header = writeEntropy ? 200 : 0; + size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart + lhSize; + U32 const singleStream = lhSize == 3; + symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; + size_t cLitSize = 0; + + (void)bmi2; /* TODO bmi2... */ + + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); + + *entropyWritten = 0; + if (litSize == 0 || hufMetadata->hType == set_basic) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } else if (hufMetadata->hType == set_rle) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); + return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); + } + + assert(litSize > 0); + assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); + + if (writeEntropy && hufMetadata->hType == set_compressed) { + ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); + op += hufMetadata->hufDesSize; + cLitSize += hufMetadata->hufDesSize; + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); + } + + /* TODO bmi2 */ + { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) + : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); + op += cSize; + cLitSize += cSize; + if (cSize == 0 || ERR_isError(cSize)) { + DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); + return 0; + } + /* If we expand and we aren't writing a header then emit uncompressed */ + if (!writeEntropy && cLitSize >= litSize) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + /* If we are writing headers then allow expansion that doesn't change our header size. */ + if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { + assert(cLitSize > litSize); + DEBUGLOG(5, "Literals expanded beyond allowed header size"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + *entropyWritten = 1; + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); + return op-ostart; +} + +static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { + const seqDef* const sstart = sequences; + const seqDef* const send = sequences + nbSeq; + const seqDef* sp = sstart; + size_t matchLengthSum = 0; + size_t litLengthSum = 0; + while (send-sp > 0) { + ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); + litLengthSum += seqLen.litLength; + matchLengthSum += seqLen.matchLength; + sp++; + } + assert(litLengthSum <= litSize); + if (!lastSequence) { + assert(litLengthSum == litSize); + } + return matchLengthSum + litSize; +} + +/* ZSTD_compressSubBlock_sequences() : + * Compresses sequences section for a sub-block. + * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have + * symbol compression modes for the super-block. + * The first successfully compressed block will have these in its header. + * We set entropyWritten=1 when we succeed in compressing the sequences. + * The following sub-blocks will always have repeat mode. + * @return : compressed size of sequences section of a sub-block + * Or 0 if it is unable to compress + * Or error code. */ +static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); + + *entropyWritten = 0; + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, ""); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + return op - ostart; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); + + if (writeEntropy) { + const U32 LLtype = fseMetadata->llType; + const U32 Offtype = fseMetadata->ofType; + const U32 MLtype = fseMetadata->mlType; + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); + op += fseMetadata->fseTablesSize; + } else { + const U32 repeat = set_repeat; + *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + fseTables->matchlengthCTable, mlCode, + fseTables->offcodeCTable, ofCode, + fseTables->litlengthCTable, llCode, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(fseMetadata->lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } +#endif + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); + } + + /* zstd versions <= 1.4.0 mistakenly report error when + * sequences section body size is less than 3 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1664. + * This can happen when the previous sequences section block is compressed + * with rle mode and the current block's sequences section is compressed + * with repeat mode where sequences section body size can be 1 byte. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (op-seqHead < 4) { + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " + "an uncompressed block when sequences are < 4 bytes"); + return 0; + } +#endif + + *entropyWritten = 1; + return op - ostart; +} + +/* ZSTD_compressSubBlock() : + * Compresses a single sub-block. + * @return : compressed size of the sub-block + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* literals, size_t litSize, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, + int writeLitEntropy, int writeSeqEntropy, + int* litEntropyWritten, int* seqEntropyWritten, + U32 lastBlock) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart + ZSTD_blockHeaderSize; + DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", + litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); + { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, + &entropyMetadata->hufMetadata, literals, litSize, + op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); + FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); + if (cLitSize == 0) return 0; + op += cLitSize; + } + { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, + &entropyMetadata->fseMetadata, + sequences, nbSeq, + llCode, mlCode, ofCode, + cctxParams, + op, oend-op, + bmi2, writeSeqEntropy, seqEntropyWritten); + FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); + if (cSeqSize == 0) return 0; + op += cSeqSize; + } + /* Write block header */ + { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + } + return op-ostart; +} + +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = 255; + size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + cSymbolTypeSizeEstimateInBits = max <= defaultMax + ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) + : ERROR(GENERIC); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} + +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t cSizeEstimate = 0; + cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return cSizeEstimate + ZSTD_blockHeaderSize; +} + +static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) +{ + if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) + return 1; + if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) + return 1; + if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) + return 1; + return 0; +} + +/* ZSTD_compressSubBlock_multi() : + * Breaks super-block into multiple sub-blocks and compresses them. + * Entropy will be written to the first block. + * The following blocks will use repeat mode to compress. + * All sub-blocks are compressed blocks (no raw or rle blocks). + * @return : compressed size of the super block (which is multiple ZSTD blocks) + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, + const ZSTD_compressedBlockState_t* prevCBlock, + ZSTD_compressedBlockState_t* nextCBlock, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const int bmi2, U32 lastBlock, + void* workspace, size_t wkspSize) +{ + const seqDef* const sstart = seqStorePtr->sequencesStart; + const seqDef* const send = seqStorePtr->sequences; + const seqDef* sp = sstart; + const BYTE* const lstart = seqStorePtr->litStart; + const BYTE* const lend = seqStorePtr->lit; + const BYTE* lp = lstart; + BYTE const* ip = (BYTE const*)src; + BYTE const* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + const BYTE* llCodePtr = seqStorePtr->llCode; + const BYTE* mlCodePtr = seqStorePtr->mlCode; + const BYTE* ofCodePtr = seqStorePtr->ofCode; + size_t targetCBlockSize = cctxParams->targetCBlockSize; + size_t litSize, seqCount; + int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; + int writeSeqEntropy = 1; + int lastSequence = 0; + + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", + (unsigned)(lend-lp), (unsigned)(send-sstart)); + + litSize = 0; + seqCount = 0; + do { + size_t cBlockSizeEstimate = 0; + if (sstart == send) { + lastSequence = 1; + } else { + const seqDef* const sequence = sp + seqCount; + lastSequence = sequence == send - 1; + litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; + seqCount++; + } + if (lastSequence) { + assert(lp <= lend); + assert(litSize <= (size_t)(lend - lp)); + litSize = (size_t)(lend - lp); + } + /* I think there is an optimization opportunity here. + * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful + * since it recalculates estimate from scratch. + * For example, it would recount literal distribution and symbol codes everytime. + */ + cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, + &nextCBlock->entropy, entropyMetadata, + workspace, wkspSize, writeLitEntropy, writeSeqEntropy); + if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { + int litEntropyWritten = 0; + int seqEntropyWritten = 0; + const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); + const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, oend-op, + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + lastBlock && lastSequence); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Committed the sub-block"); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + sp += seqCount; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + litSize = 0; + seqCount = 0; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + } + } + } while (!lastSequence); + if (writeLitEntropy) { + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); + ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); + } + if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { + /* If we haven't written our entropy tables, then we've violated our contract and + * must emit an uncompressed block. + */ + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); + return 0; + } + if (ip < iend) { + size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); + DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + assert(cSize != 0); + op += cSize; + /* We have to regenerate the repcodes because we've skipped some sequences */ + if (sp < send) { + seqDef const* seq; + repcodes_t rep; + ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); + for (seq = sstart; seq < sp; ++seq) { + rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + } + ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); + } + } + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); + return op-ostart; +} + +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + + return ZSTD_compressSubBlock_multi(&zc->seqStore, + zc->blockState.prevCBlock, + zc->blockState.nextCBlock, + &entropyMetadata, + &zc->appliedParams, + dst, dstCapacity, + src, srcSize, + zc->bmi2, lastBlock, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */); +} diff --git a/lib/zstd/compress/zstd_compress_superblock.h b/lib/zstd/compress/zstd_compress_superblock.h new file mode 100644 index 0000000000000..224ece79546eb --- /dev/null +++ b/lib/zstd/compress/zstd_compress_superblock.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_ADVANCED_H +#define ZSTD_COMPRESS_ADVANCED_H + +/*-************************************* +* Dependencies +***************************************/ + +#include /* ZSTD_CCtx */ + +/*-************************************* +* Target Compressed Block Size +***************************************/ + +/* ZSTD_compressSuperBlock() : + * Used to compress a super block when targetCBlockSize is being used. + * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock); + +#endif /* ZSTD_COMPRESS_ADVANCED_H */ diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h new file mode 100644 index 0000000000000..98e359adf5d44 --- /dev/null +++ b/lib/zstd/compress/zstd_cwksp.h @@ -0,0 +1,482 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CWKSP_H +#define ZSTD_CWKSP_H + +/*-************************************* +* Dependencies +***************************************/ +#include "../common/zstd_internal.h" + + +/*-************************************* +* Constants +***************************************/ + +/* Since the workspace is effectively its own little malloc implementation / + * arena, when we run under ASAN, we should similarly insert redzones between + * each internal element of the workspace, so ASAN will catch overruns that + * reach outside an object but that stay inside the workspace. + * + * This defines the size of that redzone. + */ +#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE +#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 +#endif + +/*-************************************* +* Structures +***************************************/ +typedef enum { + ZSTD_cwksp_alloc_objects, + ZSTD_cwksp_alloc_buffers, + ZSTD_cwksp_alloc_aligned +} ZSTD_cwksp_alloc_phase_e; + +/* + * Used to describe whether the workspace is statically allocated (and will not + * necessarily ever be freed), or if it's dynamically allocated and we can + * expect a well-formed caller to free this. + */ +typedef enum { + ZSTD_cwksp_dynamic_alloc, + ZSTD_cwksp_static_alloc +} ZSTD_cwksp_static_alloc_e; + +/* + * Zstd fits all its internal datastructures into a single continuous buffer, + * so that it only needs to perform a single OS allocation (or so that a buffer + * can be provided to it and it can perform no allocations at all). This buffer + * is called the workspace. + * + * Several optimizations complicate that process of allocating memory ranges + * from this workspace for each internal datastructure: + * + * - These different internal datastructures have different setup requirements: + * + * - The static objects need to be cleared once and can then be trivially + * reused for each compression. + * + * - Various buffers don't need to be initialized at all--they are always + * written into before they're read. + * + * - The matchstate tables have a unique requirement that they don't need + * their memory to be totally cleared, but they do need the memory to have + * some bound, i.e., a guarantee that all values in the memory they've been + * allocated is less than some maximum value (which is the starting value + * for the indices that they will then use for compression). When this + * guarantee is provided to them, they can use the memory without any setup + * work. When it can't, they have to clear the area. + * + * - These buffers also have different alignment requirements. + * + * - We would like to reuse the objects in the workspace for multiple + * compressions without having to perform any expensive reallocation or + * reinitialization work. + * + * - We would like to be able to efficiently reuse the workspace across + * multiple compressions **even when the compression parameters change** and + * we need to resize some of the objects (where possible). + * + * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp + * abstraction was created. It works as follows: + * + * Workspace Layout: + * + * [ ... workspace ... ] + * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] + * + * The various objects that live in the workspace are divided into the + * following categories, and are allocated separately: + * + * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, + * so that literally everything fits in a single buffer. Note: if present, + * this must be the first object in the workspace, since ZSTD_customFree{CCtx, + * CDict}() rely on a pointer comparison to see whether one or two frees are + * required. + * + * - Fixed size objects: these are fixed-size, fixed-count objects that are + * nonetheless "dynamically" allocated in the workspace so that we can + * control how they're initialized separately from the broader ZSTD_CCtx. + * Examples: + * - Entropy Workspace + * - 2 x ZSTD_compressedBlockState_t + * - CDict dictionary contents + * + * - Tables: these are any of several different datastructures (hash tables, + * chain tables, binary trees) that all respect a common format: they are + * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). + * Their sizes depend on the cparams. + * + * - Aligned: these buffers are used for various purposes that require 4 byte + * alignment, but don't require any initialization before they're used. + * + * - Buffers: these buffers are used for various purposes that don't require + * any alignment or initialization before they're used. This means they can + * be moved around at no cost for a new compression. + * + * Allocating Memory: + * + * The various types of objects must be allocated in order, so they can be + * correctly packed into the workspace buffer. That order is: + * + * 1. Objects + * 2. Buffers + * 3. Aligned + * 4. Tables + * + * Attempts to reserve objects of different types out of order will fail. + */ +typedef struct { + void* workspace; + void* workspaceEnd; + + void* objectEnd; + void* tableEnd; + void* tableValidEnd; + void* allocStart; + + BYTE allocFailed; + int workspaceOversizedDuration; + ZSTD_cwksp_alloc_phase_e phase; + ZSTD_cwksp_static_alloc_e isStatic; +} ZSTD_cwksp; + +/*-************************************* +* Functions +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); + +MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { + (void)ws; + assert(ws->workspace <= ws->objectEnd); + assert(ws->objectEnd <= ws->tableEnd); + assert(ws->objectEnd <= ws->tableValidEnd); + assert(ws->tableEnd <= ws->allocStart); + assert(ws->tableValidEnd <= ws->allocStart); + assert(ws->allocStart <= ws->workspaceEnd); +} + +/* + * Align must be a power of 2. + */ +MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { + size_t const mask = align - 1; + assert((align & mask) == 0); + return (size + mask) & ~mask; +} + +/* + * Use this to determine how much space in the workspace we will consume to + * allocate this object. (Normally it should be exactly the size of the object, + * but under special conditions, like ASAN, where we pad each object, it might + * be larger.) + * + * Since tables aren't currently redzoned, you don't need to call through this + * to figure out how much space you need for the matchState tables. Everything + * else is though. + */ +MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { + if (size == 0) + return 0; + return size; +} + +MEM_STATIC void ZSTD_cwksp_internal_advance_phase( + ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { + assert(phase >= ws->phase); + if (phase > ws->phase) { + if (ws->phase < ZSTD_cwksp_alloc_buffers && + phase >= ZSTD_cwksp_alloc_buffers) { + ws->tableValidEnd = ws->objectEnd; + } + if (ws->phase < ZSTD_cwksp_alloc_aligned && + phase >= ZSTD_cwksp_alloc_aligned) { + /* If unaligned allocations down from a too-large top have left us + * unaligned, we need to realign our alloc ptr. Technically, this + * can consume space that is unaccounted for in the neededSpace + * calculation. However, I believe this can only happen when the + * workspace is too large, and specifically when it is too large + * by a larger margin than the space that will be consumed. */ + /* TODO: cleaner, compiler warning friendly way to do this??? */ + ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); + if (ws->allocStart < ws->tableValidEnd) { + ws->tableValidEnd = ws->allocStart; + } + } + ws->phase = phase; + } +} + +/* + * Returns whether this object/buffer/etc was allocated in this workspace. + */ +MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { + return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); +} + +/* + * Internal function. Do not use directly. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_internal( + ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { + void* alloc; + void* bottom = ws->tableEnd; + ZSTD_cwksp_internal_advance_phase(ws, phase); + alloc = (BYTE *)ws->allocStart - bytes; + + if (bytes == 0) + return NULL; + + + DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; + + + return alloc; +} + +/* + * Reserves and returns unaligned memory. + */ +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { + return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); +} + +/* + * Reserves and returns memory sized on and aligned on sizeof(unsigned). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { + assert((bytes & (sizeof(U32)-1)) == 0); + return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); +} + +/* + * Aligned on sizeof(unsigned). These buffers have the special property that + * their values remain constrained, allowing us to re-use them without + * memset()-ing them. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { + const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; + void* alloc = ws->tableEnd; + void* end = (BYTE *)alloc + bytes; + void* top = ws->allocStart; + + DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + assert((bytes & (sizeof(U32)-1)) == 0); + ZSTD_cwksp_internal_advance_phase(ws, phase); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(end <= top); + if (end > top) { + DEBUGLOG(4, "cwksp: table alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->tableEnd = end; + + + return alloc; +} + +/* + * Aligned on sizeof(void*). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { + size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); + void* alloc = ws->objectEnd; + void* end = (BYTE*)alloc + roundedBytes; + + + DEBUGLOG(5, + "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", + alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); + assert(((size_t)alloc & (sizeof(void*)-1)) == 0); + assert((bytes & (sizeof(void*)-1)) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + /* we must be in the first phase, no advance is possible */ + if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { + DEBUGLOG(4, "cwksp: object alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; + + + return alloc; +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); + + + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + ws->tableValidEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/* + * Zero the part of the allocated tables not already marked clean. + */ +MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); + } + ZSTD_cwksp_mark_tables_clean(ws); +} + +/* + * Invalidates table allocations. + * All other allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing tables!"); + + + ws->tableEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/* + * Invalidates all buffer, aligned, and table allocations. + * Object allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing!"); + + + + ws->tableEnd = ws->objectEnd; + ws->allocStart = ws->workspaceEnd; + ws->allocFailed = 0; + if (ws->phase > ZSTD_cwksp_alloc_buffers) { + ws->phase = ZSTD_cwksp_alloc_buffers; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/* + * The provided workspace takes ownership of the buffer [start, start+size). + * Any existing values in the workspace are ignored (the previously managed + * buffer, if present, must be separately freed). + */ +MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) { + DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); + assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + ws->workspace = start; + ws->workspaceEnd = (BYTE*)start + size; + ws->objectEnd = ws->workspace; + ws->tableValidEnd = ws->objectEnd; + ws->phase = ZSTD_cwksp_alloc_objects; + ws->isStatic = isStatic; + ZSTD_cwksp_clear(ws); + ws->workspaceOversizedDuration = 0; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { + void* workspace = ZSTD_customMalloc(size, customMem); + DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); + RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); + ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc); + return 0; +} + +MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { + void *ptr = ws->workspace; + DEBUGLOG(4, "cwksp: freeing workspace"); + ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp)); + ZSTD_customFree(ptr, customMem); +} + +/* + * Moves the management of a workspace from one cwksp to another. The src cwksp + * is left in an invalid state (src must be re-init()'ed before it's used again). + */ +MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { + *dst = *src; + ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); +} + +MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); +} + +MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) + + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); +} + +MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { + return ws->allocFailed; +} + +/*-************************************* +* Functions Checking Free Space +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); +} + +MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; +} + +MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_available( + ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); +} + +MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) + && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( + ZSTD_cwksp* ws, size_t additionalNeededSpace) { + if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { + ws->workspaceOversizedDuration++; + } else { + ws->workspaceOversizedDuration = 0; + } +} + + +#endif /* ZSTD_CWKSP_H */ diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c new file mode 100644 index 0000000000000..b0424d23ac57f --- /dev/null +++ b/lib/zstd/compress/zstd_double_fast.c @@ -0,0 +1,519 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_double_fast.h" + + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashLarge = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32 const mls = cParams->minMatch; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash tables. + * Insert the other positions into the large hash table if their entry + * is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); + size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); + if (i == 0) + hashSmall[smHash] = curr + i; + if (i == 0 || hashLarge[lgHash] == 0) + hashLarge[lgHash] = curr + i; + /* Only load extra positions for ZSTD_dtlm_full */ + if (dtlm == ZSTD_dtlm_fast) + break; + } } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */, ZSTD_dictMode_e const dictMode) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = + dictMode == ZSTD_dictMatchState ? + &dms->cParams : NULL; + const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? + dms->chainTable : NULL; + const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? + dictBase + dictStartIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? + dictCParams->hashLog : hBitsL; + const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? + dictCParams->chainLog : hBitsS; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); + + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + + /* if a dictionary is attached, it must be within window range */ + if (dictMode == ZSTD_dictMatchState) { + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); + } + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 offset; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); + size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ + + /* check dictMatchState repcode */ + if (dictMode == ZSTD_dictMatchState + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + /* check noDict repcode */ + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + if (matchIndexL > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchLong) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState long match */ + U32 const dictMatchIndexL = dictHashLong[dictHL]; + const BYTE* dictMatchL = dictBase + dictMatchIndexL; + assert(dictMatchL < dictEnd); + + if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { + mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; + offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); + while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ + goto _match_found; + } } + + if (matchIndexS > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState short match */ + U32 const dictMatchIndexS = dictHashSmall[dictHS]; + match = dictBase + dictMatchIndexS; + matchIndexS = dictMatchIndexS + dictIndexDelta; + + if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } } + + ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif + continue; + +_search_next_long: + + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = curr + 1; + + /* check prefix long +1 match */ + if (matchIndexL3 > prefixLowestIndex) { + if (MEM_read64(matchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dict long +1 match */ + U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; + const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; + assert(dictMatchL3 < dictEnd); + if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; + ip++; + offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); + while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ + goto _match_found; + } } } + + /* if no long +1 match, explore the short match we found */ + if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; + offset = (U32)(curr - matchIndexS); + while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + +_match_found: + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState + && repIndex2 < prefixLowestIndex ? + dictBase + repIndex2 - dictIndexDelta : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } + + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + } /* while (ip < ilimit) */ + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + } +} + + +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + + /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ + + if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ + & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; + offset = curr - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = curr + 1; + if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; + ip++; + offset = curr+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + offset = curr - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + } } + + /* move to next sequence start */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ + & (repIndex2 > dictStartIndex)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} diff --git a/lib/zstd/compress/zstd_double_fast.h b/lib/zstd/compress/zstd_double_fast.h new file mode 100644 index 0000000000000..6822bde65a1d8 --- /dev/null +++ b/lib/zstd/compress/zstd_double_fast.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H + + +#include "../common/mem.h" /* U32 */ +#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + + +#endif /* ZSTD_DOUBLE_FAST_H */ diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c new file mode 100644 index 0000000000000..96b7d48e2868e --- /dev/null +++ b/lib/zstd/compress/zstd_fast.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ +#include "zstd_fast.h" + + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog; + U32 const mls = cParams->minMatch; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); + hashTable[hash0] = curr; + if (dtlm == ZSTD_dtlm_fast) continue; + /* Only load extra positions for ZSTD_dtlm_full */ + { U32 p; + for (p = 1; p < fastHashFillStep; ++p) { + size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); + if (hashTable[hash] == 0) { /* not yet filled */ + hashTable[hash] = curr + p; + } } } } +} + + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_fast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); + ip0 += (ip0 == prefixStart); + ip1 = ip0 + 1; + { U32 const curr = (U32)(ip0 - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ +#ifdef __INTEL_COMPILER + /* From intel 'The vector pragma indicates that the loop should be + * vectorized if it is legal to do so'. Can be used together with + * #pragma ivdep (but have opted to exclude that because intel + * warns against using it).*/ + #pragma vector always +#endif + while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ + size_t mLength; + BYTE const* ip2 = ip0 + 2; + size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); + U32 const val0 = MEM_read32(ip0); + size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); + U32 const val1 = MEM_read32(ip1); + U32 const current0 = (U32)(ip0-base); + U32 const current1 = (U32)(ip1-base); + U32 const matchIndex0 = hashTable[h0]; + U32 const matchIndex1 = hashTable[h1]; + BYTE const* repMatch = ip2 - offset_1; + const BYTE* match0 = base + matchIndex0; + const BYTE* match1 = base + matchIndex1; + U32 offcode; + +#if defined(__aarch64__) + PREFETCH_L1(ip0+256); +#endif + + hashTable[h0] = current0; /* update hash table */ + hashTable[h1] = current1; /* update hash table */ + + assert(ip0 + 1 == ip1); + + if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { + mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; + ip0 = ip2 - mLength; + match0 = repMatch - mLength; + mLength += 4; + offcode = 0; + goto _match; + } + if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { + /* found a regular match */ + goto _offset; + } + if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { + /* found a regular match after one literal */ + ip0 = ip1; + match0 = match1; + goto _offset; + } + { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; + assert(step >= 2); + ip0 += step; + ip1 += step; + continue; + } +_offset: /* Requires: ip0, match0 */ + /* Compute the offset code */ + offset_2 = offset_1; + offset_1 = (U32)(ip0-match0); + offcode = offset_1 + ZSTD_REP_MOVE; + mLength = 4; + /* Count the backwards match length */ + while (((ip0>anchor) & (match0>prefixStart)) + && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ + +_match: /* Requires: ip0, match0, offcode */ + /* Count the forward length */ + mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); + ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); + /* match found */ + ip0 += mLength; + anchor = ip0; + + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + ip1 = ip0 + 1; + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_dictMatchState_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; + const U32* const dictHashTable = dms->hashTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); + const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); + const U32 dictHLog = dictCParams->hashLog; + + /* if a dictionary is still attached, it necessarily means that + * it is within window size. So we just check it. */ + const U32 maxDistance = 1U << cParams->windowLog; + const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); + assert(endIndex - prefixStartIndex <= maxDistance); + (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ + + /* ensure there will be no underflow + * when translating a dict index into a local index */ + assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); + ip += (dictAndPrefixLength == 0); + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hlog, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (repIndex < prefixStartIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashTable[h] = curr; /* update hash table */ + + if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else if ( (matchIndex <= prefixStartIndex) ) { + size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); + U32 const dictMatchIndex = dictHashTable[dictHash]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictStartIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; + while (((ip>anchor) & (dictMatch>dictStart)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + } else if (MEM_read32(match) != MEM_read32(ip)) { + /* it's not a match, and we're not going to check the dictionary */ + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a regular match */ + U32 const offset = (U32)(ip-match); + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + while (((ip>anchor) & (match>prefixStart)) + && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + assert(base+curr+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + + +static size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const BYTE* const dictStart = dictBase + dictStartIndex; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); + + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hlog, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + hashTable[h] = curr; /* update hash table */ + DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); + assert(offset_1 <= curr +1); /* check repIndex */ + + if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + } else { + if ( (matchIndex < dictStartIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + U32 const offset = curr - matchIndex; + size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = offset; /* update offset history */ + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ip += mLength; + anchor = ip; + } } + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} diff --git a/lib/zstd/compress/zstd_fast.h b/lib/zstd/compress/zstd_fast.h new file mode 100644 index 0000000000000..fddc2f532d21d --- /dev/null +++ b/lib/zstd/compress/zstd_fast.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H + + +#include "../common/mem.h" /* U32 */ +#include "zstd_compress_internal.h" + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +#endif /* ZSTD_FAST_H */ diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c new file mode 100644 index 0000000000000..fb54d4e28a2bc --- /dev/null +++ b/lib/zstd/compress/zstd_lazy.c @@ -0,0 +1,1414 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_lazy.h" + + +/*-************************************* +* Binary Tree search +***************************************/ + +static void +ZSTD_updateDUBT(ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iend, + U32 mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + if (idx != target) + DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", + idx, target, ms->window.dictLimit); + assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ + (void)iend; + + assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ + for ( ; idx < target ; idx++) { + size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ + U32 const matchIndex = hashTable[h]; + + U32* const nextCandidatePtr = bt + 2*(idx&btMask); + U32* const sortMarkPtr = nextCandidatePtr + 1; + + DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); + hashTable[h] = idx; /* Update Hash Table */ + *nextCandidatePtr = matchIndex; /* update BT like a chain */ + *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; + } + ms->nextToUpdate = target; +} + + +/* ZSTD_insertDUBT1() : + * sort one already inserted but unsorted position + * assumption : curr >= btlow == (curr - btmask) + * doesn't fail */ +static void +ZSTD_insertDUBT1(ZSTD_matchState_t* ms, + U32 curr, const BYTE* inputEnd, + U32 nbCompares, U32 btLow, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr; + const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ + U32 dummy32; /* to be nullified at the end */ + U32 const windowValid = ms->window.lowLimit; + U32 const maxDistance = 1U << cParams->windowLog; + U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid; + + + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", + curr, dictLimit, windowLow); + assert(curr >= btLow); + assert(ip < iend); /* condition for ZSTD_count */ + + for (; nbCompares && (matchIndex > windowLow); --nbCompares) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + /* note : all candidates are now supposed sorted, + * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK + * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ + + if ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ + || (curr < dictLimit) /* both in extDict */) { + const BYTE* const mBase = ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit)) ? + base : dictBase; + assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ + || (curr < dictLimit) ); + match = mBase + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* preparation for next read of match[matchLength] */ + } + + DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", + curr, matchIndex, (U32)matchLength); + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", + matchIndex, btLow, nextPtr[1]); + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", + matchIndex, btLow, nextPtr[0]); + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; +} + + +static size_t +ZSTD_DUBT_findBetterDictMatch ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + size_t bestLength, + U32 nbCompares, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_matchState_t * const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; + const U32 * const dictHashTable = dms->hashTable; + U32 const hashLog = dmsCParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 dictMatchIndex = dictHashTable[h]; + + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + U32 const curr = (U32)(ip-base); + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictEnd = dms->window.nextSrc; + U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); + U32 const dictLowLimit = dms->window.lowLimit; + U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; + + U32* const dictBt = dms->chainTable; + U32 const btLog = dmsCParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; + + size_t commonLengthSmaller=0, commonLengthLarger=0; + + (void)dictMode; + assert(dictMode == ZSTD_dictMatchState); + + for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) { + U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dictBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (dictMatchIndex+matchLength >= dictHighLimit) + match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + U32 matchIndex = dictMatchIndex + dictIndexDelta; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { + DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", + curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + } + if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + +} + + +static size_t +ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const unsortLimit = MAX(btLow, windowLow); + + U32* nextCandidate = bt + 2*(matchIndex&btMask); + U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; + U32 nbCompares = 1U << cParams->searchLog; + U32 nbCandidates = nbCompares; + U32 previousCandidate = 0; + + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr); + assert(ip <= iend-8); /* required for h calculation */ + assert(dictMode != ZSTD_dedicatedDictSearch); + + /* reach end of unsorted candidates list */ + while ( (matchIndex > unsortLimit) + && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) + && (nbCandidates > 1) ) { + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", + matchIndex); + *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ + previousCandidate = matchIndex; + matchIndex = *nextCandidate; + nextCandidate = bt + 2*(matchIndex&btMask); + unsortedMark = bt + 2*(matchIndex&btMask) + 1; + nbCandidates --; + } + + /* nullify last candidate if it's still unsorted + * simplification, detrimental to compression ratio, beneficial for speed */ + if ( (matchIndex > unsortLimit) + && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", + matchIndex); + *nextCandidate = *unsortedMark = 0; + } + + /* batch sort stacked candidates */ + matchIndex = previousCandidate; + while (matchIndex) { /* will end on matchIndex == 0 */ + U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; + U32 const nextCandidateIdx = *nextCandidateIdxPtr; + ZSTD_insertDUBT1(ms, matchIndex, iend, + nbCandidates, unsortLimit, dictMode); + matchIndex = nextCandidateIdx; + nbCandidates++; + } + + /* find longest match */ + { size_t commonLengthSmaller = 0, commonLengthLarger = 0; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr + 8 + 1; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + matchIndex = hashTable[h]; + hashTable[h] = curr; /* Update Hash Table */ + + for (; nbCompares && (matchIndex > windowLow); --nbCompares) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + if (dictMode == ZSTD_dictMatchState) { + nbCompares = 0; /* in addition to avoiding checking any + * further in this loop, make sure we + * skip checking in the dictionary. */ + } + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ + if (dictMode == ZSTD_dictMatchState && nbCompares) { + bestLength = ZSTD_DUBT_findBetterDictMatch( + ms, ip, iend, + offsetPtr, bestLength, nbCompares, + mls, dictMode); + } + + assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + } +} + + +/* ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls /* template */, + const ZSTD_dictMode_e dictMode) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); +} + + +static size_t +ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32* const hashTable = ms->hashTable; + U32* const chainTable = ms->chainTable; + U32 const chainSize = 1 << ms->cParams.chainLog; + U32 idx = ms->nextToUpdate; + U32 const minChain = chainSize < target ? target - chainSize : idx; + U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const cacheSize = bucketSize - 1; + U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; + U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts; + + /* We know the hashtable is oversized by a factor of `bucketSize`. + * We are going to temporarily pretend `bucketSize == 1`, keeping only a + * single entry. We will use the rest of the space to construct a temporary + * chaintable. + */ + U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + U32* const tmpHashTable = hashTable; + U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); + U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; + U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; + + U32 hashIdx; + + assert(ms->cParams.chainLog <= 24); + assert(ms->cParams.hashLog >= ms->cParams.chainLog); + assert(idx != 0); + assert(tmpMinChain <= minChain); + + /* fill conventional hash table and conventional chain table */ + for ( ; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch); + if (idx >= tmpMinChain) { + tmpChainTable[idx - tmpMinChain] = hashTable[h]; + } + tmpHashTable[h] = idx; + } + + /* sort chains into ddss chain table */ + { + U32 chainPos = 0; + for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) { + U32 count; + U32 countBeyondMinChain = 0; + U32 i = tmpHashTable[hashIdx]; + for (count = 0; i >= tmpMinChain && count < cacheSize; count++) { + /* skip through the chain to the first position that won't be + * in the hash cache bucket */ + if (i < minChain) { + countBeyondMinChain++; + } + i = tmpChainTable[i - tmpMinChain]; + } + if (count == cacheSize) { + for (count = 0; count < chainLimit;) { + if (i < minChain) { + if (!i || countBeyondMinChain++ > cacheSize) { + /* only allow pulling `cacheSize` number of entries + * into the cache or chainTable beyond `minChain`, + * to replace the entries pulled out of the + * chainTable into the cache. This lets us reach + * back further without increasing the total number + * of entries in the chainTable, guaranteeing the + * DDSS chain table will fit into the space + * allocated for the regular one. */ + break; + } + } + chainTable[chainPos++] = i; + count++; + if (i < tmpMinChain) { + break; + } + i = tmpChainTable[i - tmpMinChain]; + } + } else { + count = 0; + } + if (count) { + tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count; + } else { + tmpHashTable[hashIdx] = 0; + } + } + assert(chainPos <= chainSize); /* I believe this is guaranteed... */ + } + + /* move chain pointers into the last entry of each hash bucket */ + for (hashIdx = (1 << hashLog); hashIdx; ) { + U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const chainPackedPointer = tmpHashTable[hashIdx]; + U32 i; + for (i = 0; i < cacheSize; i++) { + hashTable[bucketIdx + i] = 0; + } + hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer; + } + + /* fill the buckets of the hash table */ + for (idx = ms->nextToUpdate; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch) + << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 i; + /* Shift hash cache down 1. */ + for (i = cacheSize - 1; i; i--) + hashTable[h + i] = hashTable[h + i - 1]; + hashTable[h] = idx; + } + + ms->nextToUpdate = target; +} + + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const chainTable = ms->chainTable; + const U32 chainSize = (1 << cParams->chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 minChain = curr > chainSize ? curr - chainSize : 0; + U32 nbAttempts = 1U << cParams->searchLog; + size_t ml=4-1; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch + ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch + ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + + U32 matchIndex; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32* entry = &dms->hashTable[ddsIdx]; + PREFETCH_L1(entry); + } + + /* HC4 match finder */ + matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + + for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + } else if (dictMode == ZSTD_dictMatchState) { + const U32* const dmsChainTable = dms->chainTable; + const U32 dmsChainSize = (1 << dms->cParams.chainLog); + const U32 dmsChainMask = dmsChainSize - 1; + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; + + matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; + + for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= dmsMinChain) break; + + matchIndex = dmsChainTable[matchIndex & dmsChainMask]; + } + } + + return ml; +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch); + } +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +typedef enum { search_hashChain, search_binaryTree } searchMethod_e; + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_lazy_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth, + ZSTD_dictMode_e const dictMode) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + + /* + * This table is indexed first by the four ZSTD_dictMode_e values, and then + * by the two searchMethod_e values. NULLs are placed for configurations + * that should never occur (extDict modes go to the other implementation + * below and there is no DDSS for binary tree search yet). + */ + const searchMax_f searchFuncs[4][2] = { + { + ZSTD_HcFindBestMatch_selectMLS, + ZSTD_BtFindBestMatch_selectMLS + }, + { + NULL, + NULL + }, + { + ZSTD_HcFindBestMatch_dictMatchState_selectMLS, + ZSTD_BtFindBestMatch_dictMatchState_selectMLS + }, + { + ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, + NULL + } + }; + + searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; + U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + + const int isDMS = dictMode == ZSTD_dictMatchState; + const int isDDS = dictMode == ZSTD_dedicatedDictSearch; + const int isDxS = isDMS || isDDS; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; + const BYTE* const dictBase = isDxS ? dms->window.base : NULL; + const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL; + const U32 dictIndexDelta = isDxS ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); + + assert(searchMax != NULL); + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + if (isDxS) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Match Loop */ +#if defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if (isDxS) { + const U32 repIndex = (U32)(ip - base) + 1 - offset_1; + const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + if (depth==0) goto _storeSequence; + } + } + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ + /* catch up */ + if (offset) { + if (dictMode == ZSTD_noDict) { + while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) + && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + } + if (isDxS) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; + const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + if (isDxS) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex = current2 - offset_2; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex : + base + repIndex; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* Save reps for next block */ + rep[0] = offset_1 ? offset_1 : savedOffset; + rep[1] = offset_2 ? offset_2 : savedOffset; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); +} + + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ms->window.lowLimit; + const U32 windowLog = ms->cParams.windowLog; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + + U32 offset_1 = rep[0], offset_2 = rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); + + /* init */ + ip += (ip == prefixStart); + + /* Match Loop */ +#if defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 curr = (U32)(ip-base); + + /* check repCode */ + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog); + const U32 repIndex = (U32)(curr+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repCurrent = (U32)(ip-base); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); + const U32 repIndex = repCurrent - offset_2; + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); +} + +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); +} diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h new file mode 100644 index 0000000000000..2fc5a61821344 --- /dev/null +++ b/lib/zstd/compress/zstd_lazy.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H + + +#include "zstd_compress_internal.h" + +/* + * Dedicated Dictionary Search Structure bucket log. In the + * ZSTD_dedicatedDictSearch mode, the hashTable has + * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just + * one. + */ +#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); + +void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +#endif /* ZSTD_LAZY_H */ diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c new file mode 100644 index 0000000000000..8ef7e88a5adde --- /dev/null +++ b/lib/zstd/compress/zstd_ldm.c @@ -0,0 +1,686 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_ldm.h" + +#include "../common/debug.h" +#include +#include "zstd_fast.h" /* ZSTD_fillHashTable() */ +#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ +#include "zstd_ldm_geartab.h" + +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_RLOG 7 + +typedef struct { + U64 rolling; + U64 stopMask; +} ldmRollingHashState_t; + +/* ZSTD_ldm_gear_init(): + * + * Initializes the rolling hash state such that it will honor the + * settings in params. */ +static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params) +{ + unsigned maxBitsInMask = MIN(params->minMatchLength, 64); + unsigned hashRateLog = params->hashRateLog; + + state->rolling = ~(U32)0; + + /* The choice of the splitting criterion is subject to two conditions: + * 1. it has to trigger on average every 2^(hashRateLog) bytes; + * 2. ideally, it has to depend on a window of minMatchLength bytes. + * + * In the gear hash algorithm, bit n depends on the last n bytes; + * so in order to obtain a good quality splitting criterion it is + * preferable to use bits with high weight. + * + * To match condition 1 we use a mask with hashRateLog bits set + * and, because of the previous remark, we make sure these bits + * have the highest possible weight while still respecting + * condition 2. + */ + if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) { + state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog); + } else { + /* In this degenerate case we simply honor the hash rate. */ + state->stopMask = ((U64)1 << hashRateLog) - 1; + } +} + +/* ZSTD_ldm_gear_feed(): + * + * Registers in the splits array all the split points found in the first + * size bytes following the data pointer. This function terminates when + * either all the data has been processed or LDM_BATCH_SIZE splits are + * present in the splits array. + * + * Precondition: The splits array must not be full. + * Returns: The number of bytes processed. */ +static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, + BYTE const* data, size_t size, + size_t* splits, unsigned* numSplits) +{ + size_t n; + U64 hash, mask; + + hash = state->rolling; + mask = state->stopMask; + n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + if (UNLIKELY((hash & mask) == 0)) { \ + splits[*numSplits] = n; \ + *numSplits += 1; \ + if (*numSplits == LDM_BATCH_SIZE) \ + goto done; \ + } \ + } while (0) + + while (n + 3 < size) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < size) { + GEAR_ITER_ONCE(); + } + +#undef GEAR_ITER_ONCE + +done: + state->rolling = hash; + return n; +} + +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) +{ + params->windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (params->hashLog == 0) { + params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + } + if (params->hashRateLog == 0) { + params->hashRateLog = params->windowLog < params->hashLog + ? 0 + : params->windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} + +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); + size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); + return params.enableLdm ? totalSize : 0; +} + +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; +} + +/* ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} + +/* ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const pOffset = ldmState->bucketOffsets + hash; + unsigned const offset = *pOffset; + + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; + *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); + +} + +/* ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/* ZSTD_ldm_countBackwardsMatch_2segments() : + * Returns the number of bytes that match backwards from pMatch, + * even with the backwards match spanning 2 different segments. + * + * On reaching `pMatchBase`, start counting from mEnd */ +static size_t ZSTD_ldm_countBackwardsMatch_2segments( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase, + const BYTE* pExtDictStart, const BYTE* pExtDictEnd) +{ + size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase); + if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) { + /* If backwards match is entirely in the extDict or prefix, immediately return */ + return matchLength; + } + DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength); + matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart); + DEBUGLOG(7, "final backwards match length = %zu", matchLength); + return matchLength; +} + +/* ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, + void const* end) +{ + const BYTE* const iend = (const BYTE*)end; + + switch(ms->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +void ZSTD_ldm_fillHashTable( + ldmState_t* ldmState, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params) +{ + U32 const minMatchLength = params->minMatchLength; + U32 const hBits = params->hashLog - params->bucketSizeLog; + BYTE const* const base = ldmState->window.base; + BYTE const* const istart = ip; + ldmRollingHashState_t hashState; + size_t* const splits = ldmState->splitIndices; + unsigned numSplits; + + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); + + ZSTD_ldm_gear_init(&hashState, params); + while (ip < iend) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + if (ip + splits[n] >= istart + minMatchLength) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = xxh64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + ldmEntry_t entry; + + entry.offset = (U32)(split - base); + entry.checksum = (U32)(xxhash >> 32); + ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); + } + } + + ip += hashed; + } +} + + +/* ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) +{ + U32 const curr = (U32)(anchor - ms->window.base); + if (curr > ms->nextToUpdate + 1024) { + ms->nextToUpdate = + curr - MIN(512, curr - ms->nextToUpdate - 1024); + } +} + +static size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); + U32 const minMatchLength = params->minMatchLength; + U32 const entsPerBucket = 1U << params->bucketSizeLog; + U32 const hBits = params->hashLog - params->bucketSizeLog; + /* Prefix and extDict parameters */ + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - HASH_READ_SIZE; + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash state */ + ldmRollingHashState_t hashState; + /* Arrays for staged-processing */ + size_t* const splits = ldmState->splitIndices; + ldmMatchCandidate_t* const candidates = ldmState->matchCandidates; + unsigned numSplits; + + if (srcSize < minMatchLength) + return iend - anchor; + + /* Initialize the rolling hash state with the first minMatchLength bytes */ + ZSTD_ldm_gear_init(&hashState, params); + { + size_t n = 0; + + while (n < minMatchLength) { + numSplits = 0; + n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, + splits, &numSplits); + } + ip += minMatchLength; + } + + while (ip < ilimit) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip, + splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = xxh64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + + candidates[n].split = split; + candidates[n].hash = hash; + candidates[n].checksum = (U32)(xxhash >> 32); + candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); + PREFETCH_L1(candidates[n].bucket); + } + + for (n = 0; n < numSplits; n++) { + size_t forwardMatchLength = 0, backwardMatchLength = 0, + bestMatchLength = 0, mLength; + BYTE const* const split = candidates[n].split; + U32 const checksum = candidates[n].checksum; + U32 const hash = candidates[n].hash; + ldmEntry_t* const bucket = candidates[n].bucket; + ldmEntry_t const* cur; + ldmEntry_t const* bestEntry = NULL; + ldmEntry_t newEntry; + + newEntry.offset = (U32)(split - base); + newEntry.checksum = checksum; + + /* If a split point would generate a sequence overlapping with + * the previous one, we merely register it in the hash table and + * move on */ + if (split < anchor) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + for (cur = bucket; cur < bucket + entsPerBucket; cur++) { + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + curForwardMatchLength = + ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments( + split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(split, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); + } + curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + + /* No match found -- insert an entry into the hash table + * and process the next candidate match */ + if (bestEntry == NULL) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + { + U32 const offset = (U32)(split - base) - bestEntry->offset; + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(split - backwardMatchLength - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } + + /* Insert the current entry into the hash table --- it must be + * done after the previous block to avoid clobbering bestEntry */ + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + + anchor = split + forwardMatchLength; + } + + ip += hashed; + } + + return iend - anchor; +} + +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t chunk; + size_t leftoverSize = 0; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. + */ + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; + size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; + + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ + if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + /* invalidate dictionaries on overflow correction */ + ldmState->loadedDictEnd = 0; + } + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * the offset against maxDist directly. + * + * NOTE: Because of dictionaries + sequence splitting we MUST make sure + * that any offset used is valid at the END of the sequence, since it may + * be split into two sequences. This condition holds when using + * ZSTD_window_enforceMaxDist(), but if we move to checking offsets + * against maxDist directly, we'll have to carefully handle that case. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } + } + return 0; +} + +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= (U32)srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} + +/* + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) +{ + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; + assert(sequence.offset > 0); + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ + if (remaining <= sequence.litLength) { + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + sequence.matchLength = remaining - sequence.litLength; + if (sequence.matchLength < minMatch) { + sequence.offset = 0; + } + } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); + return sequence; +} + +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + unsigned const minMatch = cParams->minMatch; + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; + + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + /* If using opt parser, use LDMs only as candidates rather than always accepting them */ + if (cParams->strategy >= ZSTD_btopt) { + size_t lastLLSize; + ms->ldmSeqStore = rawSeqStore; + lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); + ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); + return lastLLSize; + } + + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the literals */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + int i; + /* End signal */ + if (sequence.offset == 0) + break; + + assert(ip + sequence.litLength + sequence.matchLength <= iend); + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Run the block compressor */ + DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); + { + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, ip, sequence.litLength); + ip += sequence.litLength; + /* Update the repcodes */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + rep[i] = rep[i-1]; + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, + sequence.offset + ZSTD_REP_MOVE, + sequence.matchLength - MINMATCH); + ip += sequence.matchLength; + } + } + /* Fill the tables for the block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Compress the last literals */ + return blockCompressor(ms, seqStore, rep, ip, iend - ip); +} diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h new file mode 100644 index 0000000000000..25b25270b72e2 --- /dev/null +++ b/lib/zstd/compress/zstd_ldm.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H + + +#include "zstd_compress_internal.h" /* ldmParams_t, U32 */ +#include /* ZSTD_CCtx, size_t */ + +/*-************************************* +* Long distance matching +***************************************/ + +#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT + +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params); + +/* + * ZSTD_ldm_generateSequences(): + * + * Generates the sequences using the long distance match finder. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. + * + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); + +/* + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. + * + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. + */ +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +/* + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); + +/* ZSTD_ldm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'. + * Not to be used in conjunction with ZSTD_ldm_skipSequences(). + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); + +/* ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. + */ +size_t ZSTD_ldm_getTableSize(ldmParams_t params); + +/* ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. + */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); + +/* ZSTD_ldm_adjustParameters() : + * If the params->hashRateLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. + */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); + + +#endif /* ZSTD_FAST_H */ diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h new file mode 100644 index 0000000000000..e5c24d856b0a8 --- /dev/null +++ b/lib/zstd/compress/zstd_ldm_geartab.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_GEARTAB_H +#define ZSTD_LDM_GEARTAB_H + +static U64 ZSTD_ldm_gearTab[256] = { + 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, + 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, + 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, + 0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889, + 0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e, + 0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140, + 0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e, + 0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f, + 0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391, + 0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210, + 0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be, + 0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a, + 0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b, + 0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4, + 0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb, + 0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312, + 0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01, + 0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc, + 0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967, + 0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553, + 0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f, + 0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2, + 0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d, + 0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a, + 0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74, + 0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3, + 0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1, + 0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b, + 0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568, + 0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a, + 0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1, + 0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9, + 0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463, + 0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba, + 0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9, + 0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61, + 0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec, + 0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6, + 0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab, + 0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5, + 0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59, + 0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7, + 0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc, + 0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb, + 0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be, + 0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312, + 0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1, + 0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc, + 0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d, + 0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445, + 0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c, + 0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5, + 0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5, + 0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28, + 0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a, + 0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9, + 0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15, + 0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef, + 0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2, + 0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375, + 0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3, + 0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595, + 0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389, + 0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4, + 0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756, + 0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc, + 0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45, + 0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea, + 0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f, + 0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc, + 0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c, + 0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a, + 0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17, + 0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3, + 0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4, + 0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91, + 0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40, + 0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741, + 0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f, + 0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4, + 0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad, + 0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047, + 0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2, + 0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e, + 0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b, + 0x2b4da14f2613d8f4 +}; + +#endif /* ZSTD_LDM_GEARTAB_H */ diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c new file mode 100644 index 0000000000000..04337050fe9a0 --- /dev/null +++ b/lib/zstd/compress/zstd_opt.c @@ -0,0 +1,1346 @@ +/* + * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "hist.h" +#include "zstd_opt.h" + + +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ +#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ +#define ZSTD_MAX_PRICE (1<<30) + +#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + + +/*-************************************* +* Price functions for optimal parser +***************************************/ + +#if 0 /* approximation at bit level */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) +#else /* opt==approx, ultra==accurate */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +#endif + +MEM_STATIC U32 ZSTD_bitWeight(U32 stat) +{ + return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); +} + +MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) +{ + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * BITCOST_MULTIPLIER; + U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + BITCOST_ACCURACY < 31); + return weight; +} + +#if (DEBUGLEVEL>=2) +/* debugging function, + * @return price in bytes as fractional value + * for debug messages only */ +MEM_STATIC double ZSTD_fCost(U32 price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} +#endif + +static int ZSTD_compressedLiterals(optState_t const* const optPtr) +{ + return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; +} + +static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) +{ + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); + optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); + optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); +} + + +/* ZSTD_downscaleStat() : + * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) + * return the resulting sum of elements */ +static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) +{ + U32 s, sum=0; + DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); + assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); + for (s=0; s> (ZSTD_FREQ_DIV+malus)); + sum += table[s]; + } + return sum; +} + +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * or init from zero, using src for literals stats, or flat 1 for match symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int const optLevel) +{ + int const compressedLiterals = ZSTD_compressedLiterals(optPtr); + DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); + optPtr->priceType = zop_dynamic; + + if (optPtr->litLengthSum == 0) { /* first block : init */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ + DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); + optPtr->priceType = zop_predef; + } + + assert(optPtr->symbolCosts != NULL); + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { + /* huffman table presumed generated by dictionary */ + optPtr->priceType = zop_dynamic; + + if (compressedLiterals) { + unsigned lit; + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; + for (lit=0; lit<=MaxLit; lit++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); + assert(bitCost <= scaleLog); + optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litSum += optPtr->litFreq[lit]; + } } + + { unsigned ll; + FSE_CState_t llstate; + FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); + optPtr->litLengthSum = 0; + for (ll=0; ll<=MaxLL; ll++) { + U32 const scaleLog = 10; /* scale to 1K */ + U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); + assert(bitCost < scaleLog); + optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litLengthSum += optPtr->litLengthFreq[ll]; + } } + + { unsigned ml; + FSE_CState_t mlstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); + optPtr->matchLengthSum = 0; + for (ml=0; ml<=MaxML; ml++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); + assert(bitCost < scaleLog); + optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; + } } + + { unsigned of; + FSE_CState_t ofstate; + FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); + optPtr->offCodeSum = 0; + for (of=0; of<=MaxOff; of++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); + assert(bitCost < scaleLog); + optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->offCodeSum += optPtr->offCodeFreq[of]; + } } + + } else { /* not a dictionary */ + + assert(optPtr->litFreq != NULL); + if (compressedLiterals) { + unsigned lit = MaxLit; + HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + } + + { unsigned ll; + for (ll=0; ll<=MaxLL; ll++) + optPtr->litLengthFreq[ll] = 1; + } + optPtr->litLengthSum = MaxLL+1; + + { unsigned ml; + for (ml=0; ml<=MaxML; ml++) + optPtr->matchLengthFreq[ml] = 1; + } + optPtr->matchLengthSum = MaxML+1; + + { unsigned of; + for (of=0; of<=MaxOff; of++) + optPtr->offCodeFreq[of] = 1; + } + optPtr->offCodeSum = MaxOff+1; + + } + + } else { /* new block : re-use previous statistics, scaled down */ + + if (compressedLiterals) + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); + } + + ZSTD_setBasePrices(optPtr, optLevel); +} + +/* ZSTD_rawLiteralsCost() : + * price of literals (only) in specified segment (which length can be 0). + * does not include price of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + if (litLength == 0) return 0; + + if (!ZSTD_compressedLiterals(optPtr)) + return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ + + if (optPtr->priceType == zop_predef) + return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ + + /* dynamic statistics */ + { U32 price = litLength * optPtr->litSumBasePrice; + U32 u; + for (u=0; u < litLength; u++) { + assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ + price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); + } + return price; + } +} + +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); + + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + + optPtr->litLengthSumBasePrice + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); + } +} + +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ +FORCE_INLINE_TEMPLATE U32 +ZSTD_getMatchPrice(U32 const offset, + U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offset+1); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); + + if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ + return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); + + /* dynamic statistics */ + price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); + if ((optLevel<2) /*static*/ && offCode >= 20) + price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ + + /* match Length */ + { U32 const mlCode = ZSTD_MLcode(mlBase); + price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); + } + + price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ + + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; +} + +/* ZSTD_updateStats() : + * assumption : literals + litLengtn <= iend */ +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offsetCode, U32 matchLength) +{ + /* literals */ + if (ZSTD_compressedLiterals(optPtr)) { + U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); + optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + } + + /* match Length */ + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } +} + + +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip) +{ + U32* const hashTable3 = ms->hashTable3; + U32 const hashLog3 = ms->hashLog3; + const BYTE* const base = ms->window.base; + U32 idx = *nextToUpdate3; + U32 const target = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); + assert(hashLog3 > 0); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + *nextToUpdate3 = target; + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/* ZSTD_insertBt1() : add one or multiple positions to tree. + * ip : assumed <= iend-8 . + * @return : nb of positions added */ +static U32 ZSTD_insertBt1( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + U32 const mls, const int extDict) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 curr = (U32)(ip-base); + const U32 btLow = btMask >= curr ? 0 : curr - btMask; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + U32 matchEndIdx = curr+8+1; + size_t bestLength = 8; + U32 nbCompares = 1U << cParams->searchLog; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); + + assert(ip <= iend-8); /* required for h calculation */ + hashTable[h] = curr; /* Update Hash Table */ + + assert(windowLow > 0); + for (; nbCompares && (matchIndex >= windowLow); --nbCompares) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + + if (!extDict || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + { U32 positions = 0; + if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > curr + 8); + return MAX(positions, matchEndIdx - (curr + 8)); + } +} + +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + idx, target, dictMode); + + while(idx < target) { + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + assert(idx < (U32)(idx + forward)); + idx += forward; + } + assert((size_t)(ip - base) <= (size_t)(U32)(-1)); + assert((size_t)(iend - base) <= (size_t)(U32)(-1)); + ms->nextToUpdate = target; +} + +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { + ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); +} + +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ + const U32 lengthToBeat, + U32 const mls /* template */) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const hashLog = cParams->hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; + U32* const hashTable = ms->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + U32 const dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + U32 const matchLow = windowLow ? windowLow : 1; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + U32 nbCompares = 1U << cParams->searchLog; + + const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; + const ZSTD_compressionParameters* const dmsCParams = + dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; + const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; + const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; + U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; + U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; + U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; + U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; + U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; + U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; + U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; + + size_t bestLength = lengthToBeat-1; + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr); + + /* check repCode */ + assert(ll0 <= 1); /* necessarily 1 or 0 */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = curr - repOffset; + U32 repLen = 0; + assert(curr >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */ + /* We must validate the repcode offset because when we're using a dictionary the + * valid offset range shrinks when the dictionary goes out of bounds. + */ + if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= curr */ + const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? + dmsBase + repIndex - dmsIndexDelta : + dictBase + repIndex; + assert(curr >= windowLow); + if ( dictMode == ZSTD_extDict + && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } + if (dictMode == ZSTD_dictMatchState + && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ + & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", + repCode, ll0, repOffset, repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); + if ((matchIndex3 >= matchLow) + & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + } + + /* save best solution */ + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(curr > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + ms->nextToUpdate = curr+1; /* skip insertion */ + return 1; + } } } + /* no dictMatchState lookup: dicts don't have a populated HC3 table */ + } + + hashTable[h] = curr; /* Update Hash Table */ + + for (; nbCompares && (matchIndex >= matchLow); --nbCompares) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + const BYTE* match; + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(curr > matchIndex); + + if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ + match = base + matchIndex; + if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); + } else { + match = dictBase + matchIndex; + assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* prepare for match[matchLength] read */ + } + + if (matchLength > bestLength) { + DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ + } else { + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ + if (dictMode == ZSTD_dictMatchState && nbCompares) { + size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); + U32 dictMatchIndex = dms->hashTable[dmsH]; + const U32* const dmsBt = dms->chainTable; + commonLengthSmaller = commonLengthLarger = 0; + for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) { + const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dmsBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); + if (dictMatchIndex+matchLength >= dmsHighLimit) + match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + matchIndex = dictMatchIndex + dmsIndexDelta; + DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ + if (match[matchLength] < ip[matchLength]) { + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + } + + assert(matchEndIdx > curr+8); + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + return mnum; +} + + +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const matchLengthSearch = cParams->minMatch; + DEBUGLOG(8, "ZSTD_BtGetAllMatches"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); + switch(matchLengthSearch) + { + case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); + default : + case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); + case 7 : + case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); + } +} + +/* *********************** +* LDM helper functions * +*************************/ + +/* Struct containing info needed to make decision about ldm inclusion */ +typedef struct { + rawSeqStore_t seqStore; /* External match candidates store for this block */ + U32 startPosInBlock; /* Start position of the current match candidate */ + U32 endPosInBlock; /* End position of the current match candidate */ + U32 offset; /* Offset of the match candidate */ +} ZSTD_optLdm_t; + +/* ZSTD_optLdm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'. + */ +static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +/* ZSTD_opt_getNextMatchAndUpdateSeqStore(): + * Calculates the beginning and end of the next match in the current block. + * Updates 'pos' and 'posInSequence' of the ldmSeqStore. + */ +static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, + U32 blockBytesRemaining) { + rawSeq currSeq; + U32 currBlockEndPos; + U32 literalsBytesRemaining; + U32 matchBytesRemaining; + + /* Setting match end position to MAX to ensure we never use an LDM during this block */ + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + return; + } + /* Calculate appropriate bytes left in matchLength and litLength after adjusting + based on ldmSeqStore->posInSequence */ + currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; + assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); + currBlockEndPos = currPosInBlock + blockBytesRemaining; + literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ? + currSeq.litLength - (U32)optLdm->seqStore.posInSequence : + 0; + matchBytesRemaining = (literalsBytesRemaining == 0) ? + currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) : + currSeq.matchLength; + + /* If there are more literal bytes than bytes remaining in block, no ldm is possible */ + if (literalsBytesRemaining >= blockBytesRemaining) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining); + return; + } + + /* Matches may be < MINMATCH by this process. In that case, we will reject them + when we are deciding whether or not to add the ldm */ + optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; + optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; + optLdm->offset = currSeq.offset; + + if (optLdm->endPosInBlock > currBlockEndPos) { + /* Match ends after the block ends, we can't use the whole match */ + optLdm->endPosInBlock = currBlockEndPos; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock); + } else { + /* Consume nb of bytes equal to size of sequence left */ + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining); + } +} + +/* ZSTD_optLdm_maybeAddMatch(): + * Adds a match if it's long enough, based on it's 'matchStartPosInBlock' + * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches' + */ +static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, + ZSTD_optLdm_t* optLdm, U32 currPosInBlock) { + U32 posDiff = currPosInBlock - optLdm->startPosInBlock; + /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ + U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; + U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE; + + /* Ensure that current block position is not outside of the match */ + if (currPosInBlock < optLdm->startPosInBlock + || currPosInBlock >= optLdm->endPosInBlock + || candidateMatchLength < MINMATCH) { + return; + } + + if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { + DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", + candidateOffCode, candidateMatchLength, currPosInBlock); + matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffCode; + (*nbMatches)++; + } +} + +/* ZSTD_optLdm_processMatchCandidate(): + * Wrapper function to update ldm seq store and call ldm functions as necessary. + */ +static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches, + U32 currPosInBlock, U32 remainingBytes) { + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + return; + } + + if (currPosInBlock >= optLdm->endPosInBlock) { + if (currPosInBlock > optLdm->endPosInBlock) { + /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily + * at the end of a match from the ldm seq store, and will often be some bytes + * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" + */ + U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); + } + ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); + } + ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); +} + +/*-******************************* +* Optimal parser +*********************************/ + + +static U32 ZSTD_totalLen(ZSTD_optimal_t sol) +{ + return sol.litlen + sol.mlen; +} + +#if 0 /* debug */ + +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} + +#endif + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const int optLevel, + const ZSTD_dictMode_e dictMode) +{ + optState_t* const optStatePtr = &ms->opt; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; + U32 nextToUpdate3 = ms->nextToUpdate; + + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + ZSTD_optimal_t lastSequence; + ZSTD_optLdm_t optLdm; + + optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; + optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; + ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", + (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); + assert(optLevel <= 2); + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); + ip += (ip==prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, last_pos = 0; + + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(ip-istart), (U32)(iend - ip)); + if (!nbMatches) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + U32 const maxOffset = matches[nbMatches-1].off; + DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", + nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); + + if (maxML > sufficient_len) { + lastSequence.litlen = litlen; + lastSequence.mlen = maxML; + lastSequence.off = maxOffset; + DEBUGLOG(6, "large match (%u>%u), immediate encoding", + maxML, sufficient_len); + cur = 0; + last_pos = ZSTD_totalLen(lastSequence); + goto _shortestPath; + } } + + /* set prices for first matches starting position == 0 */ + { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 pos; + U32 matchNb; + for (pos = 1; pos < minMatch; pos++) { + opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const end = matches[matchNb].len; + for ( ; pos <= end ; pos++ ) { + U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const sequencePrice = literalsPrice + matchPrice; + DEBUGLOG(7, "rPos:%u => set initial price : %.2f", + pos, ZSTD_fCost(sequencePrice)); + opt[pos].mlen = pos; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = sequencePrice; + } } + last_pos = pos-1; + } + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + const BYTE* const inr = ip + cur; + assert(cur < ZSTD_OPT_NUM); + DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) + + /* Fix current position with one literal if cheaper */ + { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; + int const price = opt[cur-1].price + + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, + opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); + opt[cur].mlen = 0; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + } else { + DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), + opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); + } + } + + /* Set the repcodes of the current position. We must do it here + * because we rely on the repcodes of the 2nd to last sequence being + * correct to set the next chunks repcodes during the backward + * traversal. + */ + ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); + assert(cur >= opt[cur].mlen); + if (opt[cur].mlen != 0) { + U32 const prev = cur - opt[cur].mlen; + repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); + } else { + ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); + } + + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; + + if (cur == last_pos) break; + + if ( (optLevel==0) /*static_test*/ + && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { + DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + } + + { U32 const ll0 = (opt[cur].mlen != 0); + U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; + U32 const previousPrice = opt[cur].price; + U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + U32 matchNb; + + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(inr-istart), (U32)(iend-inr)); + + if (!nbMatches) { + DEBUGLOG(7, "rPos:%u : no match found", cur); + continue; + } + + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", + inr-istart, cur, nbMatches, maxML); + + if ( (maxML > sufficient_len) + || (cur + maxML >= ZSTD_OPT_NUM) ) { + lastSequence.mlen = maxML; + lastSequence.off = matches[nbMatches-1].off; + lastSequence.litlen = litlen; + cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ + last_pos = cur + ZSTD_totalLen(lastSequence); + if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ + goto _shortestPath; + } } + + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; + + DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", + matchNb, matches[matchNb].off, lastML, litlen); + + for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ + U32 const pos = cur + mlen; + int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = price; + } else { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + } /* for (cur = 1; cur <= last_pos; cur++) */ + + lastSequence = opt[last_pos]; + cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ + assert(cur < ZSTD_OPT_NUM); /* control overflow*/ + +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 0); + + /* Set the next chunk's repcodes based on the repcodes of the beginning + * of the last match, and the last sequence. This avoids us having to + * update them while traversing the sequences. + */ + if (lastSequence.mlen != 0) { + repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); + ZSTD_memcpy(rep, &reps, sizeof(reps)); + } else { + ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); + } + + { U32 const storeEnd = cur + 1; + U32 storeStart = storeEnd; + U32 seqPos = cur; + + DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); (void)last_pos; + assert(storeEnd < ZSTD_OPT_NUM); + DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); + opt[storeEnd] = lastSequence; + while (seqPos > 0) { + U32 const backDist = ZSTD_totalLen(opt[seqPos]); + storeStart--; + DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); + opt[storeStart] = opt[seqPos]; + seqPos = (seqPos > backDist) ? seqPos - backDist : 0; + } + + /* save sequences */ + DEBUGLOG(6, "sending selected sequences into seqStore") + { U32 storePos; + for (storePos=storeStart; storePos <= storeEnd; storePos++) { + U32 const llen = opt[storePos].litlen; + U32 const mlen = opt[storePos].mlen; + U32 const offCode = opt[storePos].off; + U32 const advance = llen + mlen; + DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", + anchor - istart, (unsigned)llen, (unsigned)mlen); + + if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ + assert(storePos == storeEnd); /* must be last sequence */ + ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ + continue; /* will finish */ + } + + assert(anchor + llen <= iend); + ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); + anchor += advance; + ip = anchor; + } } + ZSTD_setBasePrices(optStatePtr, optLevel); + } + } /* while (ip < ilimit) */ + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); +} + + +/* used in 2-pass strategy */ +static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+bonus >= 0); + for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); +} + +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error, hence its contract must be respected. + */ +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); + + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + + /* invalidate first scan from history */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); +} + +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 const curr = (U32)((const BYTE*)src - ms->window.base); + DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); + + /* 2-pass strategy: + * this strategy makes a first pass over first block to collect statistics + * and seed next round's statistics with it. + * After 1st pass, function forgets everything, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. + * The compression ratio gain is generally small (~0.5% on first block), + * the cost is 2x cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) + ) { + ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); + } + + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); +} + +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); +} + +/* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ diff --git a/lib/zstd/compress/zstd_opt.h b/lib/zstd/compress/zstd_opt.h new file mode 100644 index 0000000000000..22b862858ba7a --- /dev/null +++ b/lib/zstd/compress/zstd_opt.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_OPT_H +#define ZSTD_OPT_H + + +#include "zstd_compress_internal.h" + +/* used in ZSTD_loadDictionaryContent() */ +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + /* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ + + +#endif /* ZSTD_OPT_H */ diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c deleted file mode 100644 index 02e92c2cbf4fc..0000000000000 --- a/lib/zstd/decompress.c +++ /dev/null @@ -1,2571 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -/* *************************************************************** -* Tuning parameters -*****************************************************************/ -/*! -* MAXWINDOWSIZE_DEFAULT : -* maximum window size accepted by DStream, by default. -* Frames requiring more memory will be rejected. -*/ -#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT -#define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */ -#endif - -/*-******************************************************* -* Dependencies -*********************************************************/ -#include "fse.h" -#include "huf.h" -#include "mem.h" /* low level memory routines */ -#include "zstd_internal.h" -#include -#include -#include /* memcpy, memmove, memset */ - -#define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) - -/*-************************************* -* Macros -***************************************/ -#define ZSTD_isError ERR_isError /* for inlining */ -#define FSE_isError ERR_isError -#define HUF_isError ERR_isError - -/*_******************************************************* -* Memory operations -**********************************************************/ -static void ZSTD_copy4(void *dst, const void *src) { memcpy(dst, src, 4); } - -/*-************************************************************* -* Context management -***************************************************************/ -typedef enum { - ZSTDds_getFrameHeaderSize, - ZSTDds_decodeFrameHeader, - ZSTDds_decodeBlockHeader, - ZSTDds_decompressBlock, - ZSTDds_decompressLastBlock, - ZSTDds_checkChecksum, - ZSTDds_decodeSkippableHeader, - ZSTDds_skipFrame -} ZSTD_dStage; - -typedef struct { - FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; - FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; - FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ - U64 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32 / 2]; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_entropyTables_t; - -struct ZSTD_DCtx_s { - const FSE_DTable *LLTptr; - const FSE_DTable *MLTptr; - const FSE_DTable *OFTptr; - const HUF_DTable *HUFptr; - ZSTD_entropyTables_t entropy; - const void *previousDstEnd; /* detect continuity */ - const void *base; /* start of curr segment */ - const void *vBase; /* virtual start of previous segment if it was just before curr one */ - const void *dictEnd; /* end of previous segment */ - size_t expected; - ZSTD_frameParams fParams; - blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ - ZSTD_dStage stage; - U32 litEntropy; - U32 fseEntropy; - struct xxh64_state xxhState; - size_t headerSize; - U32 dictID; - const BYTE *litPtr; - ZSTD_customMem customMem; - size_t litSize; - size_t rleSize; - BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH]; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; -}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ - -size_t ZSTD_DCtxWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx)); } - -size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx) -{ - dctx->expected = ZSTD_frameHeaderSize_prefix; - dctx->stage = ZSTDds_getFrameHeaderSize; - dctx->previousDstEnd = NULL; - dctx->base = NULL; - dctx->vBase = NULL; - dctx->dictEnd = NULL; - dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - dctx->litEntropy = dctx->fseEntropy = 0; - dctx->dictID = 0; - ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ - dctx->LLTptr = dctx->entropy.LLTable; - dctx->MLTptr = dctx->entropy.MLTable; - dctx->OFTptr = dctx->entropy.OFTable; - dctx->HUFptr = dctx->entropy.hufTable; - return 0; -} - -ZSTD_DCtx *ZSTD_createDCtx_advanced(ZSTD_customMem customMem) -{ - ZSTD_DCtx *dctx; - - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - - dctx = (ZSTD_DCtx *)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem); - if (!dctx) - return NULL; - memcpy(&dctx->customMem, &customMem, sizeof(customMem)); - ZSTD_decompressBegin(dctx); - return dctx; -} - -ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - return ZSTD_createDCtx_advanced(stackMem); -} - -size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx) -{ - if (dctx == NULL) - return 0; /* support free on NULL */ - ZSTD_free(dctx, dctx->customMem); - return 0; /* reserved as a potential error code in the future */ -} - -void ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx) -{ - size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max; - memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */ -} - -static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict); - -/*-************************************************************* -* Decompression section -***************************************************************/ - -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -unsigned ZSTD_isFrame(const void *buffer, size_t size) -{ - if (size < 4) - return 0; - { - U32 const magic = ZSTD_readLE32(buffer); - if (magic == ZSTD_MAGICNUMBER) - return 1; - if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) - return 1; - } - return 0; -} - -/** ZSTD_frameHeaderSize() : -* srcSize must be >= ZSTD_frameHeaderSize_prefix. -* @return : size of the Frame Header */ -static size_t ZSTD_frameHeaderSize(const void *src, size_t srcSize) -{ - if (srcSize < ZSTD_frameHeaderSize_prefix) - return ERROR(srcSize_wrong); - { - BYTE const fhd = ((const BYTE *)src)[4]; - U32 const dictID = fhd & 3; - U32 const singleSegment = (fhd >> 5) & 1; - U32 const fcsId = fhd >> 6; - return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + (singleSegment && !fcsId); - } -} - -/** ZSTD_getFrameParams() : -* decode Frame Header, or require larger `srcSize`. -* @return : 0, `fparamsPtr` is correctly filled, -* >0, `srcSize` is too small, result is expected `srcSize`, -* or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, size_t srcSize) -{ - const BYTE *ip = (const BYTE *)src; - - if (srcSize < ZSTD_frameHeaderSize_prefix) - return ZSTD_frameHeaderSize_prefix; - if (ZSTD_readLE32(src) != ZSTD_MAGICNUMBER) { - if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { - if (srcSize < ZSTD_skippableHeaderSize) - return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */ - memset(fparamsPtr, 0, sizeof(*fparamsPtr)); - fparamsPtr->frameContentSize = ZSTD_readLE32((const char *)src + 4); - fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */ - return 0; - } - return ERROR(prefix_unknown); - } - - /* ensure there is enough `srcSize` to fully read/decode frame header */ - { - size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); - if (srcSize < fhsize) - return fhsize; - } - - { - BYTE const fhdByte = ip[4]; - size_t pos = 5; - U32 const dictIDSizeCode = fhdByte & 3; - U32 const checksumFlag = (fhdByte >> 2) & 1; - U32 const singleSegment = (fhdByte >> 5) & 1; - U32 const fcsID = fhdByte >> 6; - U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; - U32 windowSize = 0; - U32 dictID = 0; - U64 frameContentSize = 0; - if ((fhdByte & 0x08) != 0) - return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */ - if (!singleSegment) { - BYTE const wlByte = ip[pos++]; - U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - if (windowLog > ZSTD_WINDOWLOG_MAX) - return ERROR(frameParameter_windowTooLarge); /* avoids issue with 1 << windowLog */ - windowSize = (1U << windowLog); - windowSize += (windowSize >> 3) * (wlByte & 7); - } - - switch (dictIDSizeCode) { - default: /* impossible */ - case 0: break; - case 1: - dictID = ip[pos]; - pos++; - break; - case 2: - dictID = ZSTD_readLE16(ip + pos); - pos += 2; - break; - case 3: - dictID = ZSTD_readLE32(ip + pos); - pos += 4; - break; - } - switch (fcsID) { - default: /* impossible */ - case 0: - if (singleSegment) - frameContentSize = ip[pos]; - break; - case 1: frameContentSize = ZSTD_readLE16(ip + pos) + 256; break; - case 2: frameContentSize = ZSTD_readLE32(ip + pos); break; - case 3: frameContentSize = ZSTD_readLE64(ip + pos); break; - } - if (!windowSize) - windowSize = (U32)frameContentSize; - if (windowSize > windowSizeMax) - return ERROR(frameParameter_windowTooLarge); - fparamsPtr->frameContentSize = frameContentSize; - fparamsPtr->windowSize = windowSize; - fparamsPtr->dictID = dictID; - fparamsPtr->checksumFlag = checksumFlag; - } - return 0; -} - -/** ZSTD_getFrameContentSize() : -* compatible with legacy mode -* @return : decompressed size of the single frame pointed to be `src` if known, otherwise -* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined -* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) -{ - { - ZSTD_frameParams fParams; - if (ZSTD_getFrameParams(&fParams, src, srcSize) != 0) - return ZSTD_CONTENTSIZE_ERROR; - if (fParams.windowSize == 0) { - /* Either skippable or empty frame, size == 0 either way */ - return 0; - } else if (fParams.frameContentSize != 0) { - return fParams.frameContentSize; - } else { - return ZSTD_CONTENTSIZE_UNKNOWN; - } - } -} - -/** ZSTD_findDecompressedSize() : - * compatible with legacy mode - * `srcSize` must be the exact length of some number of ZSTD compressed and/or - * skippable frames - * @return : decompressed size of the frames contained */ -unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize) -{ - { - unsigned long long totalDstSize = 0; - while (srcSize >= ZSTD_frameHeaderSize_prefix) { - const U32 magicNumber = ZSTD_readLE32(src); - - if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t skippableSize; - if (srcSize < ZSTD_skippableHeaderSize) - return ERROR(srcSize_wrong); - skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize; - if (srcSize < skippableSize) { - return ZSTD_CONTENTSIZE_ERROR; - } - - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; - continue; - } - - { - unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); - if (ret >= ZSTD_CONTENTSIZE_ERROR) - return ret; - - /* check for overflow */ - if (totalDstSize + ret < totalDstSize) - return ZSTD_CONTENTSIZE_ERROR; - totalDstSize += ret; - } - { - size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); - if (ZSTD_isError(frameSrcSize)) { - return ZSTD_CONTENTSIZE_ERROR; - } - - src = (const BYTE *)src + frameSrcSize; - srcSize -= frameSrcSize; - } - } - - if (srcSize) { - return ZSTD_CONTENTSIZE_ERROR; - } - - return totalDstSize; - } -} - -/** ZSTD_decodeFrameHeader() : -* `headerSize` must be the size provided by ZSTD_frameHeaderSize(). -* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ -static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx *dctx, const void *src, size_t headerSize) -{ - size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize); - if (ZSTD_isError(result)) - return result; /* invalid header */ - if (result > 0) - return ERROR(srcSize_wrong); /* headerSize too small */ - if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) - return ERROR(dictionary_wrong); - if (dctx->fParams.checksumFlag) - xxh64_reset(&dctx->xxhState, 0); - return 0; -} - -typedef struct { - blockType_e blockType; - U32 lastBlock; - U32 origSize; -} blockProperties_t; - -/*! ZSTD_getcBlockSize() : -* Provides the size of compressed block from block header `src` */ -size_t ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr) -{ - if (srcSize < ZSTD_blockHeaderSize) - return ERROR(srcSize_wrong); - { - U32 const cBlockHeader = ZSTD_readLE24(src); - U32 const cSize = cBlockHeader >> 3; - bpPtr->lastBlock = cBlockHeader & 1; - bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); - bpPtr->origSize = cSize; /* only useful for RLE */ - if (bpPtr->blockType == bt_rle) - return 1; - if (bpPtr->blockType == bt_reserved) - return ERROR(corruption_detected); - return cSize; - } -} - -static size_t ZSTD_copyRawBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - if (srcSize > dstCapacity) - return ERROR(dstSize_tooSmall); - memcpy(dst, src, srcSize); - return srcSize; -} - -static size_t ZSTD_setRleBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize, size_t regenSize) -{ - if (srcSize != 1) - return ERROR(srcSize_wrong); - if (regenSize > dstCapacity) - return ERROR(dstSize_tooSmall); - memset(dst, *(const BYTE *)src, regenSize); - return regenSize; -} - -/*! ZSTD_decodeLiteralsBlock() : - @return : nb of bytes read from src (< srcSize ) */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ -{ - if (srcSize < MIN_CBLOCK_SIZE) - return ERROR(corruption_detected); - - { - const BYTE *const istart = (const BYTE *)src; - symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); - - switch (litEncType) { - case set_repeat: - if (dctx->litEntropy == 0) - return ERROR(dictionary_corrupted); - fallthrough; - case set_compressed: - if (srcSize < 5) - return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ - { - size_t lhSize, litSize, litCSize; - U32 singleStream = 0; - U32 const lhlCode = (istart[0] >> 2) & 3; - U32 const lhc = ZSTD_readLE32(istart); - switch (lhlCode) { - case 0: - case 1: - default: /* note : default is impossible, since lhlCode into [0..3] */ - /* 2 - 2 - 10 - 10 */ - singleStream = !lhlCode; - lhSize = 3; - litSize = (lhc >> 4) & 0x3FF; - litCSize = (lhc >> 14) & 0x3FF; - break; - case 2: - /* 2 - 2 - 14 - 14 */ - lhSize = 4; - litSize = (lhc >> 4) & 0x3FFF; - litCSize = lhc >> 18; - break; - case 3: - /* 2 - 2 - 18 - 18 */ - lhSize = 5; - litSize = (lhc >> 4) & 0x3FFFF; - litCSize = (lhc >> 22) + (istart[4] << 10); - break; - } - if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) - return ERROR(corruption_detected); - if (litCSize + lhSize > srcSize) - return ERROR(corruption_detected); - - if (HUF_isError( - (litEncType == set_repeat) - ? (singleStream ? HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr) - : HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr)) - : (singleStream - ? HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize, - dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) - : HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize, - dctx->entropy.workspace, sizeof(dctx->entropy.workspace))))) - return ERROR(corruption_detected); - - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - dctx->litEntropy = 1; - if (litEncType == set_compressed) - dctx->HUFptr = dctx->entropy.hufTable; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return litCSize + lhSize; - } - - case set_basic: { - size_t litSize, lhSize; - U32 const lhlCode = ((istart[0]) >> 2) & 3; - switch (lhlCode) { - case 0: - case 2: - default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = ZSTD_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = ZSTD_readLE24(istart) >> 4; - break; - } - - if (lhSize + litSize + WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - if (litSize + lhSize > srcSize) - return ERROR(corruption_detected); - memcpy(dctx->litBuffer, istart + lhSize, litSize); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return lhSize + litSize; - } - /* direct reference into compressed stream */ - dctx->litPtr = istart + lhSize; - dctx->litSize = litSize; - return lhSize + litSize; - } - - case set_rle: { - U32 const lhlCode = ((istart[0]) >> 2) & 3; - size_t litSize, lhSize; - switch (lhlCode) { - case 0: - case 2: - default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = ZSTD_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = ZSTD_readLE24(istart) >> 4; - if (srcSize < 4) - return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ - break; - } - if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) - return ERROR(corruption_detected); - memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - return lhSize + 1; - } - default: - return ERROR(corruption_detected); /* impossible */ - } - } -} - -typedef union { - FSE_decode_t realData; - U32 alignedBy4; -} FSE_decode_t4; - -static const FSE_decode_t4 LL_defaultDTable[(1 << LL_DEFAULTNORMLOG) + 1] = { - {{LL_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ - {{0, 0, 4}}, /* 0 : base, symbol, bits */ - {{16, 0, 4}}, - {{32, 1, 5}}, - {{0, 3, 5}}, - {{0, 4, 5}}, - {{0, 6, 5}}, - {{0, 7, 5}}, - {{0, 9, 5}}, - {{0, 10, 5}}, - {{0, 12, 5}}, - {{0, 14, 6}}, - {{0, 16, 5}}, - {{0, 18, 5}}, - {{0, 19, 5}}, - {{0, 21, 5}}, - {{0, 22, 5}}, - {{0, 24, 5}}, - {{32, 25, 5}}, - {{0, 26, 5}}, - {{0, 27, 6}}, - {{0, 29, 6}}, - {{0, 31, 6}}, - {{32, 0, 4}}, - {{0, 1, 4}}, - {{0, 2, 5}}, - {{32, 4, 5}}, - {{0, 5, 5}}, - {{32, 7, 5}}, - {{0, 8, 5}}, - {{32, 10, 5}}, - {{0, 11, 5}}, - {{0, 13, 6}}, - {{32, 16, 5}}, - {{0, 17, 5}}, - {{32, 19, 5}}, - {{0, 20, 5}}, - {{32, 22, 5}}, - {{0, 23, 5}}, - {{0, 25, 4}}, - {{16, 25, 4}}, - {{32, 26, 5}}, - {{0, 28, 6}}, - {{0, 30, 6}}, - {{48, 0, 4}}, - {{16, 1, 4}}, - {{32, 2, 5}}, - {{32, 3, 5}}, - {{32, 5, 5}}, - {{32, 6, 5}}, - {{32, 8, 5}}, - {{32, 9, 5}}, - {{32, 11, 5}}, - {{32, 12, 5}}, - {{0, 15, 6}}, - {{32, 17, 5}}, - {{32, 18, 5}}, - {{32, 20, 5}}, - {{32, 21, 5}}, - {{32, 23, 5}}, - {{32, 24, 5}}, - {{0, 35, 6}}, - {{0, 34, 6}}, - {{0, 33, 6}}, - {{0, 32, 6}}, -}; /* LL_defaultDTable */ - -static const FSE_decode_t4 ML_defaultDTable[(1 << ML_DEFAULTNORMLOG) + 1] = { - {{ML_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ - {{0, 0, 6}}, /* 0 : base, symbol, bits */ - {{0, 1, 4}}, - {{32, 2, 5}}, - {{0, 3, 5}}, - {{0, 5, 5}}, - {{0, 6, 5}}, - {{0, 8, 5}}, - {{0, 10, 6}}, - {{0, 13, 6}}, - {{0, 16, 6}}, - {{0, 19, 6}}, - {{0, 22, 6}}, - {{0, 25, 6}}, - {{0, 28, 6}}, - {{0, 31, 6}}, - {{0, 33, 6}}, - {{0, 35, 6}}, - {{0, 37, 6}}, - {{0, 39, 6}}, - {{0, 41, 6}}, - {{0, 43, 6}}, - {{0, 45, 6}}, - {{16, 1, 4}}, - {{0, 2, 4}}, - {{32, 3, 5}}, - {{0, 4, 5}}, - {{32, 6, 5}}, - {{0, 7, 5}}, - {{0, 9, 6}}, - {{0, 12, 6}}, - {{0, 15, 6}}, - {{0, 18, 6}}, - {{0, 21, 6}}, - {{0, 24, 6}}, - {{0, 27, 6}}, - {{0, 30, 6}}, - {{0, 32, 6}}, - {{0, 34, 6}}, - {{0, 36, 6}}, - {{0, 38, 6}}, - {{0, 40, 6}}, - {{0, 42, 6}}, - {{0, 44, 6}}, - {{32, 1, 4}}, - {{48, 1, 4}}, - {{16, 2, 4}}, - {{32, 4, 5}}, - {{32, 5, 5}}, - {{32, 7, 5}}, - {{32, 8, 5}}, - {{0, 11, 6}}, - {{0, 14, 6}}, - {{0, 17, 6}}, - {{0, 20, 6}}, - {{0, 23, 6}}, - {{0, 26, 6}}, - {{0, 29, 6}}, - {{0, 52, 6}}, - {{0, 51, 6}}, - {{0, 50, 6}}, - {{0, 49, 6}}, - {{0, 48, 6}}, - {{0, 47, 6}}, - {{0, 46, 6}}, -}; /* ML_defaultDTable */ - -static const FSE_decode_t4 OF_defaultDTable[(1 << OF_DEFAULTNORMLOG) + 1] = { - {{OF_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ - {{0, 0, 5}}, /* 0 : base, symbol, bits */ - {{0, 6, 4}}, - {{0, 9, 5}}, - {{0, 15, 5}}, - {{0, 21, 5}}, - {{0, 3, 5}}, - {{0, 7, 4}}, - {{0, 12, 5}}, - {{0, 18, 5}}, - {{0, 23, 5}}, - {{0, 5, 5}}, - {{0, 8, 4}}, - {{0, 14, 5}}, - {{0, 20, 5}}, - {{0, 2, 5}}, - {{16, 7, 4}}, - {{0, 11, 5}}, - {{0, 17, 5}}, - {{0, 22, 5}}, - {{0, 4, 5}}, - {{16, 8, 4}}, - {{0, 13, 5}}, - {{0, 19, 5}}, - {{0, 1, 5}}, - {{16, 6, 4}}, - {{0, 10, 5}}, - {{0, 16, 5}}, - {{0, 28, 5}}, - {{0, 27, 5}}, - {{0, 26, 5}}, - {{0, 25, 5}}, - {{0, 24, 5}}, -}; /* OF_defaultDTable */ - -/*! ZSTD_buildSeqTable() : - @return : nb bytes read from src, - or an error code if it fails, testable with ZSTD_isError() -*/ -static size_t ZSTD_buildSeqTable(FSE_DTable *DTableSpace, const FSE_DTable **DTablePtr, symbolEncodingType_e type, U32 max, U32 maxLog, const void *src, - size_t srcSize, const FSE_decode_t4 *defaultTable, U32 flagRepeatTable, void *workspace, size_t workspaceSize) -{ - const void *const tmpPtr = defaultTable; /* bypass strict aliasing */ - switch (type) { - case set_rle: - if (!srcSize) - return ERROR(srcSize_wrong); - if ((*(const BYTE *)src) > max) - return ERROR(corruption_detected); - FSE_buildDTable_rle(DTableSpace, *(const BYTE *)src); - *DTablePtr = DTableSpace; - return 1; - case set_basic: *DTablePtr = (const FSE_DTable *)tmpPtr; return 0; - case set_repeat: - if (!flagRepeatTable) - return ERROR(corruption_detected); - return 0; - default: /* impossible */ - case set_compressed: { - U32 tableLog; - S16 *norm = (S16 *)workspace; - size_t const spaceUsed32 = ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(GENERIC); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - { - size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - if (FSE_isError(headerSize)) - return ERROR(corruption_detected); - if (tableLog > maxLog) - return ERROR(corruption_detected); - FSE_buildDTable_wksp(DTableSpace, norm, max, tableLog, workspace, workspaceSize); - *DTablePtr = DTableSpace; - return headerSize; - } - } - } -} - -size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize) -{ - const BYTE *const istart = (const BYTE *const)src; - const BYTE *const iend = istart + srcSize; - const BYTE *ip = istart; - - /* check */ - if (srcSize < MIN_SEQUENCES_SIZE) - return ERROR(srcSize_wrong); - - /* SeqHead */ - { - int nbSeq = *ip++; - if (!nbSeq) { - *nbSeqPtr = 0; - return 1; - } - if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) { - if (ip + 2 > iend) - return ERROR(srcSize_wrong); - nbSeq = ZSTD_readLE16(ip) + LONGNBSEQ, ip += 2; - } else { - if (ip >= iend) - return ERROR(srcSize_wrong); - nbSeq = ((nbSeq - 0x80) << 8) + *ip++; - } - } - *nbSeqPtr = nbSeq; - } - - /* FSE table descriptors */ - if (ip + 4 > iend) - return ERROR(srcSize_wrong); /* minimum possible size */ - { - symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); - symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); - symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); - ip++; - - /* Build DTables */ - { - size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, LLtype, MaxLL, LLFSELog, ip, iend - ip, - LL_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); - if (ZSTD_isError(llhSize)) - return ERROR(corruption_detected); - ip += llhSize; - } - { - size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, OFtype, MaxOff, OffFSELog, ip, iend - ip, - OF_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); - if (ZSTD_isError(ofhSize)) - return ERROR(corruption_detected); - ip += ofhSize; - } - { - size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, MLtype, MaxML, MLFSELog, ip, iend - ip, - ML_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); - if (ZSTD_isError(mlhSize)) - return ERROR(corruption_detected); - ip += mlhSize; - } - } - - return ip - istart; -} - -typedef struct { - size_t litLength; - size_t matchLength; - size_t offset; - const BYTE *match; -} seq_t; - -typedef struct { - BIT_DStream_t DStream; - FSE_DState_t stateLL; - FSE_DState_t stateOffb; - FSE_DState_t stateML; - size_t prevOffset[ZSTD_REP_NUM]; - const BYTE *base; - size_t pos; - uPtrDiff gotoDict; -} seqState_t; - -FORCE_NOINLINE -size_t ZSTD_execSequenceLast7(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, - const BYTE *const vBase, const BYTE *const dictEnd) -{ - BYTE *const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE *const iLitEnd = *litPtr + sequence.litLength; - const BYTE *match = oLitEnd - sequence.offset; - - /* check */ - if (oMatchEnd > oend) - return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) - return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd <= oend_w) - return ERROR(GENERIC); /* Precondition */ - - /* copy literals */ - if (op < oend_w) { - ZSTD_wildcopy(op, *litPtr, oend_w - op); - *litPtr += oend_w - op; - op = oend_w; - } - while (op < oLitEnd) - *op++ = *(*litPtr)++; - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { - /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) - return ERROR(corruption_detected); - match = dictEnd - (base - match); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currPrefixSegment */ - { - size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = base; - } - } - while (op < oMatchEnd) - *op++ = *match++; - return sequenceLength; -} - -static seq_t ZSTD_decodeSequence(seqState_t *seqState) -{ - seq_t seq; - - U32 const llCode = FSE_peekSymbol(&seqState->stateLL); - U32 const mlCode = FSE_peekSymbol(&seqState->stateML); - U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ - - U32 const llBits = LL_bits[llCode]; - U32 const mlBits = ML_bits[mlCode]; - U32 const ofBits = ofCode; - U32 const totalBits = llBits + mlBits + ofBits; - - static const U32 LL_base[MaxLL + 1] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, - 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000}; - - static const U32 ML_base[MaxML + 1] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, - 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003}; - - static const U32 OF_base[MaxOff + 1] = {0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, - 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, - 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD}; - - /* sequence */ - { - size_t offset; - if (!ofCode) - offset = 0; - else { - offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (ZSTD_32bits()) - BIT_reloadDStream(&seqState->DStream); - } - - if (ofCode <= 1) { - offset += (llCode == 0); - if (offset) { - size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq.offset = offset; - } - - seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (ZSTD_32bits() && (mlBits + llBits > 24)) - BIT_reloadDStream(&seqState->DStream); - - seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - - /* ANS state update */ - FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (ZSTD_32bits()) - BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - - seq.match = NULL; - - return seq; -} - -FORCE_INLINE -size_t ZSTD_execSequence(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, - const BYTE *const vBase, const BYTE *const dictEnd) -{ - BYTE *const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE *const iLitEnd = *litPtr + sequence.litLength; - const BYTE *match = oLitEnd - sequence.offset; - - /* check */ - if (oMatchEnd > oend) - return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) - return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd > oend_w) - return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); - - /* copy Literals */ - ZSTD_copy8(op, *litPtr); - if (sequence.litLength > 8) - ZSTD_wildcopy(op + 8, (*litPtr) + 8, - sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { - /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) - return ERROR(corruption_detected); - match = dictEnd + (match - base); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currPrefixSegment */ - { - size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = base; - if (op > oend_w || sequence.matchLength < MINMATCH) { - U32 i; - for (i = 0; i < sequence.matchLength; ++i) - op[i] = match[i]; - return sequenceLength; - } - } - } - /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ - - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ - static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op + 4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; - match += 8; - - if (oMatchEnd > oend - (16 - MINMATCH)) { - if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op); - match += oend_w - op; - op = oend_w; - } - while (op < oMatchEnd) - *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */ - } - return sequenceLength; -} - -static size_t ZSTD_decompressSequences(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize) -{ - const BYTE *ip = (const BYTE *)seqStart; - const BYTE *const iend = ip + seqSize; - BYTE *const ostart = (BYTE * const)dst; - BYTE *const oend = ostart + maxDstSize; - BYTE *op = ostart; - const BYTE *litPtr = dctx->litPtr; - const BYTE *const litEnd = litPtr + dctx->litSize; - const BYTE *const base = (const BYTE *)(dctx->base); - const BYTE *const vBase = (const BYTE *)(dctx->vBase); - const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd); - int nbSeq; - - /* Build Decoding Tables */ - { - size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); - if (ZSTD_isError(seqHSize)) - return seqHSize; - ip += seqHSize; - } - - /* Regen sequences */ - if (nbSeq) { - seqState_t seqState; - dctx->fseEntropy = 1; - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - seqState.prevOffset[i] = dctx->entropy.rep[i]; - } - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected); - FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq;) { - nbSeq--; - { - seq_t const sequence = ZSTD_decodeSequence(&seqState); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) - return oneSeqSize; - op += oneSeqSize; - } - } - - /* check if reached exact end */ - if (nbSeq) - return ERROR(corruption_detected); - /* save reps for next block */ - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); - } - } - - /* last literal segment */ - { - size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend - op)) - return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - - return op - ostart; -} - -FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t *seqState, int const longOffsets) -{ - seq_t seq; - - U32 const llCode = FSE_peekSymbol(&seqState->stateLL); - U32 const mlCode = FSE_peekSymbol(&seqState->stateML); - U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ - - U32 const llBits = LL_bits[llCode]; - U32 const mlBits = ML_bits[mlCode]; - U32 const ofBits = ofCode; - U32 const totalBits = llBits + mlBits + ofBits; - - static const U32 LL_base[MaxLL + 1] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, - 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000}; - - static const U32 ML_base[MaxML + 1] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, - 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003}; - - static const U32 OF_base[MaxOff + 1] = {0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, - 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, - 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD}; - - /* sequence */ - { - size_t offset; - if (!ofCode) - offset = 0; - else { - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN); - offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - if (ZSTD_32bits() || extraBits) - BIT_reloadDStream(&seqState->DStream); - if (extraBits) - offset += BIT_readBitsFast(&seqState->DStream, extraBits); - } else { - offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (ZSTD_32bits()) - BIT_reloadDStream(&seqState->DStream); - } - } - - if (ofCode <= 1) { - offset += (llCode == 0); - if (offset) { - size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq.offset = offset; - } - - seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (ZSTD_32bits() && (mlBits + llBits > 24)) - BIT_reloadDStream(&seqState->DStream); - - seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - - { - size_t const pos = seqState->pos + seq.litLength; - seq.match = seqState->base + pos - seq.offset; /* single memory segment */ - if (seq.offset > pos) - seq.match += seqState->gotoDict; /* separate memory segment */ - seqState->pos = pos + seq.matchLength; - } - - /* ANS state update */ - FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (ZSTD_32bits()) - BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - - return seq; -} - -static seq_t ZSTD_decodeSequenceLong(seqState_t *seqState, unsigned const windowSize) -{ - if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) { - return ZSTD_decodeSequenceLong_generic(seqState, 1); - } else { - return ZSTD_decodeSequenceLong_generic(seqState, 0); - } -} - -FORCE_INLINE -size_t ZSTD_execSequenceLong(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, - const BYTE *const vBase, const BYTE *const dictEnd) -{ - BYTE *const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE *const iLitEnd = *litPtr + sequence.litLength; - const BYTE *match = sequence.match; - - /* check */ - if (oMatchEnd > oend) - return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) - return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd > oend_w) - return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); - - /* copy Literals */ - ZSTD_copy8(op, *litPtr); - if (sequence.litLength > 8) - ZSTD_wildcopy(op + 8, (*litPtr) + 8, - sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { - /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) - return ERROR(corruption_detected); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currPrefixSegment */ - { - size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = base; - if (op > oend_w || sequence.matchLength < MINMATCH) { - U32 i; - for (i = 0; i < sequence.matchLength; ++i) - op[i] = match[i]; - return sequenceLength; - } - } - } - /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ - - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ - static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op + 4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; - match += 8; - - if (oMatchEnd > oend - (16 - MINMATCH)) { - if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op); - match += oend_w - op; - op = oend_w; - } - while (op < oMatchEnd) - *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */ - } - return sequenceLength; -} - -static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize) -{ - const BYTE *ip = (const BYTE *)seqStart; - const BYTE *const iend = ip + seqSize; - BYTE *const ostart = (BYTE * const)dst; - BYTE *const oend = ostart + maxDstSize; - BYTE *op = ostart; - const BYTE *litPtr = dctx->litPtr; - const BYTE *const litEnd = litPtr + dctx->litSize; - const BYTE *const base = (const BYTE *)(dctx->base); - const BYTE *const vBase = (const BYTE *)(dctx->vBase); - const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd); - unsigned const windowSize = dctx->fParams.windowSize; - int nbSeq; - - /* Build Decoding Tables */ - { - size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); - if (ZSTD_isError(seqHSize)) - return seqHSize; - ip += seqHSize; - } - - /* Regen sequences */ - if (nbSeq) { -#define STORED_SEQS 4 -#define STOSEQ_MASK (STORED_SEQS - 1) -#define ADVANCED_SEQS 4 - seq_t *sequences = (seq_t *)dctx->entropy.workspace; - int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); - seqState_t seqState; - int seqNb; - ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.workspace) >= sizeof(seq_t) * STORED_SEQS); - dctx->fseEntropy = 1; - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - seqState.prevOffset[i] = dctx->entropy.rep[i]; - } - seqState.base = base; - seqState.pos = (size_t)(op - base); - seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */ - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected); - FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - /* prepare in advance */ - for (seqNb = 0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb < seqAdvance; seqNb++) { - sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize); - } - if (seqNb < seqAdvance) - return ERROR(corruption_detected); - - /* decode and decompress */ - for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb < nbSeq; seqNb++) { - seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize); - size_t const oneSeqSize = - ZSTD_execSequenceLong(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) - return oneSeqSize; - ZSTD_PREFETCH(sequence.match); - sequences[seqNb & STOSEQ_MASK] = sequence; - op += oneSeqSize; - } - if (seqNb < nbSeq) - return ERROR(corruption_detected); - - /* finish queue */ - seqNb -= seqAdvance; - for (; seqNb < nbSeq; seqNb++) { - size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) - return oneSeqSize; - op += oneSeqSize; - } - - /* save reps for next block */ - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); - } - } - - /* last literal segment */ - { - size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend - op)) - return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - - return op - ostart; -} - -static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ /* blockType == blockCompressed */ - const BYTE *ip = (const BYTE *)src; - - if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) - return ERROR(srcSize_wrong); - - /* Decode literals section */ - { - size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); - if (ZSTD_isError(litCSize)) - return litCSize; - ip += litCSize; - srcSize -= litCSize; - } - if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */ - /* likely because of register pressure */ - /* if that's the correct cause, then 32-bits ARM should be affected differently */ - /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */ - if (dctx->fParams.windowSize > (1 << 23)) - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); -} - -static void ZSTD_checkContinuity(ZSTD_DCtx *dctx, const void *dst) -{ - if (dst != dctx->previousDstEnd) { /* not contiguous */ - dctx->dictEnd = dctx->previousDstEnd; - dctx->vBase = (const char *)dst - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base)); - dctx->base = dst; - dctx->previousDstEnd = dst; - } -} - -size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t dSize; - ZSTD_checkContinuity(dctx, dst); - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); - dctx->previousDstEnd = (char *)dst + dSize; - return dSize; -} - -/** ZSTD_insertBlock() : - insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ -size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, size_t blockSize) -{ - ZSTD_checkContinuity(dctx, blockStart); - dctx->previousDstEnd = (const char *)blockStart + blockSize; - return blockSize; -} - -size_t ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_t length) -{ - if (length > dstCapacity) - return ERROR(dstSize_tooSmall); - memset(dst, byte, length); - return length; -} - -/** ZSTD_findFrameCompressedSize() : - * compatible with legacy mode - * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame - * `srcSize` must be at least as large as the frame contained - * @return : the compressed size of the frame starting at `src` */ -size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) -{ - if (srcSize >= ZSTD_skippableHeaderSize && (ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { - return ZSTD_skippableHeaderSize + ZSTD_readLE32((const BYTE *)src + 4); - } else { - const BYTE *ip = (const BYTE *)src; - const BYTE *const ipstart = ip; - size_t remainingSize = srcSize; - ZSTD_frameParams fParams; - - size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize); - if (ZSTD_isError(headerSize)) - return headerSize; - - /* Frame Header */ - { - size_t const ret = ZSTD_getFrameParams(&fParams, ip, remainingSize); - if (ZSTD_isError(ret)) - return ret; - if (ret > 0) - return ERROR(srcSize_wrong); - } - - ip += headerSize; - remainingSize -= headerSize; - - /* Loop on each block */ - while (1) { - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) - return cBlockSize; - - if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) - return ERROR(srcSize_wrong); - - ip += ZSTD_blockHeaderSize + cBlockSize; - remainingSize -= ZSTD_blockHeaderSize + cBlockSize; - - if (blockProperties.lastBlock) - break; - } - - if (fParams.checksumFlag) { /* Frame content checksum */ - if (remainingSize < 4) - return ERROR(srcSize_wrong); - ip += 4; - remainingSize -= 4; - } - - return ip - ipstart; - } -} - -/*! ZSTD_decompressFrame() : -* @dctx must be properly initialized */ -static size_t ZSTD_decompressFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void **srcPtr, size_t *srcSizePtr) -{ - const BYTE *ip = (const BYTE *)(*srcPtr); - BYTE *const ostart = (BYTE * const)dst; - BYTE *const oend = ostart + dstCapacity; - BYTE *op = ostart; - size_t remainingSize = *srcSizePtr; - - /* check */ - if (remainingSize < ZSTD_frameHeaderSize_min + ZSTD_blockHeaderSize) - return ERROR(srcSize_wrong); - - /* Frame Header */ - { - size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix); - if (ZSTD_isError(frameHeaderSize)) - return frameHeaderSize; - if (remainingSize < frameHeaderSize + ZSTD_blockHeaderSize) - return ERROR(srcSize_wrong); - CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize)); - ip += frameHeaderSize; - remainingSize -= frameHeaderSize; - } - - /* Loop on each block */ - while (1) { - size_t decodedSize; - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) - return cBlockSize; - - ip += ZSTD_blockHeaderSize; - remainingSize -= ZSTD_blockHeaderSize; - if (cBlockSize > remainingSize) - return ERROR(srcSize_wrong); - - switch (blockProperties.blockType) { - case bt_compressed: decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend - op, ip, cBlockSize); break; - case bt_raw: decodedSize = ZSTD_copyRawBlock(op, oend - op, ip, cBlockSize); break; - case bt_rle: decodedSize = ZSTD_generateNxBytes(op, oend - op, *ip, blockProperties.origSize); break; - case bt_reserved: - default: return ERROR(corruption_detected); - } - - if (ZSTD_isError(decodedSize)) - return decodedSize; - if (dctx->fParams.checksumFlag) - xxh64_update(&dctx->xxhState, op, decodedSize); - op += decodedSize; - ip += cBlockSize; - remainingSize -= cBlockSize; - if (blockProperties.lastBlock) - break; - } - - if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ - U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState); - U32 checkRead; - if (remainingSize < 4) - return ERROR(checksum_wrong); - checkRead = ZSTD_readLE32(ip); - if (checkRead != checkCalc) - return ERROR(checksum_wrong); - ip += 4; - remainingSize -= 4; - } - - /* Allow caller to get size read */ - *srcPtr = ip; - *srcSizePtr = remainingSize; - return op - ostart; -} - -static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict); -static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict); - -static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize, - const ZSTD_DDict *ddict) -{ - void *const dststart = dst; - - if (ddict) { - if (dict) { - /* programmer error, these two cases should be mutually exclusive */ - return ERROR(GENERIC); - } - - dict = ZSTD_DDictDictContent(ddict); - dictSize = ZSTD_DDictDictSize(ddict); - } - - while (srcSize >= ZSTD_frameHeaderSize_prefix) { - U32 magicNumber; - - magicNumber = ZSTD_readLE32(src); - if (magicNumber != ZSTD_MAGICNUMBER) { - if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t skippableSize; - if (srcSize < ZSTD_skippableHeaderSize) - return ERROR(srcSize_wrong); - skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize; - if (srcSize < skippableSize) { - return ERROR(srcSize_wrong); - } - - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; - continue; - } else { - return ERROR(prefix_unknown); - } - } - - if (ddict) { - /* we were called from ZSTD_decompress_usingDDict */ - ZSTD_refDDict(dctx, ddict); - } else { - /* this will initialize correctly with no dict if dict == NULL, so - * use this in all cases but ddict */ - CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); - } - ZSTD_checkContinuity(dctx, dst); - - { - const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); - if (ZSTD_isError(res)) - return res; - /* don't need to bounds check this, ZSTD_decompressFrame will have - * already */ - dst = (BYTE *)dst + res; - dstCapacity -= res; - } - } - - if (srcSize) - return ERROR(srcSize_wrong); /* input not entirely consumed */ - - return (BYTE *)dst - (BYTE *)dststart; -} - -size_t ZSTD_decompress_usingDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize) -{ - return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); -} - -size_t ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); -} - -/*-************************************** -* Advanced Streaming Decompression API -* Bufferless and synchronous -****************************************/ -size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx) { return dctx->expected; } - -ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx) -{ - switch (dctx->stage) { - default: /* should not happen */ - case ZSTDds_getFrameHeaderSize: - case ZSTDds_decodeFrameHeader: return ZSTDnit_frameHeader; - case ZSTDds_decodeBlockHeader: return ZSTDnit_blockHeader; - case ZSTDds_decompressBlock: return ZSTDnit_block; - case ZSTDds_decompressLastBlock: return ZSTDnit_lastBlock; - case ZSTDds_checkChecksum: return ZSTDnit_checksum; - case ZSTDds_decodeSkippableHeader: - case ZSTDds_skipFrame: return ZSTDnit_skippableFrame; - } -} - -int ZSTD_isSkipFrame(ZSTD_DCtx *dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */ - -/** ZSTD_decompressContinue() : -* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) -* or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - /* Sanity check */ - if (srcSize != dctx->expected) - return ERROR(srcSize_wrong); - if (dstCapacity) - ZSTD_checkContinuity(dctx, dst); - - switch (dctx->stage) { - case ZSTDds_getFrameHeaderSize: - if (srcSize != ZSTD_frameHeaderSize_prefix) - return ERROR(srcSize_wrong); /* impossible */ - if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); - dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */ - dctx->stage = ZSTDds_decodeSkippableHeader; - return 0; - } - dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix); - if (ZSTD_isError(dctx->headerSize)) - return dctx->headerSize; - memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); - if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) { - dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix; - dctx->stage = ZSTDds_decodeFrameHeader; - return 0; - } - dctx->expected = 0; /* not necessary to copy more */ - fallthrough; - - case ZSTDds_decodeFrameHeader: - memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); - CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); - dctx->expected = ZSTD_blockHeaderSize; - dctx->stage = ZSTDds_decodeBlockHeader; - return 0; - - case ZSTDds_decodeBlockHeader: { - blockProperties_t bp; - size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); - if (ZSTD_isError(cBlockSize)) - return cBlockSize; - dctx->expected = cBlockSize; - dctx->bType = bp.blockType; - dctx->rleSize = bp.origSize; - if (cBlockSize) { - dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; - return 0; - } - /* empty block */ - if (bp.lastBlock) { - if (dctx->fParams.checksumFlag) { - dctx->expected = 4; - dctx->stage = ZSTDds_checkChecksum; - } else { - dctx->expected = 0; /* end of frame */ - dctx->stage = ZSTDds_getFrameHeaderSize; - } - } else { - dctx->expected = 3; /* go directly to next header */ - dctx->stage = ZSTDds_decodeBlockHeader; - } - return 0; - } - case ZSTDds_decompressLastBlock: - case ZSTDds_decompressBlock: { - size_t rSize; - switch (dctx->bType) { - case bt_compressed: rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); break; - case bt_raw: rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); break; - case bt_rle: rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); break; - case bt_reserved: /* should never happen */ - default: return ERROR(corruption_detected); - } - if (ZSTD_isError(rSize)) - return rSize; - if (dctx->fParams.checksumFlag) - xxh64_update(&dctx->xxhState, dst, rSize); - - if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ - if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ - dctx->expected = 4; - dctx->stage = ZSTDds_checkChecksum; - } else { - dctx->expected = 0; /* ends here */ - dctx->stage = ZSTDds_getFrameHeaderSize; - } - } else { - dctx->stage = ZSTDds_decodeBlockHeader; - dctx->expected = ZSTD_blockHeaderSize; - dctx->previousDstEnd = (char *)dst + rSize; - } - return rSize; - } - case ZSTDds_checkChecksum: { - U32 const h32 = (U32)xxh64_digest(&dctx->xxhState); - U32 const check32 = ZSTD_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */ - if (check32 != h32) - return ERROR(checksum_wrong); - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - } - case ZSTDds_decodeSkippableHeader: { - memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); - dctx->expected = ZSTD_readLE32(dctx->headerBuffer + 4); - dctx->stage = ZSTDds_skipFrame; - return 0; - } - case ZSTDds_skipFrame: { - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - } - default: - return ERROR(GENERIC); /* impossible */ - } -} - -static size_t ZSTD_refDictContent(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) -{ - dctx->dictEnd = dctx->previousDstEnd; - dctx->vBase = (const char *)dict - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base)); - dctx->base = dict; - dctx->previousDstEnd = (const char *)dict + dictSize; - return 0; -} - -/* ZSTD_loadEntropy() : - * dict : must point at beginning of a valid zstd dictionary - * @return : size of entropy tables read */ -static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t *entropy, const void *const dict, size_t const dictSize) -{ - const BYTE *dictPtr = (const BYTE *)dict; - const BYTE *const dictEnd = dictPtr + dictSize; - - if (dictSize <= 8) - return ERROR(dictionary_corrupted); - dictPtr += 8; /* skip header = magic + dictID */ - - { - size_t const hSize = HUF_readDTableX4_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, entropy->workspace, sizeof(entropy->workspace)); - if (HUF_isError(hSize)) - return ERROR(dictionary_corrupted); - dictPtr += hSize; - } - - { - short offcodeNCount[MaxOff + 1]; - U32 offcodeMaxValue = MaxOff, offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(offcodeHeaderSize)) - return ERROR(dictionary_corrupted); - if (offcodeLog > OffFSELog) - return ERROR(dictionary_corrupted); - CHECK_E(FSE_buildDTable_wksp(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); - dictPtr += offcodeHeaderSize; - } - - { - short matchlengthNCount[MaxML + 1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(matchlengthHeaderSize)) - return ERROR(dictionary_corrupted); - if (matchlengthLog > MLFSELog) - return ERROR(dictionary_corrupted); - CHECK_E(FSE_buildDTable_wksp(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); - dictPtr += matchlengthHeaderSize; - } - - { - short litlengthNCount[MaxLL + 1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(litlengthHeaderSize)) - return ERROR(dictionary_corrupted); - if (litlengthLog > LLFSELog) - return ERROR(dictionary_corrupted); - CHECK_E(FSE_buildDTable_wksp(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); - dictPtr += litlengthHeaderSize; - } - - if (dictPtr + 12 > dictEnd) - return ERROR(dictionary_corrupted); - { - int i; - size_t const dictContentSize = (size_t)(dictEnd - (dictPtr + 12)); - for (i = 0; i < 3; i++) { - U32 const rep = ZSTD_readLE32(dictPtr); - dictPtr += 4; - if (rep == 0 || rep >= dictContentSize) - return ERROR(dictionary_corrupted); - entropy->rep[i] = rep; - } - } - - return dictPtr - (const BYTE *)dict; -} - -static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) -{ - if (dictSize < 8) - return ZSTD_refDictContent(dctx, dict, dictSize); - { - U32 const magic = ZSTD_readLE32(dict); - if (magic != ZSTD_DICT_MAGIC) { - return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ - } - } - dctx->dictID = ZSTD_readLE32((const char *)dict + 4); - - /* load entropy tables */ - { - size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize); - if (ZSTD_isError(eSize)) - return ERROR(dictionary_corrupted); - dict = (const char *)dict + eSize; - dictSize -= eSize; - } - dctx->litEntropy = dctx->fseEntropy = 1; - - /* reference dictionary content */ - return ZSTD_refDictContent(dctx, dict, dictSize); -} - -size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) -{ - CHECK_F(ZSTD_decompressBegin(dctx)); - if (dict && dictSize) - CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); - return 0; -} - -/* ====== ZSTD_DDict ====== */ - -struct ZSTD_DDict_s { - void *dictBuffer; - const void *dictContent; - size_t dictSize; - ZSTD_entropyTables_t entropy; - U32 dictID; - U32 entropyPresent; - ZSTD_customMem cMem; -}; /* typedef'd to ZSTD_DDict within "zstd.h" */ - -size_t ZSTD_DDictWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DDict)); } - -static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict) { return ddict->dictContent; } - -static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict) { return ddict->dictSize; } - -static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict) -{ - ZSTD_decompressBegin(dstDCtx); /* init */ - if (ddict) { /* support refDDict on NULL */ - dstDCtx->dictID = ddict->dictID; - dstDCtx->base = ddict->dictContent; - dstDCtx->vBase = ddict->dictContent; - dstDCtx->dictEnd = (const BYTE *)ddict->dictContent + ddict->dictSize; - dstDCtx->previousDstEnd = dstDCtx->dictEnd; - if (ddict->entropyPresent) { - dstDCtx->litEntropy = 1; - dstDCtx->fseEntropy = 1; - dstDCtx->LLTptr = ddict->entropy.LLTable; - dstDCtx->MLTptr = ddict->entropy.MLTable; - dstDCtx->OFTptr = ddict->entropy.OFTable; - dstDCtx->HUFptr = ddict->entropy.hufTable; - dstDCtx->entropy.rep[0] = ddict->entropy.rep[0]; - dstDCtx->entropy.rep[1] = ddict->entropy.rep[1]; - dstDCtx->entropy.rep[2] = ddict->entropy.rep[2]; - } else { - dstDCtx->litEntropy = 0; - dstDCtx->fseEntropy = 0; - } - } -} - -static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict *ddict) -{ - ddict->dictID = 0; - ddict->entropyPresent = 0; - if (ddict->dictSize < 8) - return 0; - { - U32 const magic = ZSTD_readLE32(ddict->dictContent); - if (magic != ZSTD_DICT_MAGIC) - return 0; /* pure content mode */ - } - ddict->dictID = ZSTD_readLE32((const char *)ddict->dictContent + 4); - - /* load entropy tables */ - CHECK_E(ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted); - ddict->entropyPresent = 1; - return 0; -} - -static ZSTD_DDict *ZSTD_createDDict_advanced(const void *dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem) -{ - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - - { - ZSTD_DDict *const ddict = (ZSTD_DDict *)ZSTD_malloc(sizeof(ZSTD_DDict), customMem); - if (!ddict) - return NULL; - ddict->cMem = customMem; - - if ((byReference) || (!dict) || (!dictSize)) { - ddict->dictBuffer = NULL; - ddict->dictContent = dict; - } else { - void *const internalBuffer = ZSTD_malloc(dictSize, customMem); - if (!internalBuffer) { - ZSTD_freeDDict(ddict); - return NULL; - } - memcpy(internalBuffer, dict, dictSize); - ddict->dictBuffer = internalBuffer; - ddict->dictContent = internalBuffer; - } - ddict->dictSize = dictSize; - ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - /* parse dictionary content */ - { - size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict); - if (ZSTD_isError(errorCode)) { - ZSTD_freeDDict(ddict); - return NULL; - } - } - - return ddict; - } -} - -/*! ZSTD_initDDict() : -* Create a digested dictionary, to start decompression without startup delay. -* `dict` content is copied inside DDict. -* Consequently, `dict` can be released after `ZSTD_DDict` creation */ -ZSTD_DDict *ZSTD_initDDict(const void *dict, size_t dictSize, void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - return ZSTD_createDDict_advanced(dict, dictSize, 1, stackMem); -} - -size_t ZSTD_freeDDict(ZSTD_DDict *ddict) -{ - if (ddict == NULL) - return 0; /* support free on NULL */ - { - ZSTD_customMem const cMem = ddict->cMem; - ZSTD_free(ddict->dictBuffer, cMem); - ZSTD_free(ddict, cMem); - return 0; - } -} - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -unsigned ZSTD_getDictID_fromDict(const void *dict, size_t dictSize) -{ - if (dictSize < 8) - return 0; - if (ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC) - return 0; - return ZSTD_readLE32((const char *)dict + 4); -} - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict) -{ - if (ddict == NULL) - return 0; - return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); -} - -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ -unsigned ZSTD_getDictID_fromFrame(const void *src, size_t srcSize) -{ - ZSTD_frameParams zfp = {0, 0, 0, 0}; - size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize); - if (ZSTD_isError(hError)) - return 0; - return zfp.dictID; -} - -/*! ZSTD_decompress_usingDDict() : -* Decompression using a pre-digested Dictionary -* Use dictionary without significant overhead. */ -size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_DDict *ddict) -{ - /* pass content and size in case legacy frames are encountered */ - return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, NULL, 0, ddict); -} - -/*===================================== -* Streaming decompression -*====================================*/ - -typedef enum { zdss_init, zdss_loadHeader, zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; - -/* *** Resource management *** */ -struct ZSTD_DStream_s { - ZSTD_DCtx *dctx; - ZSTD_DDict *ddictLocal; - const ZSTD_DDict *ddict; - ZSTD_frameParams fParams; - ZSTD_dStreamStage stage; - char *inBuff; - size_t inBuffSize; - size_t inPos; - size_t maxWindowSize; - char *outBuff; - size_t outBuffSize; - size_t outStart; - size_t outEnd; - size_t blockSize; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; /* tmp buffer to store frame header */ - size_t lhSize; - ZSTD_customMem customMem; - void *legacyContext; - U32 previousLegacyVersion; - U32 legacyVersion; - U32 hostageByte; -}; /* typedef'd to ZSTD_DStream within "zstd.h" */ - -size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize) -{ - size_t const blockSize = MIN(maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); - size_t const inBuffSize = blockSize; - size_t const outBuffSize = maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; - return ZSTD_DCtxWorkspaceBound() + ZSTD_ALIGN(sizeof(ZSTD_DStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize); -} - -static ZSTD_DStream *ZSTD_createDStream_advanced(ZSTD_customMem customMem) -{ - ZSTD_DStream *zds; - - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - - zds = (ZSTD_DStream *)ZSTD_malloc(sizeof(ZSTD_DStream), customMem); - if (zds == NULL) - return NULL; - memset(zds, 0, sizeof(ZSTD_DStream)); - memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem)); - zds->dctx = ZSTD_createDCtx_advanced(customMem); - if (zds->dctx == NULL) { - ZSTD_freeDStream(zds); - return NULL; - } - zds->stage = zdss_init; - zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; - return zds; -} - -ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - ZSTD_DStream *zds = ZSTD_createDStream_advanced(stackMem); - if (!zds) { - return NULL; - } - - zds->maxWindowSize = maxWindowSize; - zds->stage = zdss_loadHeader; - zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - ZSTD_freeDDict(zds->ddictLocal); - zds->ddictLocal = NULL; - zds->ddict = zds->ddictLocal; - zds->legacyVersion = 0; - zds->hostageByte = 0; - - { - size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); - size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; - - zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem); - zds->inBuffSize = blockSize; - zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem); - zds->outBuffSize = neededOutSize; - if (zds->inBuff == NULL || zds->outBuff == NULL) { - ZSTD_freeDStream(zds); - return NULL; - } - } - return zds; -} - -ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize) -{ - ZSTD_DStream *zds = ZSTD_initDStream(maxWindowSize, workspace, workspaceSize); - if (zds) { - zds->ddict = ddict; - } - return zds; -} - -size_t ZSTD_freeDStream(ZSTD_DStream *zds) -{ - if (zds == NULL) - return 0; /* support free on null */ - { - ZSTD_customMem const cMem = zds->customMem; - ZSTD_freeDCtx(zds->dctx); - zds->dctx = NULL; - ZSTD_freeDDict(zds->ddictLocal); - zds->ddictLocal = NULL; - ZSTD_free(zds->inBuff, cMem); - zds->inBuff = NULL; - ZSTD_free(zds->outBuff, cMem); - zds->outBuff = NULL; - ZSTD_free(zds, cMem); - return 0; - } -} - -/* *** Initialization *** */ - -size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; } -size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } - -size_t ZSTD_resetDStream(ZSTD_DStream *zds) -{ - zds->stage = zdss_loadHeader; - zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - zds->legacyVersion = 0; - zds->hostageByte = 0; - return ZSTD_frameHeaderSize_prefix; -} - -/* ***** Decompression ***** */ - -ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t const length = MIN(dstCapacity, srcSize); - memcpy(dst, src, length); - return length; -} - -size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inBuffer *input) -{ - const char *const istart = (const char *)(input->src) + input->pos; - const char *const iend = (const char *)(input->src) + input->size; - const char *ip = istart; - char *const ostart = (char *)(output->dst) + output->pos; - char *const oend = (char *)(output->dst) + output->size; - char *op = ostart; - U32 someMoreWork = 1; - - while (someMoreWork) { - switch (zds->stage) { - case zdss_init: - ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ - fallthrough; - - case zdss_loadHeader: { - size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize); - if (ZSTD_isError(hSize)) - return hSize; - if (hSize != 0) { /* need more input */ - size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ - if (toLoad > (size_t)(iend - ip)) { /* not enough input to load full header */ - memcpy(zds->headerBuffer + zds->lhSize, ip, iend - ip); - zds->lhSize += iend - ip; - input->pos = input->size; - return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + - ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ - } - memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); - zds->lhSize = hSize; - ip += toLoad; - break; - } - - /* check for single-pass mode opportunity */ - if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ - && (U64)(size_t)(oend - op) >= zds->fParams.frameContentSize) { - size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend - istart); - if (cSize <= (size_t)(iend - istart)) { - size_t const decompressedSize = ZSTD_decompress_usingDDict(zds->dctx, op, oend - op, istart, cSize, zds->ddict); - if (ZSTD_isError(decompressedSize)) - return decompressedSize; - ip = istart + cSize; - op += decompressedSize; - zds->dctx->expected = 0; - zds->stage = zdss_init; - someMoreWork = 0; - break; - } - } - - /* Consume header */ - ZSTD_refDDict(zds->dctx, zds->ddict); - { - size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */ - CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size)); - { - size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); - CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer + h1Size, h2Size)); - } - } - - zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); - if (zds->fParams.windowSize > zds->maxWindowSize) - return ERROR(frameParameter_windowTooLarge); - - /* Buffers are preallocated, but double check */ - { - size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); - size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; - if (zds->inBuffSize < blockSize) { - return ERROR(GENERIC); - } - if (zds->outBuffSize < neededOutSize) { - return ERROR(GENERIC); - } - zds->blockSize = blockSize; - } - zds->stage = zdss_read; - } - fallthrough; - - case zdss_read: { - size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); - if (neededInSize == 0) { /* end of frame */ - zds->stage = zdss_init; - someMoreWork = 0; - break; - } - if ((size_t)(iend - ip) >= neededInSize) { /* decode directly from src */ - const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); - size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, - (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), ip, neededInSize); - if (ZSTD_isError(decodedSize)) - return decodedSize; - ip += neededInSize; - if (!decodedSize && !isSkipFrame) - break; /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; - zds->stage = zdss_flush; - break; - } - if (ip == iend) { - someMoreWork = 0; - break; - } /* no more input */ - zds->stage = zdss_load; - /* pass-through */ - } - fallthrough; - - case zdss_load: { - size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); - size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */ - size_t loadedSize; - if (toLoad > zds->inBuffSize - zds->inPos) - return ERROR(corruption_detected); /* should never happen */ - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend - ip); - ip += loadedSize; - zds->inPos += loadedSize; - if (loadedSize < toLoad) { - someMoreWork = 0; - break; - } /* not enough input, wait for more */ - - /* decode loaded input */ - { - const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); - size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, - zds->inBuff, neededInSize); - if (ZSTD_isError(decodedSize)) - return decodedSize; - zds->inPos = 0; /* input is consumed */ - if (!decodedSize && !isSkipFrame) { - zds->stage = zdss_read; - break; - } /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; - zds->stage = zdss_flush; - /* pass-through */ - } - } - fallthrough; - - case zdss_flush: { - size_t const toFlushSize = zds->outEnd - zds->outStart; - size_t const flushedSize = ZSTD_limitCopy(op, oend - op, zds->outBuff + zds->outStart, toFlushSize); - op += flushedSize; - zds->outStart += flushedSize; - if (flushedSize == toFlushSize) { /* flush completed */ - zds->stage = zdss_read; - if (zds->outStart + zds->blockSize > zds->outBuffSize) - zds->outStart = zds->outEnd = 0; - break; - } - /* cannot complete flush */ - someMoreWork = 0; - break; - } - default: - return ERROR(GENERIC); /* impossible */ - } - } - - /* result */ - input->pos += (size_t)(ip - istart); - output->pos += (size_t)(op - ostart); - { - size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx); - if (!nextSrcSizeHint) { /* frame fully decoded */ - if (zds->outEnd == zds->outStart) { /* output fully flushed */ - if (zds->hostageByte) { - if (input->pos >= input->size) { - zds->stage = zdss_read; - return 1; - } /* can't release hostage (not present) */ - input->pos++; /* release hostage */ - } - return 0; - } - if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ - input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ - zds->hostageByte = 1; - } - return 1; - } - nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block); /* preload header of next block */ - if (zds->inPos > nextSrcSizeHint) - return ERROR(GENERIC); /* should never happen */ - nextSrcSizeHint -= zds->inPos; /* already loaded*/ - return nextSrcSizeHint; - } -} - -unsigned int zstd_is_error(size_t code) -{ - return ZSTD_isError(code); -} -EXPORT_SYMBOL(zstd_is_error); - -zstd_error_code zstd_get_error_code(size_t code) -{ - return ZSTD_getErrorCode(code); -} -EXPORT_SYMBOL(zstd_get_error_code); - -const char *zstd_get_error_name(size_t code) -{ - /* Real implementation in zstd-1.4.6. */ - return "GENERIC"; -} -EXPORT_SYMBOL(zstd_get_error_name); - -size_t zstd_dctx_workspace_bound(void) -{ - return ZSTD_DCtxWorkspaceBound(); -} -EXPORT_SYMBOL(zstd_dctx_workspace_bound); - -zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size) -{ - return ZSTD_initDCtx(workspace, workspace_size); -} -EXPORT_SYMBOL(zstd_init_dctx); - -size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, - const void *src, size_t src_size) -{ - return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size); -} -EXPORT_SYMBOL(zstd_decompress_dctx); - -size_t zstd_dstream_workspace_bound(size_t max_window_size) -{ - return ZSTD_DStreamWorkspaceBound(max_window_size); -} -EXPORT_SYMBOL(zstd_dstream_workspace_bound); - -zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, - size_t workspace_size) -{ - return ZSTD_initDStream(max_window_size, workspace, workspace_size); -} -EXPORT_SYMBOL(zstd_init_dstream); - -size_t zstd_reset_dstream(zstd_dstream *dstream) -{ - return ZSTD_resetDStream(dstream); -} -EXPORT_SYMBOL(zstd_reset_dstream); - -size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, - zstd_in_buffer *input) -{ - return ZSTD_decompressStream(dstream, output, input); -} -EXPORT_SYMBOL(zstd_decompress_stream); - -size_t zstd_find_frame_compressed_size(const void *src, size_t src_size) -{ - return ZSTD_findFrameCompressedSize(src, src_size); -} -EXPORT_SYMBOL(zstd_find_frame_compressed_size); - -size_t zstd_get_frame_header(zstd_frame_header *header, const void *src, - size_t src_size) -{ - return ZSTD_getFrameParams(header, src, src_size); -} -EXPORT_SYMBOL(zstd_get_frame_header); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("Zstd Decompressor"); diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c new file mode 100644 index 0000000000000..05570ed5f8be2 --- /dev/null +++ b/lib/zstd/decompress/huf_decompress.c @@ -0,0 +1,1206 @@ +/* ****************************************************************** + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Dependencies +****************************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ +#include "../common/compiler.h" +#include "../common/bitstream.h" /* BIT_* */ +#include "../common/fse.h" /* to compress headers */ +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include "../common/error_private.h" + +/* ************************************************************** +* Macros +****************************************************************/ + +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. + */ +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#endif + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError + + +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + + +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +#if DYNAMIC_BMI2 + +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + ZSTD_memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +#ifndef HUF_FORCE_DECOMPRESS_X2 + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ + +/* + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = symbol + (nbBits << 8); + } else { + D4 = (symbol << 8) + nbBits; + } + D4 *= 0x0001000100010001ULL; + return D4; +} + +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + + +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; + + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); + + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); + if (HUF_isError(iSize)) return iSize; + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Compute symbols and rankStart given rankVal: + * + * rankVal already contains the number of values of each weight. + * + * symbols contains the symbols ordered by weight. First are the rankVal[0] + * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. + * symbols[0] is filled (but unused) to avoid a branch. + * + * rankStart contains the offset where each rank belongs in the DTable. + * rankStart[0] is not filled because there are no entries in the table for + * weight 0. + */ + { + int n; + int nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; + for (n=0; n<(int)tableLog+1; n++) { + U32 const curr = nextRankStart; + nextRankStart += wksp->rankVal[n]; + wksp->rankStart[n] = curr; + } + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); + } + } + for (; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; + } + } + + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outter loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { + U32 w; + int symbol=wksp->rankVal[0]; + int rankStart=0; + for (w=1; wrankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); + int s; + int u; + switch (length) { + case 1: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; ssymbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } + return iSize; +} + +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } + + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + return pEnd-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); + + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; + const void* const dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } + + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); + +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) + + + +size_t HUF_decompress1X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X2 */ + + +#ifndef HUF_FORCE_DECOMPRESS_X1 + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + + +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize) +{ + HUF_DEltX2 DElt; + U32* rankVal = wksp; + + assert(wkspSize >= HUF_TABLELOG_MAX + 1); + (void)wkspSize; + /* get pre-calculated rankVal */ + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); + + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ + + rankVal[weight] += length; + } } +} + + +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline, U32* wksp, size_t wkspSize) +{ + U32* rankVal = wksp; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + assert(wkspSize >= HUF_TABLELOG_MAX + 1); + wksp += HUF_TABLELOG_MAX + 1; + wkspSize -= HUF_TABLELOG_MAX + 1; + + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); + + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol, wksp, wkspSize); + } else { + HUF_DEltX2 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} + +typedef struct { + rankValCol_t rankVal[HUF_TABLELOG_MAX]; + U32 rankStats[HUF_TABLELOG_MAX + 1]; + U32 rankStart0[HUF_TABLELOG_MAX + 2]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; +} HUF_ReadDTableX2_Workspace; + +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; + + HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace; + + if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC); + + rankStart = wksp->rankStart0 + 1; + ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats)); + ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0)); + + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; wrankStats[w]; + rankStart[w] = curr; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; + } + + /* sort symbols by weight */ + { U32 s; + for (s=0; sweightList[s]; + U32 const r = rankStart[w]++; + wksp->sortedSymbol[r].symbol = (BYTE)s; + wksp->sortedSymbol[r].weight = (BYTE)w; + } + rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ + } + + /* Build rankVal */ + { U32* const rankVal0 = wksp->rankVal[0]; + { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ + U32 nextRankVal = 0; + U32 w; + for (w=1; wrankStats[w] << (w+rescale); + rankVal0[w] = curr; + } } + { U32 const minBits = tableLog+1 - maxW; + U32 consumed; + for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { + U32* const rankValPtr = wksp->rankVal[consumed]; + U32 w; + for (w = 1; w < maxW+1; w++) { + rankValPtr[w] = rankVal0[w] >> consumed; + } } } } + + HUF_fillDTableX2(dt, maxTableLog, + wksp->sortedSymbol, sizeOfSort, + wksp->rankStart0, wksp->rankVal, maxW, + tableLog+1, + wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32)); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = 1; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY( + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X1 */ + + +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + + +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; +#endif + +/* HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128*1024); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; + return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; +#else + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; + } +#endif +} + + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif + } +} + +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#endif + } +} + + +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} +#endif + +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#endif + } +} + diff --git a/lib/zstd/decompress/zstd_ddict.c b/lib/zstd/decompress/zstd_ddict.c new file mode 100644 index 0000000000000..dbbc7919de534 --- /dev/null +++ b/lib/zstd/decompress/zstd_ddict.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_ddict.c : + * concentrates all logic that needs to know the internals of ZSTD_DDict object */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include "zstd_decompress_internal.h" +#include "zstd_ddict.h" + + + + +/*-******************************************************* +* Types +*********************************************************/ +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictContent; +} + +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictSize; +} + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_copyDDictParameters"); + assert(dctx != NULL); + assert(ddict != NULL); + dctx->dictID = ddict->dictID; + dctx->prefixStart = ddict->dictContent; + dctx->virtualStart = ddict->dictContent; + dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + if (ddict->entropyPresent) { + dctx->litEntropy = 1; + dctx->fseEntropy = 1; + dctx->LLTptr = ddict->entropy.LLTable; + dctx->MLTptr = ddict->entropy.MLTable; + dctx->OFTptr = ddict->entropy.OFTable; + dctx->HUFptr = ddict->entropy.hufTable; + dctx->entropy.rep[0] = ddict->entropy.rep[0]; + dctx->entropy.rep[1] = ddict->entropy.rep[1]; + dctx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dctx->litEntropy = 0; + dctx->fseEntropy = 0; + } +} + + +static size_t +ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, + ZSTD_dictContentType_e dictContentType) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; + + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( + &ddict->entropy, ddict->dictContent, ddict->dictSize)), + dictionary_corrupted, ""); + ddict->entropyPresent = 1; + return 0; +} + + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + if (!dict) dictSize = 0; + } else { + void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + ZSTD_memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); + + return 0; +} + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); + if (ddict == NULL) return NULL; + ddict->cMem = customMem; + { size_t const initResult = ZSTD_initDDict_internal(ddict, + dict, dictSize, + dictLoadMethod, dictContentType); + if (ZSTD_isError(initResult)) { + ZSTD_freeDDict(ddict); + return NULL; + } } + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); +} + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} + + +const ZSTD_DDict* ZSTD_initStaticDDict( + void* sBuffer, size_t sBufferSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + size_t const neededSpace = sizeof(ZSTD_DDict) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; + assert(sBuffer != NULL); + assert(dict != NULL); + if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ + if (sBufferSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; + } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_customFree(ddict->dictBuffer, cMem); + ZSTD_customFree(ddict, cMem); + return 0; + } +} + +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) +{ + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +} diff --git a/lib/zstd/decompress/zstd_ddict.h b/lib/zstd/decompress/zstd_ddict.h new file mode 100644 index 0000000000000..8c1a79d666f89 --- /dev/null +++ b/lib/zstd/decompress/zstd_ddict.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DDICT_H +#define ZSTD_DDICT_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include "../common/zstd_deps.h" /* size_t */ +#include /* ZSTD_DDict, and several public functions */ + + +/*-******************************************************* + * Interface + *********************************************************/ + +/* note: several prototypes are already published in `zstd.h` : + * ZSTD_createDDict() + * ZSTD_createDDict_byReference() + * ZSTD_createDDict_advanced() + * ZSTD_freeDDict() + * ZSTD_initStaticDDict() + * ZSTD_sizeof_DDict() + * ZSTD_estimateDDictSize() + * ZSTD_getDictID_fromDict() + */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + + + +#endif /* ZSTD_DDICT_H */ diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c new file mode 100644 index 0000000000000..b4d81d84479ac --- /dev/null +++ b/lib/zstd/decompress/zstd_decompress.c @@ -0,0 +1,2085 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) +*/ + +/*! + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) +#endif + +/*! + * NO_FORWARD_PROGRESS_MAX : + * maximum allowed nb of calls to ZSTD_decompressStream() + * without any forward progress + * (defined as: no byte read from input, and no byte flushed to output) + * before triggering an error. + */ +#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX +# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */ +#include "../common/zstd_internal.h" /* blockProperties_t */ +#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ +#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ +#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ + + + + +/* *********************************** + * Multiple DDicts Hashset internals * + *************************************/ + +#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 +#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. + * Currently, that means a 0.75 load factor. + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded + * the load factor of the ddict hash set. + */ + +#define DDICT_HASHSET_TABLE_BASE_SIZE 64 +#define DDICT_HASHSET_RESIZE_FACTOR 2 + +/* Hash function to determine starting position of dict insertion within the table + * Returns an index between [0, hashSet->ddictPtrTableSize] + */ +static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { + const U64 hash = xxh64(&dictID, sizeof(U32), 0); + /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ + return hash & (hashSet->ddictPtrTableSize - 1); +} + +/* Adds DDict to a hashset without resizing it. + * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set. + * Returns 0 if successful, or a zstd error code if something went wrong. + */ +static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) { + const U32 dictID = ZSTD_getDictID_fromDDict(ddict); + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + while (hashSet->ddictPtrTable[idx] != NULL) { + /* Replace existing ddict if inserting ddict with same dictID */ + if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) { + DEBUGLOG(4, "DictID already exists, replacing rather than adding"); + hashSet->ddictPtrTable[idx] = ddict; + return 0; + } + idx &= idxRangeMask; + idx++; + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + hashSet->ddictPtrTable[idx] = ddict; + hashSet->ddictPtrCount++; + return 0; +} + +/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and + * rehashes all values, allocates new table, frees old table. + * Returns 0 on success, otherwise a zstd error code. + */ +static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR; + const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); + const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; + size_t oldTableSize = hashSet->ddictPtrTableSize; + size_t i; + + DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); + RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); + hashSet->ddictPtrTable = newTable; + hashSet->ddictPtrTableSize = newTableSize; + hashSet->ddictPtrCount = 0; + for (i = 0; i < oldTableSize; ++i) { + if (oldTable[i] != NULL) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), ""); + } + } + ZSTD_customFree((void*)oldTable, customMem); + DEBUGLOG(4, "Finished re-hash"); + return 0; +} + +/* Fetches a DDict with the given dictID + * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL. + */ +static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) { + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + for (;;) { + size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); + if (currDictID == dictID || currDictID == 0) { + /* currDictID == 0 implies a NULL ddict entry */ + break; + } else { + idx &= idxRangeMask; /* Goes to start of table when we reach the end */ + idx++; + } + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + return hashSet->ddictPtrTable[idx]; +} + +/* Allocates space for and returns a ddict hash set + * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with. + * Returns NULL if allocation failed. + */ +static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { + ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); + if (!ret) + return NULL; + ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); + if (!ret->ddictPtrTable) { + ZSTD_customFree(ret, customMem); + return NULL; + } + ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; + ret->ddictPtrCount = 0; + return ret; +} + +/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. + */ +static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } +} + +/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. + * Returns 0 on success, or a ZSTD error. + */ +static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) { + DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize); + if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), ""); + } + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), ""); + return 0; +} + +/*-************************************************************* +* Context management +***************************************************************/ +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; +} + +static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) +{ + assert(dctx->streamStage == zdss_init); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->outBufferMode = ZSTD_bm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; +} + +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) +{ + dctx->staticSize = 0; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->dictEnd = NULL; + dctx->ddictIsCold = 0; + dctx->dictUses = ZSTD_dont_use; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; + dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; + dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->ddictSet = NULL; + ZSTD_DCtx_resetParameters(dctx); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif +} + +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_clearDict(ZSTD_DCtx* dctx) +{ + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + dctx->ddict = NULL; + dctx->dictUses = ZSTD_dont_use; +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_clearDict(dctx); + ZSTD_customFree(dctx->inBuff, cMem); + dctx->inBuff = NULL; + if (dctx->ddictSet) { + ZSTD_freeDDictHashSet(dctx->ddictSet, cMem); + dctx->ddictSet = NULL; + } + ZSTD_customFree(dctx, cMem); + return 0; + } +} + +/* no longer useful */ +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} + +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. + */ +static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { + assert(dctx->refMultipleDDicts && dctx->ddictSet); + DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (frameDDict) { + DEBUGLOG(4, "DDict found!"); + ZSTD_clearDict(dctx); + dctx->dictID = dctx->fParams.dictID; + dctx->ddict = frameDDict; + dctx->dictUses = ZSTD_use_indefinitely; + } + } +} + + +/*-************************************************************* + * Frame header decoding + ***************************************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } + return 0; +} + +/* ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) +{ + size_t const minInputSize = ZSTD_startingInputLength(format); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + +/* ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} + + +/* ZSTD_getFrameHeader_advanced() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) +{ + const BYTE* ip = (const BYTE*)src; + size_t const minInputSize = ZSTD_startingInputLength(format); + + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + if (srcSize < minInputSize) return minInputSize; + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); + + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) + return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); + zfhPtr->frameType = ZSTD_skippableFrame; + return 0; + } + RETURN_ERROR(prefix_unknown, ""); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } + + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U64 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, + "reserved bits, must be zero"); + + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); + windowSize = (1ULL << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + switch(dictIDSizeCode) + { + default: + assert(0); /* impossible */ + ZSTD_FALLTHROUGH; + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: + assert(0); /* impossible */ + ZSTD_FALLTHROUGH; + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (singleSegment) windowSize = frameContentSize; + + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; + } + return 0; +} + +/* ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} + + +/* ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } +} + +static size_t readSkippableFrameSize(void const* src, size_t srcSize) +{ + size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; + U32 sizeU32; + + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); + + sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); + RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, + frameParameter_unsupported, ""); + { + size_t const skippableSize = skippableHeaderSize + sizeU32; + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); + return skippableSize; + } +} + +/* ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long totalDstSize = 0; + + while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { + U32 const magicNumber = MEM_readLE32(src); + + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + if (ZSTD_isError(skippableSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + + { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; + + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; + } + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; + + return totalDstSize; +} + +/* ZSTD_getDecompressedSize() : + * compatible with legacy mode + * @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +} + + +/* ZSTD_decodeFrameHeader() : + * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * If multiple DDict references are enabled, also will choose the correct DDict to use. + * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); + + /* Reference DDict requested by frame if dctx references multiple ddicts */ + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { + ZSTD_DCtx_selectFrameDDict(dctx); + } + +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ + RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), + dictionary_wrong, ""); +#endif + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) xxh64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; + return 0; +} + +static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) +{ + ZSTD_frameSizeInfo frameSizeInfo; + frameSizeInfo.compressedSize = ret; + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + return frameSizeInfo; +} + +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + + + if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + assert(ZSTD_isError(frameSizeInfo.compressedSize) || + frameSizeInfo.compressedSize <= srcSize); + return frameSizeInfo; + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) + return ZSTD_errorFrameSizeInfo(ret); + if (ret > 0) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + } + + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; + + /* Iterate over each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) + return ZSTD_errorFrameSizeInfo(cBlockSize); + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + nbBlocks++; + + if (blockProperties.lastBlock) break; + } + + /* Final frame content checksum */ + if (zfh.checksumFlag) { + if (remainingSize < 4) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + ip += 4; + } + + frameSizeInfo.compressedSize = (size_t)(ip - ipstart); + frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) + ? zfh.frameContentSize + : nbBlocks * zfh.blockSizeMax; + return frameSizeInfo; + } +} + +/* ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + return frameSizeInfo.compressedSize; +} + +/* ZSTD_decompressBound() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame or a skippeable frame + * `srcSize` must be at least as large as the frame contained + * @return : the maximum decompressed size of the compressed source + */ +unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) +{ + unsigned long long bound = 0; + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ZSTD_CONTENTSIZE_ERROR; + assert(srcSize >= compressedSize); + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + bound += decompressedBound; + } + return bound; +} + + +/*-************************************************************* + * Frame decoding + ***************************************************************/ + +/* ZSTD_insertBlock() : + * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); + ZSTD_checkContinuity(dctx, blockStart, blockSize); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (srcSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memcpy(dst, src, srcSize); + return srcSize; +} + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + BYTE b, + size_t regenSize) +{ + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (regenSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memset(dst, b, regenSize); + return regenSize; +} + +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +} + + +/*! ZSTD_decompressFrame() : + * @dctx must be properly initialized + * will update *srcPtr and *srcSizePtr, + * to make *srcPtr progress by one frame. */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; + BYTE* op = ostart; + size_t remainingSrcSize = *srcSizePtr; + + DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); + + /* check */ + RETURN_ERROR_IF( + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( + ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); + ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSrcSize -= ZSTD_blockHeaderSize; + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->validateChecksum) + xxh64_update(&dctx->xxhState, op, decodedSize); + if (decodedSize != 0) + op += decodedSize; + assert(ip != NULL); + ip += cBlockSize; + remainingSrcSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, + corruption_detected, ""); + } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState); + U32 checkRead; + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } + ip += 4; + remainingSrcSize -= 4; + } + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSrcSize; + return (size_t)(op-ostart); +} + +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + int moreThan1Frame = 0; + + DEBUGLOG(5, "ZSTD_decompressMultiFrame"); + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + + if (ddict) { + dict = ZSTD_DDict_dictContent(ddict); + dictSize = ZSTD_DDict_dictSize(ddict); + } + + while (srcSize >= ZSTD_startingInputLength(dctx->format)) { + + + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (unsigned)magicNumber, ZSTD_MAGICNUMBER); + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); + } + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + RETURN_ERROR_IF( + (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1), + srcSize_wrong, + "At least one frame successfully completed, " + "but following bytes are garbage: " + "it's more likely to be a srcSize error, " + "specifying more input bytes than size of frame(s). " + "Note: one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic bytes. " + "But this is _much_ less likely than a srcSize field error."); + if (ZSTD_isError(res)) return res; + assert(res <= dstCapacity); + if (res != 0) + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + moreThan1Frame = 1; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); + + return (size_t)((BYTE*)dst - (BYTE*)dststart); +} + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +} + + +static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) +{ + switch (dctx->dictUses) { + default: + assert(0 /* Impossible */); + ZSTD_FALLTHROUGH; + case ZSTD_dont_use: + ZSTD_clearDict(dctx); + return NULL; + case ZSTD_use_indefinitely: + return dctx->ddict; + case ZSTD_use_once: + dctx->dictUses = ZSTD_dont_use; + return dctx->ddict; + } +} + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + ZSTD_initDCtx_internal(&dctx); + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +/* + * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * we allow taking a partial block as the input. Currently only raw uncompressed blocks can + * be streamed. + * + * For blocks that can be streamed, this allows us to reduce the latency until we produce + * output, and avoid copying the input. + * + * @param inputSize - The total amount of input that the caller currently has. + */ +static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { + if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) + return dctx->expected; + if (dctx->bType != bt_raw) + return dctx->expected; + return MIN(MAX(inputSize, 1), dctx->expected); +} + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + assert(0); + ZSTD_FALLTHROUGH; + case ZSTDds_getFrameHeaderSize: + ZSTD_FALLTHROUGH; + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + ZSTD_FALLTHROUGH; + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } + +/* ZSTD_decompressContinue() : + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); + /* Sanity check */ + RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + dctx->processedCSize += srcSize; + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + + case ZSTDds_decodeFrameHeader: + assert(src != NULL); + ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_raw : + assert(srcSize <= dctx->expected); + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); + assert(rSize == srcSize); + dctx->expected -= rSize; + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_reserved : /* should never happen */ + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + FORWARD_IF_ERROR(rSize, ""); + RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + dctx->decodedSize += rSize; + if (dctx->validateChecksum) xxh64_update(&dctx->xxhState, dst, rSize); + dctx->previousDstEnd = (char*)dst + rSize; + + /* Stay on the same stage until we are finished streaming the block. */ + if (dctx->expected > 0) { + return rSize; + } + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); + RETURN_ERROR_IF( + dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && dctx->decodedSize != dctx->fParams.frameContentSize, + corruption_detected, ""); + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + } + return rSize; + } + + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ + { + if (dctx->validateChecksum) { + U32 const h32 = (U32)xxh64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + + case ZSTDds_decodeSkippableHeader: + assert(src != NULL); + assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + + case ZSTDds_skipFrame: + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + return 0; +} + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t +ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); + assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ + dictPtr += 8; /* skip header = magic + dictID */ + + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); + ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); + { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ + size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); +#ifdef HUF_FORCE_DECOMPRESS_X1 + /* in minimal huffman, we always use X1 variants */ + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#else + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, + dictPtr, (size_t)(dictEnd - dictPtr), + workspace, workspaceSize); +#endif + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->OFTable, + offcodeNCount, offcodeMaxValue, + OF_base, OF_bits, + offcodeLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ML_base, ML_bits, + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->LLTable, + litlengthNCount, litlengthMaxValue, + LL_base, LL_bits, + litlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + RETURN_ERROR_IF(rep==0 || rep > dictContentSize, + dictionary_corrupted, ""); + entropy->rep[i] = rep; + } } + + return (size_t)(dictPtr - (const BYTE*)dict); +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_MAGIC_DICTIONARY) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + dctx->bType = bt_reserved; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (dict && dictSize) + RETURN_ERROR_IF( + ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), + dictionary_corrupted, ""); + return 0; +} + + +/* ====== ZSTD_DDict ====== */ + +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); + assert(dctx != NULL); + if (ddict) { + const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); + size_t const dictSize = ZSTD_DDict_dictSize(ddict); + const void* const dictEnd = dictStart + dictSize; + dctx->ddictIsCold = (dctx->dictEnd != dictEnd); + DEBUGLOG(4, "DDict is %s", + dctx->ddictIsCold ? "~cold~" : "hot!"); + } + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (ddict) { /* NULL ddict is equivalent to no dictionary */ + ZSTD_copyDDictParameters(dctx, ddict); + } + return 0; +} + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); +} + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompress frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) +{ + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} + + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +ZSTD_DStream* ZSTD_createDStream(void) +{ + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_advanced(customMem); +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + return ZSTD_freeDCtx(zds); +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (dict && dictSize != 0) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); + dctx->ddict = dctx->ddictLocal; + dctx->dictUses = ZSTD_use_indefinitely; + } + return 0; +} + +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); + dctx->dictUses = ZSTD_use_once; + return 0; +} + +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + + +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); + return ZSTD_startingInputLength(zds->format); +} + +/* note : this variant can't fail */ +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + DEBUGLOG(4, "ZSTD_initDStream"); + return ZSTD_initDStream_usingDDict(zds, NULL); +} + +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) +{ + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); + return ZSTD_startingInputLength(dctx->format); +} + +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); + return ZSTD_startingInputLength(dctx->format); +} + + +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (ddict) { + dctx->ddict = ddict; + dctx->dictUses = ZSTD_use_indefinitely; + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } + assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ + FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), ""); + } + } + return 0; +} + +/* ZSTD_DCtx_setMaxWindowSize() : + * note : no direct equivalence in ZSTD_DCtx_setParameter, + * since this version sets windowSize, and the other sets windowLog */ +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); + size_t const min = (size_t)1 << bounds.lowerBound; + size_t const max = (size_t)1 << bounds.upperBound; + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format); +} + +ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + switch(dParam) { + case ZSTD_d_windowLogMax: + bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + case ZSTD_d_format: + bounds.lowerBound = (int)ZSTD_f_zstd1; + bounds.upperBound = (int)ZSTD_f_zstd1_magicless; + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; + return bounds; + case ZSTD_d_refMultipleDDicts: + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; + return bounds; + default:; + } + bounds.error = ERROR(parameter_unsupported); + return bounds; +} + +/* ZSTD_dParam_withinBounds: + * @return 1 if value is within dParam bounds, + * 0 otherwise */ +static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +#define CHECK_DBOUNDS(p,v) { \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +} + +size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) +{ + switch (param) { + case ZSTD_d_windowLogMax: + *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize); + return 0; + case ZSTD_d_format: + *value = (int)dctx->format; + return 0; + case ZSTD_d_stableOutBuffer: + *value = (int)dctx->outBufferMode; + return 0; + case ZSTD_d_forceIgnoreChecksum: + *value = (int)dctx->forceIgnoreChecksum; + return 0; + case ZSTD_d_refMultipleDDicts: + *value = (int)dctx->refMultipleDDicts; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + switch(dParam) { + case ZSTD_d_windowLogMax: + if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); + dctx->maxWindowSize = ((size_t)1) << value; + return 0; + case ZSTD_d_format: + CHECK_DBOUNDS(ZSTD_d_format, value); + dctx->format = (ZSTD_format_e)value; + return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_bufferMode_e)value; + return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; + return 0; + case ZSTD_d_refMultipleDDicts: + CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); + if (dctx->staticSize != 0) { + RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); + } + dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + dctx->streamStage = zdss_init; + dctx->noForwardProgress = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + ZSTD_DCtx_resetParameters(dctx); + } + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} + +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, + frameParameter_windowTooLarge, ""); + return minRBSize; +} + +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} + +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); + RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, + frameParameter_windowTooLarge, ""); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); +} + + +/* ***** Decompression ***** */ + +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; +} + +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} + +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_bm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!"); +} + +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_bm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_bm_stable); + } + return 0; +} + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; + const char* ip = istart; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; + char* op = ostart; + U32 someMoreWork = 1; + + DEBUGLOG(5, "ZSTD_decompressStream"); + RETURN_ERROR_IF( + input->pos > input->size, + srcSize_wrong, + "forbidden. in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + RETURN_ERROR_IF( + output->pos > output->size, + dstSize_tooSmall, + "forbidden. out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); + + while (someMoreWork) { + switch(zds->streamStage) + { + case zdss_init : + DEBUGLOG(5, "stage zdss_init => transparent reset "); + zds->streamStage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + zds->legacyVersion = 0; + zds->hostageByte = 0; + zds->expectedOutBuffer = *output; + ZSTD_FALLTHROUGH; + + case zdss_loadHeader : + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); + { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + if (zds->refMultipleDDicts && zds->ddictSet) { + ZSTD_DCtx_selectFrameDDict(zds); + } + DEBUGLOG(5, "header size : %u", (U32)hSize); + if (ZSTD_isError(hSize)) { + return hSize; /* error */ + } + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } + input->pos = input->size; + return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + assert(ip != NULL); + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + ip = istart + cSize; + op += decompressedSize; + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_bm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); + + if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); + zds->stage = ZSTDds_skipFrame; + } else { + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); + zds->expected = ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, + frameParameter_windowTooLarge, ""); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered + ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_customFree(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } + zds->streamStage = zdss_read; + ZSTD_FALLTHROUGH; + + case zdss_read: + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); + if (neededInSize==0) { /* end of frame */ + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); + ip += neededInSize; + /* Function modifies the stage so we must break */ + break; + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + ZSTD_FALLTHROUGH; + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t loadedSize; + /* At this point we shouldn't be decompressing a block that we can stream. */ + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, + corruption_detected, + "should never happen"); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); + } + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); + zds->outStart = zds->outEnd = 0; + } + break; + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } } + + /* result */ + input->pos = (size_t)(ip - (const char*)(input->src)); + output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + + if ((ip==istart) && (op==ostart)) { /* no forward progress */ + zds->noForwardProgress ++; + if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { + RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); + RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); + assert(0); + } + } else { + zds->noForwardProgress = 0; + } + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } + input->pos++; /* release hostage */ + } /* zds->hostageByte */ + return 0; + } /* zds->outEnd == zds->outStart */ + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ + return nextSrcSizeHint; + } +} + +size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c new file mode 100644 index 0000000000000..2d101d9a842ec --- /dev/null +++ b/lib/zstd/decompress/zstd_decompress_block.c @@ -0,0 +1,1540 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_decompress_block : + * this module takes care of decompressing _compressed_ block */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ +#include "../common/compiler.h" /* prefetch */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include "../common/zstd_internal.h" +#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ +#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ +#include "zstd_decompress_block.h" + +/*_******************************************************* +* Macros +**********************************************************/ + +/* These two optional macros force the use one way or another of the two + * ZSTD_decompressSequences implementations. You can't force in both directions + * at the same time. + */ +#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" +#endif + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); + + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); + return cSize; + } +} + + +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize); +/*! ZSTD_decodeLiteralsBlock() : + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); + ZSTD_FALLTHROUGH; + + case set_compressed: + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + size_t hufSuccess; + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + + if (litEncType==set_repeat) { + if (singleStream) { + hufSuccess = HUF_decompress1X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } else { + hufSuccess = HUF_decompress4X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } + } else { + if (singleStream) { +#if defined(HUF_FORCE_DECOMPRESS_X2) + hufSuccess = HUF_decompress1X_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace)); +#else + hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); +#endif + } else { + hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); + } + } + + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); + ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + RETURN_ERROR(corruption_detected, "impossible"); + } + } +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * cannot fail if input is valid => + * all inputs are presumed validated at this stage */ +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; + + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + assert(normalizedCounter[s]>=0); + symbolNext[s] = (U16)normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { + U32 u; + for (u=0; u max, corruption_detected, ""); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U32 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); + /* prefetch FSE table if used */ + if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { + const void* const pStart = *DTablePtr; + size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); + PREFETCH_AREA(pStart, pSize); + } + return 0; + case set_compressed : + { unsigned tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); + *DTablePtr = DTableSpace; + return headerSize; + } + default : + assert(0); + RETURN_ERROR(GENERIC, "impossible"); + } +} + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + int nbSeq; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); + + /* SeqHead */ + nbSeq = *ip++; + if (!nbSeq) { + *nbSeqPtr=0; + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); + return 1; + } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ; + ip+=2; + } else { + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + + /* FSE table descriptors */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + LL_base, LL_bits, + LL_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + OF_base, OF_bits, + OF_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ML_base, ML_bits, + ML_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; + const BYTE* match; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* prefixStart; + const BYTE* dictEnd; + size_t pos; +} seqState_t; + +/*! ZSTD_overlapCopy8() : + * Copies 8 bytes from ip to op and updates op and ip where ip <= op. + * If the offset is < 8 then the offset is spread to at least 8 bytes. + * + * Precondition: *ip <= *op + * Postcondition: *op - *op >= 8 + */ +HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { + assert(*ip <= *op); + if (offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[offset]; + (*op)[0] = (*ip)[0]; + (*op)[1] = (*ip)[1]; + (*op)[2] = (*ip)[2]; + (*op)[3] = (*ip)[3]; + *ip += dec32table[offset]; + ZSTD_copy4(*op+4, *ip); + *ip -= sub2; + } else { + ZSTD_copy8(*op, *ip); + } + *ip += 8; + *op += 8; + assert(*op - *ip >= 8); +} + +/*! ZSTD_safecopy() : + * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer + * and write up to 16 bytes past oend_w (op >= oend_w is allowed). + * This function is only called in the uncommon case where the sequence is near the end of the block. It + * should be fast for a single long sequence, but can be slow for several short sequences. + * + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. + * The src buffer must be before the dst buffer. + */ +static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || + (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); + + if (length < 8) { + /* Handle short lengths. */ + while (op < oend) *op++ = *ip++; + return; + } + if (ovtype == ZSTD_overlap_src_before_dst) { + /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ + assert(length >= 8); + ZSTD_overlapCopy8(&op, &ip, diff); + assert(op - ip >= 8); + assert(op <= oend); + } + + if (oend <= oend_w) { + /* No risk of overwrite. */ + ZSTD_wildcopy(op, ip, length, ovtype); + return; + } + if (op <= oend_w) { + /* Wildcopy until we get close to the end. */ + assert(oend > oend_w); + ZSTD_wildcopy(op, ip, oend_w - op, ovtype); + ip += oend_w - op; + op = oend_w; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_execSequenceEnd(): + * This version handles cases that are near the end of the output buffer. It requires + * more careful checks to make sure there is no overflow. By separating out these hard + * and unlikely cases, we can speed up the common cases. + * + * NOTE: This function needs to be fast for a single long sequence, but doesn't need + * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). + */ +FORCE_NOINLINE +size_t ZSTD_execSequenceEnd(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart-match); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +HINT_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +{ + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offsets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) +{ + seq_t seq; + ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; + ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; + ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; + U32 const llBase = llDInfo.baseValue; + U32 const mlBase = mlDInfo.baseValue; + U32 const ofBase = ofDInfo.baseValue; + BYTE const llBits = llDInfo.nbAdditionalBits; + BYTE const mlBits = mlDInfo.nbAdditionalBits; + BYTE const ofBits = ofDInfo.nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; + + /* sequence */ + { size_t offset; + if (ofBits > 1) { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } else { + U32 const ll0 = (llBase == 0); + if (LIKELY((ofBits == 0))) { + if (LIKELY(!ll0)) + offset = seqState->prevOffset[0]; + else { + offset = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; + } + + seq.matchLength = mlBase; + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase; + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + if (prefetch == ZSTD_p_prefetch) { + size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, offset is only used for prefetching */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update + * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). + * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). + * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the + * better option, so it is the default for other compilers. But, if you + * measure that it is worse, please put up a pull request. + */ + { +#if !defined(__clang__) + const int kUseUpdateFseState = 1; +#else + const int kUseUpdateFseState = 0; +#endif + if (kUseUpdateFseState) { + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + } else { + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + } + } + + return seq; +} + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ +#if DEBUGLEVEL >= 1 + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +#else + (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; +#endif +} +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + size_t error = 0; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + +#if defined(__x86_64__) + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * I've been able to reproduce this issue on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +#endif + for ( ; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + BIT_reloadDStream(&(seqState.DStream)); + op += oneSeqSize; + /* gcc and clang both don't like early returns in this loop. + * Instead break and check for an error at the end of the loop. + */ + if (UNLIKELY(ZSTD_isError(oneSeqSize))) { + error = oneSeqSize; + break; + } + if (UNLIKELY(!--nbSeq)) break; + } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); + if (ZSTD_isError(error)) return error; + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STORED_SEQS_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { int i; for (i=0; ientropy.rep[i]; } + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; + assert(dst != NULL); + assert(iend >= ip); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if DYNAMIC_BMI2 + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static TARGET_ATTRIBUTE("bmi2") size_t +DONT_VECTORIZE +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + +#endif /* DYNAMIC_BMI2 */ + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame); + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static size_t +ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +/* ZSTD_decompressSequencesLong() : + * decompression function triggered when a minimum share of offsets is considered "long", + * aka out of cache. + * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". + * This function will try to mitigate main memory latency through the use of prefetching */ +static size_t +ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +/* ZSTD_getLongOffsetsShare() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) total += 1; + } + + assert(tableLog <= OffFSELog); + total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + + return total; +} +#endif + +size_t +ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. + * We don't expect that to be the case in 64-bit mode. + * In block mode, window size is not known, so we have to be conservative. + * (note: but it could be evaluated from current-lowLimit) + */ + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { + /* These macros control at build-time which decompressor implementation + * we use. If neither is defined, we do some inspection and dispatch at + * runtime. + */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + int usePrefetchDecoder = dctx->ddictIsCold; +#endif + int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + + RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if ( !usePrefetchDecoder + && (!frame || (dctx->fParams.windowSize > (1<<24))) + && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ + U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + usePrefetchDecoder = (shareLongOffsets >= minShare); + } +#endif + + dctx->ddictIsCold = 0; + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if (usePrefetchDecoder) +#endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + /* else */ + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + } +} + + +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) +{ + if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst, dstCapacity); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h new file mode 100644 index 0000000000000..e7f5f6689459f --- /dev/null +++ b/lib/zstd/decompress/zstd_decompress_block.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DEC_BLOCK_H +#define ZSTD_DEC_BLOCK_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include "../common/zstd_deps.h" /* size_t */ +#include /* DCtx, and some public functions */ +#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */ +#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */ + + +/* === Prototypes === */ + +/* note: prototypes already published within `zstd.h` : + * ZSTD_decompressBlock() + */ + +/* note: prototypes already published within `zstd_internal.h` : + * ZSTD_getcBlockSize() + * ZSTD_decodeSeqHeaders() + */ + + +/* ZSTD_decompressBlock_internal() : + * decompress block, starting at `src`, + * into destination buffer `dst`. + * @return : decompressed block size, + * or an error code (which can be tested using ZSTD_isError()) + */ +size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame); + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * this function must be called with valid parameters only + * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) + * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. + * Internal use only. + */ +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); + + +#endif /* ZSTD_DEC_BLOCK_H */ diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h new file mode 100644 index 0000000000000..4b9052f687558 --- /dev/null +++ b/lib/zstd/decompress/zstd_decompress_internal.h @@ -0,0 +1,202 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* zstd_decompress_internal: + * objects and definitions shared within lib/decompress modules */ + + #ifndef ZSTD_DECOMPRESS_INTERNAL_H + #define ZSTD_DECOMPRESS_INTERNAL_H + + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include "../common/mem.h" /* BYTE, U16, U32 */ +#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */ + + + +/*-******************************************************* + * Constants + *********************************************************/ +static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + +static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + +static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + +static UNUSED_ATTR const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + +/*-******************************************************* + * Decompression types + *********************************************************/ + typedef struct { + U32 fastMode; + U32 tableLog; + } ZSTD_seqSymbol_header; + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; +} ZSTD_entropyDTables_t; + +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +typedef enum { + ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ + ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ + ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ +} ZSTD_dictUses_e; + +/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ +typedef struct { + const ZSTD_DDict** ddictPtrTable; + size_t ddictPtrTableSize; + size_t ddictPtrCount; +} ZSTD_DDictHashSet; + +struct ZSTD_DCtx_s +{ + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 processedCSize; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + struct xxh64_state xxhState; + size_t headerSize; + ZSTD_format_e format; + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + + /* dictionary */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + ZSTD_dictUses_e dictUses; + ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ + ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ + + /* streaming */ + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + int noForwardProgress; + ZSTD_bufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; + + /* workspace */ + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + + size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif + + /* Tracing */ +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + + +/*-******************************************************* + * Shared internal functions + *********************************************************/ + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + +/*! ZSTD_checkContinuity() : + * check if next `dst` follows previous position, where decompression ended. + * If yes, do nothing (continue on current segment). + * If not, classify previous segment as "external dictionary", and start a new segment. + * This function cannot fail. */ +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); + + +#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h index 9ba367b441493..0fbec508f285e 100644 --- a/lib/zstd/decompress_sources.h +++ b/lib/zstd/decompress_sources.h @@ -16,8 +16,13 @@ * decompression. */ -#include "entropy_common.c" -#include "fse_decompress.c" -#include "huf_decompress.c" -#include "zstd_common.c" -#include "decompress.c" +#include "common/debug.c" +#include "common/entropy_common.c" +#include "common/error_private.c" +#include "common/fse_decompress.c" +#include "common/zstd_common.c" +#include "decompress/huf_decompress.c" +#include "decompress/zstd_ddict.c" +#include "decompress/zstd_decompress.c" +#include "decompress/zstd_decompress_block.c" +#include "zstd_decompress_module.c" diff --git a/lib/zstd/entropy_common.c b/lib/zstd/entropy_common.c deleted file mode 100644 index 2b0a643c32c4b..0000000000000 --- a/lib/zstd/entropy_common.c +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Common functions of New Generation Entropy library - * Copyright (C) 2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ - -/* ************************************* -* Dependencies -***************************************/ -#include "error_private.h" /* ERR_*, ERROR */ -#include "fse.h" -#include "huf.h" -#include "mem.h" - -/*=== Version ===*/ -unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } - -/*=== Error Management ===*/ -unsigned FSE_isError(size_t code) { return ERR_isError(code); } - -unsigned HUF_isError(size_t code) { return ERR_isError(code); } - -/*-************************************************************** -* FSE NCount encoding-decoding -****************************************************************/ -size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize) -{ - const BYTE *const istart = (const BYTE *)headerBuffer; - const BYTE *const iend = istart + hbSize; - const BYTE *ip = istart; - int nbBits; - int remaining; - int threshold; - U32 bitStream; - int bitCount; - unsigned charnum = 0; - int previous0 = 0; - - if (hbSize < 4) - return ERROR(srcSize_wrong); - bitStream = ZSTD_readLE32(ip); - nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ - if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) - return ERROR(tableLog_tooLarge); - bitStream >>= 4; - bitCount = 4; - *tableLogPtr = nbBits; - remaining = (1 << nbBits) + 1; - threshold = 1 << nbBits; - nbBits++; - - while ((remaining > 1) & (charnum <= *maxSVPtr)) { - if (previous0) { - unsigned n0 = charnum; - while ((bitStream & 0xFFFF) == 0xFFFF) { - n0 += 24; - if (ip < iend - 5) { - ip += 2; - bitStream = ZSTD_readLE32(ip) >> bitCount; - } else { - bitStream >>= 16; - bitCount += 16; - } - } - while ((bitStream & 3) == 3) { - n0 += 3; - bitStream >>= 2; - bitCount += 2; - } - n0 += bitStream & 3; - bitCount += 2; - if (n0 > *maxSVPtr) - return ERROR(maxSymbolValue_tooSmall); - while (charnum < n0) - normalizedCounter[charnum++] = 0; - if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) { - ip += bitCount >> 3; - bitCount &= 7; - bitStream = ZSTD_readLE32(ip) >> bitCount; - } else { - bitStream >>= 2; - } - } - { - int const max = (2 * threshold - 1) - remaining; - int count; - - if ((bitStream & (threshold - 1)) < (U32)max) { - count = bitStream & (threshold - 1); - bitCount += nbBits - 1; - } else { - count = bitStream & (2 * threshold - 1); - if (count >= threshold) - count -= max; - bitCount += nbBits; - } - - count--; /* extra accuracy */ - remaining -= count < 0 ? -count : count; /* -1 means +1 */ - normalizedCounter[charnum++] = (short)count; - previous0 = !count; - while (remaining < threshold) { - nbBits--; - threshold >>= 1; - } - - if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) { - ip += bitCount >> 3; - bitCount &= 7; - } else { - bitCount -= (int)(8 * (iend - 4 - ip)); - ip = iend - 4; - } - bitStream = ZSTD_readLE32(ip) >> (bitCount & 31); - } - } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ - if (remaining != 1) - return ERROR(corruption_detected); - if (bitCount > 32) - return ERROR(corruption_detected); - *maxSVPtr = charnum - 1; - - ip += (bitCount + 7) >> 3; - return ip - istart; -} - -/*! HUF_readStats() : - Read compact Huffman tree, saved by HUF_writeCTable(). - `huffWeight` is destination buffer. - `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. - @return : size read from `src` , or an error Code . - Note : Needed by HUF_readCTable() and HUF_readDTableX?() . -*/ -size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -{ - U32 weightTotal; - const BYTE *ip = (const BYTE *)src; - size_t iSize; - size_t oSize; - - if (!srcSize) - return ERROR(srcSize_wrong); - iSize = ip[0]; - /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */ - - if (iSize >= 128) { /* special header */ - oSize = iSize - 127; - iSize = ((oSize + 1) / 2); - if (iSize + 1 > srcSize) - return ERROR(srcSize_wrong); - if (oSize >= hwSize) - return ERROR(corruption_detected); - ip += 1; - { - U32 n; - for (n = 0; n < oSize; n += 2) { - huffWeight[n] = ip[n / 2] >> 4; - huffWeight[n + 1] = ip[n / 2] & 15; - } - } - } else { /* header compressed with FSE (normal case) */ - if (iSize + 1 > srcSize) - return ERROR(srcSize_wrong); - oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */ - if (FSE_isError(oSize)) - return oSize; - } - - /* collect weight stats */ - memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); - weightTotal = 0; - { - U32 n; - for (n = 0; n < oSize; n++) { - if (huffWeight[n] >= HUF_TABLELOG_MAX) - return ERROR(corruption_detected); - rankStats[huffWeight[n]]++; - weightTotal += (1 << huffWeight[n]) >> 1; - } - } - if (weightTotal == 0) - return ERROR(corruption_detected); - - /* get last non-null symbol weight (implied, total must be 2^n) */ - { - U32 const tableLog = BIT_highbit32(weightTotal) + 1; - if (tableLog > HUF_TABLELOG_MAX) - return ERROR(corruption_detected); - *tableLogPtr = tableLog; - /* determine last weight */ - { - U32 const total = 1 << tableLog; - U32 const rest = total - weightTotal; - U32 const verif = 1 << BIT_highbit32(rest); - U32 const lastWeight = BIT_highbit32(rest) + 1; - if (verif != rest) - return ERROR(corruption_detected); /* last value must be a clean power of 2 */ - huffWeight[oSize] = (BYTE)lastWeight; - rankStats[lastWeight]++; - } - } - - /* check tree construction validity */ - if ((rankStats[1] < 2) || (rankStats[1] & 1)) - return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ - - /* results */ - *nbSymbolsPtr = (U32)(oSize + 1); - return iSize + 1; -} diff --git a/lib/zstd/error_private.h b/lib/zstd/error_private.h deleted file mode 100644 index 1a60b31f706cb..0000000000000 --- a/lib/zstd/error_private.h +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -/* Note : this module is expected to remain private, do not expose it */ - -#ifndef ERROR_H_MODULE -#define ERROR_H_MODULE - -/* **************************************** -* Dependencies -******************************************/ -#include /* size_t */ -#include /* enum list */ - -/* **************************************** -* Compiler-specific -******************************************/ -#define ERR_STATIC static __attribute__((unused)) - -/*-**************************************** -* Customization (error_public.h) -******************************************/ -typedef ZSTD_ErrorCode ERR_enum; -#define PREFIX(name) ZSTD_error_##name - -/*-**************************************** -* Error codes handling -******************************************/ -#define ERROR(name) ((size_t)-PREFIX(name)) - -ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } - -ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) -{ - if (!ERR_isError(code)) - return (ERR_enum)0; - return (ERR_enum)(0 - code); -} - -#endif /* ERROR_H_MODULE */ diff --git a/lib/zstd/fse.h b/lib/zstd/fse.h deleted file mode 100644 index 7460ab04b1916..0000000000000 --- a/lib/zstd/fse.h +++ /dev/null @@ -1,575 +0,0 @@ -/* - * FSE : Finite State Entropy codec - * Public Prototypes declaration - * Copyright (C) 2013-2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ -#ifndef FSE_H -#define FSE_H - -/*-***************************************** -* Dependencies -******************************************/ -#include /* size_t, ptrdiff_t */ - -/*-***************************************** -* FSE_PUBLIC_API : control library symbols visibility -******************************************/ -#define FSE_PUBLIC_API - -/*------ Version ------*/ -#define FSE_VERSION_MAJOR 0 -#define FSE_VERSION_MINOR 9 -#define FSE_VERSION_RELEASE 0 - -#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE -#define FSE_QUOTE(str) #str -#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) -#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) - -#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR * 100 * 100 + FSE_VERSION_MINOR * 100 + FSE_VERSION_RELEASE) -FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ - -/*-***************************************** -* Tool functions -******************************************/ -FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ - -/* Error Management */ -FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ - -/*-***************************************** -* FSE detailed API -******************************************/ -/*! -FSE_compress() does the following: -1. count symbol occurrence from source[] into table count[] -2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) -3. save normalized counters to memory buffer using writeNCount() -4. build encoding table 'CTable' from normalized counters -5. encode the data stream using encoding table 'CTable' - -FSE_decompress() does the following: -1. read normalized counters with readNCount() -2. build decoding table 'DTable' from normalized counters -3. decode the data stream using decoding table 'DTable' - -The following API allows targeting specific sub-functions for advanced tasks. -For example, it's possible to compress several blocks using the same 'CTable', -or to save and provide normalized distribution using external method. -*/ - -/* *** COMPRESSION *** */ -/*! FSE_optimalTableLog(): - dynamically downsize 'tableLog' when conditions are met. - It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. - @return : recommended tableLog (necessarily <= 'maxTableLog') */ -FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_normalizeCount(): - normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) - 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). - @return : tableLog, - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_NCountWriteBound(): - Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. - Typically useful for allocation purpose. */ -FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_writeNCount(): - Compactly save 'normalizedCounter' into 'buffer'. - @return : size of the compressed table, - or an errorCode, which can be tested using FSE_isError(). */ -FSE_PUBLIC_API size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - -/*! Constructor and Destructor of FSE_CTable. - Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ -typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ - -/*! FSE_compress_usingCTable(): - Compress `src` using `ct` into `dst` which must be already allocated. - @return : size of compressed data (<= `dstCapacity`), - or 0 if compressed data could not fit into `dst`, - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_compress_usingCTable(void *dst, size_t dstCapacity, const void *src, size_t srcSize, const FSE_CTable *ct); - -/*! -Tutorial : ----------- -The first step is to count all symbols. FSE_count() does this job very fast. -Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. -'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] -maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) -FSE_count() will return the number of occurrence of the most frequent symbol. -This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). - -The next step is to normalize the frequencies. -FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. -It also guarantees a minimum of 1 to any Symbol with frequency >= 1. -You can use 'tableLog'==0 to mean "use default tableLog value". -If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), -which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). - -The result of FSE_normalizeCount() will be saved into a table, -called 'normalizedCounter', which is a table of signed short. -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. -The return value is tableLog if everything proceeded as expected. -It is 0 if there is a single symbol within distribution. -If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). - -'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). -'buffer' must be already allocated. -For guaranteed success, buffer size must be at least FSE_headerBound(). -The result of the function is the number of bytes written into 'buffer'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). - -'normalizedCounter' can then be used to create the compression table 'CTable'. -The space required by 'CTable' must be already allocated, using FSE_createCTable(). -You can then use FSE_buildCTable() to fill 'CTable'. -If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). - -'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). -Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' -The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. -If it returns '0', compressed data could not fit into 'dst'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). -*/ - -/* *** DECOMPRESSION *** */ - -/*! FSE_readNCount(): - Read compactly saved 'normalizedCounter' from 'rBuffer'. - @return : size read from 'rBuffer', - or an errorCode, which can be tested using FSE_isError(). - maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ -FSE_PUBLIC_API size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSymbolValuePtr, unsigned *tableLogPtr, const void *rBuffer, size_t rBuffSize); - -/*! Constructor and Destructor of FSE_DTable. - Note that its size depends on 'tableLog' */ -typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ - -/*! FSE_buildDTable(): - Builds 'dt', which must be already allocated, using FSE_createDTable(). - return : 0, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize); - -/*! FSE_decompress_usingDTable(): - Decompress compressed source `cSrc` of size `cSrcSize` using `dt` - into `dst` which must be already allocated. - @return : size of regenerated data (necessarily <= `dstCapacity`), - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt); - -/*! -Tutorial : ----------- -(Note : these functions only decompress FSE-compressed blocks. - If block is uncompressed, use memcpy() instead - If block is a single repeated byte, use memset() instead ) - -The first step is to obtain the normalized frequencies of symbols. -This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. -In practice, that means it's necessary to know 'maxSymbolValue' beforehand, -or size the table to handle worst case situations (typically 256). -FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. -The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. -Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. -If there is an error, the function will return an error code, which can be tested using FSE_isError(). - -The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. -This is performed by the function FSE_buildDTable(). -The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). - -`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). -`cSrcSize` must be strictly correct, otherwise decompression will fail. -FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) -*/ - -/* *** Dependency *** */ -#include "bitstream.h" - -/* ***************************************** -* Static allocation -*******************************************/ -/* FSE buffer bounds */ -#define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size >> 7)) -#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ -#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1 << (maxTableLog - 1)) + ((maxSymbolValue + 1) * 2)) -#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1 << maxTableLog)) - -/* ***************************************** -* FSE advanced API -*******************************************/ -/* FSE_count_wksp() : - * Same as FSE_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned - */ -size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace); - -/* FSE_countFast_wksp() : - * Same as FSE_countFast(), but using an externally provided scratch buffer. - * `workSpace` must be a table of minimum `1024` unsigned - */ -size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize, unsigned *workSpace); - -/*! FSE_count_simple - * Same as FSE_countFast(), but does not use any additional memory (not even on stack). - * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). -*/ -size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize); - -unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); -/**< same as FSE_optimalTableLog(), which used `minus==2` */ - -size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits); -/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ - -size_t FSE_buildCTable_rle(FSE_CTable *ct, unsigned char symbolValue); -/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ - -/* FSE_buildCTable_wksp() : - * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * `wkspSize` must be >= `(1<= BIT_DStream_completed - -When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. -Checking if DStream has reached its end is performed by : - BIT_endOfDStream(&DStream); -Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. - FSE_endOfDState(&DState); -*/ - -/* ***************************************** -* FSE unsafe API -*******************************************/ -static unsigned char FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD); -/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ - -/* ***************************************** -* Implementation of inlined functions -*******************************************/ -typedef struct { - int deltaFindState; - U32 deltaNbBits; -} FSE_symbolCompressionTransform; /* total 8 bytes */ - -ZSTD_STATIC void FSE_initCState(FSE_CState_t *statePtr, const FSE_CTable *ct) -{ - const void *ptr = ct; - const U16 *u16ptr = (const U16 *)ptr; - const U32 tableLog = ZSTD_read16(ptr); - statePtr->value = (ptrdiff_t)1 << tableLog; - statePtr->stateTable = u16ptr + 2; - statePtr->symbolTT = ((const U32 *)ct + 1 + (tableLog ? (1 << (tableLog - 1)) : 1)); - statePtr->stateLog = tableLog; -} - -/*! FSE_initCState2() : -* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) -* uses the smallest state value possible, saving the cost of this symbol */ -ZSTD_STATIC void FSE_initCState2(FSE_CState_t *statePtr, const FSE_CTable *ct, U32 symbol) -{ - FSE_initCState(statePtr, ct); - { - const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol]; - const U16 *stateTable = (const U16 *)(statePtr->stateTable); - U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1 << 15)) >> 16); - statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; - statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; - } -} - -ZSTD_STATIC void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *statePtr, U32 symbol) -{ - const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol]; - const U16 *const stateTable = (const U16 *)(statePtr->stateTable); - U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); - BIT_addBits(bitC, statePtr->value, nbBitsOut); - statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; -} - -ZSTD_STATIC void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *statePtr) -{ - BIT_addBits(bitC, statePtr->value, statePtr->stateLog); - BIT_flushBits(bitC); -} - -/* ====== Decompression ====== */ - -typedef struct { - U16 tableLog; - U16 fastMode; -} FSE_DTableHeader; /* sizeof U32 */ - -typedef struct { - unsigned short newState; - unsigned char symbol; - unsigned char nbBits; -} FSE_decode_t; /* size == U32 */ - -ZSTD_STATIC void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt) -{ - const void *ptr = dt; - const FSE_DTableHeader *const DTableH = (const FSE_DTableHeader *)ptr; - DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -ZSTD_STATIC BYTE FSE_peekSymbol(const FSE_DState_t *DStatePtr) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; - return DInfo.symbol; -} - -ZSTD_STATIC void FSE_updateState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.newState + lowBits; -} - -ZSTD_STATIC BYTE FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBits(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -/*! FSE_decodeSymbolFast() : - unsafe, only works if no symbol has a probability > 50% */ -ZSTD_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBitsFast(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -ZSTD_STATIC unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr) { return DStatePtr->state == 0; } - -/* ************************************************************** -* Tuning parameters -****************************************************************/ -/*!MEMORY_USAGE : -* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -* Increasing memory usage improves compression ratio -* Reduced memory usage can improve speed, due to cache effect -* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ -#ifndef FSE_MAX_MEMORY_USAGE -#define FSE_MAX_MEMORY_USAGE 14 -#endif -#ifndef FSE_DEFAULT_MEMORY_USAGE -#define FSE_DEFAULT_MEMORY_USAGE 13 -#endif - -/*!FSE_MAX_SYMBOL_VALUE : -* Maximum symbol value authorized. -* Required for proper stack allocation */ -#ifndef FSE_MAX_SYMBOL_VALUE -#define FSE_MAX_SYMBOL_VALUE 255 -#endif - -/* ************************************************************** -* template functions type & suffix -****************************************************************/ -#define FSE_FUNCTION_TYPE BYTE -#define FSE_FUNCTION_EXTENSION -#define FSE_DECODE_TYPE FSE_decode_t - -/* *************************************************************** -* Constants -*****************************************************************/ -#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE - 2) -#define FSE_MAX_TABLESIZE (1U << FSE_MAX_TABLELOG) -#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE - 1) -#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE - 2) -#define FSE_MIN_TABLELOG 5 - -#define FSE_TABLELOG_ABSOLUTE_MAX 15 -#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX -#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" -#endif - -#define FSE_TABLESTEP(tableSize) ((tableSize >> 1) + (tableSize >> 3) + 3) - -#endif /* FSE_H */ diff --git a/lib/zstd/fse_compress.c b/lib/zstd/fse_compress.c deleted file mode 100644 index ef3d1741d5328..0000000000000 --- a/lib/zstd/fse_compress.c +++ /dev/null @@ -1,795 +0,0 @@ -/* - * FSE : Finite State Entropy encoder - * Copyright (C) 2013-2015, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ - -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#define FORCE_INLINE static __always_inline - -/* ************************************************************** -* Includes -****************************************************************/ -#include "bitstream.h" -#include "fse.h" -#include -#include -#include -#include /* memcpy, memset */ - -/* ************************************************************** -* Error Management -****************************************************************/ -#define FSE_STATIC_ASSERT(c) \ - { \ - enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ - } /* use only *after* variable declarations */ - -/* ************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ - -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -#error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -#error "FSE_FUNCTION_TYPE must be defined" -#endif - -/* Function names */ -#define FSE_CAT(X, Y) X##Y -#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y) -#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y) - -/* Function templates */ - -/* FSE_buildCTable_wksp() : - * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * wkspSize should be sized to handle worst case situation, which is `1<> 1 : 1); - FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); - U32 const step = FSE_TABLESTEP(tableSize); - U32 highThreshold = tableSize - 1; - - U32 *cumul; - FSE_FUNCTION_TYPE *tableSymbol; - size_t spaceUsed32 = 0; - - cumul = (U32 *)workspace + spaceUsed32; - spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2; - tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - /* CTable header */ - tableU16[-2] = (U16)tableLog; - tableU16[-1] = (U16)maxSymbolValue; - - /* For explanations on how to distribute symbol values over the table : - * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ - - /* symbol start positions */ - { - U32 u; - cumul[0] = 0; - for (u = 1; u <= maxSymbolValue + 1; u++) { - if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */ - cumul[u] = cumul[u - 1] + 1; - tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1); - } else { - cumul[u] = cumul[u - 1] + normalizedCounter[u - 1]; - } - } - cumul[maxSymbolValue + 1] = tableSize + 1; - } - - /* Spread symbols */ - { - U32 position = 0; - U32 symbol; - for (symbol = 0; symbol <= maxSymbolValue; symbol++) { - int nbOccurences; - for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) { - tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; - position = (position + step) & tableMask; - while (position > highThreshold) - position = (position + step) & tableMask; /* Low proba area */ - } - } - - if (position != 0) - return ERROR(GENERIC); /* Must have gone through all positions */ - } - - /* Build table */ - { - U32 u; - for (u = 0; u < tableSize; u++) { - FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ - tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */ - } - } - - /* Build Symbol Transformation Table */ - { - unsigned total = 0; - unsigned s; - for (s = 0; s <= maxSymbolValue; s++) { - switch (normalizedCounter[s]) { - case 0: break; - - case -1: - case 1: - symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog); - symbolTT[s].deltaFindState = total - 1; - total++; - break; - default: { - U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1); - U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; - symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; - symbolTT[s].deltaFindState = total - normalizedCounter[s]; - total += normalizedCounter[s]; - } - } - } - } - - return 0; -} - -/*-************************************************************** -* FSE NCount encoding-decoding -****************************************************************/ -size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) -{ - size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3; - return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ -} - -static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, - unsigned writeIsSafe) -{ - BYTE *const ostart = (BYTE *)header; - BYTE *out = ostart; - BYTE *const oend = ostart + headerBufferSize; - int nbBits; - const int tableSize = 1 << tableLog; - int remaining; - int threshold; - U32 bitStream; - int bitCount; - unsigned charnum = 0; - int previous0 = 0; - - bitStream = 0; - bitCount = 0; - /* Table Size */ - bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount; - bitCount += 4; - - /* Init */ - remaining = tableSize + 1; /* +1 for extra accuracy */ - threshold = tableSize; - nbBits = tableLog + 1; - - while (remaining > 1) { /* stops at 1 */ - if (previous0) { - unsigned start = charnum; - while (!normalizedCounter[charnum]) - charnum++; - while (charnum >= start + 24) { - start += 24; - bitStream += 0xFFFFU << bitCount; - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream >> 8); - out += 2; - bitStream >>= 16; - } - while (charnum >= start + 3) { - start += 3; - bitStream += 3 << bitCount; - bitCount += 2; - } - bitStream += (charnum - start) << bitCount; - bitCount += 2; - if (bitCount > 16) { - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream >> 8); - out += 2; - bitStream >>= 16; - bitCount -= 16; - } - } - { - int count = normalizedCounter[charnum++]; - int const max = (2 * threshold - 1) - remaining; - remaining -= count < 0 ? -count : count; - count++; /* +1 for extra accuracy */ - if (count >= threshold) - count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ - bitStream += count << bitCount; - bitCount += nbBits; - bitCount -= (count < max); - previous0 = (count == 1); - if (remaining < 1) - return ERROR(GENERIC); - while (remaining < threshold) - nbBits--, threshold >>= 1; - } - if (bitCount > 16) { - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream >> 8); - out += 2; - bitStream >>= 16; - bitCount -= 16; - } - } - - /* flush remaining bitStream */ - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream >> 8); - out += (bitCount + 7) / 8; - - if (charnum > maxSymbolValue + 1) - return ERROR(GENERIC); - - return (out - ostart); -} - -size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) -{ - if (tableLog > FSE_MAX_TABLELOG) - return ERROR(tableLog_tooLarge); /* Unsupported */ - if (tableLog < FSE_MIN_TABLELOG) - return ERROR(GENERIC); /* Unsupported */ - - if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); - - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); -} - -/*-************************************************************** -* Counting histogram -****************************************************************/ -/*! FSE_count_simple - This function counts byte values within `src`, and store the histogram into table `count`. - It doesn't use any additional memory. - But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. - For this reason, prefer using a table `count` with 256 elements. - @return : count of most numerous element -*/ -size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize) -{ - const BYTE *ip = (const BYTE *)src; - const BYTE *const end = ip + srcSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max = 0; - - memset(count, 0, (maxSymbolValue + 1) * sizeof(*count)); - if (srcSize == 0) { - *maxSymbolValuePtr = 0; - return 0; - } - - while (ip < end) - count[*ip++]++; - - while (!count[maxSymbolValue]) - maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; - - { - U32 s; - for (s = 0; s <= maxSymbolValue; s++) - if (count[s] > max) - max = count[s]; - } - - return (size_t)max; -} - -/* FSE_count_parallel_wksp() : - * Same as FSE_count_parallel(), but using an externally provided scratch buffer. - * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ -static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax, - unsigned *const workSpace) -{ - const BYTE *ip = (const BYTE *)source; - const BYTE *const iend = ip + sourceSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max = 0; - U32 *const Counting1 = workSpace; - U32 *const Counting2 = Counting1 + 256; - U32 *const Counting3 = Counting2 + 256; - U32 *const Counting4 = Counting3 + 256; - - memset(Counting1, 0, 4 * 256 * sizeof(unsigned)); - - /* safety checks */ - if (!sourceSize) { - memset(count, 0, maxSymbolValue + 1); - *maxSymbolValuePtr = 0; - return 0; - } - if (!maxSymbolValue) - maxSymbolValue = 255; /* 0 == default */ - - /* by stripes of 16 bytes */ - { - U32 cached = ZSTD_read32(ip); - ip += 4; - while (ip < iend - 15) { - U32 c = cached; - cached = ZSTD_read32(ip); - ip += 4; - Counting1[(BYTE)c]++; - Counting2[(BYTE)(c >> 8)]++; - Counting3[(BYTE)(c >> 16)]++; - Counting4[c >> 24]++; - c = cached; - cached = ZSTD_read32(ip); - ip += 4; - Counting1[(BYTE)c]++; - Counting2[(BYTE)(c >> 8)]++; - Counting3[(BYTE)(c >> 16)]++; - Counting4[c >> 24]++; - c = cached; - cached = ZSTD_read32(ip); - ip += 4; - Counting1[(BYTE)c]++; - Counting2[(BYTE)(c >> 8)]++; - Counting3[(BYTE)(c >> 16)]++; - Counting4[c >> 24]++; - c = cached; - cached = ZSTD_read32(ip); - ip += 4; - Counting1[(BYTE)c]++; - Counting2[(BYTE)(c >> 8)]++; - Counting3[(BYTE)(c >> 16)]++; - Counting4[c >> 24]++; - } - ip -= 4; - } - - /* finish last symbols */ - while (ip < iend) - Counting1[*ip++]++; - - if (checkMax) { /* verify stats will fit into destination table */ - U32 s; - for (s = 255; s > maxSymbolValue; s--) { - Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; - if (Counting1[s]) - return ERROR(maxSymbolValue_tooSmall); - } - } - - { - U32 s; - for (s = 0; s <= maxSymbolValue; s++) { - count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; - if (count[s] > max) - max = count[s]; - } - } - - while (!count[maxSymbolValue]) - maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; - return (size_t)max; -} - -/* FSE_countFast_wksp() : - * Same as FSE_countFast(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned */ -size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) -{ - if (sourceSize < 1500) - return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); - return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); -} - -/* FSE_count_wksp() : - * Same as FSE_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned */ -size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) -{ - if (*maxSymbolValuePtr < 255) - return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); - *maxSymbolValuePtr = 255; - return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); -} - -/*-************************************************************** -* FSE Compression Code -****************************************************************/ -/*! FSE_sizeof_CTable() : - FSE_CTable is a variable size structure which contains : - `U16 tableLog;` - `U16 maxSymbolValue;` - `U16 nextStateNumber[1 << tableLog];` // This size is variable - `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable -Allocation is manual (C standard does not support variable-size structures). -*/ -size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog) -{ - if (tableLog > FSE_MAX_TABLELOG) - return ERROR(tableLog_tooLarge); - return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32); -} - -/* provides the minimum logSize to safely represent a distribution */ -static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) -{ - U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; - U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; - U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; - return minBits; -} - -unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) -{ - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; - U32 tableLog = maxTableLog; - U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); - if (tableLog == 0) - tableLog = FSE_DEFAULT_TABLELOG; - if (maxBitsSrc < tableLog) - tableLog = maxBitsSrc; /* Accuracy can be reduced */ - if (minBits > tableLog) - tableLog = minBits; /* Need a minimum to safely represent all symbol values */ - if (tableLog < FSE_MIN_TABLELOG) - tableLog = FSE_MIN_TABLELOG; - if (tableLog > FSE_MAX_TABLELOG) - tableLog = FSE_MAX_TABLELOG; - return tableLog; -} - -unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) -{ - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); -} - -/* Secondary normalization method. - To be used when primary method fails. */ - -static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue) -{ - short const NOT_YET_ASSIGNED = -2; - U32 s; - U32 distributed = 0; - U32 ToDistribute; - - /* Init */ - U32 const lowThreshold = (U32)(total >> tableLog); - U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); - - for (s = 0; s <= maxSymbolValue; s++) { - if (count[s] == 0) { - norm[s] = 0; - continue; - } - if (count[s] <= lowThreshold) { - norm[s] = -1; - distributed++; - total -= count[s]; - continue; - } - if (count[s] <= lowOne) { - norm[s] = 1; - distributed++; - total -= count[s]; - continue; - } - - norm[s] = NOT_YET_ASSIGNED; - } - ToDistribute = (1 << tableLog) - distributed; - - if ((total / ToDistribute) > lowOne) { - /* risk of rounding to zero */ - lowOne = (U32)((total * 3) / (ToDistribute * 2)); - for (s = 0; s <= maxSymbolValue; s++) { - if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { - norm[s] = 1; - distributed++; - total -= count[s]; - continue; - } - } - ToDistribute = (1 << tableLog) - distributed; - } - - if (distributed == maxSymbolValue + 1) { - /* all values are pretty poor; - probably incompressible data (should have already been detected); - find max, then give all remaining points to max */ - U32 maxV = 0, maxC = 0; - for (s = 0; s <= maxSymbolValue; s++) - if (count[s] > maxC) - maxV = s, maxC = count[s]; - norm[maxV] += (short)ToDistribute; - return 0; - } - - if (total == 0) { - /* all of the symbols were low enough for the lowOne or lowThreshold */ - for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1)) - if (norm[s] > 0) - ToDistribute--, norm[s]++; - return 0; - } - - { - U64 const vStepLog = 62 - tableLog; - U64 const mid = (1ULL << (vStepLog - 1)) - 1; - U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */ - U64 tmpTotal = mid; - for (s = 0; s <= maxSymbolValue; s++) { - if (norm[s] == NOT_YET_ASSIGNED) { - U64 const end = tmpTotal + (count[s] * rStep); - U32 const sStart = (U32)(tmpTotal >> vStepLog); - U32 const sEnd = (U32)(end >> vStepLog); - U32 const weight = sEnd - sStart; - if (weight < 1) - return ERROR(GENERIC); - norm[s] = (short)weight; - tmpTotal = end; - } - } - } - - return 0; -} - -size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue) -{ - /* Sanity checks */ - if (tableLog == 0) - tableLog = FSE_DEFAULT_TABLELOG; - if (tableLog < FSE_MIN_TABLELOG) - return ERROR(GENERIC); /* Unsupported size */ - if (tableLog > FSE_MAX_TABLELOG) - return ERROR(tableLog_tooLarge); /* Unsupported size */ - if (tableLog < FSE_minTableLog(total, maxSymbolValue)) - return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ - - { - U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}; - U64 const scale = 62 - tableLog; - U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */ - U64 const vStep = 1ULL << (scale - 20); - int stillToDistribute = 1 << tableLog; - unsigned s; - unsigned largest = 0; - short largestP = 0; - U32 lowThreshold = (U32)(total >> tableLog); - - for (s = 0; s <= maxSymbolValue; s++) { - if (count[s] == total) - return 0; /* rle special case */ - if (count[s] == 0) { - normalizedCounter[s] = 0; - continue; - } - if (count[s] <= lowThreshold) { - normalizedCounter[s] = -1; - stillToDistribute--; - } else { - short proba = (short)((count[s] * step) >> scale); - if (proba < 8) { - U64 restToBeat = vStep * rtbTable[proba]; - proba += (count[s] * step) - ((U64)proba << scale) > restToBeat; - } - if (proba > largestP) - largestP = proba, largest = s; - normalizedCounter[s] = proba; - stillToDistribute -= proba; - } - } - if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { - /* corner case, need another normalization method */ - size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); - if (FSE_isError(errorCode)) - return errorCode; - } else - normalizedCounter[largest] += (short)stillToDistribute; - } - - return tableLog; -} - -/* fake FSE_CTable, for raw (uncompressed) input */ -size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits) -{ - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSymbolValue = tableMask; - void *const ptr = ct; - U16 *const tableU16 = ((U16 *)ptr) + 2; - void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */ - FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) - return ERROR(GENERIC); /* min size */ - - /* header */ - tableU16[-2] = (U16)nbBits; - tableU16[-1] = (U16)maxSymbolValue; - - /* Build table */ - for (s = 0; s < tableSize; s++) - tableU16[s] = (U16)(tableSize + s); - - /* Build Symbol Transformation Table */ - { - const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); - for (s = 0; s <= maxSymbolValue; s++) { - symbolTT[s].deltaNbBits = deltaNbBits; - symbolTT[s].deltaFindState = s - 1; - } - } - - return 0; -} - -/* fake FSE_CTable, for rle input (always same symbol) */ -size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue) -{ - void *ptr = ct; - U16 *tableU16 = ((U16 *)ptr) + 2; - void *FSCTptr = (U32 *)ptr + 2; - FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr; - - /* header */ - tableU16[-2] = (U16)0; - tableU16[-1] = (U16)symbolValue; - - /* Build table */ - tableU16[0] = 0; - tableU16[1] = 0; /* just in case */ - - /* Build Symbol Transformation Table */ - symbolTT[symbolValue].deltaNbBits = 0; - symbolTT[symbolValue].deltaFindState = 0; - - return 0; -} - -static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast) -{ - const BYTE *const istart = (const BYTE *)src; - const BYTE *const iend = istart + srcSize; - const BYTE *ip = iend; - - BIT_CStream_t bitC; - FSE_CState_t CState1, CState2; - - /* init */ - if (srcSize <= 2) - return 0; - { - size_t const initError = BIT_initCStream(&bitC, dst, dstSize); - if (FSE_isError(initError)) - return 0; /* not enough space available to write a bitstream */ - } - -#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) - - if (srcSize & 1) { - FSE_initCState2(&CState1, ct, *--ip); - FSE_initCState2(&CState2, ct, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - FSE_FLUSHBITS(&bitC); - } else { - FSE_initCState2(&CState2, ct, *--ip); - FSE_initCState2(&CState1, ct, *--ip); - } - - /* join to mod 4 */ - srcSize -= 2; - if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */ - FSE_encodeSymbol(&bitC, &CState2, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - FSE_FLUSHBITS(&bitC); - } - - /* 2 or 4 encoding per loop */ - while (ip > istart) { - - FSE_encodeSymbol(&bitC, &CState2, *--ip); - - if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */ - FSE_FLUSHBITS(&bitC); - - FSE_encodeSymbol(&bitC, &CState1, *--ip); - - if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */ - FSE_encodeSymbol(&bitC, &CState2, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - } - - FSE_FLUSHBITS(&bitC); - } - - FSE_flushCState(&bitC, &CState2); - FSE_flushCState(&bitC, &CState1); - return BIT_closeCStream(&bitC); -} - -size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct) -{ - unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); - - if (fast) - return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); - else - return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); -} - -size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c deleted file mode 100644 index 0b353530fb3f3..0000000000000 --- a/lib/zstd/fse_decompress.c +++ /dev/null @@ -1,325 +0,0 @@ -/* - * FSE : Finite State Entropy decoder - * Copyright (C) 2013-2015, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ - -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#define FORCE_INLINE static __always_inline - -/* ************************************************************** -* Includes -****************************************************************/ -#include "bitstream.h" -#include "fse.h" -#include "zstd_internal.h" -#include -#include -#include /* memcpy, memset */ - -/* ************************************************************** -* Error Management -****************************************************************/ -#define FSE_isError ERR_isError -#define FSE_STATIC_ASSERT(c) \ - { \ - enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ - } /* use only *after* variable declarations */ - -/* ************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ - -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -#error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -#error "FSE_FUNCTION_TYPE must be defined" -#endif - -/* Function names */ -#define FSE_CAT(X, Y) X##Y -#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y) -#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y) - -/* Function templates */ - -size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize) -{ - void *const tdPtr = dt + 1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ - FSE_DECODE_TYPE *const tableDecode = (FSE_DECODE_TYPE *)(tdPtr); - U16 *symbolNext = (U16 *)workspace; - - U32 const maxSV1 = maxSymbolValue + 1; - U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize - 1; - - /* Sanity Checks */ - if (workspaceSize < sizeof(U16) * (FSE_MAX_SYMBOL_VALUE + 1)) - return ERROR(tableLog_tooLarge); - if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) - return ERROR(maxSymbolValue_tooLarge); - if (tableLog > FSE_MAX_TABLELOG) - return ERROR(tableLog_tooLarge); - - /* Init, lay down lowprob symbols */ - { - FSE_DTableHeader DTableH; - DTableH.tableLog = (U16)tableLog; - DTableH.fastMode = 1; - { - S16 const largeLimit = (S16)(1 << (tableLog - 1)); - U32 s; - for (s = 0; s < maxSV1; s++) { - if (normalizedCounter[s] == -1) { - tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; - symbolNext[s] = 1; - } else { - if (normalizedCounter[s] >= largeLimit) - DTableH.fastMode = 0; - symbolNext[s] = normalizedCounter[s]; - } - } - } - memcpy(dt, &DTableH, sizeof(DTableH)); - } - - /* Spread symbols */ - { - U32 const tableMask = tableSize - 1; - U32 const step = FSE_TABLESTEP(tableSize); - U32 s, position = 0; - for (s = 0; s < maxSV1; s++) { - int i; - for (i = 0; i < normalizedCounter[s]; i++) { - tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; - position = (position + step) & tableMask; - while (position > highThreshold) - position = (position + step) & tableMask; /* lowprob area */ - } - } - if (position != 0) - return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - } - - /* Build Decoding table */ - { - U32 u; - for (u = 0; u < tableSize; u++) { - FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); - U16 nextState = symbolNext[symbol]++; - tableDecode[u].nbBits = (BYTE)(tableLog - BIT_highbit32((U32)nextState)); - tableDecode[u].newState = (U16)((nextState << tableDecode[u].nbBits) - tableSize); - } - } - - return 0; -} - -/*-******************************************************* -* Decompression (Byte symbols) -*********************************************************/ -size_t FSE_buildDTable_rle(FSE_DTable *dt, BYTE symbolValue) -{ - void *ptr = dt; - FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr; - void *dPtr = dt + 1; - FSE_decode_t *const cell = (FSE_decode_t *)dPtr; - - DTableH->tableLog = 0; - DTableH->fastMode = 0; - - cell->newState = 0; - cell->symbol = symbolValue; - cell->nbBits = 0; - - return 0; -} - -size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits) -{ - void *ptr = dt; - FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr; - void *dPtr = dt + 1; - FSE_decode_t *const dinfo = (FSE_decode_t *)dPtr; - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSV1 = tableMask + 1; - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) - return ERROR(GENERIC); /* min size */ - - /* Build Decoding Table */ - DTableH->tableLog = (U16)nbBits; - DTableH->fastMode = 1; - for (s = 0; s < maxSV1; s++) { - dinfo[s].newState = 0; - dinfo[s].symbol = (BYTE)s; - dinfo[s].nbBits = (BYTE)nbBits; - } - - return 0; -} - -FORCE_INLINE size_t FSE_decompress_usingDTable_generic(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt, - const unsigned fast) -{ - BYTE *const ostart = (BYTE *)dst; - BYTE *op = ostart; - BYTE *const omax = op + maxDstSize; - BYTE *const olimit = omax - 3; - - BIT_DStream_t bitD; - FSE_DState_t state1; - FSE_DState_t state2; - - /* Init */ - CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize)); - - FSE_initDState(&state1, &bitD, dt); - FSE_initDState(&state2, &bitD, dt); - -#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) - - /* 4 symbols per loop */ - for (; (BIT_reloadDStream(&bitD) == BIT_DStream_unfinished) & (op < olimit); op += 4) { - op[0] = FSE_GETSYMBOL(&state1); - - if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ - BIT_reloadDStream(&bitD); - - op[1] = FSE_GETSYMBOL(&state2); - - if (FSE_MAX_TABLELOG * 4 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ - { - if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { - op += 2; - break; - } - } - - op[2] = FSE_GETSYMBOL(&state1); - - if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ - BIT_reloadDStream(&bitD); - - op[3] = FSE_GETSYMBOL(&state2); - } - - /* tail */ - /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ - while (1) { - if (op > (omax - 2)) - return ERROR(dstSize_tooSmall); - *op++ = FSE_GETSYMBOL(&state1); - if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) { - *op++ = FSE_GETSYMBOL(&state2); - break; - } - - if (op > (omax - 2)) - return ERROR(dstSize_tooSmall); - *op++ = FSE_GETSYMBOL(&state2); - if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) { - *op++ = FSE_GETSYMBOL(&state1); - break; - } - } - - return op - ostart; -} - -size_t FSE_decompress_usingDTable(void *dst, size_t originalSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt) -{ - const void *ptr = dt; - const FSE_DTableHeader *DTableH = (const FSE_DTableHeader *)ptr; - const U32 fastMode = DTableH->fastMode; - - /* select fast mode (static) */ - if (fastMode) - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); -} - -size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize) -{ - const BYTE *const istart = (const BYTE *)cSrc; - const BYTE *ip = istart; - unsigned tableLog; - unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - size_t NCountLength; - - FSE_DTable *dt; - short *counting; - size_t spaceUsed32 = 0; - - FSE_STATIC_ASSERT(sizeof(FSE_DTable) == sizeof(U32)); - - dt = (FSE_DTable *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += FSE_DTABLE_SIZE_U32(maxLog); - counting = (short *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(sizeof(short) * (FSE_MAX_SYMBOL_VALUE + 1), sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - /* normal FSE decoding mode */ - NCountLength = FSE_readNCount(counting, &maxSymbolValue, &tableLog, istart, cSrcSize); - if (FSE_isError(NCountLength)) - return NCountLength; - // if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining - // case : NCountLength==cSrcSize */ - if (tableLog > maxLog) - return ERROR(tableLog_tooLarge); - ip += NCountLength; - cSrcSize -= NCountLength; - - CHECK_F(FSE_buildDTable_wksp(dt, counting, maxSymbolValue, tableLog, workspace, workspaceSize)); - - return FSE_decompress_usingDTable(dst, dstCapacity, ip, cSrcSize, dt); /* always return, even if it is an error code */ -} diff --git a/lib/zstd/huf.h b/lib/zstd/huf.h deleted file mode 100644 index 923218d12e289..0000000000000 --- a/lib/zstd/huf.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Huffman coder, part of New Generation Entropy library - * header file - * Copyright (C) 2013-2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ -#ifndef HUF_H_298734234 -#define HUF_H_298734234 - -/* *** Dependencies *** */ -#include /* size_t */ - -/* *** Tool functions *** */ -#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ -size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ - -/* Error Management */ -unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ - -/* *** Advanced function *** */ - -/** HUF_compress4X_wksp() : -* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */ -size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, - size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ - -/* *** Dependencies *** */ -#include "mem.h" /* U32 */ - -/* *** Constants *** */ -#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ -#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */ -#define HUF_SYMBOLVALUE_MAX 255 - -#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) -#error "HUF_TABLELOG_MAX is too large !" -#endif - -/* **************************************** -* Static allocation -******************************************/ -/* HUF buffer bounds */ -#define HUF_CTABLEBOUND 129 -#define HUF_BLOCKBOUND(size) (size + (size >> 8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ -#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* static allocation of HUF's Compression Table */ -#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ - U32 name##hb[maxSymbolValue + 1]; \ - void *name##hv = &(name##hb); \ - HUF_CElt *name = (HUF_CElt *)(name##hv) /* no final ; */ - -/* static allocation of HUF's DTable */ -typedef U32 HUF_DTable; -#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1 << (maxTableLog))) -#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = {((U32)((maxTableLog)-1) * 0x01000001)} -#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = {((U32)(maxTableLog)*0x01000001)} - -/* The workspace must have alignment at least 4 and be at least this large */ -#define HUF_COMPRESS_WORKSPACE_SIZE (6 << 10) -#define HUF_COMPRESS_WORKSPACE_SIZE_U32 (HUF_COMPRESS_WORKSPACE_SIZE / sizeof(U32)) - -/* The workspace must have alignment at least 4 and be at least this large */ -#define HUF_DECOMPRESS_WORKSPACE_SIZE (3 << 10) -#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) - -/* **************************************** -* Advanced decompression functions -******************************************/ -size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); /**< decodes RLE and uncompressed */ -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, - size_t workspaceSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, - size_t workspaceSize); /**< single-symbol decoder */ -size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, - size_t workspaceSize); /**< double-symbols decoder */ - -/* **************************************** -* HUF detailed API -******************************************/ -/*! -HUF_compress() does the following: -1. count symbol occurrence from source[] into table count[] using FSE_count() -2. (optional) refine tableLog using HUF_optimalTableLog() -3. build Huffman table from count using HUF_buildCTable() -4. save Huffman table to memory buffer using HUF_writeCTable_wksp() -5. encode the data stream using HUF_compress4X_usingCTable() - -The following API allows targeting specific sub-functions for advanced tasks. -For example, it's possible to compress several blocks using the same 'CTable', -or to save and regenerate 'CTable' using external methods. -*/ -/* FSE_count() : find it within "fse.h" */ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); -typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ -size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, unsigned maxSymbolValue, unsigned huffLog, void *workspace, size_t workspaceSize); -size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable); - -typedef enum { - HUF_repeat_none, /**< Cannot use the previous table */ - HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, - 4}X_repeat */ - HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ -} HUF_repeat; -/** HUF_compress4X_repeat() : -* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. -* If it uses hufTable it does not modify hufTable or repeat. -* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. -* If preferRepeat then the old table will always be used if valid. */ -size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, - size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, - int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ - -/** HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. - */ -size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize); - -/*! HUF_readStats() : - Read compact Huffman tree, saved by HUF_writeCTable(). - `huffWeight` is destination buffer. - @return : size read from `src` , or an error Code . - Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ -size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, - void *workspace, size_t workspaceSize); - -/** HUF_readCTable() : -* Loading a CTable saved with HUF_writeCTable() */ -size_t HUF_readCTable_wksp(HUF_CElt *CTable, unsigned maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); - -/* -HUF_decompress() does the following: -1. select the decompression algorithm (X2, X4) based on pre-computed heuristics -2. build Huffman table from save, using HUF_readDTableXn() -3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable -*/ - -/** HUF_selectDecoder() : -* Tells which decoder is likely to decode faster, -* based on a set of pre-determined metrics. -* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . -* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ -U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize); - -size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); -size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); - -size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -size_t HUF_decompress4X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -size_t HUF_decompress4X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); - -/* single stream variants */ - -size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, - size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ -size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable); -/** HUF_compress1X_repeat() : -* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. -* If it uses hufTable it does not modify hufTable or repeat. -* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. -* If preferRepeat then the old table will always be used if valid. */ -size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, - size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, - int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ - -size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, - size_t workspaceSize); /**< single-symbol decoder */ -size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, - size_t workspaceSize); /**< double-symbols decoder */ - -size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, - const HUF_DTable *DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ -size_t HUF_decompress1X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -size_t HUF_decompress1X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); - -#endif /* HUF_H_298734234 */ diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c deleted file mode 100644 index fd32838c185f2..0000000000000 --- a/lib/zstd/huf_compress.c +++ /dev/null @@ -1,773 +0,0 @@ -/* - * Huffman encoder, part of New Generation Entropy library - * Copyright (C) 2013-2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ - -/* ************************************************************** -* Includes -****************************************************************/ -#include "bitstream.h" -#include "fse.h" /* header compression */ -#include "huf.h" -#include -#include /* memcpy, memset */ - -/* ************************************************************** -* Error Management -****************************************************************/ -#define HUF_STATIC_ASSERT(c) \ - { \ - enum { HUF_static_assert = 1 / (int)(!!(c)) }; \ - } /* use only *after* variable declarations */ -#define CHECK_V_F(e, f) \ - size_t const e = f; \ - if (ERR_isError(e)) \ - return f -#define CHECK_F(f) \ - { \ - CHECK_V_F(_var_err__, f); \ - } - -/* ************************************************************** -* Utils -****************************************************************/ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) -{ - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); -} - -/* ******************************************************* -* HUF : Huffman block compression -*********************************************************/ -/* HUF_compressWeights() : - * Same as FSE_compress(), but dedicated to huff0's weights compression. - * The use case needs much less stack memory. - * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. - */ -#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 -size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize) -{ - BYTE *const ostart = (BYTE *)dst; - BYTE *op = ostart; - BYTE *const oend = ostart + dstSize; - - U32 maxSymbolValue = HUF_TABLELOG_MAX; - U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; - - FSE_CTable *CTable; - U32 *count; - S16 *norm; - size_t spaceUsed32 = 0; - - HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32)); - - CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX); - count = (U32 *)workspace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 1; - norm = (S16 *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - /* init conditions */ - if (wtSize <= 1) - return 0; /* Not compressible */ - - /* Scan input and build symbol stats */ - { - CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize)); - if (maxCount == wtSize) - return 1; /* only a single symbol in src : rle */ - if (maxCount == 1) - return 0; /* each symbol present maximum once => not compressible */ - } - - tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); - CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue)); - - /* Write table description header */ - { - CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog)); - op += hSize; - } - - /* Compress */ - CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize)); - { - CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable)); - if (cSize == 0) - return 0; /* not enough space for compressed data */ - op += cSize; - } - - return op - ostart; -} - -struct HUF_CElt_s { - U16 val; - BYTE nbBits; -}; /* typedef'd to HUF_CElt within "huf.h" */ - -/*! HUF_writeCTable_wksp() : - `CTable` : Huffman tree to save, using huf representation. - @return : size of saved CTable */ -size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize) -{ - BYTE *op = (BYTE *)dst; - U32 n; - - BYTE *bitsToWeight; - BYTE *huffWeight; - size_t spaceUsed32 = 0; - - bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2; - huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - /* check conditions */ - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) - return ERROR(maxSymbolValue_tooLarge); - - /* convert to weight */ - bitsToWeight[0] = 0; - for (n = 1; n < huffLog + 1; n++) - bitsToWeight[n] = (BYTE)(huffLog + 1 - n); - for (n = 0; n < maxSymbolValue; n++) - huffWeight[n] = bitsToWeight[CTable[n].nbBits]; - - /* attempt weights compression by FSE */ - { - CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize)); - if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */ - op[0] = (BYTE)hSize; - return hSize + 1; - } - } - - /* write raw values as 4-bits (max : 15) */ - if (maxSymbolValue > (256 - 128)) - return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ - if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize) - return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ - op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1)); - huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ - for (n = 0; n < maxSymbolValue; n += 2) - op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]); - return ((maxSymbolValue + 1) / 2) + 1; -} - -size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -{ - U32 *rankVal; - BYTE *huffWeight; - U32 tableLog = 0; - U32 nbSymbols = 0; - size_t readSize; - size_t spaceUsed32 = 0; - - rankVal = (U32 *)workspace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - /* get symbol weights */ - readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); - if (ERR_isError(readSize)) - return readSize; - - /* check result */ - if (tableLog > HUF_TABLELOG_MAX) - return ERROR(tableLog_tooLarge); - if (nbSymbols > maxSymbolValue + 1) - return ERROR(maxSymbolValue_tooSmall); - - /* Prepare base value per rank */ - { - U32 n, nextRankStart = 0; - for (n = 1; n <= tableLog; n++) { - U32 curr = nextRankStart; - nextRankStart += (rankVal[n] << (n - 1)); - rankVal[n] = curr; - } - } - - /* fill nbBits */ - { - U32 n; - for (n = 0; n < nbSymbols; n++) { - const U32 w = huffWeight[n]; - CTable[n].nbBits = (BYTE)(tableLog + 1 - w); - } - } - - /* fill val */ - { - U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */ - U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0}; - { - U32 n; - for (n = 0; n < nbSymbols; n++) - nbPerRank[CTable[n].nbBits]++; - } - /* determine stating value per rank */ - valPerRank[tableLog + 1] = 0; /* for w==0 */ - { - U16 min = 0; - U32 n; - for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */ - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } - } - /* assign value within rank, symbol order */ - { - U32 n; - for (n = 0; n <= maxSymbolValue; n++) - CTable[n].val = valPerRank[CTable[n].nbBits]++; - } - } - - return readSize; -} - -typedef struct nodeElt_s { - U32 count; - U16 parent; - BYTE byte; - BYTE nbBits; -} nodeElt; - -static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits) -{ - const U32 largestBits = huffNode[lastNonNull].nbBits; - if (largestBits <= maxNbBits) - return largestBits; /* early exit : no elt > maxNbBits */ - - /* there are several too large elements (at least >= 2) */ - { - int totalCost = 0; - const U32 baseCost = 1 << (largestBits - maxNbBits); - U32 n = lastNonNull; - - while (huffNode[n].nbBits > maxNbBits) { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); - huffNode[n].nbBits = (BYTE)maxNbBits; - n--; - } /* n stops at huffNode[n].nbBits <= maxNbBits */ - while (huffNode[n].nbBits == maxNbBits) - n--; /* n end at index of smallest symbol using < maxNbBits */ - - /* renorm totalCost */ - totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ - - /* repay normalized cost */ - { - U32 const noSymbol = 0xF0F0F0F0; - U32 rankLast[HUF_TABLELOG_MAX + 2]; - int pos; - - /* Get pos of last (smallest) symbol per rank */ - memset(rankLast, 0xF0, sizeof(rankLast)); - { - U32 currNbBits = maxNbBits; - for (pos = n; pos >= 0; pos--) { - if (huffNode[pos].nbBits >= currNbBits) - continue; - currNbBits = huffNode[pos].nbBits; /* < maxNbBits */ - rankLast[maxNbBits - currNbBits] = pos; - } - } - - while (totalCost > 0) { - U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; - for (; nBitsToDecrease > 1; nBitsToDecrease--) { - U32 highPos = rankLast[nBitsToDecrease]; - U32 lowPos = rankLast[nBitsToDecrease - 1]; - if (highPos == noSymbol) - continue; - if (lowPos == noSymbol) - break; - { - U32 const highTotal = huffNode[highPos].count; - U32 const lowTotal = 2 * huffNode[lowPos].count; - if (highTotal <= lowTotal) - break; - } - } - /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ - /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ - while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) - nBitsToDecrease++; - totalCost -= 1 << (nBitsToDecrease - 1); - if (rankLast[nBitsToDecrease - 1] == noSymbol) - rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ - huffNode[rankLast[nBitsToDecrease]].nbBits++; - if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ - rankLast[nBitsToDecrease] = noSymbol; - else { - rankLast[nBitsToDecrease]--; - if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease) - rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ - } - } /* while (totalCost > 0) */ - - while (totalCost < 0) { /* Sometimes, cost correction overshoot */ - if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 - (using maxNbBits) */ - while (huffNode[n].nbBits == maxNbBits) - n--; - huffNode[n + 1].nbBits--; - rankLast[1] = n + 1; - totalCost++; - continue; - } - huffNode[rankLast[1] + 1].nbBits--; - rankLast[1]++; - totalCost++; - } - } - } /* there are several too large elements (at least >= 2) */ - - return maxNbBits; -} - -typedef struct { - U32 base; - U32 curr; -} rankPos; - -static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue) -{ - rankPos rank[32]; - U32 n; - - memset(rank, 0, sizeof(rank)); - for (n = 0; n <= maxSymbolValue; n++) { - U32 r = BIT_highbit32(count[n] + 1); - rank[r].base++; - } - for (n = 30; n > 0; n--) - rank[n - 1].base += rank[n].base; - for (n = 0; n < 32; n++) - rank[n].curr = rank[n].base; - for (n = 0; n <= maxSymbolValue; n++) { - U32 const c = count[n]; - U32 const r = BIT_highbit32(c + 1) + 1; - U32 pos = rank[r].curr++; - while ((pos > rank[r].base) && (c > huffNode[pos - 1].count)) - huffNode[pos] = huffNode[pos - 1], pos--; - huffNode[pos].count = c; - huffNode[pos].byte = (BYTE)n; - } -} - -/** HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. - */ -#define STARTNODE (HUF_SYMBOLVALUE_MAX + 1) -typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1]; -size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize) -{ - nodeElt *const huffNode0 = (nodeElt *)workSpace; - nodeElt *const huffNode = huffNode0 + 1; - U32 n, nonNullRank; - int lowS, lowN; - U16 nodeNb = STARTNODE; - U32 nodeRoot; - - /* safety checks */ - if (wkspSize < sizeof(huffNodeTable)) - return ERROR(GENERIC); /* workSpace is not large enough */ - if (maxNbBits == 0) - maxNbBits = HUF_TABLELOG_DEFAULT; - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) - return ERROR(GENERIC); - memset(huffNode0, 0, sizeof(huffNodeTable)); - - /* sort, decreasing order */ - HUF_sort(huffNode, count, maxSymbolValue); - - /* init for parents */ - nonNullRank = maxSymbolValue; - while (huffNode[nonNullRank].count == 0) - nonNullRank--; - lowS = nonNullRank; - nodeRoot = nodeNb + lowS - 1; - lowN = nodeNb; - huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count; - huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb; - nodeNb++; - lowS -= 2; - for (n = nodeNb; n <= nodeRoot; n++) - huffNode[n].count = (U32)(1U << 30); - huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */ - - /* create parents */ - while (nodeNb <= nodeRoot) { - U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; - huffNode[n1].parent = huffNode[n2].parent = nodeNb; - nodeNb++; - } - - /* distribute weights (unlimited tree height) */ - huffNode[nodeRoot].nbBits = 0; - for (n = nodeRoot - 1; n >= STARTNODE; n--) - huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1; - for (n = 0; n <= nonNullRank; n++) - huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1; - - /* enforce maxTableLog */ - maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); - - /* fill result into tree (val, nbBits) */ - { - U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0}; - U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0}; - if (maxNbBits > HUF_TABLELOG_MAX) - return ERROR(GENERIC); /* check fit into table */ - for (n = 0; n <= nonNullRank; n++) - nbPerRank[huffNode[n].nbBits]++; - /* determine stating value per rank */ - { - U16 min = 0; - for (n = maxNbBits; n > 0; n--) { - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } - } - for (n = 0; n <= maxSymbolValue; n++) - tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ - for (n = 0; n <= maxSymbolValue; n++) - tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ - } - - return maxNbBits; -} - -static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue) -{ - size_t nbBits = 0; - int s; - for (s = 0; s <= (int)maxSymbolValue; ++s) { - nbBits += CTable[s].nbBits * count[s]; - } - return nbBits >> 3; -} - -static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue) -{ - int bad = 0; - int s; - for (s = 0; s <= (int)maxSymbolValue; ++s) { - bad |= (count[s] != 0) & (CTable[s].nbBits == 0); - } - return !bad; -} - -static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable) -{ - BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); -} - -size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } - -#define HUF_FLUSHBITS(s) BIT_flushBits(s) - -#define HUF_FLUSHBITS_1(stream) \ - if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \ - HUF_FLUSHBITS(stream) - -#define HUF_FLUSHBITS_2(stream) \ - if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \ - HUF_FLUSHBITS(stream) - -size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable) -{ - const BYTE *ip = (const BYTE *)src; - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - BYTE *op = ostart; - size_t n; - BIT_CStream_t bitC; - - /* init */ - if (dstSize < 8) - return 0; /* not enough space to compress */ - { - size_t const initErr = BIT_initCStream(&bitC, op, oend - op); - if (HUF_isError(initErr)) - return 0; - } - - n = srcSize & ~3; /* join to mod 4 */ - switch (srcSize & 3) { - case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC); - fallthrough; - case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC); - fallthrough; - case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC); - fallthrough; - case 0: - default:; - } - - for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */ - HUF_encodeSymbol(&bitC, ip[n - 1], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n - 2], CTable); - HUF_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n - 3], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n - 4], CTable); - HUF_FLUSHBITS(&bitC); - } - - return BIT_closeCStream(&bitC); -} - -size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable) -{ - size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */ - const BYTE *ip = (const BYTE *)src; - const BYTE *const iend = ip + srcSize; - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - BYTE *op = ostart; - - if (dstSize < 6 + 1 + 1 + 1 + 8) - return 0; /* minimum space to compress successfully */ - if (srcSize < 12) - return 0; /* no saving possible : too small input */ - op += 6; /* jumpTable */ - - { - CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); - if (cSize == 0) - return 0; - ZSTD_writeLE16(ostart, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - { - CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); - if (cSize == 0) - return 0; - ZSTD_writeLE16(ostart + 2, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - { - CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); - if (cSize == 0) - return 0; - ZSTD_writeLE16(ostart + 4, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - { - CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable)); - if (cSize == 0) - return 0; - op += cSize; - } - - return op - ostart; -} - -static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream, - const HUF_CElt *CTable) -{ - size_t const cSize = - singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); - if (HUF_isError(cSize)) { - return cSize; - } - if (cSize == 0) { - return 0; - } /* uncompressible */ - op += cSize; - /* check compressibility */ - if ((size_t)(op - ostart) >= srcSize - 1) { - return 0; - } - return op - ostart; -} - -/* `workSpace` must a table of at least 1024 unsigned */ -static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, - unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat) -{ - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - BYTE *op = ostart; - - U32 *count; - size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1); - HUF_CElt *CTable; - size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1); - - /* checks & inits */ - if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) - return ERROR(GENERIC); - if (!srcSize) - return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ - if (!dstSize) - return 0; /* cannot fit within dst budget */ - if (srcSize > HUF_BLOCKSIZE_MAX) - return ERROR(srcSize_wrong); /* curr block size limit */ - if (huffLog > HUF_TABLELOG_MAX) - return ERROR(tableLog_tooLarge); - if (!maxSymbolValue) - maxSymbolValue = HUF_SYMBOLVALUE_MAX; - if (!huffLog) - huffLog = HUF_TABLELOG_DEFAULT; - - count = (U32 *)workSpace; - workSpace = (BYTE *)workSpace + countSize; - wkspSize -= countSize; - CTable = (HUF_CElt *)workSpace; - workSpace = (BYTE *)workSpace + CTableSize; - wkspSize -= CTableSize; - - /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */ - if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { - return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); - } - - /* Scan input and build symbol stats */ - { - CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace)); - if (largest == srcSize) { - *ostart = ((const BYTE *)src)[0]; - return 1; - } /* single symbol, rle */ - if (largest <= (srcSize >> 7) + 1) - return 0; /* Fast heuristic : not compressible enough */ - } - - /* Check validity of previous table */ - if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) { - *repeat = HUF_repeat_none; - } - /* Heuristic : use existing table for small inputs */ - if (preferRepeat && repeat && *repeat != HUF_repeat_none) { - return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); - } - - /* Build Huffman Tree */ - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { - CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize)); - huffLog = (U32)maxBits; - /* Zero the unused symbols so we can check it for validity */ - memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt)); - } - - /* Write table description header */ - { - CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize)); - /* Check if using the previous table will be beneficial */ - if (repeat && *repeat != HUF_repeat_none) { - size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue); - size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue); - if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { - return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); - } - } - /* Use the new table */ - if (hSize + 12ul >= srcSize) { - return 0; - } - op += hSize; - if (repeat) { - *repeat = HUF_repeat_none; - } - if (oldHufTable) { - memcpy(oldHufTable, CTable, CTableSize); - } /* Save the new table */ - } - return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable); -} - -size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, - size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0); -} - -size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, - size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, - preferRepeat); -} - -size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, - size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0); -} - -size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, - size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, - preferRepeat); -} diff --git a/lib/zstd/huf_decompress.c b/lib/zstd/huf_decompress.c deleted file mode 100644 index 6526482047dce..0000000000000 --- a/lib/zstd/huf_decompress.c +++ /dev/null @@ -1,960 +0,0 @@ -/* - * Huffman decoder, part of New Generation Entropy library - * Copyright (C) 2013-2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ - -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#define FORCE_INLINE static __always_inline - -/* ************************************************************** -* Dependencies -****************************************************************/ -#include "bitstream.h" /* BIT_* */ -#include "fse.h" /* header compression */ -#include "huf.h" -#include -#include -#include /* memcpy, memset */ - -/* ************************************************************** -* Error Management -****************************************************************/ -#define HUF_STATIC_ASSERT(c) \ - { \ - enum { HUF_static_assert = 1 / (int)(!!(c)) }; \ - } /* use only *after* variable declarations */ - -/*-***************************/ -/* generic DTableDesc */ -/*-***************************/ - -typedef struct { - BYTE maxTableLog; - BYTE tableType; - BYTE tableLog; - BYTE reserved; -} DTableDesc; - -static DTableDesc HUF_getDTableDesc(const HUF_DTable *table) -{ - DTableDesc dtd; - memcpy(&dtd, table, sizeof(dtd)); - return dtd; -} - -/*-***************************/ -/* single-symbol decoding */ -/*-***************************/ - -typedef struct { - BYTE byte; - BYTE nbBits; -} HUF_DEltX2; /* single-symbol decoding */ - -size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -{ - U32 tableLog = 0; - U32 nbSymbols = 0; - size_t iSize; - void *const dtPtr = DTable + 1; - HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr; - - U32 *rankVal; - BYTE *huffWeight; - size_t spaceUsed32 = 0; - - rankVal = (U32 *)workspace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); - /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - - iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); - if (HUF_isError(iSize)) - return iSize; - - /* Table header */ - { - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (tableLog > (U32)(dtd.maxTableLog + 1)) - return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ - dtd.tableType = 0; - dtd.tableLog = (BYTE)tableLog; - memcpy(DTable, &dtd, sizeof(dtd)); - } - - /* Calculate starting value for each rank */ - { - U32 n, nextRankStart = 0; - for (n = 1; n < tableLog + 1; n++) { - U32 const curr = nextRankStart; - nextRankStart += (rankVal[n] << (n - 1)); - rankVal[n] = curr; - } - } - - /* fill DTable */ - { - U32 n; - for (n = 0; n < nbSymbols; n++) { - U32 const w = huffWeight[n]; - U32 const length = (1 << w) >> 1; - U32 u; - HUF_DEltX2 D; - D.byte = (BYTE)n; - D.nbBits = (BYTE)(tableLog + 1 - w); - for (u = rankVal[w]; u < rankVal[w] + length; u++) - dt[u] = D; - rankVal[w] += length; - } - } - - return iSize; -} - -static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - BYTE const c = dt[val].byte; - BIT_skipBits(Dstream, dt[val].nbBits); - return c; -} - -#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ - if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \ - HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) - -#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ - if (ZSTD_64bits()) \ - HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) - -FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog) -{ - BYTE *const pStart = p; - - /* up to 4 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - } - - /* closer to the end */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - - /* no more data to retrieve from bitstream, hence no need to reload */ - while (p < pEnd) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - - return pEnd - pStart; -} - -static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - BYTE *op = (BYTE *)dst; - BYTE *const oend = op + dstSize; - const void *dtPtr = DTable + 1; - const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr; - BIT_DStream_t bitD; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - { - size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); - if (HUF_isError(errorCode)) - return errorCode; - } - - HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); - - /* check */ - if (!BIT_endOfDStream(&bitD)) - return ERROR(corruption_detected); - - return dstSize; -} - -size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) - return ERROR(GENERIC); - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - const BYTE *ip = (const BYTE *)cSrc; - - size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize); - if (HUF_isError(hSize)) - return hSize; - if (hSize >= cSrcSize) - return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx); -} - -static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - /* Check */ - if (cSrcSize < 10) - return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { - const BYTE *const istart = (const BYTE *)cSrc; - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - const void *const dtPtr = DTable + 1; - const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = ZSTD_readLE16(istart); - size_t const length2 = ZSTD_readLE16(istart + 2); - size_t const length3 = ZSTD_readLE16(istart + 4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE *const istart1 = istart + 6; /* jumpTable */ - const BYTE *const istart2 = istart1 + length1; - const BYTE *const istart3 = istart2 + length2; - const BYTE *const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize + 3) / 4; - BYTE *const opStart2 = ostart + segmentSize; - BYTE *const opStart3 = opStart2 + segmentSize; - BYTE *const opStart4 = opStart3 + segmentSize; - BYTE *op1 = ostart; - BYTE *op2 = opStart2; - BYTE *op3 = opStart3; - BYTE *op4 = opStart4; - U32 endSignal; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - if (length4 > cSrcSize) - return ERROR(corruption_detected); /* overflow */ - { - size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); - if (HUF_isError(errorCode)) - return errorCode; - } - - /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) { - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - } - - /* check corruption */ - if (op1 > opStart2) - return ERROR(corruption_detected); - if (op2 > opStart3) - return ERROR(corruption_detected); - if (op3 > opStart4) - return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); - - /* check */ - endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endSignal) - return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; - } -} - -size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) - return ERROR(GENERIC); - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - const BYTE *ip = (const BYTE *)cSrc; - - size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize); - if (HUF_isError(hSize)) - return hSize; - if (hSize >= cSrcSize) - return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); -} - -/* *************************/ -/* double-symbols decoding */ -/* *************************/ -typedef struct { - U16 sequence; - BYTE nbBits; - BYTE length; -} HUF_DEltX4; /* double-symbols decoding */ - -typedef struct { - BYTE symbol; - BYTE weight; -} sortedSymbol_t; - -/* HUF_fillDTableX4Level2() : - * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight, - const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq) -{ - HUF_DEltX4 DElt; - U32 rankVal[HUF_TABLELOG_MAX + 1]; - - /* get pre-calculated rankVal */ - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill skipped values */ - if (minWeight > 1) { - U32 i, skipSize = rankVal[minWeight]; - ZSTD_writeLE16(&(DElt.sequence), baseSeq); - DElt.nbBits = (BYTE)(consumed); - DElt.length = 1; - for (i = 0; i < skipSize; i++) - DTable[i] = DElt; - } - - /* fill DTable */ - { - U32 s; - for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */ - const U32 symbol = sortedSymbols[s].symbol; - const U32 weight = sortedSymbols[s].weight; - const U32 nbBits = nbBitsBaseline - weight; - const U32 length = 1 << (sizeLog - nbBits); - const U32 start = rankVal[weight]; - U32 i = start; - const U32 end = start + length; - - ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); - DElt.nbBits = (BYTE)(nbBits + consumed); - DElt.length = 2; - do { - DTable[i++] = DElt; - } while (i < end); /* since length >= 1 */ - - rankVal[weight] += length; - } - } -} - -typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1]; -typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; - -static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart, - rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline) -{ - U32 rankVal[HUF_TABLELOG_MAX + 1]; - const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ - const U32 minBits = nbBitsBaseline - maxWeight; - U32 s; - - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill DTable */ - for (s = 0; s < sortedListSize; s++) { - const U16 symbol = sortedList[s].symbol; - const U32 weight = sortedList[s].weight; - const U32 nbBits = nbBitsBaseline - weight; - const U32 start = rankVal[weight]; - const U32 length = 1 << (targetLog - nbBits); - - if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */ - U32 sortedRank; - int minWeight = nbBits + scaleLog; - if (minWeight < 1) - minWeight = 1; - sortedRank = rankStart[minWeight]; - HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank, - sortedListSize - sortedRank, nbBitsBaseline, symbol); - } else { - HUF_DEltX4 DElt; - ZSTD_writeLE16(&(DElt.sequence), symbol); - DElt.nbBits = (BYTE)(nbBits); - DElt.length = 1; - { - U32 const end = start + length; - U32 u; - for (u = start; u < end; u++) - DTable[u] = DElt; - } - } - rankVal[weight] += length; - } -} - -size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -{ - U32 tableLog, maxW, sizeOfSort, nbSymbols; - DTableDesc dtd = HUF_getDTableDesc(DTable); - U32 const maxTableLog = dtd.maxTableLog; - size_t iSize; - void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */ - HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr; - U32 *rankStart; - - rankValCol_t *rankVal; - U32 *rankStats; - U32 *rankStart0; - sortedSymbol_t *sortedSymbol; - BYTE *weightList; - size_t spaceUsed32 = 0; - - HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0); - - rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; - rankStats = (U32 *)workspace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 1; - rankStart0 = (U32 *)workspace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 2; - sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; - weightList = (BYTE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - rankStart = rankStart0 + 1; - memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); - - HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ - if (maxTableLog > HUF_TABLELOG_MAX) - return ERROR(tableLog_tooLarge); - /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ - - iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); - if (HUF_isError(iSize)) - return iSize; - - /* check result */ - if (tableLog > maxTableLog) - return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ - - /* find maxWeight */ - for (maxW = tableLog; rankStats[maxW] == 0; maxW--) { - } /* necessarily finds a solution before 0 */ - - /* Get start index of each weight */ - { - U32 w, nextRankStart = 0; - for (w = 1; w < maxW + 1; w++) { - U32 curr = nextRankStart; - nextRankStart += rankStats[w]; - rankStart[w] = curr; - } - rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ - sizeOfSort = nextRankStart; - } - - /* sort symbols by weight */ - { - U32 s; - for (s = 0; s < nbSymbols; s++) { - U32 const w = weightList[s]; - U32 const r = rankStart[w]++; - sortedSymbol[r].symbol = (BYTE)s; - sortedSymbol[r].weight = (BYTE)w; - } - rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ - } - - /* Build rankVal */ - { - U32 *const rankVal0 = rankVal[0]; - { - int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */ - U32 nextRankVal = 0; - U32 w; - for (w = 1; w < maxW + 1; w++) { - U32 curr = nextRankVal; - nextRankVal += rankStats[w] << (w + rescale); - rankVal0[w] = curr; - } - } - { - U32 const minBits = tableLog + 1 - maxW; - U32 consumed; - for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { - U32 *const rankValPtr = rankVal[consumed]; - U32 w; - for (w = 1; w < maxW + 1; w++) { - rankValPtr[w] = rankVal0[w] >> consumed; - } - } - } - } - - HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1); - - dtd.tableLog = (BYTE)maxTableLog; - dtd.tableType = 1; - memcpy(DTable, &dtd, sizeof(dtd)); - return iSize; -} - -static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt + val, 2); - BIT_skipBits(DStream, dt[val].nbBits); - return dt[val].length; -} - -static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt + val, 1); - if (dt[val].length == 1) - BIT_skipBits(DStream, dt[val].nbBits); - else { - if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) { - BIT_skipBits(DStream, dt[val].nbBits); - if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8)) - /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ - DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8); - } - } - return 1; -} - -#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ - if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ - if (ZSTD_64bits()) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog) -{ - BYTE *const pStart = p; - - /* up to 8 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) { - HUF_DECODE_SYMBOLX4_2(p, bitDPtr); - HUF_DECODE_SYMBOLX4_1(p, bitDPtr); - HUF_DECODE_SYMBOLX4_2(p, bitDPtr); - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); - } - - /* closer to end : up to 2 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2)) - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); - - while (p <= pEnd - 2) - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ - - if (p < pEnd) - p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); - - return p - pStart; -} - -static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - BIT_DStream_t bitD; - - /* Init */ - { - size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); - if (HUF_isError(errorCode)) - return errorCode; - } - - /* decode */ - { - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */ - const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); - } - - /* check */ - if (!BIT_endOfDStream(&bitD)) - return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; -} - -size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) - return ERROR(GENERIC); - return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - const BYTE *ip = (const BYTE *)cSrc; - - size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize); - if (HUF_isError(hSize)) - return hSize; - if (hSize >= cSrcSize) - return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx); -} - -static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - if (cSrcSize < 10) - return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { - const BYTE *const istart = (const BYTE *)cSrc; - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - const void *const dtPtr = DTable + 1; - const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = ZSTD_readLE16(istart); - size_t const length2 = ZSTD_readLE16(istart + 2); - size_t const length3 = ZSTD_readLE16(istart + 4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE *const istart1 = istart + 6; /* jumpTable */ - const BYTE *const istart2 = istart1 + length1; - const BYTE *const istart3 = istart2 + length2; - const BYTE *const istart4 = istart3 + length3; - size_t const segmentSize = (dstSize + 3) / 4; - BYTE *const opStart2 = ostart + segmentSize; - BYTE *const opStart3 = opStart2 + segmentSize; - BYTE *const opStart4 = opStart3 + segmentSize; - BYTE *op1 = ostart; - BYTE *op2 = opStart2; - BYTE *op3 = opStart3; - BYTE *op4 = opStart4; - U32 endSignal; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - if (length4 > cSrcSize) - return ERROR(corruption_detected); /* overflow */ - { - size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); - if (HUF_isError(errorCode)) - return errorCode; - } - - /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) { - HUF_DECODE_SYMBOLX4_2(op1, &bitD1); - HUF_DECODE_SYMBOLX4_2(op2, &bitD2); - HUF_DECODE_SYMBOLX4_2(op3, &bitD3); - HUF_DECODE_SYMBOLX4_2(op4, &bitD4); - HUF_DECODE_SYMBOLX4_1(op1, &bitD1); - HUF_DECODE_SYMBOLX4_1(op2, &bitD2); - HUF_DECODE_SYMBOLX4_1(op3, &bitD3); - HUF_DECODE_SYMBOLX4_1(op4, &bitD4); - HUF_DECODE_SYMBOLX4_2(op1, &bitD1); - HUF_DECODE_SYMBOLX4_2(op2, &bitD2); - HUF_DECODE_SYMBOLX4_2(op3, &bitD3); - HUF_DECODE_SYMBOLX4_2(op4, &bitD4); - HUF_DECODE_SYMBOLX4_0(op1, &bitD1); - HUF_DECODE_SYMBOLX4_0(op2, &bitD2); - HUF_DECODE_SYMBOLX4_0(op3, &bitD3); - HUF_DECODE_SYMBOLX4_0(op4, &bitD4); - - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - } - - /* check corruption */ - if (op1 > opStart2) - return ERROR(corruption_detected); - if (op2 > opStart3) - return ERROR(corruption_detected); - if (op3 > opStart4) - return ERROR(corruption_detected); - /* note : op4 already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); - - /* check */ - { - U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) - return ERROR(corruption_detected); - } - - /* decoded size */ - return dstSize; - } -} - -size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) - return ERROR(GENERIC); - return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - const BYTE *ip = (const BYTE *)cSrc; - - size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize); - if (HUF_isError(hSize)) - return hSize; - if (hSize >= cSrcSize) - return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); -} - -/* ********************************/ -/* Generic decompression selector */ -/* ********************************/ - -size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); - return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) - : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); - return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) - : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); -} - -typedef struct { - U32 tableTime; - U32 decode256Time; -} algo_time_t; -static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = { - /* single, double, quad */ - {{0, 0}, {1, 1}, {2, 2}}, /* Q==0 : impossible */ - {{0, 0}, {1, 1}, {2, 2}}, /* Q==1 : impossible */ - {{38, 130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ - {{448, 128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ - {{556, 128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ - {{714, 128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ - {{883, 128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ - {{897, 128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ - {{926, 128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ - {{947, 128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ - {{1107, 128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ - {{1177, 128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ - {{1242, 128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ - {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */ - {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */ - {{722, 128}, {1891, 145}, {1936, 146}}, /* Q ==15 : 93-99% */ -}; - -/** HUF_selectDecoder() : -* Tells which decoder is likely to decode faster, -* based on a set of pre-determined metrics. -* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . -* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ -U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize) -{ - /* decoder timing evaluation */ - U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */ - U32 const D256 = (U32)(dstSize >> 8); - U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); - U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); - DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */ - - return DTime1 < DTime0; -} - -typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize); - -size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - /* validation checks */ - if (dstSize == 0) - return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) - return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { - memcpy(dst, cSrc, dstSize); - return dstSize; - } /* not compressed */ - if (cSrcSize == 1) { - memset(dst, *(const BYTE *)cSrc, dstSize); - return dstSize; - } /* RLE */ - - { - U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) - : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); - } -} - -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - /* validation checks */ - if (dstSize == 0) - return ERROR(dstSize_tooSmall); - if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) - return ERROR(corruption_detected); /* invalid */ - - { - U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) - : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); - } -} - -size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - /* validation checks */ - if (dstSize == 0) - return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) - return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { - memcpy(dst, cSrc, dstSize); - return dstSize; - } /* not compressed */ - if (cSrcSize == 1) { - memset(dst, *(const BYTE *)cSrc, dstSize); - return dstSize; - } /* RLE */ - - { - U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) - : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); - } -} diff --git a/lib/zstd/mem.h b/lib/zstd/mem.h deleted file mode 100644 index 93d7a2c377fe3..0000000000000 --- a/lib/zstd/mem.h +++ /dev/null @@ -1,151 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -#ifndef MEM_H_MODULE -#define MEM_H_MODULE - -/*-**************************************** -* Dependencies -******************************************/ -#include -#include /* memcpy */ -#include /* size_t, ptrdiff_t */ - -/*-**************************************** -* Compiler specifics -******************************************/ -#define ZSTD_STATIC static inline - -/*-************************************************************** -* Basic Types -*****************************************************************/ -typedef uint8_t BYTE; -typedef uint16_t U16; -typedef int16_t S16; -typedef uint32_t U32; -typedef int32_t S32; -typedef uint64_t U64; -typedef int64_t S64; -typedef ptrdiff_t iPtrDiff; -typedef uintptr_t uPtrDiff; - -/*-************************************************************** -* Memory I/O -*****************************************************************/ -ZSTD_STATIC unsigned ZSTD_32bits(void) { return sizeof(size_t) == 4; } -ZSTD_STATIC unsigned ZSTD_64bits(void) { return sizeof(size_t) == 8; } - -#if defined(__LITTLE_ENDIAN) -#define ZSTD_LITTLE_ENDIAN 1 -#else -#define ZSTD_LITTLE_ENDIAN 0 -#endif - -ZSTD_STATIC unsigned ZSTD_isLittleEndian(void) { return ZSTD_LITTLE_ENDIAN; } - -ZSTD_STATIC U16 ZSTD_read16(const void *memPtr) { return get_unaligned((const U16 *)memPtr); } - -ZSTD_STATIC U32 ZSTD_read32(const void *memPtr) { return get_unaligned((const U32 *)memPtr); } - -ZSTD_STATIC U64 ZSTD_read64(const void *memPtr) { return get_unaligned((const U64 *)memPtr); } - -ZSTD_STATIC size_t ZSTD_readST(const void *memPtr) { return get_unaligned((const size_t *)memPtr); } - -ZSTD_STATIC void ZSTD_write16(void *memPtr, U16 value) { put_unaligned(value, (U16 *)memPtr); } - -ZSTD_STATIC void ZSTD_write32(void *memPtr, U32 value) { put_unaligned(value, (U32 *)memPtr); } - -ZSTD_STATIC void ZSTD_write64(void *memPtr, U64 value) { put_unaligned(value, (U64 *)memPtr); } - -/*=== Little endian r/w ===*/ - -ZSTD_STATIC U16 ZSTD_readLE16(const void *memPtr) { return get_unaligned_le16(memPtr); } - -ZSTD_STATIC void ZSTD_writeLE16(void *memPtr, U16 val) { put_unaligned_le16(val, memPtr); } - -ZSTD_STATIC U32 ZSTD_readLE24(const void *memPtr) { return ZSTD_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16); } - -ZSTD_STATIC void ZSTD_writeLE24(void *memPtr, U32 val) -{ - ZSTD_writeLE16(memPtr, (U16)val); - ((BYTE *)memPtr)[2] = (BYTE)(val >> 16); -} - -ZSTD_STATIC U32 ZSTD_readLE32(const void *memPtr) { return get_unaligned_le32(memPtr); } - -ZSTD_STATIC void ZSTD_writeLE32(void *memPtr, U32 val32) { put_unaligned_le32(val32, memPtr); } - -ZSTD_STATIC U64 ZSTD_readLE64(const void *memPtr) { return get_unaligned_le64(memPtr); } - -ZSTD_STATIC void ZSTD_writeLE64(void *memPtr, U64 val64) { put_unaligned_le64(val64, memPtr); } - -ZSTD_STATIC size_t ZSTD_readLEST(const void *memPtr) -{ - if (ZSTD_32bits()) - return (size_t)ZSTD_readLE32(memPtr); - else - return (size_t)ZSTD_readLE64(memPtr); -} - -ZSTD_STATIC void ZSTD_writeLEST(void *memPtr, size_t val) -{ - if (ZSTD_32bits()) - ZSTD_writeLE32(memPtr, (U32)val); - else - ZSTD_writeLE64(memPtr, (U64)val); -} - -/*=== Big endian r/w ===*/ - -ZSTD_STATIC U32 ZSTD_readBE32(const void *memPtr) { return get_unaligned_be32(memPtr); } - -ZSTD_STATIC void ZSTD_writeBE32(void *memPtr, U32 val32) { put_unaligned_be32(val32, memPtr); } - -ZSTD_STATIC U64 ZSTD_readBE64(const void *memPtr) { return get_unaligned_be64(memPtr); } - -ZSTD_STATIC void ZSTD_writeBE64(void *memPtr, U64 val64) { put_unaligned_be64(val64, memPtr); } - -ZSTD_STATIC size_t ZSTD_readBEST(const void *memPtr) -{ - if (ZSTD_32bits()) - return (size_t)ZSTD_readBE32(memPtr); - else - return (size_t)ZSTD_readBE64(memPtr); -} - -ZSTD_STATIC void ZSTD_writeBEST(void *memPtr, size_t val) -{ - if (ZSTD_32bits()) - ZSTD_writeBE32(memPtr, (U32)val); - else - ZSTD_writeBE64(memPtr, (U64)val); -} - -/* function safe only for comparisons */ -ZSTD_STATIC U32 ZSTD_readMINMATCH(const void *memPtr, U32 length) -{ - switch (length) { - default: - case 4: return ZSTD_read32(memPtr); - case 3: - if (ZSTD_isLittleEndian()) - return ZSTD_read32(memPtr) << 8; - else - return ZSTD_read32(memPtr) >> 8; - } -} - -#endif /* MEM_H_MODULE */ diff --git a/lib/zstd/zstd_common.c b/lib/zstd/zstd_common.c deleted file mode 100644 index a282624ee1553..0000000000000 --- a/lib/zstd/zstd_common.c +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -/*-************************************* -* Dependencies -***************************************/ -#include "error_private.h" -#include "zstd_internal.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */ -#include - -/*=************************************************************** -* Custom allocator -****************************************************************/ - -#define stack_push(stack, size) \ - ({ \ - void *const ptr = ZSTD_PTR_ALIGN((stack)->ptr); \ - (stack)->ptr = (char *)ptr + (size); \ - (stack)->ptr <= (stack)->end ? ptr : NULL; \ - }) - -ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize) -{ - ZSTD_customMem stackMem = {ZSTD_stackAlloc, ZSTD_stackFree, workspace}; - ZSTD_stack *stack = (ZSTD_stack *)workspace; - /* Verify preconditions */ - if (!workspace || workspaceSize < sizeof(ZSTD_stack) || workspace != ZSTD_PTR_ALIGN(workspace)) { - ZSTD_customMem error = {NULL, NULL, NULL}; - return error; - } - /* Initialize the stack */ - stack->ptr = workspace; - stack->end = (char *)workspace + workspaceSize; - stack_push(stack, sizeof(ZSTD_stack)); - return stackMem; -} - -void *ZSTD_stackAllocAll(void *opaque, size_t *size) -{ - ZSTD_stack *stack = (ZSTD_stack *)opaque; - *size = (BYTE const *)stack->end - (BYTE *)ZSTD_PTR_ALIGN(stack->ptr); - return stack_push(stack, *size); -} - -void *ZSTD_stackAlloc(void *opaque, size_t size) -{ - ZSTD_stack *stack = (ZSTD_stack *)opaque; - return stack_push(stack, size); -} -void ZSTD_stackFree(void *opaque, void *address) -{ - (void)opaque; - (void)address; -} - -void *ZSTD_malloc(size_t size, ZSTD_customMem customMem) { return customMem.customAlloc(customMem.opaque, size); } - -void ZSTD_free(void *ptr, ZSTD_customMem customMem) -{ - if (ptr != NULL) - customMem.customFree(customMem.opaque, ptr); -} diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c new file mode 100644 index 0000000000000..65548a4bb9341 --- /dev/null +++ b/lib/zstd/zstd_compress_module.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include +#include +#include +#include + +#include "common/zstd_deps.h" +#include "common/zstd_internal.h" + +#define ZSTD_FORWARD_IF_ERR(ret) \ + do { \ + size_t const __ret = (ret); \ + if (ZSTD_isError(__ret)) \ + return __ret; \ + } while (0) + +static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters, + unsigned long long pledged_src_size) +{ + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset( + cctx, ZSTD_reset_session_and_parameters)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize( + cctx, pledged_src_size)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_windowLog, parameters->cParams.windowLog)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_hashLog, parameters->cParams.hashLog)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_chainLog, parameters->cParams.chainLog)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_searchLog, parameters->cParams.searchLog)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_minMatch, parameters->cParams.minMatch)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_targetLength, parameters->cParams.targetLength)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_strategy, parameters->cParams.strategy)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag)); + ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter( + cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag)); + return 0; +} + +int zstd_min_clevel(void) +{ + return ZSTD_minCLevel(); +} +EXPORT_SYMBOL(zstd_min_clevel); + +int zstd_max_clevel(void) +{ + return ZSTD_maxCLevel(); +} +EXPORT_SYMBOL(zstd_max_clevel); + +size_t zstd_compress_bound(size_t src_size) +{ + return ZSTD_compressBound(src_size); +} +EXPORT_SYMBOL(zstd_compress_bound); + +zstd_parameters zstd_get_params(int level, + unsigned long long estimated_src_size) +{ + return ZSTD_getParams(level, estimated_src_size, 0); +} +EXPORT_SYMBOL(zstd_get_params); + +size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams) +{ + return ZSTD_estimateCCtxSize_usingCParams(*cparams); +} +EXPORT_SYMBOL(zstd_cctx_workspace_bound); + +zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size) +{ + if (workspace == NULL) + return NULL; + return ZSTD_initStaticCCtx(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_cctx); + +size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size, const zstd_parameters *parameters) +{ + ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size)); + return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size); +} +EXPORT_SYMBOL(zstd_compress_cctx); + +size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams) +{ + return ZSTD_estimateCStreamSize_usingCParams(*cparams); +} +EXPORT_SYMBOL(zstd_cstream_workspace_bound); + +zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, + unsigned long long pledged_src_size, void *workspace, size_t workspace_size) +{ + zstd_cstream *cstream; + + if (workspace == NULL) + return NULL; + + cstream = ZSTD_initStaticCStream(workspace, workspace_size); + if (cstream == NULL) + return NULL; + + /* 0 means unknown in linux zstd API but means 0 in new zstd API */ + if (pledged_src_size == 0) + pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN; + + if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size))) + return NULL; + + return cstream; +} +EXPORT_SYMBOL(zstd_init_cstream); + +size_t zstd_reset_cstream(zstd_cstream *cstream, + unsigned long long pledged_src_size) +{ + return ZSTD_resetCStream(cstream, pledged_src_size); +} +EXPORT_SYMBOL(zstd_reset_cstream); + +size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, + zstd_in_buffer *input) +{ + return ZSTD_compressStream(cstream, output, input); +} +EXPORT_SYMBOL(zstd_compress_stream); + +size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output) +{ + return ZSTD_flushStream(cstream, output); +} +EXPORT_SYMBOL(zstd_flush_stream); + +size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output) +{ + return ZSTD_endStream(cstream, output); +} +EXPORT_SYMBOL(zstd_end_stream); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Zstd Compressor"); diff --git a/lib/zstd/zstd_decompress_module.c b/lib/zstd/zstd_decompress_module.c new file mode 100644 index 0000000000000..f4ed952ed4852 --- /dev/null +++ b/lib/zstd/zstd_decompress_module.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include +#include +#include +#include + +#include "common/zstd_deps.h" + +/* Common symbols. zstd_compress must depend on zstd_decompress. */ + +unsigned int zstd_is_error(size_t code) +{ + return ZSTD_isError(code); +} +EXPORT_SYMBOL(zstd_is_error); + +zstd_error_code zstd_get_error_code(size_t code) +{ + return ZSTD_getErrorCode(code); +} +EXPORT_SYMBOL(zstd_get_error_code); + +const char *zstd_get_error_name(size_t code) +{ + return ZSTD_getErrorName(code); +} +EXPORT_SYMBOL(zstd_get_error_name); + +/* Decompression symbols. */ + +size_t zstd_dctx_workspace_bound(void) +{ + return ZSTD_estimateDCtxSize(); +} +EXPORT_SYMBOL(zstd_dctx_workspace_bound); + +zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size) +{ + if (workspace == NULL) + return NULL; + return ZSTD_initStaticDCtx(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_dctx); + +size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size) +{ + return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size); +} +EXPORT_SYMBOL(zstd_decompress_dctx); + +size_t zstd_dstream_workspace_bound(size_t max_window_size) +{ + return ZSTD_estimateDStreamSize(max_window_size); +} +EXPORT_SYMBOL(zstd_dstream_workspace_bound); + +zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, + size_t workspace_size) +{ + if (workspace == NULL) + return NULL; + (void)max_window_size; + return ZSTD_initStaticDStream(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_dstream); + +size_t zstd_reset_dstream(zstd_dstream *dstream) +{ + return ZSTD_resetDStream(dstream); +} +EXPORT_SYMBOL(zstd_reset_dstream); + +size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, + zstd_in_buffer *input) +{ + return ZSTD_decompressStream(dstream, output, input); +} +EXPORT_SYMBOL(zstd_decompress_stream); + +size_t zstd_find_frame_compressed_size(const void *src, size_t src_size) +{ + return ZSTD_findFrameCompressedSize(src, src_size); +} +EXPORT_SYMBOL(zstd_find_frame_compressed_size); + +size_t zstd_get_frame_header(zstd_frame_header *header, const void *src, + size_t src_size) +{ + return ZSTD_getFrameHeader(header, src, src_size); +} +EXPORT_SYMBOL(zstd_get_frame_header); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Zstd Decompressor"); diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h deleted file mode 100644 index dac753397f868..0000000000000 --- a/lib/zstd/zstd_internal.h +++ /dev/null @@ -1,273 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -#ifndef ZSTD_CCOMMON_H_MODULE -#define ZSTD_CCOMMON_H_MODULE - -/*-******************************************************* -* Compiler specifics -*********************************************************/ -#define FORCE_INLINE static __always_inline -#define FORCE_NOINLINE static noinline - -/*-************************************* -* Dependencies -***************************************/ -#include "error_private.h" -#include "mem.h" -#include -#include -#include -#include - -/*-************************************* -* shared macros -***************************************/ -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define CHECK_F(f) \ - { \ - size_t const errcod = f; \ - if (ERR_isError(errcod)) \ - return errcod; \ - } /* check and Forward error code */ -#define CHECK_E(f, e) \ - { \ - size_t const errcod = f; \ - if (ERR_isError(errcod)) \ - return ERROR(e); \ - } /* check and send Error code */ -#define ZSTD_STATIC_ASSERT(c) \ - { \ - enum { ZSTD_static_assert = 1 / (int)(!!(c)) }; \ - } - -/*-************************************* -* Common constants -***************************************/ -#define ZSTD_OPT_NUM (1 << 12) -#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */ - -#define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ -#define ZSTD_REP_MOVE (ZSTD_REP_NUM - 1) -#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM) -static const U32 repStartValue[ZSTD_REP_NUM] = {1, 4, 8}; - -#define KB *(1 << 10) -#define MB *(1 << 20) -#define GB *(1U << 30) - -#define BIT7 128 -#define BIT6 64 -#define BIT5 32 -#define BIT4 16 -#define BIT1 2 -#define BIT0 1 - -#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 -static const size_t ZSTD_fcs_fieldSize[4] = {0, 2, 4, 8}; -static const size_t ZSTD_did_fieldSize[4] = {0, 1, 2, 4}; - -#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ -static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; -typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; - -#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ - -#define HufLog 12 -typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; - -#define LONGNBSEQ 0x7F00 - -#define MINMATCH 3 -#define EQUAL_READ32 4 - -#define Litbits 8 -#define MaxLit ((1 << Litbits) - 1) -#define MaxML 52 -#define MaxLL 35 -#define MaxOff 28 -#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ -#define MLFSELog 9 -#define LLFSELog 9 -#define OffFSELog 8 - -static const U32 LL_bits[MaxLL + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -static const S16 LL_defaultNorm[MaxLL + 1] = {4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1}; -#define LL_DEFAULTNORMLOG 6 /* for static allocation */ -static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; - -static const U32 ML_bits[MaxML + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -static const S16 ML_defaultNorm[MaxML + 1] = {1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1}; -#define ML_DEFAULTNORMLOG 6 /* for static allocation */ -static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; - -static const S16 OF_defaultNorm[MaxOff + 1] = {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1}; -#define OF_DEFAULTNORMLOG 5 /* for static allocation */ -static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; - -/*-******************************************* -* Shared functions to include for inlining -*********************************************/ -ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) { - /* - * zstd relies heavily on gcc being able to analyze and inline this - * memcpy() call, since it is called in a tight loop. Preboot mode - * is compiled in freestanding mode, which stops gcc from analyzing - * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a - * regular memcpy(). - */ - __builtin_memcpy(dst, src, 8); -} -/*! ZSTD_wildcopy() : -* custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ -#define WILDCOPY_OVERLENGTH 8 -ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length) -{ - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + length; -#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200 - /* - * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388. - * Avoid the bad case where the loop only runs once by handling the - * special case separately. This doesn't trigger the bug because it - * doesn't involve pointer/integer overflow. - */ - if (length <= 8) - return ZSTD_copy8(dst, src); -#endif - do { - ZSTD_copy8(op, ip); - op += 8; - ip += 8; - } while (op < oend); -} - -/*-******************************************* -* Private interfaces -*********************************************/ -typedef struct ZSTD_stats_s ZSTD_stats_t; - -typedef struct { - U32 off; - U32 len; -} ZSTD_match_t; - -typedef struct { - U32 price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_optimal_t; - -typedef struct seqDef_s { - U32 offset; - U16 litLength; - U16 matchLength; -} seqDef; - -typedef struct { - seqDef *sequencesStart; - seqDef *sequences; - BYTE *litStart; - BYTE *lit; - BYTE *llCode; - BYTE *mlCode; - BYTE *ofCode; - U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ - U32 longLengthPos; - /* opt */ - ZSTD_optimal_t *priceTable; - ZSTD_match_t *matchTable; - U32 *matchLengthFreq; - U32 *litLengthFreq; - U32 *litFreq; - U32 *offCodeFreq; - U32 matchLengthSum; - U32 matchSum; - U32 litLengthSum; - U32 litSum; - U32 offCodeSum; - U32 log2matchLengthSum; - U32 log2matchSum; - U32 log2litLengthSum; - U32 log2litSum; - U32 log2offCodeSum; - U32 factor; - U32 staticPrices; - U32 cachedPrice; - U32 cachedLitLength; - const BYTE *cachedLiterals; -} seqStore_t; - -const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx); -void ZSTD_seqToCodes(const seqStore_t *seqStorePtr); -int ZSTD_isSkipFrame(ZSTD_DCtx *dctx); - -/*= Custom memory allocation functions */ -typedef void *(*ZSTD_allocFunction)(void *opaque, size_t size); -typedef void (*ZSTD_freeFunction)(void *opaque, void *address); -typedef struct { - ZSTD_allocFunction customAlloc; - ZSTD_freeFunction customFree; - void *opaque; -} ZSTD_customMem; - -void *ZSTD_malloc(size_t size, ZSTD_customMem customMem); -void ZSTD_free(void *ptr, ZSTD_customMem customMem); - -/*====== stack allocation ======*/ - -typedef struct { - void *ptr; - const void *end; -} ZSTD_stack; - -#define ZSTD_ALIGN(x) ALIGN(x, sizeof(size_t)) -#define ZSTD_PTR_ALIGN(p) PTR_ALIGN(p, sizeof(size_t)) - -ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize); - -void *ZSTD_stackAllocAll(void *opaque, size_t *size); -void *ZSTD_stackAlloc(void *opaque, size_t size); -void ZSTD_stackFree(void *opaque, void *address); - -/*====== common function ======*/ - -ZSTD_STATIC U32 ZSTD_highbit32(U32 val) { return 31 - __builtin_clz(val); } - -/* hidden functions */ - -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx); - -size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx); -size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx); -size_t ZSTD_freeCDict(ZSTD_CDict *cdict); -size_t ZSTD_freeDDict(ZSTD_DDict *cdict); -size_t ZSTD_freeCStream(ZSTD_CStream *zcs); -size_t ZSTD_freeDStream(ZSTD_DStream *zds); - -#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/lib/zstd/zstd_opt.h b/lib/zstd/zstd_opt.h deleted file mode 100644 index 55e1b4cba8088..0000000000000 --- a/lib/zstd/zstd_opt.h +++ /dev/null @@ -1,1014 +0,0 @@ -/** - * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -/* Note : this file is intended to be included within zstd_compress.c */ - -#ifndef ZSTD_OPT_H_91842398743 -#define ZSTD_OPT_H_91842398743 - -#define ZSTD_LITFREQ_ADD 2 -#define ZSTD_FREQ_DIV 4 -#define ZSTD_MAX_PRICE (1 << 30) - -/*-************************************* -* Price functions for optimal parser -***************************************/ -FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t *ssPtr) -{ - ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum + 1); - ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum + 1); - ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum + 1); - ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum + 1); - ssPtr->factor = 1 + ((ssPtr->litSum >> 5) / ssPtr->litLengthSum) + ((ssPtr->litSum << 1) / (ssPtr->litSum + ssPtr->matchSum)); -} - -ZSTD_STATIC void ZSTD_rescaleFreqs(seqStore_t *ssPtr, const BYTE *src, size_t srcSize) -{ - unsigned u; - - ssPtr->cachedLiterals = NULL; - ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; - ssPtr->staticPrices = 0; - - if (ssPtr->litLengthSum == 0) { - if (srcSize <= 1024) - ssPtr->staticPrices = 1; - - for (u = 0; u <= MaxLit; u++) - ssPtr->litFreq[u] = 0; - for (u = 0; u < srcSize; u++) - ssPtr->litFreq[src[u]]++; - - ssPtr->litSum = 0; - ssPtr->litLengthSum = MaxLL + 1; - ssPtr->matchLengthSum = MaxML + 1; - ssPtr->offCodeSum = (MaxOff + 1); - ssPtr->matchSum = (ZSTD_LITFREQ_ADD << Litbits); - - for (u = 0; u <= MaxLit; u++) { - ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> ZSTD_FREQ_DIV); - ssPtr->litSum += ssPtr->litFreq[u]; - } - for (u = 0; u <= MaxLL; u++) - ssPtr->litLengthFreq[u] = 1; - for (u = 0; u <= MaxML; u++) - ssPtr->matchLengthFreq[u] = 1; - for (u = 0; u <= MaxOff; u++) - ssPtr->offCodeFreq[u] = 1; - } else { - ssPtr->matchLengthSum = 0; - ssPtr->litLengthSum = 0; - ssPtr->offCodeSum = 0; - ssPtr->matchSum = 0; - ssPtr->litSum = 0; - - for (u = 0; u <= MaxLit; u++) { - ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> (ZSTD_FREQ_DIV + 1)); - ssPtr->litSum += ssPtr->litFreq[u]; - } - for (u = 0; u <= MaxLL; u++) { - ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u] >> (ZSTD_FREQ_DIV + 1)); - ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; - } - for (u = 0; u <= MaxML; u++) { - ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u] >> ZSTD_FREQ_DIV); - ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; - ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); - } - ssPtr->matchSum *= ZSTD_LITFREQ_ADD; - for (u = 0; u <= MaxOff; u++) { - ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u] >> ZSTD_FREQ_DIV); - ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; - } - } - - ZSTD_setLog2Prices(ssPtr); -} - -FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t *ssPtr, U32 litLength, const BYTE *literals) -{ - U32 price, u; - - if (ssPtr->staticPrices) - return ZSTD_highbit32((U32)litLength + 1) + (litLength * 6); - - if (litLength == 0) - return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0] + 1); - - /* literals */ - if (ssPtr->cachedLiterals == literals) { - U32 const additional = litLength - ssPtr->cachedLitLength; - const BYTE *literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength; - price = ssPtr->cachedPrice + additional * ssPtr->log2litSum; - for (u = 0; u < additional; u++) - price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]] + 1); - ssPtr->cachedPrice = price; - ssPtr->cachedLitLength = litLength; - } else { - price = litLength * ssPtr->log2litSum; - for (u = 0; u < litLength; u++) - price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]] + 1); - - if (litLength >= 12) { - ssPtr->cachedLiterals = literals; - ssPtr->cachedPrice = price; - ssPtr->cachedLitLength = litLength; - } - } - - /* literal Length */ - { - const BYTE LL_deltaCode = 19; - const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode] + 1); - } - - return price; -} - -FORCE_INLINE U32 ZSTD_getPrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength, const int ultra) -{ - /* offset */ - U32 price; - BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1); - - if (seqStorePtr->staticPrices) - return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength + 1) + 16 + offCode; - - price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode] + 1); - if (!ultra && offCode >= 20) - price += (offCode - 19) * 2; - - /* match Length */ - { - const BYTE ML_deltaCode = 36; - const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; - price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode] + 1); - } - - return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor; -} - -ZSTD_STATIC void ZSTD_updatePrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength) -{ - U32 u; - - /* literals */ - seqStorePtr->litSum += litLength * ZSTD_LITFREQ_ADD; - for (u = 0; u < litLength; u++) - seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; - - /* literal Length */ - { - const BYTE LL_deltaCode = 19; - const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - seqStorePtr->litLengthFreq[llCode]++; - seqStorePtr->litLengthSum++; - } - - /* match offset */ - { - BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1); - seqStorePtr->offCodeSum++; - seqStorePtr->offCodeFreq[offCode]++; - } - - /* match Length */ - { - const BYTE ML_deltaCode = 36; - const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; - seqStorePtr->matchLengthFreq[mlCode]++; - seqStorePtr->matchLengthSum++; - } - - ZSTD_setLog2Prices(seqStorePtr); -} - -#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ - { \ - while (last_pos < pos) { \ - opt[last_pos + 1].price = ZSTD_MAX_PRICE; \ - last_pos++; \ - } \ - opt[pos].mlen = mlen_; \ - opt[pos].off = offset_; \ - opt[pos].litlen = litlen_; \ - opt[pos].price = price_; \ - } - -/* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE -U32 ZSTD_insertAndFindFirstIndexHash3(ZSTD_CCtx *zc, const BYTE *ip) -{ - U32 *const hashTable3 = zc->hashTable3; - U32 const hashLog3 = zc->hashLog3; - const BYTE *const base = zc->base; - U32 idx = zc->nextToUpdate3; - const U32 target = zc->nextToUpdate3 = (U32)(ip - base); - const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); - - while (idx < target) { - hashTable3[ZSTD_hash3Ptr(base + idx, hashLog3)] = idx; - idx++; - } - - return hashTable3[hash3]; -} - -/*-************************************* -* Binary Tree search -***************************************/ -static U32 ZSTD_insertBtAndGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, U32 nbCompares, const U32 mls, U32 extDict, - ZSTD_match_t *matches, const U32 minMatchLen) -{ - const BYTE *const base = zc->base; - const U32 curr = (U32)(ip - base); - const U32 hashLog = zc->params.cParams.hashLog; - const size_t h = ZSTD_hashPtr(ip, hashLog, mls); - U32 *const hashTable = zc->hashTable; - U32 matchIndex = hashTable[h]; - U32 *const bt = zc->chainTable; - const U32 btLog = zc->params.cParams.chainLog - 1; - const U32 btMask = (1U << btLog) - 1; - size_t commonLengthSmaller = 0, commonLengthLarger = 0; - const BYTE *const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const prefixStart = base + dictLimit; - const U32 btLow = btMask >= curr ? 0 : curr - btMask; - const U32 windowLow = zc->lowLimit; - U32 *smallerPtr = bt + 2 * (curr & btMask); - U32 *largerPtr = bt + 2 * (curr & btMask) + 1; - U32 matchEndIdx = curr + 8; - U32 dummy32; /* to be nullified at the end */ - U32 mnum = 0; - - const U32 minMatch = (mls == 3) ? 3 : 4; - size_t bestLength = minMatchLen - 1; - - if (minMatch == 3) { /* HC3 match finder */ - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(zc, ip); - if (matchIndex3 > windowLow && (curr - matchIndex3 < (1 << 18))) { - const BYTE *match; - size_t currMl = 0; - if ((!extDict) || matchIndex3 >= dictLimit) { - match = base + matchIndex3; - if (match[bestLength] == ip[bestLength]) - currMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex3; - if (ZSTD_readMINMATCH(match, MINMATCH) == - ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currMl = ZSTD_count_2segments(ip + MINMATCH, match + MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; - } - - /* save best solution */ - if (currMl > bestLength) { - bestLength = currMl; - matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex3; - matches[mnum].len = (U32)currMl; - mnum++; - if (currMl > ZSTD_OPT_NUM) - goto update; - if (ip + currMl == iLimit) - goto update; /* best possible, and avoid read overflow*/ - } - } - } - - hashTable[h] = curr; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32 *nextPtr = bt + 2 * (matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE *match; - - if ((!extDict) || (matchIndex + matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) { - matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iLimit) + 1; - } - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iLimit, dictEnd, prefixStart); - if (matchIndex + matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; - matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex; - matches[mnum].len = (U32)matchLength; - mnum++; - if (matchLength > ZSTD_OPT_NUM) - break; - if (ip + matchLength == iLimit) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than curr */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { - smallerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to curr) */ - } else { - /* match is larger than curr */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { - largerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } - } - - *smallerPtr = *largerPtr = 0; - -update: - zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1; - return mnum; -} - -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t *matches, - const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) - return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); -} - -static U32 ZSTD_BtGetAllMatches_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */ - const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, - ZSTD_match_t *matches, const U32 minMatchLen) -{ - switch (matchLengthSearch) { - case 3: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); - default: - case 4: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); - case 7: - case 6: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); - } -} - -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls, - ZSTD_match_t *matches, const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) - return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); -} - -static U32 ZSTD_BtGetAllMatches_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */ - const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, - ZSTD_match_t *matches, const U32 minMatchLen) -{ - switch (matchLengthSearch) { - case 3: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); - default: - case 4: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); - case 7: - case 6: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); - } -} - -/*-******************************* -* Optimal parser -*********************************/ -FORCE_INLINE -void ZSTD_compressBlock_opt_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra) -{ - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - const BYTE *const base = ctx->base; - const BYTE *const prefixStart = base + ctx->dictLimit; - - const U32 maxSearches = 1U << ctx->params.cParams.searchLog; - const U32 sufficient_len = ctx->params.cParams.targetLength; - const U32 mls = ctx->params.cParams.searchLength; - const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t *opt = seqStorePtr->priceTable; - ZSTD_match_t *matches = seqStorePtr->matchTable; - const BYTE *inr; - U32 offset, rep[ZSTD_REP_NUM]; - - /* init */ - ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize); - ip += (ip == prefixStart); - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - rep[i] = ctx->rep[i]; - } - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - litlen = (U32)(ip - anchor); - - /* check repCode */ - { - U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor); - for (i = (ip == anchor); i < last_i; i++) { - const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; - if ((repCur > 0) && (repCur < (S32)(ip - prefixStart)) && - (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) { - mlen = (U32)ZSTD_count(ip + minMatch, ip + minMatch - repCur, iend) + minMatch; - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; - best_off = i; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - best_off = i - (ip == anchor); - do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } - } - } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); - - if (!last_pos && !match_num) { - ip++; - continue; - } - - if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num - 1].len; - best_off = matches[match_num - 1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - /* set prices using matches at position = 0 */ - best_mlen = (last_pos) ? last_pos : minMatch; - for (u = 0; u < match_num; u++) { - mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; - best_mlen = matches[u].len; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ - mlen++; - } - } - - if (last_pos < minMatch) { - ip++; - continue; - } - - /* initialize opt[0] */ - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - opt[0].rep[i] = rep[i]; - } - opt[0].mlen = 1; - opt[0].litlen = litlen; - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - inr = ip + cur; - - if (opt[cur - 1].mlen == 1) { - litlen = opt[cur - 1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen); - } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) - break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur - mlen].rep[1]; - opt[cur].rep[1] = opt[cur - mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1]; - opt[cur].rep[0] = - ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { - U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i = (opt[cur].mlen != 1); i < last_i; i++) { /* check rep */ - const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; - if ((repCur > 0) && (repCur < (S32)(inr - prefixStart)) && - (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) { - mlen = (U32)ZSTD_count(inr + minMatch, inr + minMatch - repCur, iend) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; - best_off = i; - last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) - best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen, - best_off, mlen - MINMATCH, ultra); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } - } - } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); - - if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num - 1].len; - best_off = matches[match_num - 1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; - best_mlen = matches[u].len; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen, - matches[u].off - 1, mlen - MINMATCH, ultra); - else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } - } - } - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) - break; - cur -= mlen; - } - - for (u = 0; u <= last_pos;) { - u += opt[u].mlen; - } - - for (cur = 0; cur < last_pos;) { - mlen = opt[cur].mlen; - if (mlen == 1) { - ip++; - cur++; - continue; - } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } - if (litLength == 0) - offset--; - } - - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); - anchor = ip = ip + mlen; - } - } /* for (cur=0; cur < last_pos; ) */ - - /* Save reps for next block */ - { - int i; - for (i = 0; i < ZSTD_REP_NUM; i++) - ctx->repToConfirm[i] = rep[i]; - } - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -FORCE_INLINE -void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra) -{ - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - const BYTE *const base = ctx->base; - const U32 lowestIndex = ctx->lowLimit; - const U32 dictLimit = ctx->dictLimit; - const BYTE *const prefixStart = base + dictLimit; - const BYTE *const dictBase = ctx->dictBase; - const BYTE *const dictEnd = dictBase + dictLimit; - - const U32 maxSearches = 1U << ctx->params.cParams.searchLog; - const U32 sufficient_len = ctx->params.cParams.targetLength; - const U32 mls = ctx->params.cParams.searchLength; - const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t *opt = seqStorePtr->priceTable; - ZSTD_match_t *matches = seqStorePtr->matchTable; - const BYTE *inr; - - /* init */ - U32 offset, rep[ZSTD_REP_NUM]; - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - rep[i] = ctx->rep[i]; - } - - ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize); - ip += (ip == prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - U32 curr = (U32)(ip - base); - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - opt[0].litlen = (U32)(ip - anchor); - - /* check repCode */ - { - U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor); - for (i = (ip == anchor); i < last_i; i++) { - const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; - const U32 repIndex = (U32)(curr - repCur); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if ((repCur > 0 && repCur <= (S32)curr) && - (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) { - /* repcode detected we should take it */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(ip + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; - best_off = i; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - best_off = i - (ip == anchor); - litlen = opt[0].litlen; - do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } - } - } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ - - if (!last_pos && !match_num) { - ip++; - continue; - } - - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - opt[0].rep[i] = rep[i]; - } - opt[0].mlen = 1; - - if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num - 1].len; - best_off = matches[match_num - 1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - best_mlen = (last_pos) ? last_pos : minMatch; - - /* set prices using matches at position = 0 */ - for (u = 0; u < match_num; u++) { - mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; - best_mlen = matches[u].len; - litlen = opt[0].litlen; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); - mlen++; - } - } - - if (last_pos < minMatch) { - ip++; - continue; - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - inr = ip + cur; - - if (opt[cur - 1].mlen == 1) { - litlen = opt[cur - 1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen); - } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) - break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur - mlen].rep[1]; - opt[cur].rep[1] = opt[cur - mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1]; - opt[cur].rep[0] = - ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { - U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i = (mlen != 1); i < last_i; i++) { - const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; - const U32 repIndex = (U32)(curr + cur - repCur); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if ((repCur > 0 && repCur <= (S32)(curr + cur)) && - (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) { - /* repcode detected */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(inr + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; - best_off = i; - last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) - best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen, - best_off, mlen - MINMATCH, ultra); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } - } - } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); - - if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num - 1].len; - best_off = matches[match_num - 1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; - best_mlen = matches[u].len; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen, - matches[u].off - 1, mlen - MINMATCH, ultra); - else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } - } - } /* for (cur = 1; cur <= last_pos; cur++) */ - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) - break; - cur -= mlen; - } - - for (u = 0; u <= last_pos;) { - u += opt[u].mlen; - } - - for (cur = 0; cur < last_pos;) { - mlen = opt[cur].mlen; - if (mlen == 1) { - ip++; - cur++; - continue; - } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } - - if (litLength == 0) - offset--; - } - - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); - anchor = ip = ip + mlen; - } - } /* for (cur=0; cur < last_pos; ) */ - - /* Save reps for next block */ - { - int i; - for (i = 0; i < ZSTD_REP_NUM; i++) - ctx->repToConfirm[i] = rep[i]; - } - - /* Last Literals */ - { - size_t lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -#endif /* ZSTD_OPT_H_91842398743 */ From 464413496acb619a380364a26912f02c8b709d29 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 26 Apr 2021 16:34:03 -0700 Subject: [PATCH 0193/1202] MAINTAINERS: Add maintainer entry for zstd Adds a maintainer entry for zstd listing myself as the maintainer for all zstd code, pointing to the upstream issues tracker for bugs, and listing my linux repo as the tree. Signed-off-by: Nick Terrell --- MAINTAINERS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index abdcbcfef73d0..33b8894ce20d7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20783,6 +20783,18 @@ F: Documentation/vm/zsmalloc.rst F: include/linux/zsmalloc.h F: mm/zsmalloc.c +ZSTD +M: Nick Terrell +S: Maintained +B: https://github.com/facebook/zstd/issues +T: git git://github.com/terrelln/linux.git +F: include/linux/zstd* +F: lib/zstd/ +F: lib/decompress_unzstd.c +F: crypto/zstd.c +N: zstd +K: zstd + ZSWAP COMPRESSED SWAP CACHING M: Seth Jennings M: Dan Streetman From 89910d26b66bacb8869a6b9a30d07793385d2bc7 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Mon, 20 Sep 2021 15:24:52 +0200 Subject: [PATCH 0194/1202] KVM: s390: pv: avoid stalls when making pages secure Improve make_secure_pte to avoid stalls when the system is heavily overcommitted. This was especially problematic in kvm_s390_pv_unpack, because of the loop over all pages that needed unpacking. Due to the locks being held, it was not possible to simply replace uv_call with uv_call_sched. A more complex approach was needed, in which uv_call is replaced with __uv_call, which does not loop. When the UVC needs to be executed again, -EAGAIN is returned, and the caller (or its caller) will try again. When -EAGAIN is returned, the path is the same as when the page is in writeback (and the writeback check is also performed, which is harmless). Fixes: 214d9bbcd3a672 ("s390/mm: provide memory management functions for protected KVM guests") Signed-off-by: Claudio Imbrenda Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20210920132502.36111-5-imbrenda@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kernel/uv.c | 29 +++++++++++++++++++++++------ arch/s390/kvm/intercept.c | 5 +++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index f95ccbd396925..09b80d371409b 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -165,7 +165,7 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr, { pte_t entry = READ_ONCE(*ptep); struct page *page; - int expected, rc = 0; + int expected, cc = 0; if (!pte_present(entry)) return -ENXIO; @@ -181,12 +181,25 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr, if (!page_ref_freeze(page, expected)) return -EBUSY; set_bit(PG_arch_1, &page->flags); - rc = uv_call(0, (u64)uvcb); + /* + * If the UVC does not succeed or fail immediately, we don't want to + * loop for long, or we might get stall notifications. + * On the other hand, this is a complex scenario and we are holding a lot of + * locks, so we can't easily sleep and reschedule. We try only once, + * and if the UVC returned busy or partial completion, we return + * -EAGAIN and we let the callers deal with it. + */ + cc = __uv_call(0, (u64)uvcb); page_ref_unfreeze(page, expected); - /* Return -ENXIO if the page was not mapped, -EINVAL otherwise */ - if (rc) - rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL; - return rc; + /* + * Return -ENXIO if the page was not mapped, -EINVAL for other errors. + * If busy or partially completed, return -EAGAIN. + */ + if (cc == UVC_CC_OK) + return 0; + else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL) + return -EAGAIN; + return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } /* @@ -239,6 +252,10 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) mmap_read_unlock(gmap->mm); if (rc == -EAGAIN) { + /* + * If we are here because the UVC returned busy or partial + * completion, this is just a useless check, but it is safe. + */ wait_on_page_writeback(page); } else if (rc == -EBUSY) { /* diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 2bd8f854f1b41..d07ff646d8440 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -518,6 +518,11 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu) */ if (rc == -EINVAL) return 0; + /* + * If we got -EAGAIN here, we simply return it. It will eventually + * get propagated all the way to userspace, which should then try + * again. + */ return rc; } From bc3e77b75d985ea21dca40c5c274b2d401efd0e0 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 8 Oct 2021 22:31:07 +0200 Subject: [PATCH 0195/1202] KVM: s390: Simplify SIGP Set Arch handling The Principles of Operations describe the various reasons that each individual SIGP orders might be rejected, and the status bit that are set for each condition. For example, for the Set Architecture order, it states: "If it is not true that all other CPUs in the configu- ration are in the stopped or check-stop state, ... bit 54 (incorrect state) ... is set to one." However, it also states: "... if the CZAM facility is installed, ... bit 55 (invalid parameter) ... is set to one." Since the Configuration-z/Architecture-Architectural Mode (CZAM) facility is unconditionally presented, there is no need to examine each VCPU to determine if it is started/stopped. It can simply be rejected outright with the Invalid Parameter bit. Fixes: b697e435aeee ("KVM: s390: Support Configuration z/Architecture Mode") Signed-off-by: Eric Farman Reviewed-by: Thomas Huth Reviewed-by: Claudio Imbrenda Reviewed-by: David Hildenbrand Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20211008203112.1979843-2-farman@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sigp.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 683036c1c92a8..cf4de80bd5410 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -151,22 +151,10 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter, u64 *status_reg) { - unsigned int i; - struct kvm_vcpu *v; - bool all_stopped = true; - - kvm_for_each_vcpu(i, v, vcpu->kvm) { - if (v == vcpu) - continue; - if (!is_vcpu_stopped(v)) - all_stopped = false; - } - *status_reg &= 0xffffffff00000000UL; /* Reject set arch order, with czam we're always in z/Arch mode. */ - *status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER : - SIGP_STATUS_INCORRECT_STATE); + *status_reg |= SIGP_STATUS_INVALID_PARAMETER; return SIGP_CC_STATUS_STORED; } From 2ef7843375dca1fd0247adda34238ab584fc063d Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 8 Oct 2021 22:31:12 +0200 Subject: [PATCH 0196/1202] KVM: s390: Add a routine for setting userspace CPU state This capability exists, but we don't record anything when userspace enables it. Let's refactor that code so that a note can be made in the debug logs that it was enabled. Signed-off-by: Eric Farman Reviewed-by: Thomas Huth Reviewed-by: Claudio Imbrenda Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20211008203112.1979843-7-farman@linux.ibm.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 6 +++--- arch/s390/kvm/kvm-s390.h | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6a6dd5e1daf63..5f52e7eec02f9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2487,8 +2487,8 @@ long kvm_arch_vm_ioctl(struct file *filp, case KVM_S390_PV_COMMAND: { struct kvm_pv_cmd args; - /* protvirt means user sigp */ - kvm->arch.user_cpu_state_ctrl = 1; + /* protvirt means user cpu state */ + kvm_s390_set_user_cpu_state_ctrl(kvm); r = 0; if (!is_prot_virt_host()) { r = -EINVAL; @@ -3801,7 +3801,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu_load(vcpu); /* user space knows about this interface - let it control the state */ - vcpu->kvm->arch.user_cpu_state_ctrl = 1; + kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); switch (mp_state->mp_state) { case KVM_MP_STATE_STOPPED: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 52bc8fbaa60ac..c07a050d757d3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -208,6 +208,15 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) return kvm->arch.user_cpu_state_ctrl != 0; } +static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm) +{ + if (kvm->arch.user_cpu_state_ctrl) + return; + + VM_EVENT(kvm, 3, "%s", "ENABLE: Userspace CPU state control"); + kvm->arch.user_cpu_state_ctrl = 1; +} + /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); From 8c4251a8bed099796a7296eb4513d0bf07ab46c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 Oct 2021 15:06:30 +0200 Subject: [PATCH 0197/1202] soc: document merges Signed-off-by: Arnd Bergmann --- arch/arm/arm-soc-for-next-contents.txt | 80 ++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 arch/arm/arm-soc-for-next-contents.txt diff --git a/arch/arm/arm-soc-for-next-contents.txt b/arch/arm/arm-soc-for-next-contents.txt new file mode 100644 index 0000000000000..dd838cb70ecc8 --- /dev/null +++ b/arch/arm/arm-soc-for-next-contents.txt @@ -0,0 +1,80 @@ +arm/soc + omap/soc + git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap tags/omap-for-v5.16/soc-signed + at91/soc + git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux tags/at91-soc-5.16 + +arm/dt + zynq/dt + https://github.com/Xilinx/linux-xlnx tags/zynqmp-dt-for-v5.16 + rockchip/dt64 + git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip tags/v5.16-rockchip-dts64-1 + rockchip/dt32 + git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip tags/v5.16-rockchip-dts32-1 + renesas/dt + git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel tags/renesas-arm-dt-for-v5.16-tag1 + renesas/dt-bindings + git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel tags/renesas-dt-bindings-for-v5.16-tag1 + zynq/dt64 + https://github.com/Xilinx/linux-xlnx tags/zynqmp-dt-for-v5.16-v2 + amlogic/dt + git://git.kernel.org/pub/scm/linux/kernel/git/amlogic/linux tags/amlogic-arm64-dt-for-v5.16 + patch + dt-bindings: arm: Add bindings for Unisoc's UMS512 + arm64: dts: Add support for Unisoc's UMS512 + tegra/dt-bindings + git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux tags/tegra-for-5.16-dt-bindings + tegra/dt + git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux tags/tegra-for-5.16-arm64-dt + mediatek/dts32 + git://git.kernel.org/pub/scm/linux/kernel/git/matthias.bgg/linux tags/v5.15-next-dts32 + omap/dt + git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap tags/omap-for-v5.16/dt-signed + at91/dt + git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux tags/at91-dt-5.16 + patch + Revert "arm64: dts: Add support for Unisoc's UMS512" + tegra/dt32 + git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux tags/tegra-for-5.16-arm-dt + k3/dts + git://git.kernel.org/pub/scm/linux/kernel/git/nmenon/linux tags/ti-k3-dt-for-v5.16 + +arm/drivers + renesas/drivers + git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel tags/renesas-drivers-for-v5.16-tag1 + amlogic/drivers + git://git.kernel.org/pub/scm/linux/kernel/git/amlogic/linux tags/amlogic-drivers-for-v5.16 + tegra/firmware + git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux tags/tegra-for-5.16-firmware + tegra/soc + git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux tags/tegra-for-5.16-soc + tegra/cpuidle + git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux tags/tegra-for-5.16-cpuidle + tegra/memory + git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl tags/memory-controller-drv-tegra-5.16 + mediatek/memory + git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl tags/memory-controller-drv-mtk-5.16 + drivers/memory + git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl tags/memory-controller-drv-5.16 + mediatek/soc + git://git.kernel.org/pub/scm/linux/kernel/git/matthias.bgg/linux tags/v5.15-next-soc + omap/ti-sysc + git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap tags/omap-for-v5.16/ti-sysc-signed + +arm/defconfigs + defconfig/multiv7 + git://git.kernel.org/pub/scm/linux/kernel/git/joel/bmc tags/multiv7-defconfig-5.16 + renesas/defconfig + git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel tags/renesas-arm-defconfig-for-v5.16-tag1 + patch + arm64: defconfig: drop obsolete ARCH_* configs + tegra/defconfig + git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux tags/tegra-for-5.16-arm64-defconfig + +arm/newsoc + +arm/late + +arm/fixes + firmware/ffa + git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux tags/ffa-fixes-5.15 From 33633b20e0da301f9009cc9aa00282acbc282a1f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 06:57:07 -0400 Subject: [PATCH 0198/1202] x86/sgx/virt: Extract sgx_vepc_remove_page() For bare-metal SGX on real hardware, the hardware provides guaranteed SGX state at reboot. For instance, all pages start out uninitialized. The vepc driver provides a similar guarantee today for freshly-opened vepc instances, but guests such as Windows expect all pages to be in uninitialized state on startup, including after every guest reboot. One way to do this is to simply close and reopen the /dev/sgx_vepc file descriptor and re-mmap the virtual EPC. However, this is problematic because it prevents sandboxing the userspace (for example forbidding open() after the guest starts; this is doable with heavy use of SCM_RIGHTS file descriptor passing). In order to implement this, an ioctl will be needed that performs EREMOVE on all pages mapped by a /dev/sgx_vepc file descriptor: other possibilities, such as closing and reopening the device, are racy. Start the implementation by creating a separate function with just the __eremove() wrapper. Signed-off-by: Paolo Bonzini Signed-off-by: Borislav Petkov Reviewed-by: Jarkko Sakkinen Link: https://lkml.kernel.org/r/20211012105708.2070480-2-pbonzini@redhat.com --- arch/x86/kernel/cpu/sgx/virt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c index 64511c4a52001..59cdf3f742acf 100644 --- a/arch/x86/kernel/cpu/sgx/virt.c +++ b/arch/x86/kernel/cpu/sgx/virt.c @@ -111,10 +111,8 @@ static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static int sgx_vepc_free_page(struct sgx_epc_page *epc_page) +static int sgx_vepc_remove_page(struct sgx_epc_page *epc_page) { - int ret; - /* * Take a previously guest-owned EPC page and return it to the * general EPC page pool. @@ -124,7 +122,12 @@ static int sgx_vepc_free_page(struct sgx_epc_page *epc_page) * case that a guest properly EREMOVE'd this page, a superfluous * EREMOVE is harmless. */ - ret = __eremove(sgx_get_epc_virt_addr(epc_page)); + return __eremove(sgx_get_epc_virt_addr(epc_page)); +} + +static int sgx_vepc_free_page(struct sgx_epc_page *epc_page) +{ + int ret = sgx_vepc_remove_page(epc_page); if (ret) { /* * Only SGX_CHILD_PRESENT is expected, which is because of @@ -144,7 +147,6 @@ static int sgx_vepc_free_page(struct sgx_epc_page *epc_page) } sgx_free_epc_page(epc_page); - return 0; } From 71eba1c0939e3b1ad1b71fe0171de30e265437e3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 06:57:08 -0400 Subject: [PATCH 0199/1202] x86/sgx/virt: Implement SGX_IOC_VEPC_REMOVE ioctl For bare-metal SGX on real hardware, the hardware provides guaranteed SGX state at reboot. For instance, all pages start out uninitialized. The vepc driver provides a similar guarantee today for freshly-opened vepc instances, but guests such as Windows expect all pages to be in uninitialized state on startup, including after every guest reboot. Some userspace implementations of virtual SGX would rather avoid having to close and reopen the /dev/sgx_vepc file descriptor and re-mmap the virtual EPC. For example, they could sandbox themselves after the guest starts and forbid further calls to open(), in order to mitigate exploits from untrusted guests. Therefore, add a ioctl that does this with EREMOVE. Userspace can invoke the ioctl to bring its vEPC pages back to uninitialized state. There is a possibility that some pages fail to be removed if they are SECS pages, and the child and SECS pages could be in separate vEPC regions. Therefore, the ioctl returns the number of EREMOVE failures, telling userspace to try the ioctl again after it's done with all vEPC regions. A more verbose description of the correct usage and the possible error conditions is documented in sgx.rst. [ bp: Minor massaging. ] Signed-off-by: Paolo Bonzini Signed-off-by: Borislav Petkov Reviewed-by: Jarkko Sakkinen Link: https://lkml.kernel.org/r/20211012105708.2070480-3-pbonzini@redhat.com --- Documentation/x86/sgx.rst | 26 +++++++++++++++ arch/x86/include/asm/sgx.h | 3 ++ arch/x86/include/uapi/asm/sgx.h | 2 ++ arch/x86/kernel/cpu/sgx/virt.c | 57 +++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+) diff --git a/Documentation/x86/sgx.rst b/Documentation/x86/sgx.rst index dd0ac96ff9efb..7bc9c3b297d64 100644 --- a/Documentation/x86/sgx.rst +++ b/Documentation/x86/sgx.rst @@ -250,3 +250,29 @@ user wants to deploy SGX applications both on the host and in guests on the same machine, the user should reserve enough EPC (by taking out total virtual EPC size of all SGX VMs from the physical EPC size) for host SGX applications so they can run with acceptable performance. + +Architectural behavior is to restore all EPC pages to an uninitialized +state also after a guest reboot. Because this state can be reached only +through the privileged ``ENCLS[EREMOVE]`` instruction, ``/dev/sgx_vepc`` +provides the ``SGX_IOC_VEPC_REMOVE_ALL`` ioctl to execute the instruction +on all pages in the virtual EPC. + +``EREMOVE`` can fail for two reasons, which Linux relays to userspace +in a different manner: + +1. Page removal will always fail when any thread is running in the + enclave to which the page belongs. In this case the ioctl will + return ``EBUSY`` independent of whether it has successfully removed + some pages; userspace can avoid these failures by preventing execution + of any vcpu which maps the virtual EPC. + +2) Page removal will also fail for SGX "SECS" metadata pages which still + have child pages. Child pages can be removed by executing + ``SGX_IOC_VEPC_REMOVE_ALL`` on all ``/dev/sgx_vepc`` file descriptors + mapped into the guest. This means that the ioctl() must be called + twice: an initial set of calls to remove child pages and a subsequent + set of calls to remove SECS pages. The second set of calls is only + required for those mappings that returned a nonzero value from the + first call. It indicates a bug in the kernel or the userspace client + if any of the second round of ``SGX_IOC_VEPC_REMOVE_ALL`` calls has + a return code other than 0. diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 05f3e21f01a74..2e5d8c97655e8 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -50,6 +50,8 @@ enum sgx_encls_function { * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not * been completed yet. * %SGX_CHILD_PRESENT SECS has child pages present in the EPC. + * %SGX_ENCLAVE_ACT One or more logical processors are executing + * inside the enclave. * %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's * public key does not match IA32_SGXLEPUBKEYHASH. * %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received @@ -57,6 +59,7 @@ enum sgx_encls_function { enum sgx_return_code { SGX_NOT_TRACKED = 11, SGX_CHILD_PRESENT = 13, + SGX_ENCLAVE_ACT = 14, SGX_INVALID_EINITTOKEN = 16, SGX_UNMASKED_EVENT = 128, }; diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h index 9690d6899ad98..f4b81587e90bb 100644 --- a/arch/x86/include/uapi/asm/sgx.h +++ b/arch/x86/include/uapi/asm/sgx.h @@ -27,6 +27,8 @@ enum sgx_page_flags { _IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init) #define SGX_IOC_ENCLAVE_PROVISION \ _IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision) +#define SGX_IOC_VEPC_REMOVE_ALL \ + _IO(SGX_MAGIC, 0x04) /** * struct sgx_enclave_create - parameter structure for the diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c index 59cdf3f742acf..4465253a770a1 100644 --- a/arch/x86/kernel/cpu/sgx/virt.c +++ b/arch/x86/kernel/cpu/sgx/virt.c @@ -150,6 +150,46 @@ static int sgx_vepc_free_page(struct sgx_epc_page *epc_page) return 0; } +static long sgx_vepc_remove_all(struct sgx_vepc *vepc) +{ + struct sgx_epc_page *entry; + unsigned long index; + long failures = 0; + + xa_for_each(&vepc->page_array, index, entry) { + int ret = sgx_vepc_remove_page(entry); + switch (ret) { + case 0: + break; + + case SGX_CHILD_PRESENT: + failures++; + break; + + case SGX_ENCLAVE_ACT: + /* + * Unlike in sgx_vepc_free_page, userspace could be calling + * the ioctl while logical processors are running in the + * enclave; do not warn. + */ + return -EBUSY; + + default: + WARN_ONCE(1, EREMOVE_ERROR_MESSAGE, ret, ret); + failures++; + break; + } + cond_resched(); + } + + /* + * Return the number of pages that failed to be removed, so + * userspace knows that there are still SECS pages lying + * around. + */ + return failures; +} + static int sgx_vepc_release(struct inode *inode, struct file *file) { struct sgx_vepc *vepc = file->private_data; @@ -235,9 +275,26 @@ static int sgx_vepc_open(struct inode *inode, struct file *file) return 0; } +static long sgx_vepc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct sgx_vepc *vepc = file->private_data; + + switch (cmd) { + case SGX_IOC_VEPC_REMOVE_ALL: + if (arg) + return -EINVAL; + return sgx_vepc_remove_all(vepc); + + default: + return -ENOTTY; + } +} + static const struct file_operations sgx_vepc_fops = { .owner = THIS_MODULE, .open = sgx_vepc_open, + .unlocked_ioctl = sgx_vepc_ioctl, + .compat_ioctl = sgx_vepc_ioctl, .release = sgx_vepc_release, .mmap = sgx_vepc_mmap, }; From 32a8aabcfecc09660e253e41f0cc300df75ef062 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 7 Sep 2021 10:55:16 +0100 Subject: [PATCH 0200/1202] nvmem: core: rework nvmem cell instance creation In the existing design, instance of nvmem cell is directly a reference to an entry in list of nvmem cell that are added to provider. However this design has some limitations when consumers want to assign name or connection id the nvmem cell instance, ex: via "nvmem-cell-names" or id in nvmem_cell_get(id). Having a name associated with nvmem cell consumer instance will help provider drivers in performing post processing of nvmem cell data if required before data is seen by the consumers. This is pretty normal with some vendors storing nvmem cells like mac-address in a vendor specific data layouts that are not directly usable by the consumer drivers. With this patch nvmem cell will be created dynamically during nvmem_cell_get and destroyed in nvmem_cell_put, allowing consumers to associate name with nvmem cell consumer instance. With this patch a new struct nvmem_cell_entry replaces struct nvmem_cell for storing nvmem cell information within the core. Signed-off-by: Srinivas Kandagatla --- drivers/nvmem/core.c | 165 +++++++++++++++++++++++++++---------------- 1 file changed, 105 insertions(+), 60 deletions(-) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 8976da38b375a..277ca5b47ade7 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -45,8 +45,7 @@ struct nvmem_device { #define to_nvmem_device(d) container_of(d, struct nvmem_device, dev) #define FLAG_COMPAT BIT(0) - -struct nvmem_cell { +struct nvmem_cell_entry { const char *name; int offset; int bytes; @@ -57,6 +56,11 @@ struct nvmem_cell { struct list_head node; }; +struct nvmem_cell { + struct nvmem_cell_entry *entry; + const char *id; +}; + static DEFINE_MUTEX(nvmem_mutex); static DEFINE_IDA(nvmem_ida); @@ -422,7 +426,7 @@ static struct bus_type nvmem_bus_type = { .name = "nvmem", }; -static void nvmem_cell_drop(struct nvmem_cell *cell) +static void nvmem_cell_entry_drop(struct nvmem_cell_entry *cell) { blocking_notifier_call_chain(&nvmem_notifier, NVMEM_CELL_REMOVE, cell); mutex_lock(&nvmem_mutex); @@ -435,13 +439,13 @@ static void nvmem_cell_drop(struct nvmem_cell *cell) static void nvmem_device_remove_all_cells(const struct nvmem_device *nvmem) { - struct nvmem_cell *cell, *p; + struct nvmem_cell_entry *cell, *p; list_for_each_entry_safe(cell, p, &nvmem->cells, node) - nvmem_cell_drop(cell); + nvmem_cell_entry_drop(cell); } -static void nvmem_cell_add(struct nvmem_cell *cell) +static void nvmem_cell_entry_add(struct nvmem_cell_entry *cell) { mutex_lock(&nvmem_mutex); list_add_tail(&cell->node, &cell->nvmem->cells); @@ -449,9 +453,9 @@ static void nvmem_cell_add(struct nvmem_cell *cell) blocking_notifier_call_chain(&nvmem_notifier, NVMEM_CELL_ADD, cell); } -static int nvmem_cell_info_to_nvmem_cell_nodup(struct nvmem_device *nvmem, - const struct nvmem_cell_info *info, - struct nvmem_cell *cell) +static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, + const struct nvmem_cell_info *info, + struct nvmem_cell_entry *cell) { cell->nvmem = nvmem; cell->offset = info->offset; @@ -475,13 +479,13 @@ static int nvmem_cell_info_to_nvmem_cell_nodup(struct nvmem_device *nvmem, return 0; } -static int nvmem_cell_info_to_nvmem_cell(struct nvmem_device *nvmem, - const struct nvmem_cell_info *info, - struct nvmem_cell *cell) +static int nvmem_cell_info_to_nvmem_cell_entry(struct nvmem_device *nvmem, + const struct nvmem_cell_info *info, + struct nvmem_cell_entry *cell) { int err; - err = nvmem_cell_info_to_nvmem_cell_nodup(nvmem, info, cell); + err = nvmem_cell_info_to_nvmem_cell_entry_nodup(nvmem, info, cell); if (err) return err; @@ -505,7 +509,7 @@ static int nvmem_add_cells(struct nvmem_device *nvmem, const struct nvmem_cell_info *info, int ncells) { - struct nvmem_cell **cells; + struct nvmem_cell_entry **cells; int i, rval; cells = kcalloc(ncells, sizeof(*cells), GFP_KERNEL); @@ -519,13 +523,13 @@ static int nvmem_add_cells(struct nvmem_device *nvmem, goto err; } - rval = nvmem_cell_info_to_nvmem_cell(nvmem, &info[i], cells[i]); + rval = nvmem_cell_info_to_nvmem_cell_entry(nvmem, &info[i], cells[i]); if (rval) { kfree(cells[i]); goto err; } - nvmem_cell_add(cells[i]); + nvmem_cell_entry_add(cells[i]); } /* remove tmp array */ @@ -534,7 +538,7 @@ static int nvmem_add_cells(struct nvmem_device *nvmem, return 0; err: while (i--) - nvmem_cell_drop(cells[i]); + nvmem_cell_entry_drop(cells[i]); kfree(cells); @@ -571,7 +575,7 @@ static int nvmem_add_cells_from_table(struct nvmem_device *nvmem) { const struct nvmem_cell_info *info; struct nvmem_cell_table *table; - struct nvmem_cell *cell; + struct nvmem_cell_entry *cell; int rval = 0, i; mutex_lock(&nvmem_cell_mutex); @@ -586,15 +590,13 @@ static int nvmem_add_cells_from_table(struct nvmem_device *nvmem) goto out; } - rval = nvmem_cell_info_to_nvmem_cell(nvmem, - info, - cell); + rval = nvmem_cell_info_to_nvmem_cell_entry(nvmem, info, cell); if (rval) { kfree(cell); goto out; } - nvmem_cell_add(cell); + nvmem_cell_entry_add(cell); } } } @@ -604,10 +606,10 @@ static int nvmem_add_cells_from_table(struct nvmem_device *nvmem) return rval; } -static struct nvmem_cell * -nvmem_find_cell_by_name(struct nvmem_device *nvmem, const char *cell_id) +static struct nvmem_cell_entry * +nvmem_find_cell_entry_by_name(struct nvmem_device *nvmem, const char *cell_id) { - struct nvmem_cell *iter, *cell = NULL; + struct nvmem_cell_entry *iter, *cell = NULL; mutex_lock(&nvmem_mutex); list_for_each_entry(iter, &nvmem->cells, node) { @@ -678,7 +680,7 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem) { struct device_node *parent, *child; struct device *dev = &nvmem->dev; - struct nvmem_cell *cell; + struct nvmem_cell_entry *cell; const __be32 *addr; int len; @@ -727,7 +729,7 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem) } cell->np = of_node_get(child); - nvmem_cell_add(cell); + nvmem_cell_entry_add(cell); } return 0; @@ -1142,9 +1144,33 @@ struct nvmem_device *devm_nvmem_device_get(struct device *dev, const char *id) } EXPORT_SYMBOL_GPL(devm_nvmem_device_get); +static struct nvmem_cell *nvmem_create_cell(struct nvmem_cell_entry *entry, const char *id) +{ + struct nvmem_cell *cell; + const char *name = NULL; + + cell = kzalloc(sizeof(*cell), GFP_KERNEL); + if (!cell) + return ERR_PTR(-ENOMEM); + + if (id) { + name = kstrdup_const(id, GFP_KERNEL); + if (!name) { + kfree(cell); + return ERR_PTR(-ENOMEM); + } + } + + cell->id = name; + cell->entry = entry; + + return cell; +} + static struct nvmem_cell * nvmem_cell_get_from_lookup(struct device *dev, const char *con_id) { + struct nvmem_cell_entry *cell_entry; struct nvmem_cell *cell = ERR_PTR(-ENOENT); struct nvmem_cell_lookup *lookup; struct nvmem_device *nvmem; @@ -1169,11 +1195,15 @@ nvmem_cell_get_from_lookup(struct device *dev, const char *con_id) break; } - cell = nvmem_find_cell_by_name(nvmem, - lookup->cell_name); - if (!cell) { + cell_entry = nvmem_find_cell_entry_by_name(nvmem, + lookup->cell_name); + if (!cell_entry) { __nvmem_device_put(nvmem); cell = ERR_PTR(-ENOENT); + } else { + cell = nvmem_create_cell(cell_entry, con_id); + if (IS_ERR(cell)) + __nvmem_device_put(nvmem); } break; } @@ -1184,10 +1214,10 @@ nvmem_cell_get_from_lookup(struct device *dev, const char *con_id) } #if IS_ENABLED(CONFIG_OF) -static struct nvmem_cell * -nvmem_find_cell_by_node(struct nvmem_device *nvmem, struct device_node *np) +static struct nvmem_cell_entry * +nvmem_find_cell_entry_by_node(struct nvmem_device *nvmem, struct device_node *np) { - struct nvmem_cell *iter, *cell = NULL; + struct nvmem_cell_entry *iter, *cell = NULL; mutex_lock(&nvmem_mutex); list_for_each_entry(iter, &nvmem->cells, node) { @@ -1217,6 +1247,7 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id) { struct device_node *cell_np, *nvmem_np; struct nvmem_device *nvmem; + struct nvmem_cell_entry *cell_entry; struct nvmem_cell *cell; int index = 0; @@ -1237,12 +1268,16 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id) if (IS_ERR(nvmem)) return ERR_CAST(nvmem); - cell = nvmem_find_cell_by_node(nvmem, cell_np); - if (!cell) { + cell_entry = nvmem_find_cell_entry_by_node(nvmem, cell_np); + if (!cell_entry) { __nvmem_device_put(nvmem); return ERR_PTR(-ENOENT); } + cell = nvmem_create_cell(cell_entry, id); + if (IS_ERR(cell)) + __nvmem_device_put(nvmem); + return cell; } EXPORT_SYMBOL_GPL(of_nvmem_cell_get); @@ -1348,13 +1383,17 @@ EXPORT_SYMBOL(devm_nvmem_cell_put); */ void nvmem_cell_put(struct nvmem_cell *cell) { - struct nvmem_device *nvmem = cell->nvmem; + struct nvmem_device *nvmem = cell->entry->nvmem; + + if (cell->id) + kfree_const(cell->id); + kfree(cell); __nvmem_device_put(nvmem); } EXPORT_SYMBOL_GPL(nvmem_cell_put); -static void nvmem_shift_read_buffer_in_place(struct nvmem_cell *cell, void *buf) +static void nvmem_shift_read_buffer_in_place(struct nvmem_cell_entry *cell, void *buf) { u8 *p, *b; int i, extra, bit_offset = cell->bit_offset; @@ -1388,8 +1427,8 @@ static void nvmem_shift_read_buffer_in_place(struct nvmem_cell *cell, void *buf) } static int __nvmem_cell_read(struct nvmem_device *nvmem, - struct nvmem_cell *cell, - void *buf, size_t *len) + struct nvmem_cell_entry *cell, + void *buf, size_t *len, const char *id) { int rc; @@ -1420,18 +1459,18 @@ static int __nvmem_cell_read(struct nvmem_device *nvmem, */ void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len) { - struct nvmem_device *nvmem = cell->nvmem; + struct nvmem_device *nvmem = cell->entry->nvmem; u8 *buf; int rc; if (!nvmem) return ERR_PTR(-EINVAL); - buf = kzalloc(cell->bytes, GFP_KERNEL); + buf = kzalloc(cell->entry->bytes, GFP_KERNEL); if (!buf) return ERR_PTR(-ENOMEM); - rc = __nvmem_cell_read(nvmem, cell, buf, len); + rc = __nvmem_cell_read(nvmem, cell->entry, buf, len, cell->id); if (rc) { kfree(buf); return ERR_PTR(rc); @@ -1441,7 +1480,7 @@ void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len) } EXPORT_SYMBOL_GPL(nvmem_cell_read); -static void *nvmem_cell_prepare_write_buffer(struct nvmem_cell *cell, +static void *nvmem_cell_prepare_write_buffer(struct nvmem_cell_entry *cell, u8 *_buf, int len) { struct nvmem_device *nvmem = cell->nvmem; @@ -1494,16 +1533,7 @@ static void *nvmem_cell_prepare_write_buffer(struct nvmem_cell *cell, return ERR_PTR(rc); } -/** - * nvmem_cell_write() - Write to a given nvmem cell - * - * @cell: nvmem cell to be written. - * @buf: Buffer to be written. - * @len: length of buffer to be written to nvmem cell. - * - * Return: length of bytes written or negative on failure. - */ -int nvmem_cell_write(struct nvmem_cell *cell, void *buf, size_t len) +static int __nvmem_cell_entry_write(struct nvmem_cell_entry *cell, void *buf, size_t len) { struct nvmem_device *nvmem = cell->nvmem; int rc; @@ -1529,6 +1559,21 @@ int nvmem_cell_write(struct nvmem_cell *cell, void *buf, size_t len) return len; } + +/** + * nvmem_cell_write() - Write to a given nvmem cell + * + * @cell: nvmem cell to be written. + * @buf: Buffer to be written. + * @len: length of buffer to be written to nvmem cell. + * + * Return: length of bytes written or negative on failure. + */ +int nvmem_cell_write(struct nvmem_cell *cell, void *buf, size_t len) +{ + return __nvmem_cell_entry_write(cell->entry, buf, len); +} + EXPORT_SYMBOL_GPL(nvmem_cell_write); static int nvmem_cell_read_common(struct device *dev, const char *cell_id, @@ -1631,7 +1676,7 @@ static const void *nvmem_cell_read_variable_common(struct device *dev, if (IS_ERR(cell)) return cell; - nbits = cell->nbits; + nbits = cell->entry->nbits; buf = nvmem_cell_read(cell, len); nvmem_cell_put(cell); if (IS_ERR(buf)) @@ -1727,18 +1772,18 @@ EXPORT_SYMBOL_GPL(nvmem_cell_read_variable_le_u64); ssize_t nvmem_device_cell_read(struct nvmem_device *nvmem, struct nvmem_cell_info *info, void *buf) { - struct nvmem_cell cell; + struct nvmem_cell_entry cell; int rc; ssize_t len; if (!nvmem) return -EINVAL; - rc = nvmem_cell_info_to_nvmem_cell_nodup(nvmem, info, &cell); + rc = nvmem_cell_info_to_nvmem_cell_entry_nodup(nvmem, info, &cell); if (rc) return rc; - rc = __nvmem_cell_read(nvmem, &cell, buf, &len); + rc = __nvmem_cell_read(nvmem, &cell, buf, &len, NULL); if (rc) return rc; @@ -1758,17 +1803,17 @@ EXPORT_SYMBOL_GPL(nvmem_device_cell_read); int nvmem_device_cell_write(struct nvmem_device *nvmem, struct nvmem_cell_info *info, void *buf) { - struct nvmem_cell cell; + struct nvmem_cell_entry cell; int rc; if (!nvmem) return -EINVAL; - rc = nvmem_cell_info_to_nvmem_cell_nodup(nvmem, info, &cell); + rc = nvmem_cell_info_to_nvmem_cell_entry_nodup(nvmem, info, &cell); if (rc) return rc; - return nvmem_cell_write(&cell, buf, cell.bytes); + return __nvmem_cell_entry_write(&cell, buf, cell.bytes); } EXPORT_SYMBOL_GPL(nvmem_device_cell_write); From 3d20421782765fc226c96ad78a40bbbb2162d5a9 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 28 Sep 2021 15:11:09 +0100 Subject: [PATCH 0201/1202] nvmem: core: add nvmem cell post processing callback Some NVMEM providers have certain nvmem cells encoded, which requires post processing before actually using it. For example mac-address is stored in either in ascii or delimited or reverse-order. Having a post-process callback hook to provider drivers would enable them to do this vendor specific post processing before nvmem consumers see it. Signed-off-by: Srinivas Kandagatla --- drivers/nvmem/core.c | 9 +++++++++ include/linux/nvmem-provider.h | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 277ca5b47ade7..e765d3d0542e5 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -38,6 +38,7 @@ struct nvmem_device { unsigned int nkeepout; nvmem_reg_read_t reg_read; nvmem_reg_write_t reg_write; + nvmem_cell_post_process_t cell_post_process; struct gpio_desc *wp_gpio; void *priv; }; @@ -796,6 +797,7 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config) nvmem->type = config->type; nvmem->reg_read = config->reg_read; nvmem->reg_write = config->reg_write; + nvmem->cell_post_process = config->cell_post_process; nvmem->keepout = config->keepout; nvmem->nkeepout = config->nkeepout; if (config->of_node) @@ -1441,6 +1443,13 @@ static int __nvmem_cell_read(struct nvmem_device *nvmem, if (cell->bit_offset || cell->nbits) nvmem_shift_read_buffer_in_place(cell, buf); + if (nvmem->cell_post_process) { + rc = nvmem->cell_post_process(nvmem->priv, id, + cell->offset, buf, cell->bytes); + if (rc) + return rc; + } + if (len) *len = cell->bytes; diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 104505e9028f7..98efb7b5660d9 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -19,6 +19,9 @@ typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, void *val, size_t bytes); typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, void *val, size_t bytes); +/* used for vendor specific post processing of cell data */ +typedef int (*nvmem_cell_post_process_t)(void *priv, const char *id, unsigned int offset, + void *buf, size_t bytes); enum nvmem_type { NVMEM_TYPE_UNKNOWN = 0, @@ -62,6 +65,7 @@ struct nvmem_keepout { * @no_of_node: Device should not use the parent's of_node even if it's !NULL. * @reg_read: Callback to read data. * @reg_write: Callback to write data. + * @cell_post_process: Callback for vendor specific post processing of cell data * @size: Device size. * @word_size: Minimum read/write access granularity. * @stride: Minimum read/write access stride. @@ -92,6 +96,7 @@ struct nvmem_config { bool no_of_node; nvmem_reg_read_t reg_read; nvmem_reg_write_t reg_write; + nvmem_cell_post_process_t cell_post_process; int size; int word_size; int stride; From 413333fd6a88d023229cc121a5c65d48c4e9c7d6 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 23 Sep 2021 19:01:07 +0800 Subject: [PATCH 0202/1202] nvmem: imx-ocotp: add support for post processing Add .cell_post_process callback for imx-ocotp to deal with MAC address, since MAC address need to be reversed byte for some i.MX SoCs. Signed-off-by: Srinivas Kandagatla Signed-off-by: Joakim Zhang --- drivers/nvmem/imx-ocotp.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index 08f41328cc711..14284e866f26e 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c @@ -97,6 +97,7 @@ struct ocotp_params { unsigned int bank_address_words; void (*set_timing)(struct ocotp_priv *priv); struct ocotp_ctrl_reg ctrl; + bool reverse_mac_address; }; static int imx_ocotp_wait_for_busy(struct ocotp_priv *priv, u32 flags) @@ -221,6 +222,25 @@ static int imx_ocotp_read(void *context, unsigned int offset, return ret; } +static int imx_ocotp_cell_pp(void *context, const char *id, unsigned int offset, + void *data, size_t bytes) +{ + struct ocotp_priv *priv = context; + + /* Deal with some post processing of nvmem cell data */ + if (id && !strcmp(id, "mac-address")) { + if (priv->params->reverse_mac_address) { + u8 *buf = data; + int i; + + for (i = 0; i < bytes/2; i++) + swap(buf[i], buf[bytes - i - 1]); + } + } + + return 0; +} + static void imx_ocotp_set_imx6_timing(struct ocotp_priv *priv) { unsigned long clk_rate; @@ -468,6 +488,7 @@ static struct nvmem_config imx_ocotp_nvmem_config = { .stride = 1, .reg_read = imx_ocotp_read, .reg_write = imx_ocotp_write, + .cell_post_process = imx_ocotp_cell_pp, }; static const struct ocotp_params imx6q_params = { @@ -530,6 +551,7 @@ static const struct ocotp_params imx8mq_params = { .bank_address_words = 0, .set_timing = imx_ocotp_set_imx6_timing, .ctrl = IMX_OCOTP_BM_CTRL_DEFAULT, + .reverse_mac_address = true, }; static const struct ocotp_params imx8mm_params = { @@ -537,6 +559,7 @@ static const struct ocotp_params imx8mm_params = { .bank_address_words = 0, .set_timing = imx_ocotp_set_imx6_timing, .ctrl = IMX_OCOTP_BM_CTRL_DEFAULT, + .reverse_mac_address = true, }; static const struct ocotp_params imx8mn_params = { @@ -544,6 +567,7 @@ static const struct ocotp_params imx8mn_params = { .bank_address_words = 0, .set_timing = imx_ocotp_set_imx6_timing, .ctrl = IMX_OCOTP_BM_CTRL_DEFAULT, + .reverse_mac_address = true, }; static const struct ocotp_params imx8mp_params = { @@ -551,6 +575,7 @@ static const struct ocotp_params imx8mp_params = { .bank_address_words = 0, .set_timing = imx_ocotp_set_imx6_timing, .ctrl = IMX_OCOTP_BM_CTRL_8MP, + .reverse_mac_address = true, }; static const struct of_device_id imx_ocotp_dt_ids[] = { From 04e78a787b74b2a8ebece8e59ec0a54230fcf6ae Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 8 Oct 2021 12:06:18 +1300 Subject: [PATCH 0203/1202] arm/arm64: dts: Enable 2.5G Ethernet port on CN9130-CRB Enable the 2.5G Ethernet port by setting the status to "okay" and the phy-mode to "2500base-x" on the cn9130-crb boards. Tested on a CN9130-CRB-A. Signed-off-by: Chris Packham Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/cn9130-crb.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/cn9130-crb.dtsi b/arch/arm64/boot/dts/marvell/cn9130-crb.dtsi index 505ae69289f6d..e7918f3256461 100644 --- a/arch/arm64/boot/dts/marvell/cn9130-crb.dtsi +++ b/arch/arm64/boot/dts/marvell/cn9130-crb.dtsi @@ -214,8 +214,8 @@ }; &cp0_eth2 { - /* This port uses "2500base-t" phy-mode */ - status = "disabled"; + status = "okay"; + phy-mode = "2500base-x"; phy = <&nbaset_phy0>; phys = <&cp0_comphy5 2>; }; From c1d7fa96e74bf38cc0ba9deb8d83f704e2de0f10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Wei=C3=9F?= Date: Sat, 4 Sep 2021 11:59:28 +0200 Subject: [PATCH 0204/1202] dm: introduce audit event module for device mapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to send auditing events to user space, we introduce a generic dm-audit module. It provides helper functions to emit audit events through the kernel audit subsystem. We claim the AUDIT_DM_CTRL type=1336 and AUDIT_DM_EVENT type=1337 out of the audit event messages range in the corresponding userspace api in 'include/uapi/linux/audit.h' for those events. AUDIT_DM_CTRL is used to provide information about creation and destruction of device mapper targets which are triggered by user space admin control actions. AUDIT_DM_EVENT is used to provide information about actual errors during operation of the mapped device, showing e.g. integrity violations in audit log. Following commits to device mapper targets actually will make use of this to emit those events in relevant cases. The audit logs look like this if executing the following simple test: # dd if=/dev/zero of=test.img bs=1M count=1024 # losetup -f test.img # integritysetup -vD format --integrity sha256 -t 32 /dev/loop0 # integritysetup open -D /dev/loop0 --integrity sha256 integritytest # integritysetup status integritytest # integritysetup close integritytest # integritysetup open -D /dev/loop0 --integrity sha256 integritytest # integritysetup status integritytest # dd if=/dev/urandom of=/dev/loop0 bs=512 count=1 seek=100000 # dd if=/dev/mapper/integritytest of=/dev/null ------------------------- audit.log from auditd type=UNKNOWN[1336] msg=audit(1630425039.363:184): module=integrity op=ctr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425039.471:185): module=integrity op=dtr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425039.611:186): module=integrity op=ctr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425054.475:187): module=integrity op=dtr ppid=3807 pid=3819 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425073.171:191): module=integrity op=ctr ppid=3807 pid=3883 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425087.239:192): module=integrity op=dtr ppid=3807 pid=3902 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1336] msg=audit(1630425093.755:193): module=integrity op=ctr ppid=3807 pid=3906 auid=1000 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts2 ses=3 comm="integritysetup" exe="/sbin/integritysetup" subj==unconfined dev=254:3 error_msg='success' res=1 type=UNKNOWN[1337] msg=audit(1630425112.119:194): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:195): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:196): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:197): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:198): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:199): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:200): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:201): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:202): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 type=UNKNOWN[1337] msg=audit(1630425112.119:203): module=integrity op=integrity-checksum dev=254:3 sector=77480 res=0 Signed-off-by: Michael Weiß Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 9 ++++ drivers/md/Makefile | 4 ++ drivers/md/dm-audit.c | 84 ++++++++++++++++++++++++++++++++++++++ drivers/md/dm-audit.h | 66 ++++++++++++++++++++++++++++++ include/uapi/linux/audit.h | 2 + 5 files changed, 165 insertions(+) create mode 100644 drivers/md/dm-audit.c create mode 100644 drivers/md/dm-audit.h diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index f45fb372e51be..1bc4f48e97652 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -642,4 +642,13 @@ config DM_ZONED If unsure, say N. +config DM_AUDIT + bool "DM audit events" + depends on AUDIT + help + Generate audit events for device-mapper. + + Enables audit logging of several security relevant events in the + particular device-mapper targets, especially the integrity target. + endif # MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 816945eeed7f3..0454b0885b013 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -107,3 +107,7 @@ endif ifeq ($(CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG),y) dm-verity-objs += dm-verity-verify-sig.o endif + +ifeq ($(CONFIG_DM_AUDIT),y) +dm-mod-objs += dm-audit.o +endif diff --git a/drivers/md/dm-audit.c b/drivers/md/dm-audit.c new file mode 100644 index 0000000000000..3049dfe67e50b --- /dev/null +++ b/drivers/md/dm-audit.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Creating audit records for mapped devices. + * + * Copyright (C) 2021 Fraunhofer AISEC. All rights reserved. + * + * Authors: Michael Weiß + */ + +#include +#include +#include +#include +#include + +#include "dm-audit.h" +#include "dm-core.h" + +static struct audit_buffer *dm_audit_log_start(int audit_type, + const char *dm_msg_prefix, + const char *op) +{ + struct audit_buffer *ab; + + if (audit_enabled == AUDIT_OFF) + return NULL; + + ab = audit_log_start(audit_context(), GFP_KERNEL, audit_type); + if (unlikely(!ab)) + return NULL; + + audit_log_format(ab, "module=%s op=%s", dm_msg_prefix, op); + return ab; +} + +void dm_audit_log_ti(int audit_type, const char *dm_msg_prefix, const char *op, + struct dm_target *ti, int result) +{ + struct audit_buffer *ab = NULL; + struct mapped_device *md = dm_table_get_md(ti->table); + int dev_major = dm_disk(md)->major; + int dev_minor = dm_disk(md)->first_minor; + + switch (audit_type) { + case AUDIT_DM_CTRL: + ab = dm_audit_log_start(audit_type, dm_msg_prefix, op); + if (unlikely(!ab)) + return; + audit_log_task_info(ab); + audit_log_format(ab, " dev=%d:%d error_msg='%s'", dev_major, + dev_minor, !result ? ti->error : "success"); + break; + case AUDIT_DM_EVENT: + ab = dm_audit_log_start(audit_type, dm_msg_prefix, op); + if (unlikely(!ab)) + return; + audit_log_format(ab, " dev=%d:%d sector=?", dev_major, + dev_minor); + break; + default: /* unintended use */ + return; + } + + audit_log_format(ab, " res=%d", result); + audit_log_end(ab); +} +EXPORT_SYMBOL_GPL(dm_audit_log_ti); + +void dm_audit_log_bio(const char *dm_msg_prefix, const char *op, + struct bio *bio, sector_t sector, int result) +{ + struct audit_buffer *ab; + int dev_major = MAJOR(bio->bi_bdev->bd_dev); + int dev_minor = MINOR(bio->bi_bdev->bd_dev); + + ab = dm_audit_log_start(AUDIT_DM_EVENT, dm_msg_prefix, op); + if (unlikely(!ab)) + return; + + audit_log_format(ab, " dev=%d:%d sector=%llu res=%d", + dev_major, dev_minor, sector, result); + audit_log_end(ab); +} +EXPORT_SYMBOL_GPL(dm_audit_log_bio); diff --git a/drivers/md/dm-audit.h b/drivers/md/dm-audit.h new file mode 100644 index 0000000000000..2385f2b659bef --- /dev/null +++ b/drivers/md/dm-audit.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Creating audit records for mapped devices. + * + * Copyright (C) 2021 Fraunhofer AISEC. All rights reserved. + * + * Authors: Michael Weiß + */ + +#ifndef DM_AUDIT_H +#define DM_AUDIT_H + +#include +#include + +#ifdef CONFIG_DM_AUDIT +void dm_audit_log_bio(const char *dm_msg_prefix, const char *op, + struct bio *bio, sector_t sector, int result); + +/* + * dm_audit_log_ti() is not intended to be used directly in dm modules, + * the wrapper functions below should be called by dm modules instead. + */ +void dm_audit_log_ti(int audit_type, const char *dm_msg_prefix, const char *op, + struct dm_target *ti, int result); + +static inline void dm_audit_log_ctr(const char *dm_msg_prefix, + struct dm_target *ti, int result) +{ + dm_audit_log_ti(AUDIT_DM_CTRL, dm_msg_prefix, "ctr", ti, result); +} + +static inline void dm_audit_log_dtr(const char *dm_msg_prefix, + struct dm_target *ti, int result) +{ + dm_audit_log_ti(AUDIT_DM_CTRL, dm_msg_prefix, "dtr", ti, result); +} + +static inline void dm_audit_log_target(const char *dm_msg_prefix, const char *op, + struct dm_target *ti, int result) +{ + dm_audit_log_ti(AUDIT_DM_EVENT, dm_msg_prefix, op, ti, result); +} +#else +static inline void dm_audit_log_bio(const char *dm_msg_prefix, const char *op, + struct bio *bio, sector_t sector, + int result) +{ +} +static inline void dm_audit_log_target(const char *dm_msg_prefix, + const char *op, struct dm_target *ti, + int result) +{ +} +static inline void dm_audit_log_ctr(const char *dm_msg_prefix, + struct dm_target *ti, int result) +{ +} + +static inline void dm_audit_log_dtr(const char *dm_msg_prefix, + struct dm_target *ti, int result) +{ +} +#endif + +#endif diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index daa481729e9ba..6650ab6def2a4 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -118,6 +118,8 @@ #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ #define AUDIT_BPF 1334 /* BPF subsystem */ #define AUDIT_EVENT_LISTENER 1335 /* Task joined multicast read socket */ +#define AUDIT_DM_CTRL 1336 /* Device Mapper target control */ +#define AUDIT_DM_EVENT 1337 /* Device Mapper events */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ From 8503243b4a81e79b99db050a2410703afbc7ff43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Wei=C3=9F?= Date: Sat, 4 Sep 2021 11:59:29 +0200 Subject: [PATCH 0205/1202] dm integrity: log audit events for dm-integrity target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dm-integrity signals integrity violations by returning I/O errors to user space. To identify integrity violations by a controlling instance, the kernel audit subsystem can be used to emit audit events to user space. We use the new dm-audit submodule allowing to emit audit events on relevant I/O errors. The construction and destruction of integrity device mappings are also relevant for auditing a system. Thus, those events are also logged as audit events. Signed-off-by: Michael Weiß Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 1 + drivers/md/dm-integrity.c | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 1bc4f48e97652..b5ea378e66cb1 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -610,6 +610,7 @@ config DM_INTEGRITY select CRYPTO select CRYPTO_SKCIPHER select ASYNC_XOR + select DM_AUDIT if AUDIT help This device-mapper target emulates a block device that has additional per-sector tags that can be used for storing diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index dc03b70f6e65c..dd78baf4c7af4 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -23,6 +23,8 @@ #include #include +#include "dm-audit.h" + #define DM_MSG_PREFIX "integrity" #define DEFAULT_INTERLEAVE_SECTORS 32768 @@ -539,6 +541,7 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr) } if (memcmp((__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size, result, size)) { dm_integrity_io_error(ic, "superblock mac", -EILSEQ); + dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0); return -EILSEQ; } } @@ -876,8 +879,10 @@ static void rw_section_mac(struct dm_integrity_c *ic, unsigned section, bool wr) if (likely(wr)) memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR); else { - if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) + if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) { dm_integrity_io_error(ic, "journal mac", -EILSEQ); + dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0); + } } } } @@ -1782,10 +1787,15 @@ static void integrity_metadata(struct work_struct *w) if (unlikely(r)) { if (r > 0) { char b[BDEVNAME_SIZE]; - DMERR_LIMIT("%s: Checksum failed at sector 0x%llx", bio_devname(bio, b), - (sector - ((r + ic->tag_size - 1) / ic->tag_size))); + sector_t s; + + s = sector - ((r + ic->tag_size - 1) / ic->tag_size); + DMERR_LIMIT("%s: Checksum failed at sector 0x%llx", + bio_devname(bio, b), s); r = -EILSEQ; atomic64_inc(&ic->number_of_mismatches); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", + bio, s, 0); } if (likely(checksums != checksums_onstack)) kfree(checksums); @@ -1991,6 +2001,8 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) { DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx", logical_sector); + dm_audit_log_bio(DM_MSG_PREFIX, "journal-checksum", + bio, logical_sector, 0); } } #endif @@ -2534,8 +2546,10 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start, integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block), (char *)access_journal_data(ic, i, l), test_tag); - if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) + if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) { dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ); + dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0); + } } journal_entry_set_unused(je2); @@ -4514,9 +4528,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) if (ic->discard) ti->num_discard_bios = 1; + dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1); return 0; bad: + dm_audit_log_ctr(DM_MSG_PREFIX, ti, 0); dm_integrity_dtr(ti); return r; } @@ -4590,6 +4606,7 @@ static void dm_integrity_dtr(struct dm_target *ti) free_alg(&ic->journal_mac_alg); kfree(ic); + dm_audit_log_dtr(DM_MSG_PREFIX, ti, 1); } static struct target_type integrity_target = { From 3d46a66322d690496821f6a7cc4613fd03483ab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Wei=C3=9F?= Date: Sat, 4 Sep 2021 11:59:30 +0200 Subject: [PATCH 0206/1202] dm crypt: log aead integrity violations to audit subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since dm-crypt target can be stacked on dm-integrity targets to provide authenticated encryption, integrity violations are recognized here during aead computation. We use the dm-audit submodule to signal those events to user space, too. The construction and destruction of crypt device mappings are also logged as audit events. Signed-off-by: Michael Weiß Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 916b7da16de25..5c56d2b66d520 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -41,6 +41,8 @@ #include +#include "dm-audit.h" + #define DM_MSG_PREFIX "crypt" /* @@ -1362,8 +1364,12 @@ static int crypt_convert_block_aead(struct crypt_config *cc, if (r == -EBADMSG) { char b[BDEVNAME_SIZE]; - DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu", bio_devname(ctx->bio_in, b), - (unsigned long long)le64_to_cpu(*sector)); + sector_t s = le64_to_cpu(*sector); + + DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu", + bio_devname(ctx->bio_in, b), s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); } if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) @@ -2173,8 +2179,12 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (error == -EBADMSG) { char b[BDEVNAME_SIZE]; - DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu", bio_devname(ctx->bio_in, b), - (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq))); + sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)); + + DMERR_LIMIT("%s: INTEGRITY AEAD ERROR, sector %llu", + bio_devname(ctx->bio_in, b), s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); io->error = BLK_STS_PROTECTION; } else if (error < 0) io->error = BLK_STS_IOERR; @@ -2734,6 +2744,8 @@ static void crypt_dtr(struct dm_target *ti) dm_crypt_clients_n--; crypt_calculate_pages_per_client(); spin_unlock(&dm_crypt_clients_lock); + + dm_audit_log_dtr(DM_MSG_PREFIX, ti, 1); } static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode) @@ -3362,9 +3374,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_bios = 1; ti->limit_swap_bios = true; + dm_audit_log_ctr(DM_MSG_PREFIX, ti, 1); return 0; bad: + dm_audit_log_ctr(DM_MSG_PREFIX, ti, 0); crypt_dtr(ti); return ret; } From dffca4d565b326b2238167c425fd9c0fdb3fb6d5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 10 Oct 2021 16:29:28 +0200 Subject: [PATCH 0207/1202] dm: Remove redundant flush_workqueue() calls destroy_workqueue() already drains the queue before destroying it, so there is no need to flush it explicitly. Remove the redundant flush_workqueue() calls. This was generated with coccinelle: @@ expression E; @@ - flush_workqueue(E); destroy_workqueue(E); Signed-off-by: Christophe JAILLET Signed-off-by: Mike Snitzer --- drivers/md/bcache/writeback.c | 4 +--- drivers/md/dm-bufio.c | 1 - drivers/md/dm-zoned-target.c | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 8120da278161e..dbb6cb8069d9e 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -790,10 +790,8 @@ static int bch_writeback_thread(void *arg) } } - if (dc->writeback_write_wq) { - flush_workqueue(dc->writeback_write_wq); + if (dc->writeback_write_wq) destroy_workqueue(dc->writeback_write_wq); - } cached_dev_put(dc); wait_for_kthread_stop(); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 50f3e673729c3..fc8f8e9f9e39f 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -2082,7 +2082,6 @@ static void __exit dm_bufio_exit(void) int bug = 0; cancel_delayed_work_sync(&dm_bufio_cleanup_old_work); - flush_workqueue(dm_bufio_wq); destroy_workqueue(dm_bufio_wq); if (dm_bufio_client_count) { diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index ae1bc48c0043d..dfc822295c255 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -967,7 +967,6 @@ static void dmz_dtr(struct dm_target *ti) struct dmz_target *dmz = ti->private; int i; - flush_workqueue(dmz->chunk_wq); destroy_workqueue(dmz->chunk_wq); for (i = 0; i < dmz->nr_ddevs; i++) From 9bed5200e04d2a3dcd3a420d249e947eac7ec7c1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 13 Sep 2021 16:45:28 -0500 Subject: [PATCH 0208/1202] Makefile: Enable -Wimplicit-fallthrough for Clang With the recent fixes for fallthrough warnings, it is now possible to enable -Wimplicit-fallthrough for Clang. Special thanks to Nathan Chancellor who fixed the Clang bug[1][2] that put the application of this patch to a halt, temporarily. This bugfix only appears in Clang 14.0.0, so older versions still contain the bug and -Wimplicit-fallthrough won't be enabled for them, for now. This concludes a long journey and now we are finally getting rid of the unintentional fallthrough bug-class in the kernel, entirely. :) [1] https://github.com/llvm/llvm-project/commit/9ed4a94d6451046a51ef393cd62f00710820a7e8 [2] https://bugs.llvm.org/show_bug.cgi?id=51094 Link: https://github.com/KSPP/linux/issues/115 Link: https://github.com/ClangBuiltLinux/linux/issues/236 [The conditional "ifneq" was idea of Nathan] Suggested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Co-developed-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Makefile b/Makefile index 5e7c1d8544412..eb44b3b0c5bce 100644 --- a/Makefile +++ b/Makefile @@ -796,6 +796,14 @@ KBUILD_CFLAGS += -Wno-gnu # source of a reference will be _MergedGlobals and not on of the whitelisted names. # See modpost pattern 2 KBUILD_CFLAGS += -mno-global-merge +# Warn about unmarked fall-throughs in switch statement as long as +# -Wunreachable-code-fallthrough is present. Clang prior to 14 +# warned on unreachable fallthroughs with -Wimplicit-fallthrough, +# which is unacceptable due to IS_ENABLED(). +# https://bugs.llvm.org/show_bug.cgi?id=51094 +ifneq ($(call cc-option,-Wunreachable-code-fallthrough),) +KBUILD_CFLAGS += -Wimplicit-fallthrough +endif else # Warn about unmarked fall-throughs in switch statement. From 52264b162a51eadb0adcb55297cf91905c6ede98 Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Tue, 12 Oct 2021 13:19:01 -0400 Subject: [PATCH 0209/1202] Revert "ext4: enforce buffer head state assertion in ext4_da_map_blocks" This reverts commit 948ca5f30e1df0c11eb5b0f410b9ceb97fa77ad9. Two crash reports from users running variations on 5.15-rc4 kernels suggest that it is premature to enforce the state assertion in the original commit. Both crashes were triggered by BUG calls in that code, indicating that under some rare circumstance the buffer head state did not match a delayed allocated block at the time the block was written out. No reproducer is available. Resolving this problem will require more time than remains in the current release cycle, so reverting the original patch for the time being is necessary to avoid any instability it may cause. Signed-off-by: Eric Whitney Link: https://lore.kernel.org/r/20211012171901.5352-1-enwlinux@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0f06305167d5a..9097fccdc6889 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1711,16 +1711,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, } /* - * the buffer head associated with a delayed and not unwritten - * block found in the extent status cache must contain an - * invalid block number and have its BH_New and BH_Delay bits - * set, reflecting the state assigned when the block was - * initially delayed allocated + * Delayed extent could be allocated by fallocate. + * So we need to check it. */ - if (ext4_es_is_delonly(&es)) { - BUG_ON(bh->b_blocknr != invalid_block); - BUG_ON(!buffer_new(bh)); - BUG_ON(!buffer_delay(bh)); + if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); return 0; } From 6861a4e1817c4ac9a0f4f60443956af016a47829 Mon Sep 17 00:00:00 2001 From: Shaoying Xu Date: Thu, 2 Sep 2021 16:44:12 +0000 Subject: [PATCH 0210/1202] ext4: fix lazy initialization next schedule time computation in more granular unit Ext4 file system has default lazy inode table initialization setup once it is mounted. However, it has issue on computing the next schedule time that makes the timeout same amount in jiffies but different real time in secs if with various HZ values. Therefore, fix by measuring the current time in a more granular unit nanoseconds and make the next schedule time independent of the HZ value. Fixes: bfff68738f1c ("ext4: add support for lazy inode table initialization") Signed-off-by: Shaoying Xu Cc: stable@vger.kernel.org Signed-off-by: Theodore Ts'o Link: https://lore.kernel.org/r/20210902164412.9994-2-shaoyi@amazon.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 88d5d274a8684..8a67e5f3f5763 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3263,9 +3263,9 @@ static int ext4_run_li_request(struct ext4_li_request *elr) struct super_block *sb = elr->lr_super; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; ext4_group_t group = elr->lr_next_group; - unsigned long timeout = 0; unsigned int prefetch_ios = 0; int ret = 0; + u64 start_time; if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) { elr->lr_next_group = ext4_mb_prefetch(sb, group, @@ -3302,14 +3302,13 @@ static int ext4_run_li_request(struct ext4_li_request *elr) ret = 1; if (!ret) { - timeout = jiffies; + start_time = ktime_get_real_ns(); ret = ext4_init_inode_table(sb, group, elr->lr_timeout ? 0 : 1); trace_ext4_lazy_itable_init(sb, group); if (elr->lr_timeout == 0) { - timeout = (jiffies - timeout) * - EXT4_SB(elr->lr_super)->s_li_wait_mult; - elr->lr_timeout = timeout; + elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) * + EXT4_SB(elr->lr_super)->s_li_wait_mult); } elr->lr_next_sched = jiffies + elr->lr_timeout; elr->lr_next_group = group + 1; From 7e1cc8086ef868ee2655cfcd312dfaa3a3cac9b0 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Fri, 3 Sep 2021 14:27:46 +0800 Subject: [PATCH 0211/1202] ext4: correct the left/middle/right debug message for binsearch The debuginfo for binsearch want to show the left/middle/right extent while the process search for the goal block. However we show this info after we change right or left. Link: https://lore.kernel.org/r/20210903062748.4118886-2-yangerkun@huawei.com Signed-off-by: yangerkun Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0e02571f2f828..c59426f1e1d2a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -714,13 +714,14 @@ ext4_ext_binsearch_idx(struct inode *inode, r = EXT_LAST_INDEX(eh); while (l <= r) { m = l + (r - l) / 2; + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, + le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), + r, le32_to_cpu(r->ei_block)); + if (block < le32_to_cpu(m->ei_block)) r = m - 1; else l = m + 1; - ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, - le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block), - r, le32_to_cpu(r->ei_block)); } path->p_idx = l - 1; @@ -782,13 +783,14 @@ ext4_ext_binsearch(struct inode *inode, while (l <= r) { m = l + (r - l) / 2; + ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, + le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), + r, le32_to_cpu(r->ee_block)); + if (block < le32_to_cpu(m->ee_block)) r = m - 1; else l = m + 1; - ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l, - le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block), - r, le32_to_cpu(r->ee_block)); } path->p_ext = l - 1; From 2f961f147aad5b9e724e689bb7a61089510f745a Mon Sep 17 00:00:00 2001 From: yangerkun Date: Fri, 3 Sep 2021 14:27:47 +0800 Subject: [PATCH 0212/1202] ext4: ensure enough credits in ext4_ext_shift_path_extents Like ext4_ext_rm_leaf, we can ensure that there are enough credits before every call that will consume credits. As part of this fix we fold the functionality of ext4_access_path() into ext4_ext_shift_path_extents(). This change is needed as a preparation for the next bugfix patch. Cc: stable@kernel.org Link: https://lore.kernel.org/r/20210903062748.4118886-3-yangerkun@huawei.com Signed-off-by: yangerkun Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 49 +++++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c59426f1e1d2a..6b080f61342a2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4979,36 +4979,6 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo); } -/* - * ext4_access_path: - * Function to access the path buffer for marking it dirty. - * It also checks if there are sufficient credits left in the journal handle - * to update path. - */ -static int -ext4_access_path(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - int credits, err; - - if (!ext4_handle_valid(handle)) - return 0; - - /* - * Check if need to extend journal credits - * 3 for leaf, sb, and inode plus 2 (bmap and group - * descriptor) for each block group; assume two block - * groups - */ - credits = ext4_writepage_trans_blocks(inode); - err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0); - if (err < 0) - return err; - - err = ext4_ext_get_access(handle, inode, path); - return err; -} - /* * ext4_ext_shift_path_extents: * Shift the extents of a path structure lying between path[depth].p_ext @@ -5023,6 +4993,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, int depth, err = 0; struct ext4_extent *ex_start, *ex_last; bool update = false; + int credits, restart_credits; depth = path->p_depth; while (depth >= 0) { @@ -5032,13 +5003,23 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, return -EFSCORRUPTED; ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); + /* leaf + sb + inode */ + credits = 3; + if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) { + update = true; + /* extent tree + sb + inode */ + credits = depth + 2; + } - err = ext4_access_path(handle, inode, path + depth); + restart_credits = ext4_writepage_trans_blocks(inode); + err = ext4_datasem_ensure_credits(handle, inode, credits, + restart_credits, 0); if (err) goto out; - if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) - update = true; + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; while (ex_start <= ex_last) { if (SHIFT == SHIFT_LEFT) { @@ -5069,7 +5050,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, } /* Update index too */ - err = ext4_access_path(handle, inode, path + depth); + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; From bd02347da7e24a7519e9281a30158a66e3def639 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Fri, 3 Sep 2021 14:27:48 +0800 Subject: [PATCH 0213/1202] ext4: refresh the ext4_ext_path struct after dropping i_data_sem. After we drop i_data sem, we need to reload the ext4_ext_path structure since the extent tree can change once i_data_sem is released. This addresses the BUG: [52117.465187] ------------[ cut here ]------------ [52117.465686] kernel BUG at fs/ext4/extents.c:1756! ... [52117.478306] Call Trace: [52117.478565] ext4_ext_shift_extents+0x3ee/0x710 [52117.479020] ext4_fallocate+0x139c/0x1b40 [52117.479405] ? __do_sys_newfstat+0x6b/0x80 [52117.479805] vfs_fallocate+0x151/0x4b0 [52117.480177] ksys_fallocate+0x4a/0xa0 [52117.480533] __x64_sys_fallocate+0x22/0x30 [52117.480930] do_syscall_64+0x35/0x80 [52117.481277] entry_SYSCALL_64_after_hwframe+0x44/0xae [52117.481769] RIP: 0033:0x7fa062f855ca Cc: stable@kernel.org Link: https://lore.kernel.org/r/20210903062748.4118886-4-yangerkun@huawei.com Signed-off-by: yangerkun Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 6b080f61342a2..15c68bc80d21b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5014,8 +5014,11 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, restart_credits = ext4_writepage_trans_blocks(inode); err = ext4_datasem_ensure_credits(handle, inode, credits, restart_credits, 0); - if (err) + if (err) { + if (err > 0) + err = -EAGAIN; goto out; + } err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -5089,6 +5092,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, int ret = 0, depth; struct ext4_extent *extent; ext4_lblk_t stop, *iterator, ex_start, ex_end; + ext4_lblk_t tmp = EXT_MAX_BLOCKS; /* Let path point to the last extent */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, @@ -5142,11 +5146,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * till we reach stop. In case of right shift, iterator points to stop * and it is decreased till we reach start. */ +again: if (SHIFT == SHIFT_LEFT) iterator = &start; else iterator = &stop; + if (tmp != EXT_MAX_BLOCKS) + *iterator = tmp; + /* * Its safe to start updating extents. Start and stop are unsigned, so * in case of right shift if extent with 0 block is reached, iterator @@ -5175,6 +5183,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } } + tmp = *iterator; if (SHIFT == SHIFT_LEFT) { extent = EXT_LAST_EXTENT(path[depth].p_hdr); *iterator = le32_to_cpu(extent->ee_block) + @@ -5193,6 +5202,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } ret = ext4_ext_shift_path_extents(path, shift, inode, handle, SHIFT); + /* iterator can be NULL which means we should break */ + if (ret == -EAGAIN) + goto again; if (ret) break; } From 6333c4e6167b01a27a6d13bd7bbeb9451d4067c1 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Mon, 19 Jul 2021 13:59:14 +0800 Subject: [PATCH 0214/1202] ext4: convert from atomic_t to refcount_t on ext4_io_end->count refcount_t type and corresponding API can protect refcounters from accidental underflow and overflow and further use-after-free situations. Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/1626674355-55795-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 ++- fs/ext4/page-io.c | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3825195539d74..404dd50856e5d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -17,6 +17,7 @@ #ifndef _EXT4_H #define _EXT4_H +#include #include #include #include @@ -241,7 +242,7 @@ typedef struct ext4_io_end { struct bio *bio; /* Linked list of completed * bios covering the extent */ unsigned int flag; /* unwritten or not */ - atomic_t count; /* reference counter */ + refcount_t count; /* reference counter */ struct list_head list_vec; /* list of ext4_io_end_vec */ } ext4_io_end_t; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index f038d578d8d8f..9cb2617149918 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -279,14 +279,14 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) io_end->inode = inode; INIT_LIST_HEAD(&io_end->list); INIT_LIST_HEAD(&io_end->list_vec); - atomic_set(&io_end->count, 1); + refcount_set(&io_end->count, 1); } return io_end; } void ext4_put_io_end_defer(ext4_io_end_t *io_end) { - if (atomic_dec_and_test(&io_end->count)) { + if (refcount_dec_and_test(&io_end->count)) { if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || list_empty(&io_end->list_vec)) { ext4_release_io_end(io_end); @@ -300,7 +300,7 @@ int ext4_put_io_end(ext4_io_end_t *io_end) { int err = 0; - if (atomic_dec_and_test(&io_end->count)) { + if (refcount_dec_and_test(&io_end->count)) { if (io_end->flag & EXT4_IO_END_UNWRITTEN) { err = ext4_convert_unwritten_io_end_vec(io_end->handle, io_end); @@ -314,7 +314,7 @@ int ext4_put_io_end(ext4_io_end_t *io_end) ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) { - atomic_inc(&io_end->count); + refcount_inc(&io_end->count); return io_end; } From efbcc1015b07e3e8bafa97394b743812c180a9dd Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 8 Sep 2021 20:08:48 +0800 Subject: [PATCH 0215/1202] ext4: check for out-of-order index extents in ext4_valid_extent_entries() After commit 5946d089379a ("ext4: check for overlapping extents in ext4_valid_extent_entries()"), we can check out the overlapping extent entry in leaf extent blocks. But the out-of-order extent entry in index extent blocks could also trigger bad things if the filesystem is inconsistent. So this patch add a check to figure out the out-of-order index extents and return error. Signed-off-by: Zhang Yi Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20210908120850.4012324-2-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 15c68bc80d21b..c0ad07adf9a65 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -357,6 +357,9 @@ static int ext4_valid_extent_entries(struct inode *inode, ext4_fsblk_t *pblk, int depth) { unsigned short entries; + ext4_lblk_t lblock = 0; + ext4_lblk_t prev = 0; + if (eh->eh_entries == 0) return 1; @@ -365,31 +368,35 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); - ext4_lblk_t lblock = 0; - ext4_lblk_t prev = 0; - int len = 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; /* Check for overlapping extents */ lblock = le32_to_cpu(ext->ee_block); - len = ext4_ext_get_actual_len(ext); if ((lblock <= prev) && prev) { *pblk = ext4_ext_pblock(ext); return 0; } + prev = lblock + ext4_ext_get_actual_len(ext) - 1; ext++; entries--; - prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; + + /* Check for overlapping index extents */ + lblock = le32_to_cpu(ext_idx->ei_block); + if ((lblock <= prev) && prev) { + *pblk = ext4_idx_pblock(ext_idx); + return 0; + } ext_idx++; entries--; + prev = lblock; } } return 1; From a992bc717652fb15b435884c587ae5249415239c Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 8 Sep 2021 20:08:49 +0800 Subject: [PATCH 0216/1202] ext4: check for inconsistent extents between index and leaf block Now that we can check out overlapping extents in leaf block and out-of-order index extents in index block. But the .ee_block in the first extent of one leaf block should equal to the .ei_block in it's parent index extent entry. This patch add a check to verify such inconsistent between the index and leaf block. Signed-off-by: Zhang Yi Link: https://lore.kernel.org/r/20210908120850.4012324-3-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 59 +++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c0ad07adf9a65..f6a902de7f41a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -354,7 +354,8 @@ static int ext4_valid_extent_idx(struct inode *inode, static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent_header *eh, - ext4_fsblk_t *pblk, int depth) + ext4_lblk_t lblk, ext4_fsblk_t *pblk, + int depth) { unsigned short entries; ext4_lblk_t lblock = 0; @@ -368,6 +369,14 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); + + /* + * The logical block in the first entry should equal to + * the number in the index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext->ee_block)) + return 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; @@ -384,6 +393,14 @@ static int ext4_valid_extent_entries(struct inode *inode, } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); + + /* + * The logical block in the first entry should equal to + * the number in the parent index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext_idx->ei_block)) + return 0; while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; @@ -404,7 +421,7 @@ static int ext4_valid_extent_entries(struct inode *inode, static int __ext4_ext_check(const char *function, unsigned int line, struct inode *inode, struct ext4_extent_header *eh, - int depth, ext4_fsblk_t pblk) + int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk) { const char *error_msg; int max = 0, err = -EFSCORRUPTED; @@ -430,7 +447,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid eh_entries"; goto corrupted; } - if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) { + if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { error_msg = "invalid extent entries"; goto corrupted; } @@ -460,7 +477,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, } #define ext4_ext_check(inode, eh, depth, pblk) \ - __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk)) + __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0) int ext4_ext_check_inode(struct inode *inode) { @@ -493,16 +510,18 @@ static void ext4_cache_extents(struct inode *inode, static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, - struct inode *inode, ext4_fsblk_t pblk, int depth, - int flags) + struct inode *inode, struct ext4_extent_idx *idx, + int depth, int flags) { struct buffer_head *bh; int err; gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS; + ext4_fsblk_t pblk; if (flags & EXT4_EX_NOFAIL) gfp_flags |= __GFP_NOFAIL; + pblk = ext4_idx_pblock(idx); bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -515,8 +534,8 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); + err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh), + depth, pblk, le32_to_cpu(idx->ei_block)); if (err) goto errout; set_buffer_verified(bh); @@ -534,8 +553,8 @@ __read_extent_tree_block(const char *function, unsigned int line, } -#define read_extent_tree_block(inode, pblk, depth, flags) \ - __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \ +#define read_extent_tree_block(inode, idx, depth, flags) \ + __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \ (depth), (flags)) /* @@ -585,8 +604,7 @@ int ext4_ext_precache(struct inode *inode) i--; continue; } - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx++), + bh = read_extent_tree_block(inode, path[i].p_idx++, depth - i - 1, EXT4_EX_FORCE_CACHE); if (IS_ERR(bh)) { @@ -893,8 +911,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_depth = i; path[ppos].p_ext = NULL; - bh = read_extent_tree_block(inode, path[ppos].p_block, --i, - flags); + bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags); if (IS_ERR(bh)) { ret = PTR_ERR(bh); goto err; @@ -1503,7 +1520,6 @@ static int ext4_ext_search_right(struct inode *inode, struct ext4_extent_header *eh; struct ext4_extent_idx *ix; struct ext4_extent *ex; - ext4_fsblk_t block; int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len; @@ -1570,20 +1586,17 @@ static int ext4_ext_search_right(struct inode *inode, * follow it and find the closest allocated * block to the right */ ix++; - block = ext4_idx_pblock(ix); while (++depth < path->p_depth) { /* subtract from p_depth to get proper eh_depth */ - bh = read_extent_tree_block(inode, block, - path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); ix = EXT_FIRST_INDEX(eh); - block = ext4_idx_pblock(ix); put_bh(bh); } - bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); @@ -2962,9 +2975,9 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, ext_debug(inode, "move to level %d (block %llu)\n", i + 1, ext4_idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx), depth - i - 1, - EXT4_EX_NOCACHE); + bh = read_extent_tree_block(inode, path[i].p_idx, + depth - i - 1, + EXT4_EX_NOCACHE); if (IS_ERR(bh)) { /* should we reset i_size? */ err = PTR_ERR(bh); From 916ff8d5ea0e24fd43f113b6b5326d5ea8f68310 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 8 Sep 2021 20:08:50 +0800 Subject: [PATCH 0217/1202] ext4: prevent partial update of the extent blocks In the most error path of current extents updating operations are not roll back partial updates properly when some bad things happens(.e.g in ext4_ext_insert_extent()). So we may get an inconsistent extents tree if journal has been aborted due to IO error, which may probability lead to BUGON later when we accessing these extent entries in errors=continue mode. This patch drop extent buffer's verify flag before updatng the contents in ext4_ext_get_access(), and reset it after updating in __ext4_ext_dirty(). After this patch we could force to check the extent buffer if extents tree updating was break off, make sure the extents are consistent. Signed-off-by: Zhang Yi Reviewed-by: Theodore Ts'o Link: https://lore.kernel.org/r/20210908120850.4012324-4-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f6a902de7f41a..09f56e04f4b24 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -136,15 +136,25 @@ int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode, static int ext4_ext_get_access(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { + int err = 0; + if (path->p_bh) { /* path points to block */ BUFFER_TRACE(path->p_bh, "get_write_access"); - return ext4_journal_get_write_access(handle, inode->i_sb, - path->p_bh, EXT4_JTR_NONE); + err = ext4_journal_get_write_access(handle, inode->i_sb, + path->p_bh, EXT4_JTR_NONE); + /* + * The extent buffer's verified bit will be set again in + * __ext4_ext_dirty(). We could leave an inconsistent + * buffer if the extents updating procudure break off du + * to some error happens, force to check it again. + */ + if (!err) + clear_buffer_verified(path->p_bh); } /* path points to leaf/index in inode body */ /* we use in-core data, no need to protect them */ - return 0; + return err; } /* @@ -165,6 +175,9 @@ static int __ext4_ext_dirty(const char *where, unsigned int line, /* path points to block */ err = __ext4_handle_dirty_metadata(where, line, handle, inode, path->p_bh); + /* Extents updating done, re-set verified flag */ + if (!err) + set_buffer_verified(path->p_bh); } else { /* path points to leaf/index in inode body */ err = ext4_mark_inode_dirty(handle, inode); From 6737810be216822898bbe9c5ade835d7d30a8989 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Oct 2021 23:33:14 +0200 Subject: [PATCH 0218/1202] soc: document merges Signed-off-by: Arnd Bergmann --- arch/arm/arm-soc-for-next-contents.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/arm-soc-for-next-contents.txt b/arch/arm/arm-soc-for-next-contents.txt index dd838cb70ecc8..0ad4afdced9bf 100644 --- a/arch/arm/arm-soc-for-next-contents.txt +++ b/arch/arm/arm-soc-for-next-contents.txt @@ -60,6 +60,10 @@ arm/drivers git://git.kernel.org/pub/scm/linux/kernel/git/matthias.bgg/linux tags/v5.15-next-soc omap/ti-sysc git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap tags/omap-for-v5.16/ti-sysc-signed + qcom/drivers + git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux tags/qcom-drivers-for-5.16 + broadcom/drivers + https://github.com/Broadcom/stblinux tags/arm-soc/for-5.16/drivers arm/defconfigs defconfig/multiv7 @@ -78,3 +82,9 @@ arm/late arm/fixes firmware/ffa git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux tags/ffa-fixes-5.15 + (bc22b6208f416ba702c17a93c9af6474f19e8212) + https://github.com/Broadcom/stblinux tags/arm-soc/for-5.15/devicetree + (7f565d0ead264329749c0da488de9c8dfa2f18ce) + git://git.linaro.org/people/jens.wiklander/linux-tee tags/optee-fix2-for-v5.15 + patch + iommu/arm: fix ARM_SMMU_QCOM compilation From cddc976f185e341b776f695a72931426716dfac2 Mon Sep 17 00:00:00 2001 From: Tinghan Shen Date: Fri, 24 Sep 2021 11:39:35 +0800 Subject: [PATCH 0219/1202] arm64: dts: mt8183: change rpmsg property name The the rpmsg property name is changed to "mediatek," to sync with the vendor name defined in vendor-prefixes.yaml. Signed-off-by: Tinghan Shen Reviewed-By: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20210924033935.2127-7-tinghan.shen@mediatek.com Signed-off-by: Matthias Brugger --- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index b42d81d26d721..a1a8fc7ba0072 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -813,7 +813,7 @@ cros_ec { compatible = "google,cros-ec-rpmsg"; - mtk,rpmsg-name = "cros-ec-rpmsg"; + mediatek,rpmsg-name = "cros-ec-rpmsg"; }; }; From 9ee7924360cb709683afb9d52b74f4cba2f22eb7 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 14 Oct 2021 11:52:39 +0000 Subject: [PATCH 0220/1202] cifs: To match file servers, make sure the server hostname matches We generally rely on a bunch of factors to differentiate between servers. For example, IP address, port etc. For certain server types (like Azure), it is important to make sure that the server hostname matches too, even if the both hostnames currently resolve to the same IP address. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/connect.c | 19 +++++++++++-------- fs/cifs/fs_context.c | 8 ++++++++ fs/cifs/fs_context.h | 1 + 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c3b94c1e45913..e6e261dfd1079 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -794,7 +794,6 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) */ } - kfree(server->hostname); kfree(server); length = atomic_dec_return(&tcpSesAllocCount); @@ -1235,6 +1234,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context * if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) return 0; + if (strcasecmp(server->hostname, ctx->server_hostname)) + return 0; + if (!match_address(server, addr, (struct sockaddr *)&ctx->srcaddr)) return 0; @@ -1336,6 +1338,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) kfree(server->session_key.response); server->session_key.response = NULL; server->session_key.len = 0; + kfree(server->hostname); task = xchg(&server->tsk, NULL); if (task) @@ -1361,14 +1364,15 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx) goto out_err; } + tcp_ses->hostname = kstrdup(ctx->server_hostname, GFP_KERNEL); + if (!tcp_ses->hostname) { + rc = -ENOMEM; + goto out_err; + } + tcp_ses->ops = ctx->ops; tcp_ses->vals = ctx->vals; cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); - tcp_ses->hostname = extract_hostname(ctx->UNC); - if (IS_ERR(tcp_ses->hostname)) { - rc = PTR_ERR(tcp_ses->hostname); - goto out_err_crypto_release; - } tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId); tcp_ses->noblockcnt = ctx->rootfs; @@ -1497,8 +1501,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx) out_err: if (tcp_ses) { - if (!IS_ERR(tcp_ses->hostname)) - kfree(tcp_ses->hostname); + kfree(tcp_ses->hostname); if (tcp_ses->ssocket) sock_release(tcp_ses->ssocket); kfree(tcp_ses); diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 3109def8e1998..4130908af8d77 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -318,6 +318,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx DUP_CTX_STR(mount_options); DUP_CTX_STR(username); DUP_CTX_STR(password); + DUP_CTX_STR(server_hostname); DUP_CTX_STR(UNC); DUP_CTX_STR(source); DUP_CTX_STR(domainname); @@ -456,6 +457,11 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) if (!pos) return -EINVAL; + /* record the server hostname */ + ctx->server_hostname = kstrndup(devname + 2, pos - devname - 2, GFP_KERNEL); + if (!ctx->server_hostname) + return -ENOMEM; + /* skip past delimiter */ ++pos; @@ -1496,6 +1502,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx) ctx->username = NULL; kfree_sensitive(ctx->password); ctx->password = NULL; + kfree(ctx->server_hostname); + ctx->server_hostname = NULL; kfree(ctx->UNC); ctx->UNC = NULL; kfree(ctx->source); diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index a42ba71d7a81f..29601a4eb4116 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -166,6 +166,7 @@ struct smb3_fs_context { char *password; char *domainname; char *source; + char *server_hostname; char *UNC; char *nodename; char *iocharset; /* local code page for mapping to and from Unicode */ From 08e9f52e2dcea8028c06cb3abbb098a0f3d9cc97 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 30 Jul 2021 06:43:09 +0000 Subject: [PATCH 0221/1202] cifs: for compound requests, use open handle if possible For smb2_compound_op, it is possible to pass a ref to an already open file. We should be passing it whenever possible. i.e. if a matching handle is already kept open. If we don't do that, we will end up breaking leases for files kept open on the same client. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/smb2inode.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 8297703492eea..beec42ff2bf9b 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -46,6 +46,10 @@ struct cop_vars { struct smb2_file_link_info link_info; }; +/* + * note: If cfile is passed, the reference to it is dropped here. + * So make sure that you do not reuse cfile after return from this func. + */ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, @@ -536,10 +540,11 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, create_options |= OPEN_REPARSE_POINT; /* Failed on a symbolic link - query a reparse point info */ + cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, - smb2_data, SMB2_OP_QUERY_INFO, NULL); + smb2_data, SMB2_OP_QUERY_INFO, cfile); } if (rc) goto out; @@ -587,10 +592,11 @@ smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, create_options |= OPEN_REPARSE_POINT; /* Failed on a symbolic link - query a reparse point info */ + cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, - smb2_data, SMB2_OP_POSIX_QUERY_INFO, NULL); + smb2_data, SMB2_OP_POSIX_QUERY_INFO, cfile); } if (rc) goto out; @@ -608,10 +614,13 @@ smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { + struct cifsFileInfo *cfile; + + cifs_get_writable_path(tcon, name, FIND_WR_ANY, &cfile); return smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, mode, NULL, SMB2_OP_MKDIR, - NULL); + cfile); } void @@ -642,18 +651,24 @@ int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { + struct cifsFileInfo *cfile; + + cifs_get_writable_path(tcon, name, FIND_WR_WITH_DELETE, &cfile); return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_NOT_FILE, ACL_NO_MODE, - NULL, SMB2_OP_RMDIR, NULL); + NULL, SMB2_OP_RMDIR, cfile); } int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { + struct cifsFileInfo *cfile; + + cifs_get_writable_path(tcon, name, FIND_WR_WITH_DELETE, &cfile); return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT, - ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL); + ACL_NO_MODE, NULL, SMB2_OP_DELETE, cfile); } static int @@ -696,9 +711,12 @@ smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb) { + struct cifsFileInfo *cfile; + + cifs_get_writable_path(tcon, from_name, FIND_WR_ANY, &cfile); return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, FILE_READ_ATTRIBUTES, SMB2_OP_HARDLINK, - NULL); + cfile); } int @@ -707,10 +725,12 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, bool set_alloc) { __le64 eof = cpu_to_le64(size); + struct cifsFileInfo *cfile; + cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); return smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE, - &eof, SMB2_OP_SET_EOF, NULL); + &eof, SMB2_OP_SET_EOF, cfile); } int @@ -719,6 +739,8 @@ smb2_set_file_info(struct inode *inode, const char *full_path, { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink; + struct cifs_tcon *tcon; + struct cifsFileInfo *cfile; int rc; if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) && @@ -729,10 +751,12 @@ smb2_set_file_info(struct inode *inode, const char *full_path, tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); - rc = smb2_compound_op(xid, tlink_tcon(tlink), cifs_sb, full_path, + cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_WRITE_ATTRIBUTES, FILE_OPEN, - 0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, NULL); + 0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, cfile); cifs_put_tlink(tlink); return rc; } From cd6c3650f6a25ae885c46a5afddeb96efe363cf5 Mon Sep 17 00:00:00 2001 From: Oscar Carter Date: Sat, 30 May 2020 11:08:39 +0200 Subject: [PATCH 0222/1202] firewire: Remove function callback casts In 1394 OHCI specification, Isochronous Receive DMA context has several modes. One of mode is 'BufferFill' and Linux FireWire stack uses it to receive isochronous packets for multiple isochronous channel as FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL. The mode is not used by in-kernel driver, while it's available for userspace. The character device driver in firewire-core includes cast of function callback for the mode since the type of callback function is different from the other modes. The case is inconvenient to effort of Control Flow Integrity builds due to -Wcast-function-type warning. This commit removes the cast. A static helper function is newly added to initialize isochronous context for the mode. The helper function arranges isochronous context to assign specific callback function after call of existent kernel API. It's noticeable that the number of isochronous channel, speed, and the size of header are not required for the mode. The helper function is used for the mode by character device driver instead of direct call of existent kernel API. The same goal can be achieved (in the ioctl_create_iso_context function) without this helper function as follows: - Call the fw_iso_context_create function passing NULL to the callback parameter. - Then setting the context->callback.sc or context->callback.mc variables based on the a->type value. However using the helper function created in this patch makes code more clear and declarative. This way avoid the call to a function with one purpose to achieved another one. Co-developed-by: Takashi Sakamoto Signed-off-by: Takashi Sakamoto Co-developed-by: Stefan Richter Signed-off-by: Stefan Richter Signed-off-by: Oscar Carter Reviewed-by: Takashi Sakamoto Testeb-by: Takashi Sakamoto Signed-off-by: Gustavo A. R. Silva --- drivers/firewire/core-cdev.c | 32 ++++++++++++++++++++++++++------ include/linux/firewire.h | 11 +++++++---- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index fb6c651214f32..9f89c17730b12 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -953,11 +954,25 @@ static enum dma_data_direction iso_dma_direction(struct fw_iso_context *context) return DMA_FROM_DEVICE; } +static struct fw_iso_context *fw_iso_mc_context_create(struct fw_card *card, + fw_iso_mc_callback_t callback, + void *callback_data) +{ + struct fw_iso_context *ctx; + + ctx = fw_iso_context_create(card, FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL, + 0, 0, 0, NULL, callback_data); + if (!IS_ERR(ctx)) + ctx->callback.mc = callback; + + return ctx; +} + static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg) { struct fw_cdev_create_iso_context *a = &arg->create_iso_context; struct fw_iso_context *context; - fw_iso_callback_t cb; + union fw_iso_callback cb; int ret; BUILD_BUG_ON(FW_CDEV_ISO_CONTEXT_TRANSMIT != FW_ISO_CONTEXT_TRANSMIT || @@ -970,7 +985,7 @@ static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg) if (a->speed > SCODE_3200 || a->channel > 63) return -EINVAL; - cb = iso_callback; + cb.sc = iso_callback; break; case FW_ISO_CONTEXT_RECEIVE: @@ -978,19 +993,24 @@ static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg) a->channel > 63) return -EINVAL; - cb = iso_callback; + cb.sc = iso_callback; break; case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL: - cb = (fw_iso_callback_t)iso_mc_callback; + cb.mc = iso_mc_callback; break; default: return -EINVAL; } - context = fw_iso_context_create(client->device->card, a->type, - a->channel, a->speed, a->header_size, cb, client); + if (a->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL) + context = fw_iso_mc_context_create(client->device->card, cb.mc, + client); + else + context = fw_iso_context_create(client->device->card, a->type, + a->channel, a->speed, + a->header_size, cb.sc, client); if (IS_ERR(context)) return PTR_ERR(context); if (client->version < FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW) diff --git a/include/linux/firewire.h b/include/linux/firewire.h index aec8f30ab200d..07967a450eaad 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -436,6 +436,12 @@ typedef void (*fw_iso_callback_t)(struct fw_iso_context *context, void *header, void *data); typedef void (*fw_iso_mc_callback_t)(struct fw_iso_context *context, dma_addr_t completed, void *data); + +union fw_iso_callback { + fw_iso_callback_t sc; + fw_iso_mc_callback_t mc; +}; + struct fw_iso_context { struct fw_card *card; int type; @@ -443,10 +449,7 @@ struct fw_iso_context { int speed; bool drop_overflow_headers; size_t header_size; - union { - fw_iso_callback_t sc; - fw_iso_mc_callback_t mc; - } callback; + union fw_iso_callback callback; void *callback_data; }; From 1bdc308cabc872d3f6998ba8af9b1e3bc88cea64 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 13 Oct 2021 13:33:16 -0500 Subject: [PATCH 0223/1202] ftrace: Fix -Wcast-function-type warnings on powerpc64 In order to make sure new function cast mismatches are not introduced in the kernel (to avoid tripping CFI checking), the kernel should be globally built with -Wcast-function-type. So, fix the following -Wcast-function-type warnings on powerpc64 (ppc64_defconfig): kernel/trace/ftrace.c: In function 'ftrace_ops_get_list_func': kernel/trace/ftrace.c:128:31: warning: cast between incompatible function types from 'void (*)(long unsigned int, long unsigned int)' to 'void (*)(long unsigned int, long unsigned int, struct ftrace_ops *, struct ftrace_regs *)' [-Wcast-function-type] 128 | #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) | ^ kernel/trace/ftrace.c:174:10: note: in expansion of macro 'ftrace_ops_list_func' 174 | return ftrace_ops_list_func; | ^~~~~~~~~~~~~~~~~~~~ kernel/trace/ftrace.c: In function 'update_ftrace_function': kernel/trace/ftrace.c:128:31: warning: cast between incompatible function types from 'void (*)(long unsigned int, long unsigned int)' to 'void (*)(long unsigned int, long unsigned int, struct ftrace_ops *, struct ftrace_regs *)' [-Wcast-function-type] 128 | #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) | ^ kernel/trace/ftrace.c:207:10: note: in expansion of macro 'ftrace_ops_list_func' 207 | func = ftrace_ops_list_func; | ^~~~~~~~~~~~~~~~~~~~ kernel/trace/ftrace.c:128:31: warning: cast between incompatible function types from 'void (*)(long unsigned int, long unsigned int)' to 'void (*)(long unsigned int, long unsigned int, struct ftrace_ops *, struct ftrace_regs *)' [-Wcast-function-type] 128 | #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) | ^ kernel/trace/ftrace.c:220:14: note: in expansion of macro 'ftrace_ops_list_func' 220 | if (func == ftrace_ops_list_func) { | ^~~~~~~~~~~~~~~~~~~~ kernel/trace/ftrace.c: In function 'ftrace_modify_all_code': kernel/trace/ftrace.c:128:31: warning: cast between incompatible function types from 'void (*)(long unsigned int, long unsigned int)' to 'void (*)(long unsigned int, long unsigned int, struct ftrace_ops *, struct ftrace_regs *)' [-Wcast-function-type] 128 | #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) | ^ kernel/trace/ftrace.c:2698:35: note: in expansion of macro 'ftrace_ops_list_func' 2698 | err = ftrace_update_ftrace_func(ftrace_ops_list_func); | ^~~~~~~~~~~~~~~~~~~~ kernel/trace/ftrace.c:128:31: warning: cast between incompatible function types from 'void (*)(long unsigned int, long unsigned int)' to 'void (*)(long unsigned int, long unsigned int, struct ftrace_ops *, struct ftrace_regs *)' [-Wcast-function-type] 128 | #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) | ^ kernel/trace/ftrace.c:2708:41: note: in expansion of macro 'ftrace_ops_list_func' 2708 | if (update && ftrace_trace_function != ftrace_ops_list_func) { Link: https://github.com/KSPP/linux/issues/20 Link: https://lore.kernel.org/lkml/20210930095300.73be1555@canb.auug.org.au/ Link: https://lore.kernel.org/lkml/20211012224445.4f4be290@oasis.local.home/ Reported-by: Stephen Rothwell Signed-off-by: Steven Rostedt Signed-off-by: Gustavo A. R. Silva --- include/asm-generic/vmlinux.lds.h | 7 ++++++- kernel/trace/ftrace.c | 23 ++++++++++------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f2984af2b85bd..b86f52683b6f6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -164,13 +164,18 @@ * Need to also make ftrace_stub_graph point to ftrace_stub * so that the same stub location may have different protocols * and not mess up with C verifiers. + * + * ftrace_ops_list_func will be defined as arch_ftrace_ops_list_func + * as some archs will have a different prototype for that function + * but ftrace_ops_list_func() will have a single prototype. */ #define MCOUNT_REC() . = ALIGN(8); \ __start_mcount_loc = .; \ KEEP(*(__mcount_loc)) \ KEEP(*(__patchable_function_entries)) \ __stop_mcount_loc = .; \ - ftrace_stub_graph = ftrace_stub; + ftrace_stub_graph = ftrace_stub; \ + ftrace_ops_list_func = arch_ftrace_ops_list_func; #else # ifdef CONFIG_FUNCTION_TRACER # define MCOUNT_REC() ftrace_stub_graph = ftrace_stub; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7efbc8aaf7f64..8acfc315dd361 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -119,14 +119,9 @@ struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; struct ftrace_ops global_ops; -#if ARCH_SUPPORTS_FTRACE_OPS -static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct ftrace_regs *fregs); -#else -/* See comment below, where ftrace_ops_list_func is defined */ -static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); -#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) -#endif +/* Defined by vmlinux.lds.h see the commment above arch_ftrace_ops_list_func for details */ +void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); static inline void ftrace_ops_init(struct ftrace_ops *ops) { @@ -7026,21 +7021,23 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved. * An architecture can pass partial regs with ftrace_ops and still * set the ARCH_SUPPORTS_FTRACE_OPS. + * + * In vmlinux.lds.h, ftrace_ops_list_func() is defined to be + * arch_ftrace_ops_list_func. */ #if ARCH_SUPPORTS_FTRACE_OPS -static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct ftrace_regs *fregs) +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { __ftrace_ops_list_func(ip, parent_ip, NULL, fregs); } -NOKPROBE_SYMBOL(ftrace_ops_list_func); #else -static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) { __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } -NOKPROBE_SYMBOL(ftrace_ops_no_ops); #endif +NOKPROBE_SYMBOL(arch_ftrace_ops_list_func); /* * If there's only one function registered but it does not support From c45ede6c27819bb038af313b475fdf50ce755dd6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 14 Oct 2021 15:05:19 -0500 Subject: [PATCH 0224/1202] ftrace: Fix -Wmissing-prototypes errors Fix the following -Wmissing-prototypes errors: kernel/trace/ftrace.c:297:5: error: no previous prototype for function '__register_ftrace_function' [-Werror,-Wmissing-prototypes] int __register_ftrace_function(struct ftrace_ops *ops) ^ kernel/trace/ftrace.c:297:1: note: declare 'static' if the function is not intended to be used outside of this translation unit int __register_ftrace_function(struct ftrace_ops *ops) ^ static kernel/trace/ftrace.c:340:5: error: no previous prototype for function '__unregister_ftrace_function' [-Werror,-Wmissing-prototypes] int __unregister_ftrace_function(struct ftrace_ops *ops) ^ kernel/trace/ftrace.c:340:1: note: declare 'static' if the function is not intended to be used outside of this translation unit int __unregister_ftrace_function(struct ftrace_ops *ops) ^ static kernel/trace/ftrace.c:579:5: error: no previous prototype for function 'ftrace_profile_pages_init' [-Werror,-Wmissing-prototypes] int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) ^ kernel/trace/ftrace.c:579:1: note: declare 'static' if the function is not intended to be used outside of this translation unit int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) ^ static kernel/trace/ftrace.c:3871:15: error: no previous prototype for function 'arch_ftrace_match_adjust' [-Werror,-Wmissing-prototypes] char * __weak arch_ftrace_match_adjust(char *str, const char *search) ^ kernel/trace/ftrace.c:3871:1: note: declare 'static' if the function is not intended to be used outside of this translation unit char * __weak arch_ftrace_match_adjust(char *str, const char *search) ^ static kernel/trace/ftrace.c:7029:6: error: no previous prototype for function 'arch_ftrace_ops_list_func' [-Werror,-Wmissing-prototypes] void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, ^ kernel/trace/ftrace.c:7029:1: note: declare 'static' if the function is not intended to be used outside of this translation unit void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, ^ static 5 errors generated. Fixed by Steven and temporarily added to my -next tree. Reported-by: kernel test robot Signed-off-by: Steven Rostedt Signed-off-by: Gustavo A. R. Silva --- include/asm-generic/vmlinux.lds.h | 3 ++- include/linux/ftrace.h | 11 +++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b86f52683b6f6..8771c435f34bb 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -178,7 +178,8 @@ ftrace_ops_list_func = arch_ftrace_ops_list_func; #else # ifdef CONFIG_FUNCTION_TRACER -# define MCOUNT_REC() ftrace_stub_graph = ftrace_stub; +# define MCOUNT_REC() ftrace_stub_graph = ftrace_stub; \ + ftrace_ops_list_func = arch_ftrace_ops_list_func; # else # define MCOUNT_REC() # endif diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 832e65f067542..871b51bec1701 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -30,16 +30,25 @@ #define ARCH_SUPPORTS_FTRACE_OPS 0 #endif +#ifdef CONFIG_FUNCTION_TRACER +struct ftrace_ops; /* * If the arch's mcount caller does not support all of ftrace's * features, then it must call an indirect function that * does. Or at least does enough to prevent any unwelcome side effects. + * + * Also define the function prototype that these architectures use + * to call the ftrace_ops_list_func(). */ #if !ARCH_SUPPORTS_FTRACE_OPS # define FTRACE_FORCE_LIST_FUNC 1 +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); #else # define FTRACE_FORCE_LIST_FUNC 0 +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); #endif +#endif /* CONFIG_FUNCTION_TRACER */ /* Main tracing buffer and events set up */ #ifdef CONFIG_TRACING @@ -88,8 +97,6 @@ extern int ftrace_enable_sysctl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -struct ftrace_ops; - #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS struct ftrace_regs { From 21078041965eb5a2e83d296690a93243611a3c27 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 29 Sep 2021 16:18:03 -0500 Subject: [PATCH 0225/1202] Makefile: Enable -Wcast-function-type In order to make sure new function cast mismatches are not introduced in the kernel (to avoid tripping CFI checking), the kernel should be globally built with -Wcast-function-type. Link: https://github.com/KSPP/linux/issues/20 Co-developed-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 5e7c1d8544412..d596becd0c658 100644 --- a/Makefile +++ b/Makefile @@ -992,6 +992,11 @@ KBUILD_CFLAGS += -Wvla # disable pointer signed / unsigned warnings in gcc 4.0 KBUILD_CFLAGS += -Wno-pointer-sign +# In order to make sure new function cast mismatches are not introduced +# in the kernel (to avoid tripping CFI checking), the kernel should be +# globally built with -Wcast-function-type. +KBUILD_CFLAGS += -Wcast-function-type + # disable stringop warnings in gcc 8+ KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) From 3652265514f52880b3ef8facafaf8117540382f6 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 29 Aug 2021 18:47:54 +0300 Subject: [PATCH 0226/1202] clk: qcom: gdsc: enable optional power domain support On sm8250 dispcc and videocc registers are powered up by the MMCX power domain. Currently we use a regulator to enable this domain on demand, however this has some consequences, as genpd code is not reentrant. Make gdsc code also use pm_runtime calls to ensure that registers are accessible during the gdsc_enable/gdsc_disable operations. Signed-off-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210829154757.784699-6-dmitry.baryshkov@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gdsc.c | 51 ++++++++++++++++++++++++++++++++++++++--- drivers/clk/qcom/gdsc.h | 2 ++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 4ece326ea233e..7e1dd8ccfa384 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,22 @@ enum gdsc_status { GDSC_ON }; +static int gdsc_pm_runtime_get(struct gdsc *sc) +{ + if (!sc->dev) + return 0; + + return pm_runtime_resume_and_get(sc->dev); +} + +static int gdsc_pm_runtime_put(struct gdsc *sc) +{ + if (!sc->dev) + return 0; + + return pm_runtime_put_sync(sc->dev); +} + /* Returns 1 if GDSC status is status, 0 if not, and < 0 on error */ static int gdsc_check_status(struct gdsc *sc, enum gdsc_status status) { @@ -232,9 +249,8 @@ static void gdsc_retain_ff_on(struct gdsc *sc) regmap_update_bits(sc->regmap, sc->gdscr, mask, mask); } -static int gdsc_enable(struct generic_pm_domain *domain) +static int _gdsc_enable(struct gdsc *sc) { - struct gdsc *sc = domain_to_gdsc(domain); int ret; if (sc->pwrsts == PWRSTS_ON) @@ -290,11 +306,22 @@ static int gdsc_enable(struct generic_pm_domain *domain) return 0; } -static int gdsc_disable(struct generic_pm_domain *domain) +static int gdsc_enable(struct generic_pm_domain *domain) { struct gdsc *sc = domain_to_gdsc(domain); int ret; + ret = gdsc_pm_runtime_get(sc); + if (ret) + return ret; + + return _gdsc_enable(sc); +} + +static int _gdsc_disable(struct gdsc *sc) +{ + int ret; + if (sc->pwrsts == PWRSTS_ON) return gdsc_assert_reset(sc); @@ -329,6 +356,18 @@ static int gdsc_disable(struct generic_pm_domain *domain) return 0; } +static int gdsc_disable(struct generic_pm_domain *domain) +{ + struct gdsc *sc = domain_to_gdsc(domain); + int ret; + + ret = _gdsc_disable(sc); + + gdsc_pm_runtime_put(sc); + + return ret; +} + static int gdsc_init(struct gdsc *sc) { u32 mask, val; @@ -443,6 +482,8 @@ int gdsc_register(struct gdsc_desc *desc, for (i = 0; i < num; i++) { if (!scs[i]) continue; + if (pm_runtime_enabled(dev)) + scs[i]->dev = dev; scs[i]->regmap = regmap; scs[i]->rcdev = rcdev; ret = gdsc_init(scs[i]); @@ -457,6 +498,8 @@ int gdsc_register(struct gdsc_desc *desc, continue; if (scs[i]->parent) pm_genpd_add_subdomain(scs[i]->parent, &scs[i]->pd); + else if (!IS_ERR_OR_NULL(dev->pm_domain)) + pm_genpd_add_subdomain(pd_to_genpd(dev->pm_domain), &scs[i]->pd); } return of_genpd_add_provider_onecell(dev->of_node, data); @@ -475,6 +518,8 @@ void gdsc_unregister(struct gdsc_desc *desc) continue; if (scs[i]->parent) pm_genpd_remove_subdomain(scs[i]->parent, &scs[i]->pd); + else if (!IS_ERR_OR_NULL(dev->pm_domain)) + pm_genpd_remove_subdomain(pd_to_genpd(dev->pm_domain), &scs[i]->pd); } of_genpd_del_provider(dev->of_node); } diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index 5bb396b344d16..702d47a87af66 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -25,6 +25,7 @@ struct reset_controller_dev; * @resets: ids of resets associated with this gdsc * @reset_count: number of @resets * @rcdev: reset controller + * @dev: the device holding the GDSC, used for pm_runtime calls */ struct gdsc { struct generic_pm_domain pd; @@ -58,6 +59,7 @@ struct gdsc { const char *supply; struct regulator *rsupply; + struct device *dev; }; struct gdsc_desc { From dad70d11a6e5248f62f62372c3ce6c88e30286e6 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Tue, 21 Sep 2021 15:34:49 +0200 Subject: [PATCH 0227/1202] ARM: dts: stm32: fix STUSB1600 Type-C irq level on stm32mp15xx-dkx STUSB1600 IRQ (Alert pin) is active low (open drain). Interrupts may get lost currently, so fix the IRQ type. Fixes: 83686162c0eb ("ARM: dts: stm32: add STUSB1600 Type-C using I2C4 on stm32mp15xx-dkx") Signed-off-by: Fabrice Gasnier Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15xx-dkx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi index 899bfe04aeb91..48beed0f1f30a 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi @@ -249,7 +249,7 @@ stusb1600@28 { compatible = "st,stusb1600"; reg = <0x28>; - interrupts = <11 IRQ_TYPE_EDGE_FALLING>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&gpioi>; pinctrl-names = "default"; pinctrl-0 = <&stusb1600_pins_a>; From 0ac68641f41435f846162e6a43fb844abb419c28 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Fri, 24 Sep 2021 18:02:21 +0200 Subject: [PATCH 0228/1202] ARM: dts: stm32: fix SAI sub nodes register range The STM32 SAI subblocks registers offsets are in the range 0x0004 (SAIx_CR1) to 0x0020 (SAIx_DR). The corresponding range length is 0x20 instead of 0x1c. Change reg property accordingly. Fixes: 5afd65c3a060 ("ARM: dts: stm32: add sai support on stm32mp157c") Signed-off-by: Olivier Moysan Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp151.dtsi | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index bd289bf5d2690..6992a4b0ba79b 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -824,7 +824,7 @@ #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x4 0x1c>; + reg = <0x4 0x20>; clocks = <&rcc SAI1_K>; clock-names = "sai_ck"; dmas = <&dmamux1 87 0x400 0x01>; @@ -834,7 +834,7 @@ sai1b: audio-controller@4400a024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI1_K>; clock-names = "sai_ck"; dmas = <&dmamux1 88 0x400 0x01>; @@ -855,7 +855,7 @@ sai2a: audio-controller@4400b004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x4 0x1c>; + reg = <0x4 0x20>; clocks = <&rcc SAI2_K>; clock-names = "sai_ck"; dmas = <&dmamux1 89 0x400 0x01>; @@ -865,7 +865,7 @@ sai2b: audio-controller@4400b024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI2_K>; clock-names = "sai_ck"; dmas = <&dmamux1 90 0x400 0x01>; @@ -886,7 +886,7 @@ sai3a: audio-controller@4400c004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x04 0x1c>; + reg = <0x04 0x20>; clocks = <&rcc SAI3_K>; clock-names = "sai_ck"; dmas = <&dmamux1 113 0x400 0x01>; @@ -896,7 +896,7 @@ sai3b: audio-controller@4400c024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI3_K>; clock-names = "sai_ck"; dmas = <&dmamux1 114 0x400 0x01>; @@ -1271,7 +1271,7 @@ sai4a: audio-controller@50027004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x04 0x1c>; + reg = <0x04 0x20>; clocks = <&rcc SAI4_K>; clock-names = "sai_ck"; dmas = <&dmamux1 99 0x400 0x01>; @@ -1281,7 +1281,7 @@ sai4b: audio-controller@50027024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI4_K>; clock-names = "sai_ck"; dmas = <&dmamux1 100 0x400 0x01>; From 960d46f29aececf3da5ffe5d8d24a4b0dddf41c3 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Mon, 4 Oct 2021 11:03:04 +0200 Subject: [PATCH 0229/1202] ARM: dts: stm32: fix AV96 board SAI2 pin muxing on stm32mp15 Fix SAI2A and SAI2B pin muxings for AV96 board on STM32MP15. Change sai2a-4 & sai2a-5 to sai2a-2 & sai2a-2. Change sai2a-4 & sai2a-sleep-5 to sai2b-2 & sai2b-sleep-2 Fixes: dcf185ca8175 ("ARM: dts: stm32: Add alternate pinmux for SAI2 pins on stm32mp15") Signed-off-by: Olivier Moysan Reviewed-by: Marek Vasut Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp15-pinctrl.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi index 5b60ecbd718f0..2ebafe27a865b 100644 --- a/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi +++ b/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi @@ -1179,7 +1179,7 @@ }; }; - sai2a_pins_c: sai2a-4 { + sai2a_pins_c: sai2a-2 { pins { pinmux = , /* SAI2_SCK_A */ , /* SAI2_SD_A */ @@ -1190,7 +1190,7 @@ }; }; - sai2a_sleep_pins_c: sai2a-5 { + sai2a_sleep_pins_c: sai2a-2 { pins { pinmux = , /* SAI2_SCK_A */ , /* SAI2_SD_A */ @@ -1235,14 +1235,14 @@ }; }; - sai2b_pins_c: sai2a-4 { + sai2b_pins_c: sai2b-2 { pins1 { pinmux = ; /* SAI2_SD_B */ bias-disable; }; }; - sai2b_sleep_pins_c: sai2a-sleep-5 { + sai2b_sleep_pins_c: sai2b-sleep-2 { pins { pinmux = ; /* SAI2_SD_B */ }; From d4b3aaf0f90bcb4fe9f994016156ffb91c482b42 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Wed, 6 Oct 2021 11:53:55 +0200 Subject: [PATCH 0230/1202] ARM: dts: stm32: use usbphyc ck_usbo_48m as USBH OHCI clock on stm32mp151 Referring to the note under USBH reset and clocks chapter of RM0436, "In order to access USBH_OHCI registers it is necessary to activate the USB clocks by enabling the PLL controlled by USBPHYC" (ck_usbo_48m). The point is, when USBPHYC PLL is not enabled, OHCI register access freezes the resume from STANDBY. It is the case when dual USBH is enabled, instead of OTG + single USBH. When OTG is probed, as ck_usbo_48m is USBO clock parent, then USBPHYC PLL is enabled and OHCI register access is OK. This patch adds ck_usbo_48m (provided by USBPHYC PLL) as clock of USBH OHCI, thus USBPHYC PLL will be enabled and OHCI register access will be OK. Signed-off-by: Amelie Delaunay Signed-off-by: Alexandre Torgue --- arch/arm/boot/dts/stm32mp151.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index 6992a4b0ba79b..f693a7d242471 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -1452,7 +1452,7 @@ usbh_ohci: usb@5800c000 { compatible = "generic-ohci"; reg = <0x5800c000 0x1000>; - clocks = <&rcc USBH>; + clocks = <&rcc USBH>, <&usbphyc>; resets = <&rcc USBH_R>; interrupts = ; status = "disabled"; From af629a8aaea62f942e0469899b9ac1b1bbbc1355 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 5 Apr 2021 04:58:39 +0200 Subject: [PATCH 0231/1202] kallsyms: support big kernel symbols (2-byte lengths) Rust symbols can become quite long due to namespacing introduced by modules, types, traits, generics, etc. Increasing to 255 is not enough in some cases, and therefore we need to introduce 2-byte lengths to the symbol table. We call these "big" symbols. In order to avoid increasing all lengths to 2 bytes (since most of them only require 1 byte, including many Rust ones), we use length zero to mark "big" symbols in the table. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Signed-off-by: Miguel Ojeda --- kernel/kallsyms.c | 7 +++++++ scripts/kallsyms.c | 31 ++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 0ba87982d017f..4067564ec59fd 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -76,6 +76,13 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, */ off += len + 1; + /* If zero, it is a "big" symbol, so a two byte length follows. */ + if (len == 0) { + len = (data[0] << 8) | data[1]; + data += 2; + off += len + 2; + } + /* * For every byte on the compressed symbol data, copy the table * entry for that byte. diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 54ad86d137849..bcdabee13aab5 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -470,12 +470,37 @@ static void write_src(void) if ((i & 0xFF) == 0) markers[i >> 8] = off; - printf("\t.byte 0x%02x", table[i]->len); + /* + * There cannot be any symbol of length zero -- we use that + * to mark a "big" symbol (and it doesn't make sense anyway). + */ + if (table[i]->len == 0) { + fprintf(stderr, "kallsyms failure: " + "unexpected zero symbol length\n"); + exit(EXIT_FAILURE); + } + + /* Only lengths that fit in up to two bytes are supported. */ + if (table[i]->len > 0xFFFF) { + fprintf(stderr, "kallsyms failure: " + "unexpected huge symbol length\n"); + exit(EXIT_FAILURE); + } + + if (table[i]->len <= 0xFF) { + /* Most symbols use a single byte for the length. */ + printf("\t.byte 0x%02x", table[i]->len); + off += table[i]->len + 1; + } else { + /* "Big" symbols use a zero and then two bytes. */ + printf("\t.byte 0x00, 0x%02x, 0x%02x", + (table[i]->len >> 8) & 0xFF, + table[i]->len & 0xFF); + off += table[i]->len + 3; + } for (k = 0; k < table[i]->len; k++) printf(", 0x%02x", table[i]->sym[k]); printf("\n"); - - off += table[i]->len + 1; } printf("\n"); From 8651666946f9ca880911dc4ed686b4d77954ce91 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 5 Apr 2021 05:03:50 +0200 Subject: [PATCH 0232/1202] kallsyms: increase maximum kernel symbol length to 512 Rust symbols can become quite long due to namespacing introduced by modules, types, traits, generics, etc. For instance, for: pub mod my_module { pub struct MyType; pub struct MyGenericType(T); pub trait MyTrait { fn my_method() -> u32; } impl MyTrait for MyGenericType { fn my_method() -> u32 { 42 } } } generates a symbol of length 96 when using the upcoming v0 mangling scheme: _RNvXNtCshGpAVYOtgW1_7example9my_moduleINtB2_13MyGenericTypeNtB2_6MyTypeENtB2_7MyTrait9my_method At the moment, Rust symbols may reach up to 300 in length. Setting 512 as the maximum seems like a reasonable choice to keep some headroom. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Signed-off-by: Miguel Ojeda --- include/linux/kallsyms.h | 2 +- kernel/livepatch/core.c | 4 ++-- scripts/kallsyms.c | 2 +- tools/include/linux/kallsyms.h | 2 +- tools/include/linux/lockdep.h | 2 +- tools/lib/perf/include/perf/event.h | 2 +- tools/lib/symbol/kallsyms.h | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index a1d6fc82d7f06..6851c2313cad9 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -15,7 +15,7 @@ #include -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s %s]") + \ (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + \ diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 335d988bd8111..73874e5edfda6 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -213,7 +213,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, * we use the smallest/strictest upper bound possible (56, based on * the current definition of MODULE_NAME_LEN) to prevent overflows. */ - BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 128); + BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 512); relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ @@ -227,7 +227,7 @@ static int klp_resolve_symbols(Elf64_Shdr *sechdrs, const char *strtab, /* Format: .klp.sym.sym_objname.sym_name,sympos */ cnt = sscanf(strtab + sym->st_name, - ".klp.sym.%55[^.].%127[^,],%lu", + ".klp.sym.%55[^.].%511[^,],%lu", sym_objname, sym_name, &sympos); if (cnt != 3) { pr_err("symbol %s has an incorrectly formatted name\n", diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index bcdabee13aab5..9bab5f55ade3d 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -27,7 +27,7 @@ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 struct sym_entry { unsigned long long addr; diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index efb6c3f5f2a9a..5a37ccbec54fb 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -6,7 +6,7 @@ #include #include -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 struct module; diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h index e56997288f2b0..d9c163f3ab242 100644 --- a/tools/include/linux/lockdep.h +++ b/tools/include/linux/lockdep.h @@ -47,7 +47,7 @@ static inline int debug_locks_off(void) #define task_pid_nr(tsk) ((tsk)->pid) -#define KSYM_NAME_LEN 128 +#define KSYM_NAME_LEN 512 #define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define pr_warn pr_err diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 4d0c02ba3f7d3..095d60144a70c 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -95,7 +95,7 @@ struct perf_record_throttle { }; #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif struct perf_record_ksymbol { diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h index 72ab9870454ba..542f9b059c3bd 100644 --- a/tools/lib/symbol/kallsyms.h +++ b/tools/lib/symbol/kallsyms.h @@ -7,7 +7,7 @@ #include #ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 +#define KSYM_NAME_LEN 512 #endif static inline u8 kallsyms2elf_binding(char type) From 02af4deb393c43223ceb4a9c28bad383f731769c Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 28 Sep 2021 12:08:37 +0800 Subject: [PATCH 0233/1202] kallsyms: Use the correct buffer size for symbols The buffered name size should be larger than KSYM_NAME_LEN, otherwise we cannot tell whether the size of a symbol name is too long. Signed-off-by: Boqun Feng Signed-off-by: Miguel Ojeda --- scripts/kallsyms.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 9bab5f55ade3d..f6cbbf8522b9f 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -27,6 +27,9 @@ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#define _stringify_1(x) #x +#define _stringify(x) _stringify_1(x) + #define KSYM_NAME_LEN 512 struct sym_entry { @@ -197,15 +200,15 @@ static void check_symbol_range(const char *sym, unsigned long long addr, static struct sym_entry *read_symbol(FILE *in) { - char name[500], type; + char name[KSYM_NAME_LEN+1], type; unsigned long long addr; unsigned int len; struct sym_entry *sym; int rc; - rc = fscanf(in, "%llx %c %499s\n", &addr, &type, name); + rc = fscanf(in, "%llx %c %" _stringify(KSYM_NAME_LEN) "s\n", &addr, &type, name); if (rc != 3) { - if (rc != EOF && fgets(name, 500, in) == NULL) + if (rc != EOF && fgets(name, KSYM_NAME_LEN + 1, in) == NULL) fprintf(stderr, "Read error or end of file.\n"); return NULL; } From 0f0d799770859966caf2cef2f7866a9c67264bdb Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:38:57 +0200 Subject: [PATCH 0234/1202] vsprintf: add new `%pA` format specifier This patch adds a format specifier `%pA` to `vsprintf` which formats a pointer as `core::fmt::Arguments`. Doing so allows us to directly format to the internal buffer of `printf`, so we do not have to use a temporary buffer on the stack to pre-assemble the message on the Rust side. This specifier is intended only to be used from Rust and not for C, so `checkpatch.pl` is intentionally unchanged to catch any misuse. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- lib/vsprintf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d7ad44f2c8f57..67a96165e5870 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2225,6 +2225,10 @@ char *fwnode_string(char *buf, char *end, struct fwnode_handle *fwnode, return widen_string(buf, buf - buf_start, end, spec); } +#ifdef CONFIG_RUST +char *rust_fmt_argument(char* buf, char* end, void *ptr); +#endif + /* Disable pointer hashing if requested */ bool no_hash_pointers __ro_after_init; EXPORT_SYMBOL_GPL(no_hash_pointers); @@ -2380,6 +2384,10 @@ early_param("no_hash_pointers", no_hash_pointers_enable); * * Note: The default behaviour (unadorned %p) is to hash the address, * rendering it useful as a unique identifier. + * + * There is also a '%pA' format specifier, but it is only intended to be used + * from Rust code to format core::fmt::Arguments. Do *not* use it from C. + * See rust/kernel/print.rs for details. */ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, @@ -2452,6 +2460,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return device_node_string(buf, end, ptr, spec, fmt + 1); case 'f': return fwnode_string(buf, end, ptr, spec, fmt + 1); +#ifdef CONFIG_RUST + case 'A': + return rust_fmt_argument(buf, end, ptr); +#endif case 'x': return pointer_string(buf, end, ptr, spec); case 'e': From 46b2ac4d6c570542235c52d0d127b66f436ba077 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 16:52:41 +0200 Subject: [PATCH 0235/1202] rust: add C helpers This source file contains forwarders to C macros and inlined functions. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/helpers.c | 301 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 rust/helpers.c diff --git a/rust/helpers.c b/rust/helpers.c new file mode 100644 index 0000000000000..36060f1f953d2 --- /dev/null +++ b/rust/helpers.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__noreturn void rust_helper_BUG(void) +{ + BUG(); +} + +unsigned long rust_helper_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return copy_from_user(to, from, n); +} + +unsigned long rust_helper_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return copy_to_user(to, from, n); +} + +unsigned long rust_helper_clear_user(void __user *to, unsigned long n) +{ + return clear_user(to, n); +} + +void __iomem *rust_helper_ioremap(resource_size_t offset, unsigned long size) +{ + return ioremap(offset, size); +} +EXPORT_SYMBOL_GPL(rust_helper_ioremap); + +u8 rust_helper_readb(const volatile void __iomem *addr) +{ + return readb(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readb); + +u16 rust_helper_readw(const volatile void __iomem *addr) +{ + return readw(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readw); + +u32 rust_helper_readl(const volatile void __iomem *addr) +{ + return readl(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readl); + +#ifdef CONFIG_64BIT +u64 rust_helper_readq(const volatile void __iomem *addr) +{ + return readq(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readq); +#endif + +void rust_helper_writeb(u8 value, volatile void __iomem *addr) +{ + writeb(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writeb); + +void rust_helper_writew(u16 value, volatile void __iomem *addr) +{ + writew(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writew); + +void rust_helper_writel(u32 value, volatile void __iomem *addr) +{ + writel(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writel); + +#ifdef CONFIG_64BIT +void rust_helper_writeq(u64 value, volatile void __iomem *addr) +{ + writeq(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writeq); +#endif + +void rust_helper___spin_lock_init(spinlock_t *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_SPINLOCK + __spin_lock_init(lock, name, key); +#else + spin_lock_init(lock); +#endif +} +EXPORT_SYMBOL_GPL(rust_helper___spin_lock_init); + +void rust_helper_spin_lock(spinlock_t *lock) +{ + spin_lock(lock); +} +EXPORT_SYMBOL_GPL(rust_helper_spin_lock); + +void rust_helper_spin_unlock(spinlock_t *lock) +{ + spin_unlock(lock); +} +EXPORT_SYMBOL_GPL(rust_helper_spin_unlock); + +void rust_helper_init_wait(struct wait_queue_entry *wq_entry) +{ + init_wait(wq_entry); +} +EXPORT_SYMBOL_GPL(rust_helper_init_wait); + +int rust_helper_signal_pending(struct task_struct *t) +{ + return signal_pending(t); +} +EXPORT_SYMBOL_GPL(rust_helper_signal_pending); + +struct page *rust_helper_alloc_pages(gfp_t gfp_mask, unsigned int order) +{ + return alloc_pages(gfp_mask, order); +} +EXPORT_SYMBOL_GPL(rust_helper_alloc_pages); + +void *rust_helper_kmap(struct page *page) +{ + return kmap(page); +} +EXPORT_SYMBOL_GPL(rust_helper_kmap); + +void rust_helper_kunmap(struct page *page) +{ + return kunmap(page); +} +EXPORT_SYMBOL_GPL(rust_helper_kunmap); + +int rust_helper_cond_resched(void) +{ + return cond_resched(); +} +EXPORT_SYMBOL_GPL(rust_helper_cond_resched); + +size_t rust_helper_copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_from_iter(addr, bytes, i); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_from_iter); + +size_t rust_helper_copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_to_iter(addr, bytes, i); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_to_iter); + +bool rust_helper_IS_ERR(__force const void *ptr) +{ + return IS_ERR(ptr); +} +EXPORT_SYMBOL_GPL(rust_helper_IS_ERR); + +long rust_helper_PTR_ERR(__force const void *ptr) +{ + return PTR_ERR(ptr); +} +EXPORT_SYMBOL_GPL(rust_helper_PTR_ERR); + +const char *rust_helper_errname(int err) +{ + return errname(err); +} +EXPORT_SYMBOL_GPL(rust_helper_errname); + +void rust_helper_mutex_lock(struct mutex *lock) +{ + mutex_lock(lock); +} +EXPORT_SYMBOL_GPL(rust_helper_mutex_lock); + +void * +rust_helper_platform_get_drvdata(const struct platform_device *pdev) +{ + return platform_get_drvdata(pdev); +} +EXPORT_SYMBOL_GPL(rust_helper_platform_get_drvdata); + +void +rust_helper_platform_set_drvdata(struct platform_device *pdev, + void *data) +{ + return platform_set_drvdata(pdev, data); +} +EXPORT_SYMBOL_GPL(rust_helper_platform_set_drvdata); + +refcount_t rust_helper_REFCOUNT_INIT(int n) +{ + return (refcount_t)REFCOUNT_INIT(n); +} +EXPORT_SYMBOL_GPL(rust_helper_REFCOUNT_INIT); + +void rust_helper_refcount_inc(refcount_t *r) +{ + refcount_inc(r); +} +EXPORT_SYMBOL_GPL(rust_helper_refcount_inc); + +bool rust_helper_refcount_dec_and_test(refcount_t *r) +{ + return refcount_dec_and_test(r); +} +EXPORT_SYMBOL_GPL(rust_helper_refcount_dec_and_test); + +void rust_helper_rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + rb_link_node(node, parent, rb_link); +} +EXPORT_SYMBOL_GPL(rust_helper_rb_link_node); + +struct task_struct *rust_helper_get_current(void) +{ + return current; +} +EXPORT_SYMBOL_GPL(rust_helper_get_current); + +void rust_helper_get_task_struct(struct task_struct * t) +{ + get_task_struct(t); +} +EXPORT_SYMBOL_GPL(rust_helper_get_task_struct); + +void rust_helper_put_task_struct(struct task_struct * t) +{ + put_task_struct(t); +} +EXPORT_SYMBOL_GPL(rust_helper_put_task_struct); + +int rust_helper_security_binder_set_context_mgr(struct task_struct *mgr) +{ + return security_binder_set_context_mgr(mgr); +} +EXPORT_SYMBOL_GPL(rust_helper_security_binder_set_context_mgr); + +int rust_helper_security_binder_transaction(struct task_struct *from, + struct task_struct *to) +{ + return security_binder_transaction(from, to); +} +EXPORT_SYMBOL_GPL(rust_helper_security_binder_transaction); + +int rust_helper_security_binder_transfer_binder(struct task_struct *from, + struct task_struct *to) +{ + return security_binder_transfer_binder(from, to); +} +EXPORT_SYMBOL_GPL(rust_helper_security_binder_transfer_binder); + +int rust_helper_security_binder_transfer_file(struct task_struct *from, + struct task_struct *to, + struct file *file) +{ + return security_binder_transfer_file(from, to, file); +} +EXPORT_SYMBOL_GPL(rust_helper_security_binder_transfer_file); + +void *rust_helper_dev_get_drvdata(struct device *dev) +{ + return dev_get_drvdata(dev); +} +EXPORT_SYMBOL_GPL(rust_helper_dev_get_drvdata); + +/* We use bindgen's --size_t-is-usize option to bind the C size_t type + * as the Rust usize type, so we can use it in contexts where Rust + * expects a usize like slice (array) indices. usize is defined to be + * the same as C's uintptr_t type (can hold any pointer) but not + * necessarily the same as size_t (can hold the size of any single + * object). Most modern platforms use the same concrete integer type for + * both of them, but in case we find ourselves on a platform where + * that's not true, fail early instead of risking ABI or + * integer-overflow issues. + * + * If your platform fails this assertion, it means that you are in + * danger of integer-overflow bugs (even if you attempt to remove + * --size_t-is-usize). It may be easiest to change the kernel ABI on + * your platform such that size_t matches uintptr_t (i.e., to increase + * size_t, because uintptr_t has to be at least as big as size_t). +*/ +static_assert( + sizeof(size_t) == sizeof(uintptr_t) && + __alignof__(size_t) == __alignof__(uintptr_t), + "Rust code expects C size_t to match Rust usize" +); From 67903bc007c487f92da810561859fafd35b7c920 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 16:54:11 +0200 Subject: [PATCH 0236/1202] rust: add `compiler_builtins` crate Rust provides `compiler_builtins` as a port of LLVM's `compiler-rt`. Since we do not need the vast majority of them, we avoid the dependency by providing our own crate. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/compiler_builtins.rs | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 rust/compiler_builtins.rs diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs new file mode 100644 index 0000000000000..a5a0be72591b8 --- /dev/null +++ b/rust/compiler_builtins.rs @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Our own `compiler_builtins`. +//! +//! Rust provides [`compiler_builtins`] as a port of LLVM's [`compiler-rt`]. +//! Since we do not need the vast majority of them, we avoid the dependency +//! by providing this file. +//! +//! At the moment, some builtins are required that should not be. For instance, +//! [`core`] has floating-point functionality which we should not be compiling +//! in. We will work with upstream [`core`] to provide feature flags to disable +//! the parts we do not need. For the moment, we define them to [`panic!`] at +//! runtime for simplicity to catch mistakes, instead of performing surgery +//! on `core.o`. +//! +//! In any case, all these symbols are weakened to ensure we do not override +//! those that may be provided by the rest of the kernel. +//! +//! [`compiler_builtins`]: https://github.com/rust-lang/compiler-builtins +//! [`compiler-rt`]: https://compiler-rt.llvm.org/ + +#![feature(compiler_builtins)] +#![compiler_builtins] +#![no_builtins] +#![no_std] + +macro_rules! define_panicking_intrinsics( + ($reason: tt, { $($ident: ident, )* }) => { + $( + #[doc(hidden)] + #[no_mangle] + pub extern "C" fn $ident() { + panic!($reason); + } + )* + } +); + +define_panicking_intrinsics!("`i128` should not be used", { + __ashrti3, + __muloti4, + __multi3, +}); + +define_panicking_intrinsics!("`u128` should not be used", { + __ashlti3, + __lshrti3, + __udivmodti4, + __udivti3, + __umodti3, +}); + +#[cfg(target_arch = "arm")] +define_panicking_intrinsics!("`u64` division/modulo should not be used", { + __aeabi_uldivmod, + __mulodi4, +}); From 7c34bbf9330eeddfa86235406e6f687c4cf4a1f5 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:02:21 +0200 Subject: [PATCH 0237/1202] rust: add `alloc` crate This crate is a subset of the Rust standard library `alloc`, with some additions on top. This is needed because upstream support for fallible allocations is a work in progress (i.e. the `try_*` versions of methods which return a `Result` instead of panicking). Having the library in-tree also gives us a bit more freedom to experiment with new interfaces and allows us to iterate quickly. Eventually, the goal is to have everything the kernel needs in upstream `alloc` and drop it from the kernel tree. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/alloc/README.md | 32 + rust/alloc/alloc.rs | 425 ++++ rust/alloc/borrow.rs | 494 +++++ rust/alloc/boxed.rs | 1725 +++++++++++++++ rust/alloc/collections/mod.rs | 117 ++ rust/alloc/fmt.rs | 587 ++++++ rust/alloc/lib.rs | 200 ++ rust/alloc/macros.rs | 126 ++ rust/alloc/prelude/mod.rs | 17 + rust/alloc/prelude/v1.rs | 16 + rust/alloc/raw_vec.rs | 612 ++++++ rust/alloc/slice.rs | 1271 +++++++++++ rust/alloc/str.rs | 614 ++++++ rust/alloc/string.rs | 2842 +++++++++++++++++++++++++ rust/alloc/vec/drain.rs | 157 ++ rust/alloc/vec/drain_filter.rs | 145 ++ rust/alloc/vec/into_iter.rs | 297 +++ rust/alloc/vec/is_zero.rs | 106 + rust/alloc/vec/mod.rs | 3261 +++++++++++++++++++++++++++++ rust/alloc/vec/partial_eq.rs | 49 + rust/alloc/vec/set_len_on_drop.rs | 30 + rust/alloc/vec/spec_extend.rs | 172 ++ 22 files changed, 13295 insertions(+) create mode 100644 rust/alloc/README.md create mode 100644 rust/alloc/alloc.rs create mode 100644 rust/alloc/borrow.rs create mode 100644 rust/alloc/boxed.rs create mode 100644 rust/alloc/collections/mod.rs create mode 100644 rust/alloc/fmt.rs create mode 100644 rust/alloc/lib.rs create mode 100644 rust/alloc/macros.rs create mode 100644 rust/alloc/prelude/mod.rs create mode 100644 rust/alloc/prelude/v1.rs create mode 100644 rust/alloc/raw_vec.rs create mode 100644 rust/alloc/slice.rs create mode 100644 rust/alloc/str.rs create mode 100644 rust/alloc/string.rs create mode 100644 rust/alloc/vec/drain.rs create mode 100644 rust/alloc/vec/drain_filter.rs create mode 100644 rust/alloc/vec/into_iter.rs create mode 100644 rust/alloc/vec/is_zero.rs create mode 100644 rust/alloc/vec/mod.rs create mode 100644 rust/alloc/vec/partial_eq.rs create mode 100644 rust/alloc/vec/set_len_on_drop.rs create mode 100644 rust/alloc/vec/spec_extend.rs diff --git a/rust/alloc/README.md b/rust/alloc/README.md new file mode 100644 index 0000000000000..a1bcc2cef0e69 --- /dev/null +++ b/rust/alloc/README.md @@ -0,0 +1,32 @@ +# `alloc` + +These source files come from the Rust standard library, hosted in +the https://github.com/rust-lang/rust repository. For copyright +details, see https://github.com/rust-lang/rust/blob/master/COPYRIGHT. + +Please note that these files should be kept as close as possible to +upstream. In general, only additions should be performed (e.g. new +methods). Eventually, changes should make it into upstream so that, +at some point, this fork can be dropped from the kernel tree. + + +## Rationale + +On one hand, kernel folks wanted to keep `alloc` in-tree to have more +freedom in both workflow and actual features if actually needed +(e.g. receiver types if we ended up using them), which is reasonable. + +On the other hand, Rust folks wanted to keep `alloc` as close as +upstream as possible and avoid as much divergence as possible, which +is also reasonable. + +We agreed on a middle-ground: we would keep a subset of `alloc` +in-tree that would be as small and as close as possible to upstream. +Then, upstream can start adding the functions that we add to `alloc` +etc., until we reach a point where the kernel already knows exactly +what it needs in `alloc` and all the new methods are merged into +upstream, so that we can drop `alloc` from the kernel tree and go back +to using the upstream one. + +By doing this, the kernel can go a bit faster now, and Rust can +slowly incorporate and discuss the changes as needed. diff --git a/rust/alloc/alloc.rs b/rust/alloc/alloc.rs new file mode 100644 index 0000000000000..a59d64ffb36d5 --- /dev/null +++ b/rust/alloc/alloc.rs @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Memory allocation APIs + +#![stable(feature = "alloc_module", since = "1.28.0")] + +#[cfg(not(test))] +use core::intrinsics; +use core::intrinsics::{min_align_of_val, size_of_val}; + +use core::ptr::Unique; +#[cfg(not(test))] +use core::ptr::{self, NonNull}; + +#[stable(feature = "alloc_module", since = "1.28.0")] +#[doc(inline)] +pub use core::alloc::*; + +#[cfg(test)] +mod tests; + +extern "Rust" { + // These are the magic symbols to call the global allocator. rustc generates + // them to call `__rg_alloc` etc. if there is a `#[global_allocator]` attribute + // (the code expanding that attribute macro generates those functions), or to call + // the default implementations in libstd (`__rdl_alloc` etc. in `library/std/src/alloc.rs`) + // otherwise. + // The rustc fork of LLVM also special-cases these function names to be able to optimize them + // like `malloc`, `realloc`, and `free`, respectively. + #[rustc_allocator] + #[rustc_allocator_nounwind] + fn __rust_alloc(size: usize, align: usize) -> *mut u8; + #[rustc_allocator_nounwind] + fn __rust_dealloc(ptr: *mut u8, size: usize, align: usize); + #[rustc_allocator_nounwind] + fn __rust_realloc(ptr: *mut u8, old_size: usize, align: usize, new_size: usize) -> *mut u8; + #[rustc_allocator_nounwind] + fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8; +} + +/// The global memory allocator. +/// +/// This type implements the [`Allocator`] trait by forwarding calls +/// to the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// Note: while this type is unstable, the functionality it provides can be +/// accessed through the [free functions in `alloc`](self#functions). +#[unstable(feature = "allocator_api", issue = "32838")] +#[derive(Copy, Clone, Default, Debug)] +#[cfg(not(test))] +pub struct Global; + +#[cfg(test)] +pub use std::alloc::Global; + +/// Allocate memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::alloc`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `alloc` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::alloc`]. +/// +/// # Examples +/// +/// ``` +/// use std::alloc::{alloc, dealloc, Layout}; +/// +/// unsafe { +/// let layout = Layout::new::(); +/// let ptr = alloc(layout); +/// +/// *(ptr as *mut u16) = 42; +/// assert_eq!(*(ptr as *mut u16), 42); +/// +/// dealloc(ptr, layout); +/// } +/// ``` +#[stable(feature = "global_alloc", since = "1.28.0")] +#[inline] +pub unsafe fn alloc(layout: Layout) -> *mut u8 { + unsafe { __rust_alloc(layout.size(), layout.align()) } +} + +/// Deallocate memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::dealloc`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `dealloc` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::dealloc`]. +#[stable(feature = "global_alloc", since = "1.28.0")] +#[inline] +pub unsafe fn dealloc(ptr: *mut u8, layout: Layout) { + unsafe { __rust_dealloc(ptr, layout.size(), layout.align()) } +} + +/// Reallocate memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::realloc`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `realloc` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::realloc`]. +#[stable(feature = "global_alloc", since = "1.28.0")] +#[inline] +pub unsafe fn realloc(ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { + unsafe { __rust_realloc(ptr, layout.size(), layout.align(), new_size) } +} + +/// Allocate zero-initialized memory with the global allocator. +/// +/// This function forwards calls to the [`GlobalAlloc::alloc_zeroed`] method +/// of the allocator registered with the `#[global_allocator]` attribute +/// if there is one, or the `std` crate’s default. +/// +/// This function is expected to be deprecated in favor of the `alloc_zeroed` method +/// of the [`Global`] type when it and the [`Allocator`] trait become stable. +/// +/// # Safety +/// +/// See [`GlobalAlloc::alloc_zeroed`]. +/// +/// # Examples +/// +/// ``` +/// use std::alloc::{alloc_zeroed, dealloc, Layout}; +/// +/// unsafe { +/// let layout = Layout::new::(); +/// let ptr = alloc_zeroed(layout); +/// +/// assert_eq!(*(ptr as *mut u16), 0); +/// +/// dealloc(ptr, layout); +/// } +/// ``` +#[stable(feature = "global_alloc", since = "1.28.0")] +#[inline] +pub unsafe fn alloc_zeroed(layout: Layout) -> *mut u8 { + unsafe { __rust_alloc_zeroed(layout.size(), layout.align()) } +} + +#[cfg(not(test))] +impl Global { + #[inline] + fn alloc_impl(&self, layout: Layout, zeroed: bool) -> Result, AllocError> { + match layout.size() { + 0 => Ok(NonNull::slice_from_raw_parts(layout.dangling(), 0)), + // SAFETY: `layout` is non-zero in size, + size => unsafe { + let raw_ptr = if zeroed { alloc_zeroed(layout) } else { alloc(layout) }; + let ptr = NonNull::new(raw_ptr).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(ptr, size)) + }, + } + } + + // SAFETY: Same as `Allocator::grow` + #[inline] + unsafe fn grow_impl( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + zeroed: bool, + ) -> Result, AllocError> { + debug_assert!( + new_layout.size() >= old_layout.size(), + "`new_layout.size()` must be greater than or equal to `old_layout.size()`" + ); + + match old_layout.size() { + 0 => self.alloc_impl(new_layout, zeroed), + + // SAFETY: `new_size` is non-zero as `old_size` is greater than or equal to `new_size` + // as required by safety conditions. Other conditions must be upheld by the caller + old_size if old_layout.align() == new_layout.align() => unsafe { + let new_size = new_layout.size(); + + // `realloc` probably checks for `new_size >= old_layout.size()` or something similar. + intrinsics::assume(new_size >= old_layout.size()); + + let raw_ptr = realloc(ptr.as_ptr(), old_layout, new_size); + let ptr = NonNull::new(raw_ptr).ok_or(AllocError)?; + if zeroed { + raw_ptr.add(old_size).write_bytes(0, new_size - old_size); + } + Ok(NonNull::slice_from_raw_parts(ptr, new_size)) + }, + + // SAFETY: because `new_layout.size()` must be greater than or equal to `old_size`, + // both the old and new memory allocation are valid for reads and writes for `old_size` + // bytes. Also, because the old allocation wasn't yet deallocated, it cannot overlap + // `new_ptr`. Thus, the call to `copy_nonoverlapping` is safe. The safety contract + // for `dealloc` must be upheld by the caller. + old_size => unsafe { + let new_ptr = self.alloc_impl(new_layout, zeroed)?; + ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_mut_ptr(), old_size); + self.deallocate(ptr, old_layout); + Ok(new_ptr) + }, + } + } +} + +#[unstable(feature = "allocator_api", issue = "32838")] +#[cfg(not(test))] +unsafe impl Allocator for Global { + #[inline] + fn allocate(&self, layout: Layout) -> Result, AllocError> { + self.alloc_impl(layout, false) + } + + #[inline] + fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { + self.alloc_impl(layout, true) + } + + #[inline] + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + if layout.size() != 0 { + // SAFETY: `layout` is non-zero in size, + // other conditions must be upheld by the caller + unsafe { dealloc(ptr.as_ptr(), layout) } + } + } + + #[inline] + unsafe fn grow( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: all conditions must be upheld by the caller + unsafe { self.grow_impl(ptr, old_layout, new_layout, false) } + } + + #[inline] + unsafe fn grow_zeroed( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + // SAFETY: all conditions must be upheld by the caller + unsafe { self.grow_impl(ptr, old_layout, new_layout, true) } + } + + #[inline] + unsafe fn shrink( + &self, + ptr: NonNull, + old_layout: Layout, + new_layout: Layout, + ) -> Result, AllocError> { + debug_assert!( + new_layout.size() <= old_layout.size(), + "`new_layout.size()` must be smaller than or equal to `old_layout.size()`" + ); + + match new_layout.size() { + // SAFETY: conditions must be upheld by the caller + 0 => unsafe { + self.deallocate(ptr, old_layout); + Ok(NonNull::slice_from_raw_parts(new_layout.dangling(), 0)) + }, + + // SAFETY: `new_size` is non-zero. Other conditions must be upheld by the caller + new_size if old_layout.align() == new_layout.align() => unsafe { + // `realloc` probably checks for `new_size <= old_layout.size()` or something similar. + intrinsics::assume(new_size <= old_layout.size()); + + let raw_ptr = realloc(ptr.as_ptr(), old_layout, new_size); + let ptr = NonNull::new(raw_ptr).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(ptr, new_size)) + }, + + // SAFETY: because `new_size` must be smaller than or equal to `old_layout.size()`, + // both the old and new memory allocation are valid for reads and writes for `new_size` + // bytes. Also, because the old allocation wasn't yet deallocated, it cannot overlap + // `new_ptr`. Thus, the call to `copy_nonoverlapping` is safe. The safety contract + // for `dealloc` must be upheld by the caller. + new_size => unsafe { + let new_ptr = self.allocate(new_layout)?; + ptr::copy_nonoverlapping(ptr.as_ptr(), new_ptr.as_mut_ptr(), new_size); + self.deallocate(ptr, old_layout); + Ok(new_ptr) + }, + } + } +} + +/// The allocator for unique pointers. +// This function must not unwind. If it does, MIR codegen will fail. +#[cfg(all(not(no_global_oom_handling), not(test)))] +#[lang = "exchange_malloc"] +#[inline] +unsafe fn exchange_malloc(size: usize, align: usize) -> *mut u8 { + let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; + match Global.allocate(layout) { + Ok(ptr) => ptr.as_mut_ptr(), + Err(_) => handle_alloc_error(layout), + } +} + +#[cfg_attr(not(test), lang = "box_free")] +#[inline] +// This signature has to be the same as `Box`, otherwise an ICE will happen. +// When an additional parameter to `Box` is added (like `A: Allocator`), this has to be added here as +// well. +// For example if `Box` is changed to `struct Box(Unique, A)`, +// this function has to be changed to `fn box_free(Unique, A)` as well. +pub(crate) unsafe fn box_free(ptr: Unique, alloc: A) { + unsafe { + let size = size_of_val(ptr.as_ref()); + let align = min_align_of_val(ptr.as_ref()); + let layout = Layout::from_size_align_unchecked(size, align); + alloc.deallocate(ptr.cast().into(), layout) + } +} + +// # Allocation error handler + +#[cfg(not(no_global_oom_handling))] +extern "Rust" { + // This is the magic symbol to call the global alloc error handler. rustc generates + // it to call `__rg_oom` if there is a `#[alloc_error_handler]`, or to call the + // default implementations below (`__rdl_oom`) otherwise. + #[rustc_allocator_nounwind] + fn __rust_alloc_error_handler(size: usize, align: usize) -> !; +} + +/// Abort on memory allocation error or failure. +/// +/// Callers of memory allocation APIs wishing to abort computation +/// in response to an allocation error are encouraged to call this function, +/// rather than directly invoking `panic!` or similar. +/// +/// The default behavior of this function is to print a message to standard error +/// and abort the process. +/// It can be replaced with [`set_alloc_error_hook`] and [`take_alloc_error_hook`]. +/// +/// [`set_alloc_error_hook`]: ../../std/alloc/fn.set_alloc_error_hook.html +/// [`take_alloc_error_hook`]: ../../std/alloc/fn.take_alloc_error_hook.html +#[stable(feature = "global_alloc", since = "1.28.0")] +#[cfg(all(not(no_global_oom_handling), not(test)))] +#[rustc_allocator_nounwind] +#[cold] +pub fn handle_alloc_error(layout: Layout) -> ! { + unsafe { + __rust_alloc_error_handler(layout.size(), layout.align()); + } +} + +// For alloc test `std::alloc::handle_alloc_error` can be used directly. +#[cfg(all(not(no_global_oom_handling), test))] +pub use std::alloc::handle_alloc_error; + +#[cfg(all(not(no_global_oom_handling), not(any(target_os = "hermit", test))))] +#[doc(hidden)] +#[allow(unused_attributes)] +#[unstable(feature = "alloc_internals", issue = "none")] +pub mod __alloc_error_handler { + use crate::alloc::Layout; + + // called via generated `__rust_alloc_error_handler` + + // if there is no `#[alloc_error_handler]` + #[rustc_std_internal_symbol] + pub unsafe extern "C" fn __rdl_oom(size: usize, _align: usize) -> ! { + panic!("memory allocation of {} bytes failed", size) + } + + // if there is a `#[alloc_error_handler]` + #[rustc_std_internal_symbol] + pub unsafe extern "C" fn __rg_oom(size: usize, align: usize) -> ! { + let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; + extern "Rust" { + #[lang = "oom"] + fn oom_impl(layout: Layout) -> !; + } + unsafe { oom_impl(layout) } + } +} + +/// Specialize clones into pre-allocated, uninitialized memory. +/// Used by `Box::clone` and `Rc`/`Arc::make_mut`. +pub(crate) trait WriteCloneIntoRaw: Sized { + unsafe fn write_clone_into_raw(&self, target: *mut Self); +} + +impl WriteCloneIntoRaw for T { + #[inline] + default unsafe fn write_clone_into_raw(&self, target: *mut Self) { + // Having allocated *first* may allow the optimizer to create + // the cloned value in-place, skipping the local and move. + unsafe { target.write(self.clone()) }; + } +} + +impl WriteCloneIntoRaw for T { + #[inline] + unsafe fn write_clone_into_raw(&self, target: *mut Self) { + // We can always copy in-place, without ever involving a local value. + unsafe { target.copy_from_nonoverlapping(self, 1) }; + } +} diff --git a/rust/alloc/borrow.rs b/rust/alloc/borrow.rs new file mode 100644 index 0000000000000..3a05264274565 --- /dev/null +++ b/rust/alloc/borrow.rs @@ -0,0 +1,494 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A module for working with borrowed data. + +#![stable(feature = "rust1", since = "1.0.0")] + +use core::cmp::Ordering; +use core::hash::{Hash, Hasher}; +use core::ops::Deref; +#[cfg(not(no_global_oom_handling))] +use core::ops::{Add, AddAssign}; + +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::borrow::{Borrow, BorrowMut}; + +use crate::fmt; +#[cfg(not(no_global_oom_handling))] +use crate::string::String; + +use Cow::*; + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, B: ?Sized> Borrow for Cow<'a, B> +where + B: ToOwned, + ::Owned: 'a, +{ + fn borrow(&self) -> &B { + &**self + } +} + +/// A generalization of `Clone` to borrowed data. +/// +/// Some types make it possible to go from borrowed to owned, usually by +/// implementing the `Clone` trait. But `Clone` works only for going from `&T` +/// to `T`. The `ToOwned` trait generalizes `Clone` to construct owned data +/// from any borrow of a given type. +#[cfg_attr(not(test), rustc_diagnostic_item = "ToOwned")] +#[stable(feature = "rust1", since = "1.0.0")] +pub trait ToOwned { + /// The resulting type after obtaining ownership. + #[stable(feature = "rust1", since = "1.0.0")] + type Owned: Borrow; + + /// Creates owned data from borrowed data, usually by cloning. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s: &str = "a"; + /// let ss: String = s.to_owned(); + /// + /// let v: &[i32] = &[1, 2]; + /// let vv: Vec = v.to_owned(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use = "cloning is often expensive and is not expected to have side effects"] + fn to_owned(&self) -> Self::Owned; + + /// Uses borrowed data to replace owned data, usually by cloning. + /// + /// This is borrow-generalized version of `Clone::clone_from`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// # #![feature(toowned_clone_into)] + /// let mut s: String = String::new(); + /// "hello".clone_into(&mut s); + /// + /// let mut v: Vec = Vec::new(); + /// [1, 2][..].clone_into(&mut v); + /// ``` + #[unstable(feature = "toowned_clone_into", reason = "recently added", issue = "41263")] + fn clone_into(&self, target: &mut Self::Owned) { + *target = self.to_owned(); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ToOwned for T +where + T: Clone, +{ + type Owned = T; + fn to_owned(&self) -> T { + self.clone() + } + + fn clone_into(&self, target: &mut T) { + target.clone_from(self); + } +} + +/// A clone-on-write smart pointer. +/// +/// The type `Cow` is a smart pointer providing clone-on-write functionality: it +/// can enclose and provide immutable access to borrowed data, and clone the +/// data lazily when mutation or ownership is required. The type is designed to +/// work with general borrowed data via the `Borrow` trait. +/// +/// `Cow` implements `Deref`, which means that you can call +/// non-mutating methods directly on the data it encloses. If mutation +/// is desired, `to_mut` will obtain a mutable reference to an owned +/// value, cloning if necessary. +/// +/// If you need reference-counting pointers, note that +/// [`Rc::make_mut`][crate::rc::Rc::make_mut] and +/// [`Arc::make_mut`][crate::sync::Arc::make_mut] can provide clone-on-write +/// functionality as well. +/// +/// # Examples +/// +/// ``` +/// use std::borrow::Cow; +/// +/// fn abs_all(input: &mut Cow<[i32]>) { +/// for i in 0..input.len() { +/// let v = input[i]; +/// if v < 0 { +/// // Clones into a vector if not already owned. +/// input.to_mut()[i] = -v; +/// } +/// } +/// } +/// +/// // No clone occurs because `input` doesn't need to be mutated. +/// let slice = [0, 1, 2]; +/// let mut input = Cow::from(&slice[..]); +/// abs_all(&mut input); +/// +/// // Clone occurs because `input` needs to be mutated. +/// let slice = [-1, 0, 1]; +/// let mut input = Cow::from(&slice[..]); +/// abs_all(&mut input); +/// +/// // No clone occurs because `input` is already owned. +/// let mut input = Cow::from(vec![-1, 0, 1]); +/// abs_all(&mut input); +/// ``` +/// +/// Another example showing how to keep `Cow` in a struct: +/// +/// ``` +/// use std::borrow::Cow; +/// +/// struct Items<'a, X: 'a> where [X]: ToOwned> { +/// values: Cow<'a, [X]>, +/// } +/// +/// impl<'a, X: Clone + 'a> Items<'a, X> where [X]: ToOwned> { +/// fn new(v: Cow<'a, [X]>) -> Self { +/// Items { values: v } +/// } +/// } +/// +/// // Creates a container from borrowed values of a slice +/// let readonly = [1, 2]; +/// let borrowed = Items::new((&readonly[..]).into()); +/// match borrowed { +/// Items { values: Cow::Borrowed(b) } => println!("borrowed {:?}", b), +/// _ => panic!("expect borrowed value"), +/// } +/// +/// let mut clone_on_write = borrowed; +/// // Mutates the data from slice into owned vec and pushes a new value on top +/// clone_on_write.values.to_mut().push(3); +/// println!("clone_on_write = {:?}", clone_on_write.values); +/// +/// // The data was mutated. Let check it out. +/// match clone_on_write { +/// Items { values: Cow::Owned(_) } => println!("clone_on_write contains owned data"), +/// _ => panic!("expect owned data"), +/// } +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(test), rustc_diagnostic_item = "Cow")] +pub enum Cow<'a, B: ?Sized + 'a> +where + B: ToOwned, +{ + /// Borrowed data. + #[stable(feature = "rust1", since = "1.0.0")] + Borrowed(#[stable(feature = "rust1", since = "1.0.0")] &'a B), + + /// Owned data. + #[stable(feature = "rust1", since = "1.0.0")] + Owned(#[stable(feature = "rust1", since = "1.0.0")] ::Owned), +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for Cow<'_, B> { + fn clone(&self) -> Self { + match *self { + Borrowed(b) => Borrowed(b), + Owned(ref o) => { + let b: &B = o.borrow(); + Owned(b.to_owned()) + } + } + } + + fn clone_from(&mut self, source: &Self) { + match (self, source) { + (&mut Owned(ref mut dest), &Owned(ref o)) => o.borrow().clone_into(dest), + (t, s) => *t = s.clone(), + } + } +} + +impl Cow<'_, B> { + /// Returns true if the data is borrowed, i.e. if `to_mut` would require additional work. + /// + /// # Examples + /// + /// ``` + /// #![feature(cow_is_borrowed)] + /// use std::borrow::Cow; + /// + /// let cow = Cow::Borrowed("moo"); + /// assert!(cow.is_borrowed()); + /// + /// let bull: Cow<'_, str> = Cow::Owned("...moo?".to_string()); + /// assert!(!bull.is_borrowed()); + /// ``` + #[unstable(feature = "cow_is_borrowed", issue = "65143")] + #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")] + pub const fn is_borrowed(&self) -> bool { + match *self { + Borrowed(_) => true, + Owned(_) => false, + } + } + + /// Returns true if the data is owned, i.e. if `to_mut` would be a no-op. + /// + /// # Examples + /// + /// ``` + /// #![feature(cow_is_borrowed)] + /// use std::borrow::Cow; + /// + /// let cow: Cow<'_, str> = Cow::Owned("moo".to_string()); + /// assert!(cow.is_owned()); + /// + /// let bull = Cow::Borrowed("...moo?"); + /// assert!(!bull.is_owned()); + /// ``` + #[unstable(feature = "cow_is_borrowed", issue = "65143")] + #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")] + pub const fn is_owned(&self) -> bool { + !self.is_borrowed() + } + + /// Acquires a mutable reference to the owned form of the data. + /// + /// Clones the data if it is not already owned. + /// + /// # Examples + /// + /// ``` + /// use std::borrow::Cow; + /// + /// let mut cow = Cow::Borrowed("foo"); + /// cow.to_mut().make_ascii_uppercase(); + /// + /// assert_eq!( + /// cow, + /// Cow::Owned(String::from("FOO")) as Cow + /// ); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn to_mut(&mut self) -> &mut ::Owned { + match *self { + Borrowed(borrowed) => { + *self = Owned(borrowed.to_owned()); + match *self { + Borrowed(..) => unreachable!(), + Owned(ref mut owned) => owned, + } + } + Owned(ref mut owned) => owned, + } + } + + /// Extracts the owned data. + /// + /// Clones the data if it is not already owned. + /// + /// # Examples + /// + /// Calling `into_owned` on a `Cow::Borrowed` clones the underlying data + /// and becomes a `Cow::Owned`: + /// + /// ``` + /// use std::borrow::Cow; + /// + /// let s = "Hello world!"; + /// let cow = Cow::Borrowed(s); + /// + /// assert_eq!( + /// cow.into_owned(), + /// String::from(s) + /// ); + /// ``` + /// + /// Calling `into_owned` on a `Cow::Owned` is a no-op: + /// + /// ``` + /// use std::borrow::Cow; + /// + /// let s = "Hello world!"; + /// let cow: Cow = Cow::Owned(String::from(s)); + /// + /// assert_eq!( + /// cow.into_owned(), + /// String::from(s) + /// ); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_owned(self) -> ::Owned { + match self { + Borrowed(borrowed) => borrowed.to_owned(), + Owned(owned) => owned, + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Deref for Cow<'_, B> { + type Target = B; + + fn deref(&self) -> &B { + match *self { + Borrowed(borrowed) => borrowed, + Owned(ref owned) => owned.borrow(), + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for Cow<'_, B> where B: Eq + ToOwned {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for Cow<'_, B> +where + B: Ord + ToOwned, +{ + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + Ord::cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, 'b, B: ?Sized, C: ?Sized> PartialEq> for Cow<'a, B> +where + B: PartialEq + ToOwned, + C: ToOwned, +{ + #[inline] + fn eq(&self, other: &Cow<'b, C>) -> bool { + PartialEq::eq(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, B: ?Sized> PartialOrd for Cow<'a, B> +where + B: PartialOrd + ToOwned, +{ + #[inline] + fn partial_cmp(&self, other: &Cow<'a, B>) -> Option { + PartialOrd::partial_cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for Cow<'_, B> +where + B: fmt::Debug + ToOwned, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Borrowed(ref b) => fmt::Debug::fmt(b, f), + Owned(ref o) => fmt::Debug::fmt(o, f), + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for Cow<'_, B> +where + B: fmt::Display + ToOwned, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + Borrowed(ref b) => fmt::Display::fmt(b, f), + Owned(ref o) => fmt::Display::fmt(o, f), + } + } +} + +#[stable(feature = "default", since = "1.11.0")] +impl Default for Cow<'_, B> +where + B: ToOwned, +{ + /// Creates an owned Cow<'a, B> with the default value for the contained owned value. + fn default() -> Self { + Owned(::Owned::default()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for Cow<'_, B> +where + B: Hash + ToOwned, +{ + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash(&**self, state) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for Cow<'_, T> { + fn as_ref(&self) -> &T { + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> Add<&'a str> for Cow<'a, str> { + type Output = Cow<'a, str>; + + #[inline] + fn add(mut self, rhs: &'a str) -> Self::Output { + self += rhs; + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> Add> for Cow<'a, str> { + type Output = Cow<'a, str>; + + #[inline] + fn add(mut self, rhs: Cow<'a, str>) -> Self::Output { + self += rhs; + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> AddAssign<&'a str> for Cow<'a, str> { + fn add_assign(&mut self, rhs: &'a str) { + if self.is_empty() { + *self = Cow::Borrowed(rhs) + } else if !rhs.is_empty() { + if let Cow::Borrowed(lhs) = *self { + let mut s = String::with_capacity(lhs.len() + rhs.len()); + s.push_str(lhs); + *self = Cow::Owned(s); + } + self.to_mut().push_str(rhs); + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_add", since = "1.14.0")] +impl<'a> AddAssign> for Cow<'a, str> { + fn add_assign(&mut self, rhs: Cow<'a, str>) { + if self.is_empty() { + *self = rhs + } else if !rhs.is_empty() { + if let Cow::Borrowed(lhs) = *self { + let mut s = String::with_capacity(lhs.len() + rhs.len()); + s.push_str(lhs); + *self = Cow::Owned(s); + } + self.to_mut().push_str(&rhs); + } + } +} diff --git a/rust/alloc/boxed.rs b/rust/alloc/boxed.rs new file mode 100644 index 0000000000000..d1dbcc7c6e129 --- /dev/null +++ b/rust/alloc/boxed.rs @@ -0,0 +1,1725 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A pointer type for heap allocation. +//! +//! [`Box`], casually referred to as a 'box', provides the simplest form of +//! heap allocation in Rust. Boxes provide ownership for this allocation, and +//! drop their contents when they go out of scope. Boxes also ensure that they +//! never allocate more than `isize::MAX` bytes. +//! +//! # Examples +//! +//! Move a value from the stack to the heap by creating a [`Box`]: +//! +//! ``` +//! let val: u8 = 5; +//! let boxed: Box = Box::new(val); +//! ``` +//! +//! Move a value from a [`Box`] back to the stack by [dereferencing]: +//! +//! ``` +//! let boxed: Box = Box::new(5); +//! let val: u8 = *boxed; +//! ``` +//! +//! Creating a recursive data structure: +//! +//! ``` +//! #[derive(Debug)] +//! enum List { +//! Cons(T, Box>), +//! Nil, +//! } +//! +//! let list: List = List::Cons(1, Box::new(List::Cons(2, Box::new(List::Nil)))); +//! println!("{:?}", list); +//! ``` +//! +//! This will print `Cons(1, Cons(2, Nil))`. +//! +//! Recursive structures must be boxed, because if the definition of `Cons` +//! looked like this: +//! +//! ```compile_fail,E0072 +//! # enum List { +//! Cons(T, List), +//! # } +//! ``` +//! +//! It wouldn't work. This is because the size of a `List` depends on how many +//! elements are in the list, and so we don't know how much memory to allocate +//! for a `Cons`. By introducing a [`Box`], which has a defined size, we know how +//! big `Cons` needs to be. +//! +//! # Memory layout +//! +//! For non-zero-sized values, a [`Box`] will use the [`Global`] allocator for +//! its allocation. It is valid to convert both ways between a [`Box`] and a +//! raw pointer allocated with the [`Global`] allocator, given that the +//! [`Layout`] used with the allocator is correct for the type. More precisely, +//! a `value: *mut T` that has been allocated with the [`Global`] allocator +//! with `Layout::for_value(&*value)` may be converted into a box using +//! [`Box::::from_raw(value)`]. Conversely, the memory backing a `value: *mut +//! T` obtained from [`Box::::into_raw`] may be deallocated using the +//! [`Global`] allocator with [`Layout::for_value(&*value)`]. +//! +//! For zero-sized values, the `Box` pointer still has to be [valid] for reads +//! and writes and sufficiently aligned. In particular, casting any aligned +//! non-zero integer literal to a raw pointer produces a valid pointer, but a +//! pointer pointing into previously allocated memory that since got freed is +//! not valid. The recommended way to build a Box to a ZST if `Box::new` cannot +//! be used is to use [`ptr::NonNull::dangling`]. +//! +//! So long as `T: Sized`, a `Box` is guaranteed to be represented +//! as a single pointer and is also ABI-compatible with C pointers +//! (i.e. the C type `T*`). This means that if you have extern "C" +//! Rust functions that will be called from C, you can define those +//! Rust functions using `Box` types, and use `T*` as corresponding +//! type on the C side. As an example, consider this C header which +//! declares functions that create and destroy some kind of `Foo` +//! value: +//! +//! ```c +//! /* C header */ +//! +//! /* Returns ownership to the caller */ +//! struct Foo* foo_new(void); +//! +//! /* Takes ownership from the caller; no-op when invoked with null */ +//! void foo_delete(struct Foo*); +//! ``` +//! +//! These two functions might be implemented in Rust as follows. Here, the +//! `struct Foo*` type from C is translated to `Box`, which captures +//! the ownership constraints. Note also that the nullable argument to +//! `foo_delete` is represented in Rust as `Option>`, since `Box` +//! cannot be null. +//! +//! ``` +//! #[repr(C)] +//! pub struct Foo; +//! +//! #[no_mangle] +//! pub extern "C" fn foo_new() -> Box { +//! Box::new(Foo) +//! } +//! +//! #[no_mangle] +//! pub extern "C" fn foo_delete(_: Option>) {} +//! ``` +//! +//! Even though `Box` has the same representation and C ABI as a C pointer, +//! this does not mean that you can convert an arbitrary `T*` into a `Box` +//! and expect things to work. `Box` values will always be fully aligned, +//! non-null pointers. Moreover, the destructor for `Box` will attempt to +//! free the value with the global allocator. In general, the best practice +//! is to only use `Box` for pointers that originated from the global +//! allocator. +//! +//! **Important.** At least at present, you should avoid using +//! `Box` types for functions that are defined in C but invoked +//! from Rust. In those cases, you should directly mirror the C types +//! as closely as possible. Using types like `Box` where the C +//! definition is just using `T*` can lead to undefined behavior, as +//! described in [rust-lang/unsafe-code-guidelines#198][ucg#198]. +//! +//! [ucg#198]: https://github.com/rust-lang/unsafe-code-guidelines/issues/198 +//! [dereferencing]: core::ops::Deref +//! [`Box::::from_raw(value)`]: Box::from_raw +//! [`Global`]: crate::alloc::Global +//! [`Layout`]: crate::alloc::Layout +//! [`Layout::for_value(&*value)`]: crate::alloc::Layout::for_value +//! [valid]: ptr#safety + +#![stable(feature = "rust1", since = "1.0.0")] + +use core::any::Any; +use core::borrow; +use core::cmp::Ordering; +use core::convert::{From, TryFrom}; +use core::fmt; +use core::future::Future; +use core::hash::{Hash, Hasher}; +#[cfg(not(no_global_oom_handling))] +use core::iter::FromIterator; +use core::iter::{FusedIterator, Iterator}; +use core::marker::{Unpin, Unsize}; +use core::mem; +use core::ops::{ + CoerceUnsized, Deref, DerefMut, DispatchFromDyn, Generator, GeneratorState, Receiver, +}; +use core::pin::Pin; +use core::ptr::{self, Unique}; +use core::stream::Stream; +use core::task::{Context, Poll}; + +#[cfg(not(no_global_oom_handling))] +use crate::alloc::{handle_alloc_error, WriteCloneIntoRaw}; +use crate::alloc::{AllocError, Allocator, Global, Layout}; +#[cfg(not(no_global_oom_handling))] +use crate::borrow::Cow; +#[cfg(not(no_global_oom_handling))] +use crate::raw_vec::RawVec; +#[cfg(not(no_global_oom_handling))] +use crate::str::from_boxed_utf8_unchecked; +#[cfg(not(no_global_oom_handling))] +use crate::vec::Vec; + +/// A pointer type for heap allocation. +/// +/// See the [module-level documentation](../../std/boxed/index.html) for more. +#[lang = "owned_box"] +#[fundamental] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Box< + T: ?Sized, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +>(Unique, A); + +impl Box { + /// Allocates memory on the heap and then places `x` into it. + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// let five = Box::new(5); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline(always)] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new(x: T) -> Self { + box x + } + + /// Constructs a new box with uninitialized contents. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut five = Box::::new_uninit(); + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub fn new_uninit() -> Box> { + Self::new_uninit_in(Global) + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let zero = Box::::new_zeroed(); + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed() -> Box> { + Self::new_zeroed_in(Global) + } + + /// Constructs a new `Pin>`. If `T` does not implement `Unpin`, then + /// `x` will be pinned in memory and unable to be moved. + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "pin", since = "1.33.0")] + #[inline(always)] + pub fn pin(x: T) -> Pin> { + (box x).into() + } + + /// Allocates memory on the heap then places `x` into it, + /// returning an error if the allocation fails + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// let five = Box::try_new(5)?; + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn try_new(x: T) -> Result { + Self::try_new_in(x, Global) + } + + /// Constructs a new box with uninitialized contents on the heap, + /// returning an error if the allocation fails + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// let mut five = Box::::try_new_uninit()?; + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub fn try_new_uninit() -> Result>, AllocError> { + Box::try_new_uninit_in(Global) + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes on the heap + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// let zero = Box::::try_new_zeroed()?; + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub fn try_new_zeroed() -> Result>, AllocError> { + Box::try_new_zeroed_in(Global) + } +} + +impl Box { + /// Allocates memory in the given allocator then places `x` into it. + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let five = Box::new_in(5, System); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn new_in(x: T, alloc: A) -> Self { + let mut boxed = Self::new_uninit_in(alloc); + unsafe { + boxed.as_mut_ptr().write(x); + boxed.assume_init() + } + } + + /// Allocates memory in the given allocator then places `x` into it, + /// returning an error if the allocation fails + /// + /// This doesn't actually allocate if `T` is zero-sized. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let five = Box::try_new_in(5, System)?; + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn try_new_in(x: T, alloc: A) -> Result { + let mut boxed = Self::try_new_uninit_in(alloc)?; + unsafe { + boxed.as_mut_ptr().write(x); + Ok(boxed.assume_init()) + } + } + + /// Constructs a new box with uninitialized contents in the provided allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let mut five = Box::::new_uninit_in(System); + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + #[cfg(not(no_global_oom_handling))] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_uninit_in(alloc: A) -> Box, A> { + let layout = Layout::new::>(); + // NOTE: Prefer match over unwrap_or_else since closure sometimes not inlineable. + // That would make code size bigger. + match Box::try_new_uninit_in(alloc) { + Ok(m) => m, + Err(_) => handle_alloc_error(layout), + } + } + + /// Constructs a new box with uninitialized contents in the provided allocator, + /// returning an error if the allocation fails + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let mut five = Box::::try_new_uninit_in(System)?; + /// + /// let five = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn try_new_uninit_in(alloc: A) -> Result, A>, AllocError> { + let layout = Layout::new::>(); + let ptr = alloc.allocate(layout)?.cast(); + unsafe { Ok(Box::from_raw_in(ptr.as_ptr(), alloc)) } + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes in the provided allocator. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let zero = Box::::new_zeroed_in(System); + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + #[cfg(not(no_global_oom_handling))] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed_in(alloc: A) -> Box, A> { + let layout = Layout::new::>(); + // NOTE: Prefer match over unwrap_or_else since closure sometimes not inlineable. + // That would make code size bigger. + match Box::try_new_zeroed_in(alloc) { + Ok(m) => m, + Err(_) => handle_alloc_error(layout), + } + } + + /// Constructs a new `Box` with uninitialized contents, with the memory + /// being filled with `0` bytes in the provided allocator, + /// returning an error if the allocation fails, + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let zero = Box::::try_new_zeroed_in(System)?; + /// let zero = unsafe { zero.assume_init() }; + /// + /// assert_eq!(*zero, 0); + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn try_new_zeroed_in(alloc: A) -> Result, A>, AllocError> { + let layout = Layout::new::>(); + let ptr = alloc.allocate_zeroed(layout)?.cast(); + unsafe { Ok(Box::from_raw_in(ptr.as_ptr(), alloc)) } + } + + /// Constructs a new `Pin>`. If `T` does not implement `Unpin`, then + /// `x` will be pinned in memory and unable to be moved. + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline(always)] + pub fn pin_in(x: T, alloc: A) -> Pin + where + A: 'static, + { + Self::new_in(x, alloc).into() + } + + /// Converts a `Box` into a `Box<[T]>` + /// + /// This conversion does not allocate on the heap and happens in place. + #[unstable(feature = "box_into_boxed_slice", issue = "71582")] + pub fn into_boxed_slice(boxed: Self) -> Box<[T], A> { + let (raw, alloc) = Box::into_raw_with_allocator(boxed); + unsafe { Box::from_raw_in(raw as *mut [T; 1], alloc) } + } + + /// Consumes the `Box`, returning the wrapped value. + /// + /// # Examples + /// + /// ``` + /// #![feature(box_into_inner)] + /// + /// let c = Box::new(5); + /// + /// assert_eq!(Box::into_inner(c), 5); + /// ``` + #[unstable(feature = "box_into_inner", issue = "80437")] + #[inline] + pub fn into_inner(boxed: Self) -> T { + *boxed + } +} + +impl Box<[T]> { + /// Constructs a new boxed slice with uninitialized contents. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut values = Box::<[u32]>::new_uninit_slice(3); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// values[0].as_mut_ptr().write(1); + /// values[1].as_mut_ptr().write(2); + /// values[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_uninit_slice(len: usize) -> Box<[mem::MaybeUninit]> { + unsafe { RawVec::with_capacity(len).into_box(len) } + } + + /// Constructs a new boxed slice with uninitialized contents, with the memory + /// being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let values = Box::<[u32]>::new_zeroed_slice(3); + /// let values = unsafe { values.assume_init() }; + /// + /// assert_eq!(*values, [0, 0, 0]) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed_slice(len: usize) -> Box<[mem::MaybeUninit]> { + unsafe { RawVec::with_capacity_zeroed(len).into_box(len) } + } +} + +impl Box<[T], A> { + /// Constructs a new boxed slice with uninitialized contents in the provided allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let mut values = Box::<[u32], _>::new_uninit_slice_in(3, System); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// values[0].as_mut_ptr().write(1); + /// values[1].as_mut_ptr().write(2); + /// values[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_uninit_slice_in(len: usize, alloc: A) -> Box<[mem::MaybeUninit], A> { + unsafe { RawVec::with_capacity_in(len, alloc).into_box(len) } + } + + /// Constructs a new boxed slice with uninitialized contents in the provided allocator, + /// with the memory being filled with `0` bytes. + /// + /// See [`MaybeUninit::zeroed`][zeroed] for examples of correct and incorrect usage + /// of this method. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, new_uninit)] + /// + /// use std::alloc::System; + /// + /// let values = Box::<[u32], _>::new_zeroed_slice_in(3, System); + /// let values = unsafe { values.assume_init() }; + /// + /// assert_eq!(*values, [0, 0, 0]) + /// ``` + /// + /// [zeroed]: mem::MaybeUninit::zeroed + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "new_uninit", issue = "63291")] + pub fn new_zeroed_slice_in(len: usize, alloc: A) -> Box<[mem::MaybeUninit], A> { + unsafe { RawVec::with_capacity_zeroed_in(len, alloc).into_box(len) } + } +} + +impl Box, A> { + /// Converts to `Box`. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], + /// it is up to the caller to guarantee that the value + /// really is in an initialized state. + /// Calling this when the content is not yet fully initialized + /// causes immediate undefined behavior. + /// + /// [`MaybeUninit::assume_init`]: mem::MaybeUninit::assume_init + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut five = Box::::new_uninit(); + /// + /// let five: Box = unsafe { + /// // Deferred initialization: + /// five.as_mut_ptr().write(5); + /// + /// five.assume_init() + /// }; + /// + /// assert_eq!(*five, 5) + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub unsafe fn assume_init(self) -> Box { + let (raw, alloc) = Box::into_raw_with_allocator(self); + unsafe { Box::from_raw_in(raw as *mut T, alloc) } + } +} + +impl Box<[mem::MaybeUninit], A> { + /// Converts to `Box<[T], A>`. + /// + /// # Safety + /// + /// As with [`MaybeUninit::assume_init`], + /// it is up to the caller to guarantee that the values + /// really are in an initialized state. + /// Calling this when the content is not yet fully initialized + /// causes immediate undefined behavior. + /// + /// [`MaybeUninit::assume_init`]: mem::MaybeUninit::assume_init + /// + /// # Examples + /// + /// ``` + /// #![feature(new_uninit)] + /// + /// let mut values = Box::<[u32]>::new_uninit_slice(3); + /// + /// let values = unsafe { + /// // Deferred initialization: + /// values[0].as_mut_ptr().write(1); + /// values[1].as_mut_ptr().write(2); + /// values[2].as_mut_ptr().write(3); + /// + /// values.assume_init() + /// }; + /// + /// assert_eq!(*values, [1, 2, 3]) + /// ``` + #[unstable(feature = "new_uninit", issue = "63291")] + #[inline] + pub unsafe fn assume_init(self) -> Box<[T], A> { + let (raw, alloc) = Box::into_raw_with_allocator(self); + unsafe { Box::from_raw_in(raw as *mut [T], alloc) } + } +} + +impl Box { + /// Constructs a box from a raw pointer. + /// + /// After calling this function, the raw pointer is owned by the + /// resulting `Box`. Specifically, the `Box` destructor will call + /// the destructor of `T` and free the allocated memory. For this + /// to be safe, the memory must have been allocated in accordance + /// with the [memory layout] used by `Box` . + /// + /// # Safety + /// + /// This function is unsafe because improper use may lead to + /// memory problems. For example, a double-free may occur if the + /// function is called twice on the same raw pointer. + /// + /// The safety conditions are described in the [memory layout] section. + /// + /// # Examples + /// + /// Recreate a `Box` which was previously converted to a raw pointer + /// using [`Box::into_raw`]: + /// ``` + /// let x = Box::new(5); + /// let ptr = Box::into_raw(x); + /// let x = unsafe { Box::from_raw(ptr) }; + /// ``` + /// Manually create a `Box` from scratch by using the global allocator: + /// ``` + /// use std::alloc::{alloc, Layout}; + /// + /// unsafe { + /// let ptr = alloc(Layout::new::()) as *mut i32; + /// // In general .write is required to avoid attempting to destruct + /// // the (uninitialized) previous contents of `ptr`, though for this + /// // simple example `*ptr = 5` would have worked as well. + /// ptr.write(5); + /// let x = Box::from_raw(ptr); + /// } + /// ``` + /// + /// [memory layout]: self#memory-layout + /// [`Layout`]: crate::Layout + #[stable(feature = "box_raw", since = "1.4.0")] + #[inline] + pub unsafe fn from_raw(raw: *mut T) -> Self { + unsafe { Self::from_raw_in(raw, Global) } + } +} + +impl Box { + /// Constructs a box from a raw pointer in the given allocator. + /// + /// After calling this function, the raw pointer is owned by the + /// resulting `Box`. Specifically, the `Box` destructor will call + /// the destructor of `T` and free the allocated memory. For this + /// to be safe, the memory must have been allocated in accordance + /// with the [memory layout] used by `Box` . + /// + /// # Safety + /// + /// This function is unsafe because improper use may lead to + /// memory problems. For example, a double-free may occur if the + /// function is called twice on the same raw pointer. + /// + /// + /// # Examples + /// + /// Recreate a `Box` which was previously converted to a raw pointer + /// using [`Box::into_raw_with_allocator`]: + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let x = Box::new_in(5, System); + /// let (ptr, alloc) = Box::into_raw_with_allocator(x); + /// let x = unsafe { Box::from_raw_in(ptr, alloc) }; + /// ``` + /// Manually create a `Box` from scratch by using the system allocator: + /// ``` + /// #![feature(allocator_api, slice_ptr_get)] + /// + /// use std::alloc::{Allocator, Layout, System}; + /// + /// unsafe { + /// let ptr = System.allocate(Layout::new::())?.as_mut_ptr() as *mut i32; + /// // In general .write is required to avoid attempting to destruct + /// // the (uninitialized) previous contents of `ptr`, though for this + /// // simple example `*ptr = 5` would have worked as well. + /// ptr.write(5); + /// let x = Box::from_raw_in(ptr, System); + /// } + /// # Ok::<(), std::alloc::AllocError>(()) + /// ``` + /// + /// [memory layout]: self#memory-layout + /// [`Layout`]: crate::Layout + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub unsafe fn from_raw_in(raw: *mut T, alloc: A) -> Self { + Box(unsafe { Unique::new_unchecked(raw) }, alloc) + } + + /// Consumes the `Box`, returning a wrapped raw pointer. + /// + /// The pointer will be properly aligned and non-null. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Box`. In particular, the + /// caller should properly destroy `T` and release the memory, taking + /// into account the [memory layout] used by `Box`. The easiest way to + /// do this is to convert the raw pointer back into a `Box` with the + /// [`Box::from_raw`] function, allowing the `Box` destructor to perform + /// the cleanup. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::into_raw(b)` instead of `b.into_raw()`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// Converting the raw pointer back into a `Box` with [`Box::from_raw`] + /// for automatic cleanup: + /// ``` + /// let x = Box::new(String::from("Hello")); + /// let ptr = Box::into_raw(x); + /// let x = unsafe { Box::from_raw(ptr) }; + /// ``` + /// Manual cleanup by explicitly running the destructor and deallocating + /// the memory: + /// ``` + /// use std::alloc::{dealloc, Layout}; + /// use std::ptr; + /// + /// let x = Box::new(String::from("Hello")); + /// let p = Box::into_raw(x); + /// unsafe { + /// ptr::drop_in_place(p); + /// dealloc(p as *mut u8, Layout::new::()); + /// } + /// ``` + /// + /// [memory layout]: self#memory-layout + #[stable(feature = "box_raw", since = "1.4.0")] + #[inline] + pub fn into_raw(b: Self) -> *mut T { + Self::into_raw_with_allocator(b).0 + } + + /// Consumes the `Box`, returning a wrapped raw pointer and the allocator. + /// + /// The pointer will be properly aligned and non-null. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Box`. In particular, the + /// caller should properly destroy `T` and release the memory, taking + /// into account the [memory layout] used by `Box`. The easiest way to + /// do this is to convert the raw pointer back into a `Box` with the + /// [`Box::from_raw_in`] function, allowing the `Box` destructor to perform + /// the cleanup. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::into_raw_with_allocator(b)` instead of `b.into_raw_with_allocator()`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// Converting the raw pointer back into a `Box` with [`Box::from_raw_in`] + /// for automatic cleanup: + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let x = Box::new_in(String::from("Hello"), System); + /// let (ptr, alloc) = Box::into_raw_with_allocator(x); + /// let x = unsafe { Box::from_raw_in(ptr, alloc) }; + /// ``` + /// Manual cleanup by explicitly running the destructor and deallocating + /// the memory: + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::{Allocator, Layout, System}; + /// use std::ptr::{self, NonNull}; + /// + /// let x = Box::new_in(String::from("Hello"), System); + /// let (ptr, alloc) = Box::into_raw_with_allocator(x); + /// unsafe { + /// ptr::drop_in_place(ptr); + /// let non_null = NonNull::new_unchecked(ptr); + /// alloc.deallocate(non_null.cast(), Layout::new::()); + /// } + /// ``` + /// + /// [memory layout]: self#memory-layout + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn into_raw_with_allocator(b: Self) -> (*mut T, A) { + let (leaked, alloc) = Box::into_unique(b); + (leaked.as_ptr(), alloc) + } + + #[unstable( + feature = "ptr_internals", + issue = "none", + reason = "use `Box::leak(b).into()` or `Unique::from(Box::leak(b))` instead" + )] + #[inline] + #[doc(hidden)] + pub fn into_unique(b: Self) -> (Unique, A) { + // Box is recognized as a "unique pointer" by Stacked Borrows, but internally it is a + // raw pointer for the type system. Turning it directly into a raw pointer would not be + // recognized as "releasing" the unique pointer to permit aliased raw accesses, + // so all raw pointer methods have to go through `Box::leak`. Turning *that* to a raw pointer + // behaves correctly. + let alloc = unsafe { ptr::read(&b.1) }; + (Unique::from(Box::leak(b)), alloc) + } + + /// Returns a reference to the underlying allocator. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::allocator(&b)` instead of `b.allocator()`. This + /// is so that there is no conflict with a method on the inner type. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(b: &Self) -> &A { + &b.1 + } + + /// Consumes and leaks the `Box`, returning a mutable reference, + /// `&'a mut T`. Note that the type `T` must outlive the chosen lifetime + /// `'a`. If the type has only static references, or none at all, then this + /// may be chosen to be `'static`. + /// + /// This function is mainly useful for data that lives for the remainder of + /// the program's life. Dropping the returned reference will cause a memory + /// leak. If this is not acceptable, the reference should first be wrapped + /// with the [`Box::from_raw`] function producing a `Box`. This `Box` can + /// then be dropped which will properly destroy `T` and release the + /// allocated memory. + /// + /// Note: this is an associated function, which means that you have + /// to call it as `Box::leak(b)` instead of `b.leak()`. This + /// is so that there is no conflict with a method on the inner type. + /// + /// # Examples + /// + /// Simple usage: + /// + /// ``` + /// let x = Box::new(41); + /// let static_ref: &'static mut usize = Box::leak(x); + /// *static_ref += 1; + /// assert_eq!(*static_ref, 42); + /// ``` + /// + /// Unsized data: + /// + /// ``` + /// let x = vec![1, 2, 3].into_boxed_slice(); + /// let static_ref = Box::leak(x); + /// static_ref[0] = 4; + /// assert_eq!(*static_ref, [4, 2, 3]); + /// ``` + #[stable(feature = "box_leak", since = "1.26.0")] + #[inline] + pub fn leak<'a>(b: Self) -> &'a mut T + where + A: 'a, + { + unsafe { &mut *mem::ManuallyDrop::new(b).0.as_ptr() } + } + + /// Converts a `Box` into a `Pin>` + /// + /// This conversion does not allocate on the heap and happens in place. + /// + /// This is also available via [`From`]. + #[unstable(feature = "box_into_pin", issue = "62370")] + pub fn into_pin(boxed: Self) -> Pin + where + A: 'static, + { + // It's not possible to move or replace the insides of a `Pin>` + // when `T: !Unpin`, so it's safe to pin it directly without any + // additional requirements. + unsafe { Pin::new_unchecked(boxed) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T: ?Sized, A: Allocator> Drop for Box { + fn drop(&mut self) { + // FIXME: Do nothing, drop is currently performed by compiler. + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for Box { + /// Creates a `Box`, with the `Default` value for T. + fn default() -> Self { + box T::default() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for Box<[T]> { + fn default() -> Self { + Box::<[T; 0]>::new([]) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "default_box_extra", since = "1.17.0")] +impl Default for Box { + fn default() -> Self { + unsafe { from_boxed_utf8_unchecked(Default::default()) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for Box { + /// Returns a new box with a `clone()` of this box's contents. + /// + /// # Examples + /// + /// ``` + /// let x = Box::new(5); + /// let y = x.clone(); + /// + /// // The value is the same + /// assert_eq!(x, y); + /// + /// // But they are unique objects + /// assert_ne!(&*x as *const i32, &*y as *const i32); + /// ``` + #[inline] + fn clone(&self) -> Self { + // Pre-allocate memory to allow writing the cloned value directly. + let mut boxed = Self::new_uninit_in(self.1.clone()); + unsafe { + (**self).write_clone_into_raw(boxed.as_mut_ptr()); + boxed.assume_init() + } + } + + /// Copies `source`'s contents into `self` without creating a new allocation. + /// + /// # Examples + /// + /// ``` + /// let x = Box::new(5); + /// let mut y = Box::new(10); + /// let yp: *const i32 = &*y; + /// + /// y.clone_from(&x); + /// + /// // The value is the same + /// assert_eq!(x, y); + /// + /// // And no allocation occurred + /// assert_eq!(yp, &*y); + /// ``` + #[inline] + fn clone_from(&mut self, source: &Self) { + (**self).clone_from(&(**source)); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_slice_clone", since = "1.3.0")] +impl Clone for Box { + fn clone(&self) -> Self { + // this makes a copy of the data + let buf: Box<[u8]> = self.as_bytes().into(); + unsafe { from_boxed_utf8_unchecked(buf) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for Box { + #[inline] + fn eq(&self, other: &Self) -> bool { + PartialEq::eq(&**self, &**other) + } + #[inline] + fn ne(&self, other: &Self) -> bool { + PartialEq::ne(&**self, &**other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for Box { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + PartialOrd::partial_cmp(&**self, &**other) + } + #[inline] + fn lt(&self, other: &Self) -> bool { + PartialOrd::lt(&**self, &**other) + } + #[inline] + fn le(&self, other: &Self) -> bool { + PartialOrd::le(&**self, &**other) + } + #[inline] + fn ge(&self, other: &Self) -> bool { + PartialOrd::ge(&**self, &**other) + } + #[inline] + fn gt(&self, other: &Self) -> bool { + PartialOrd::gt(&**self, &**other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for Box { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + Ord::cmp(&**self, &**other) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for Box {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for Box { + fn hash(&self, state: &mut H) { + (**self).hash(state); + } +} + +#[stable(feature = "indirect_hasher_impl", since = "1.22.0")] +impl Hasher for Box { + fn finish(&self) -> u64 { + (**self).finish() + } + fn write(&mut self, bytes: &[u8]) { + (**self).write(bytes) + } + fn write_u8(&mut self, i: u8) { + (**self).write_u8(i) + } + fn write_u16(&mut self, i: u16) { + (**self).write_u16(i) + } + fn write_u32(&mut self, i: u32) { + (**self).write_u32(i) + } + fn write_u64(&mut self, i: u64) { + (**self).write_u64(i) + } + fn write_u128(&mut self, i: u128) { + (**self).write_u128(i) + } + fn write_usize(&mut self, i: usize) { + (**self).write_usize(i) + } + fn write_i8(&mut self, i: i8) { + (**self).write_i8(i) + } + fn write_i16(&mut self, i: i16) { + (**self).write_i16(i) + } + fn write_i32(&mut self, i: i32) { + (**self).write_i32(i) + } + fn write_i64(&mut self, i: i64) { + (**self).write_i64(i) + } + fn write_i128(&mut self, i: i128) { + (**self).write_i128(i) + } + fn write_isize(&mut self, i: isize) { + (**self).write_isize(i) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_for_ptrs", since = "1.6.0")] +impl From for Box { + /// Converts a `T` into a `Box` + /// + /// The conversion allocates on the heap and moves `t` + /// from the stack into it. + /// + /// # Examples + /// ```rust + /// let x = 5; + /// let boxed = Box::new(5); + /// + /// assert_eq!(Box::from(x), boxed); + /// ``` + fn from(t: T) -> Self { + Box::new(t) + } +} + +#[stable(feature = "pin", since = "1.33.0")] +impl From> for Pin> +where + A: 'static, +{ + /// Converts a `Box` into a `Pin>` + /// + /// This conversion does not allocate on the heap and happens in place. + fn from(boxed: Box) -> Self { + Box::into_pin(boxed) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_slice", since = "1.17.0")] +impl From<&[T]> for Box<[T]> { + /// Converts a `&[T]` into a `Box<[T]>` + /// + /// This conversion allocates on the heap + /// and performs a copy of `slice`. + /// + /// # Examples + /// ```rust + /// // create a &[u8] which will be used to create a Box<[u8]> + /// let slice: &[u8] = &[104, 101, 108, 108, 111]; + /// let boxed_slice: Box<[u8]> = Box::from(slice); + /// + /// println!("{:?}", boxed_slice); + /// ``` + fn from(slice: &[T]) -> Box<[T]> { + let len = slice.len(); + let buf = RawVec::with_capacity(len); + unsafe { + ptr::copy_nonoverlapping(slice.as_ptr(), buf.ptr(), len); + buf.into_box(slice.len()).assume_init() + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_cow", since = "1.45.0")] +impl From> for Box<[T]> { + #[inline] + fn from(cow: Cow<'_, [T]>) -> Box<[T]> { + match cow { + Cow::Borrowed(slice) => Box::from(slice), + Cow::Owned(slice) => Box::from(slice), + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_slice", since = "1.17.0")] +impl From<&str> for Box { + /// Converts a `&str` into a `Box` + /// + /// This conversion allocates on the heap + /// and performs a copy of `s`. + /// + /// # Examples + /// ```rust + /// let boxed: Box = Box::from("hello"); + /// println!("{}", boxed); + /// ``` + #[inline] + fn from(s: &str) -> Box { + unsafe { from_boxed_utf8_unchecked(Box::from(s.as_bytes())) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_cow", since = "1.45.0")] +impl From> for Box { + #[inline] + fn from(cow: Cow<'_, str>) -> Box { + match cow { + Cow::Borrowed(s) => Box::from(s), + Cow::Owned(s) => Box::from(s), + } + } +} + +#[stable(feature = "boxed_str_conv", since = "1.19.0")] +impl From> for Box<[u8], A> { + /// Converts a `Box` into a `Box<[u8]>` + /// + /// This conversion does not allocate on the heap and happens in place. + /// + /// # Examples + /// ```rust + /// // create a Box which will be used to create a Box<[u8]> + /// let boxed: Box = Box::from("hello"); + /// let boxed_str: Box<[u8]> = Box::from(boxed); + /// + /// // create a &[u8] which will be used to create a Box<[u8]> + /// let slice: &[u8] = &[104, 101, 108, 108, 111]; + /// let boxed_slice = Box::from(slice); + /// + /// assert_eq!(boxed_slice, boxed_str); + /// ``` + #[inline] + fn from(s: Box) -> Self { + let (raw, alloc) = Box::into_raw_with_allocator(s); + unsafe { Box::from_raw_in(raw as *mut [u8], alloc) } + } +} + +#[stable(feature = "box_from_array", since = "1.45.0")] +impl From<[T; N]> for Box<[T]> { + /// Converts a `[T; N]` into a `Box<[T]>` + /// + /// This conversion moves the array to newly heap-allocated memory. + /// + /// # Examples + /// ```rust + /// let boxed: Box<[u8]> = Box::from([4, 2]); + /// println!("{:?}", boxed); + /// ``` + fn from(array: [T; N]) -> Box<[T]> { + box array + } +} + +#[stable(feature = "boxed_slice_try_from", since = "1.43.0")] +impl TryFrom> for Box<[T; N]> { + type Error = Box<[T]>; + + fn try_from(boxed_slice: Box<[T]>) -> Result { + if boxed_slice.len() == N { + Ok(unsafe { Box::from_raw(Box::into_raw(boxed_slice) as *mut [T; N]) }) + } else { + Err(boxed_slice) + } + } +} + +impl Box { + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + /// Attempt to downcast the box to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// + /// fn print_if_string(value: Box) { + /// if let Ok(string) = value.downcast::() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Box::new(my_string)); + /// print_if_string(Box::new(0i8)); + /// ``` + pub fn downcast(self) -> Result, Self> { + if self.is::() { + unsafe { + let (raw, alloc): (*mut dyn Any, _) = Box::into_raw_with_allocator(self); + Ok(Box::from_raw_in(raw as *mut T, alloc)) + } + } else { + Err(self) + } + } +} + +impl Box { + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + /// Attempt to downcast the box to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// + /// fn print_if_string(value: Box) { + /// if let Ok(string) = value.downcast::() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Box::new(my_string)); + /// print_if_string(Box::new(0i8)); + /// ``` + pub fn downcast(self) -> Result, Self> { + if self.is::() { + unsafe { + let (raw, alloc): (*mut (dyn Any + Send), _) = Box::into_raw_with_allocator(self); + Ok(Box::from_raw_in(raw as *mut T, alloc)) + } + } else { + Err(self) + } + } +} + +impl Box { + #[inline] + #[stable(feature = "box_send_sync_any_downcast", since = "1.51.0")] + /// Attempt to downcast the box to a concrete type. + /// + /// # Examples + /// + /// ``` + /// use std::any::Any; + /// + /// fn print_if_string(value: Box) { + /// if let Ok(string) = value.downcast::() { + /// println!("String ({}): {}", string.len(), string); + /// } + /// } + /// + /// let my_string = "Hello World".to_string(); + /// print_if_string(Box::new(my_string)); + /// print_if_string(Box::new(0i8)); + /// ``` + pub fn downcast(self) -> Result, Self> { + if self.is::() { + unsafe { + let (raw, alloc): (*mut (dyn Any + Send + Sync), _) = + Box::into_raw_with_allocator(self); + Ok(Box::from_raw_in(raw as *mut T, alloc)) + } + } else { + Err(self) + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for Box { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for Box { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Pointer for Box { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // It's not possible to extract the inner Uniq directly from the Box, + // instead we cast it to a *const which aliases the Unique + let ptr: *const T = &**self; + fmt::Pointer::fmt(&ptr, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Deref for Box { + type Target = T; + + fn deref(&self) -> &T { + &**self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl DerefMut for Box { + fn deref_mut(&mut self) -> &mut T { + &mut **self + } +} + +#[unstable(feature = "receiver_trait", issue = "none")] +impl Receiver for Box {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for Box { + type Item = I::Item; + fn next(&mut self) -> Option { + (**self).next() + } + fn size_hint(&self) -> (usize, Option) { + (**self).size_hint() + } + fn nth(&mut self, n: usize) -> Option { + (**self).nth(n) + } + fn last(self) -> Option { + BoxIter::last(self) + } +} + +trait BoxIter { + type Item; + fn last(self) -> Option; +} + +impl BoxIter for Box { + type Item = I::Item; + default fn last(self) -> Option { + #[inline] + fn some(_: Option, x: T) -> Option { + Some(x) + } + + self.fold(None, some) + } +} + +/// Specialization for sized `I`s that uses `I`s implementation of `last()` +/// instead of the default. +#[stable(feature = "rust1", since = "1.0.0")] +impl BoxIter for Box { + fn last(self) -> Option { + (*self).last() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl DoubleEndedIterator for Box { + fn next_back(&mut self) -> Option { + (**self).next_back() + } + fn nth_back(&mut self, n: usize) -> Option { + (**self).nth_back(n) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ExactSizeIterator for Box { + fn len(&self) -> usize { + (**self).len() + } + fn is_empty(&self) -> bool { + (**self).is_empty() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for Box {} + +#[stable(feature = "boxed_closure_impls", since = "1.35.0")] +impl + ?Sized, A: Allocator> FnOnce for Box { + type Output = >::Output; + + extern "rust-call" fn call_once(self, args: Args) -> Self::Output { + >::call_once(*self, args) + } +} + +#[stable(feature = "boxed_closure_impls", since = "1.35.0")] +impl + ?Sized, A: Allocator> FnMut for Box { + extern "rust-call" fn call_mut(&mut self, args: Args) -> Self::Output { + >::call_mut(self, args) + } +} + +#[stable(feature = "boxed_closure_impls", since = "1.35.0")] +impl + ?Sized, A: Allocator> Fn for Box { + extern "rust-call" fn call(&self, args: Args) -> Self::Output { + >::call(self, args) + } +} + +#[unstable(feature = "coerce_unsized", issue = "27732")] +impl, U: ?Sized, A: Allocator> CoerceUnsized> for Box {} + +#[unstable(feature = "dispatch_from_dyn", issue = "none")] +impl, U: ?Sized> DispatchFromDyn> for Box {} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "boxed_slice_from_iter", since = "1.32.0")] +impl FromIterator for Box<[I]> { + fn from_iter>(iter: T) -> Self { + iter.into_iter().collect::>().into_boxed_slice() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_slice_clone", since = "1.3.0")] +impl Clone for Box<[T], A> { + fn clone(&self) -> Self { + let alloc = Box::allocator(self).clone(); + self.to_vec_in(alloc).into_boxed_slice() + } + + fn clone_from(&mut self, other: &Self) { + if self.len() == other.len() { + self.clone_from_slice(&other); + } else { + *self = other.clone(); + } + } +} + +#[stable(feature = "box_borrow", since = "1.1.0")] +impl borrow::Borrow for Box { + fn borrow(&self) -> &T { + &**self + } +} + +#[stable(feature = "box_borrow", since = "1.1.0")] +impl borrow::BorrowMut for Box { + fn borrow_mut(&mut self) -> &mut T { + &mut **self + } +} + +#[stable(since = "1.5.0", feature = "smart_ptr_as_ref")] +impl AsRef for Box { + fn as_ref(&self) -> &T { + &**self + } +} + +#[stable(since = "1.5.0", feature = "smart_ptr_as_ref")] +impl AsMut for Box { + fn as_mut(&mut self) -> &mut T { + &mut **self + } +} + +/* Nota bene + * + * We could have chosen not to add this impl, and instead have written a + * function of Pin> to Pin. Such a function would not be sound, + * because Box implements Unpin even when T does not, as a result of + * this impl. + * + * We chose this API instead of the alternative for a few reasons: + * - Logically, it is helpful to understand pinning in regard to the + * memory region being pointed to. For this reason none of the + * standard library pointer types support projecting through a pin + * (Box is the only pointer type in std for which this would be + * safe.) + * - It is in practice very useful to have Box be unconditionally + * Unpin because of trait objects, for which the structural auto + * trait functionality does not apply (e.g., Box would + * otherwise not be Unpin). + * + * Another type with the same semantics as Box but only a conditional + * implementation of `Unpin` (where `T: Unpin`) would be valid/safe, and + * could have a method to project a Pin from it. + */ +#[stable(feature = "pin", since = "1.33.0")] +impl Unpin for Box where A: 'static {} + +#[unstable(feature = "generator_trait", issue = "43122")] +impl + Unpin, R, A: Allocator> Generator for Box +where + A: 'static, +{ + type Yield = G::Yield; + type Return = G::Return; + + fn resume(mut self: Pin<&mut Self>, arg: R) -> GeneratorState { + G::resume(Pin::new(&mut *self), arg) + } +} + +#[unstable(feature = "generator_trait", issue = "43122")] +impl, R, A: Allocator> Generator for Pin> +where + A: 'static, +{ + type Yield = G::Yield; + type Return = G::Return; + + fn resume(mut self: Pin<&mut Self>, arg: R) -> GeneratorState { + G::resume((*self).as_mut(), arg) + } +} + +#[stable(feature = "futures_api", since = "1.36.0")] +impl Future for Box +where + A: 'static, +{ + type Output = F::Output; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + F::poll(Pin::new(&mut *self), cx) + } +} + +#[unstable(feature = "async_stream", issue = "79024")] +impl Stream for Box { + type Item = S::Item; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut **self).poll_next(cx) + } + + fn size_hint(&self) -> (usize, Option) { + (**self).size_hint() + } +} diff --git a/rust/alloc/collections/mod.rs b/rust/alloc/collections/mod.rs new file mode 100644 index 0000000000000..c320d360a8ffc --- /dev/null +++ b/rust/alloc/collections/mod.rs @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Collection types. + +#![stable(feature = "rust1", since = "1.0.0")] + +#[cfg(not(no_global_oom_handling))] +pub mod binary_heap; +#[cfg(not(no_global_oom_handling))] +mod btree; +#[cfg(not(no_global_oom_handling))] +pub mod linked_list; +#[cfg(not(no_global_oom_handling))] +pub mod vec_deque; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub mod btree_map { + //! A map based on a B-Tree. + #[stable(feature = "rust1", since = "1.0.0")] + pub use super::btree::map::*; +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub mod btree_set { + //! A set based on a B-Tree. + #[stable(feature = "rust1", since = "1.0.0")] + pub use super::btree::set::*; +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use binary_heap::BinaryHeap; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use btree_map::BTreeMap; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use btree_set::BTreeSet; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use linked_list::LinkedList; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +#[doc(no_inline)] +pub use vec_deque::VecDeque; + +use crate::alloc::{Layout, LayoutError}; +use core::fmt::Display; + +/// The error type for `try_reserve` methods. +#[derive(Clone, PartialEq, Eq, Debug)] +#[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] +pub enum TryReserveError { + /// Error due to the computed capacity exceeding the collection's maximum + /// (usually `isize::MAX` bytes). + CapacityOverflow, + + /// The memory allocator returned an error + AllocError { + /// The layout of allocation request that failed + layout: Layout, + + #[doc(hidden)] + #[unstable( + feature = "container_error_extra", + issue = "none", + reason = "\ + Enable exposing the allocator’s custom error value \ + if an associated type is added in the future: \ + https://github.com/rust-lang/wg-allocators/issues/23" + )] + non_exhaustive: (), + }, +} + +#[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] +impl From for TryReserveError { + /// Always evaluates to [`TryReserveError::CapacityOverflow`]. + #[inline] + fn from(_: LayoutError) -> Self { + TryReserveError::CapacityOverflow + } +} + +#[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] +impl Display for TryReserveError { + fn fmt( + &self, + fmt: &mut core::fmt::Formatter<'_>, + ) -> core::result::Result<(), core::fmt::Error> { + fmt.write_str("memory allocation failed")?; + let reason = match &self { + TryReserveError::CapacityOverflow => { + " because the computed capacity exceeded the collection's maximum" + } + TryReserveError::AllocError { .. } => " because the memory allocator returned a error", + }; + fmt.write_str(reason) + } +} + +/// An intermediate trait for specialization of `Extend`. +#[doc(hidden)] +trait SpecExtend { + /// Extends `self` with the contents of the given iterator. + fn spec_extend(&mut self, iter: I); +} diff --git a/rust/alloc/fmt.rs b/rust/alloc/fmt.rs new file mode 100644 index 0000000000000..9c4e0b2f2111d --- /dev/null +++ b/rust/alloc/fmt.rs @@ -0,0 +1,587 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Utilities for formatting and printing `String`s. +//! +//! This module contains the runtime support for the [`format!`] syntax extension. +//! This macro is implemented in the compiler to emit calls to this module in +//! order to format arguments at runtime into strings. +//! +//! # Usage +//! +//! The [`format!`] macro is intended to be familiar to those coming from C's +//! `printf`/`fprintf` functions or Python's `str.format` function. +//! +//! Some examples of the [`format!`] extension are: +//! +//! ``` +//! format!("Hello"); // => "Hello" +//! format!("Hello, {}!", "world"); // => "Hello, world!" +//! format!("The number is {}", 1); // => "The number is 1" +//! format!("{:?}", (3, 4)); // => "(3, 4)" +//! format!("{value}", value=4); // => "4" +//! format!("{} {}", 1, 2); // => "1 2" +//! format!("{:04}", 42); // => "0042" with leading zeros +//! format!("{:#?}", (100, 200)); // => "( +//! // 100, +//! // 200, +//! // )" +//! ``` +//! +//! From these, you can see that the first argument is a format string. It is +//! required by the compiler for this to be a string literal; it cannot be a +//! variable passed in (in order to perform validity checking). The compiler +//! will then parse the format string and determine if the list of arguments +//! provided is suitable to pass to this format string. +//! +//! To convert a single value to a string, use the [`to_string`] method. This +//! will use the [`Display`] formatting trait. +//! +//! ## Positional parameters +//! +//! Each formatting argument is allowed to specify which value argument it's +//! referencing, and if omitted it is assumed to be "the next argument". For +//! example, the format string `{} {} {}` would take three parameters, and they +//! would be formatted in the same order as they're given. The format string +//! `{2} {1} {0}`, however, would format arguments in reverse order. +//! +//! Things can get a little tricky once you start intermingling the two types of +//! positional specifiers. The "next argument" specifier can be thought of as an +//! iterator over the argument. Each time a "next argument" specifier is seen, +//! the iterator advances. This leads to behavior like this: +//! +//! ``` +//! format!("{1} {} {0} {}", 1, 2); // => "2 1 1 2" +//! ``` +//! +//! The internal iterator over the argument has not been advanced by the time +//! the first `{}` is seen, so it prints the first argument. Then upon reaching +//! the second `{}`, the iterator has advanced forward to the second argument. +//! Essentially, parameters that explicitly name their argument do not affect +//! parameters that do not name an argument in terms of positional specifiers. +//! +//! A format string is required to use all of its arguments, otherwise it is a +//! compile-time error. You may refer to the same argument more than once in the +//! format string. +//! +//! ## Named parameters +//! +//! Rust itself does not have a Python-like equivalent of named parameters to a +//! function, but the [`format!`] macro is a syntax extension that allows it to +//! leverage named parameters. Named parameters are listed at the end of the +//! argument list and have the syntax: +//! +//! ```text +//! identifier '=' expression +//! ``` +//! +//! For example, the following [`format!`] expressions all use named argument: +//! +//! ``` +//! format!("{argument}", argument = "test"); // => "test" +//! format!("{name} {}", 1, name = 2); // => "2 1" +//! format!("{a} {c} {b}", a="a", b='b', c=3); // => "a 3 b" +//! ``` +//! +//! It is not valid to put positional parameters (those without names) after +//! arguments that have names. Like with positional parameters, it is not +//! valid to provide named parameters that are unused by the format string. +//! +//! # Formatting Parameters +//! +//! Each argument being formatted can be transformed by a number of formatting +//! parameters (corresponding to `format_spec` in [the syntax](#syntax)). These +//! parameters affect the string representation of what's being formatted. +//! +//! ## Width +//! +//! ``` +//! // All of these print "Hello x !" +//! println!("Hello {:5}!", "x"); +//! println!("Hello {:1$}!", "x", 5); +//! println!("Hello {1:0$}!", 5, "x"); +//! println!("Hello {:width$}!", "x", width = 5); +//! ``` +//! +//! This is a parameter for the "minimum width" that the format should take up. +//! If the value's string does not fill up this many characters, then the +//! padding specified by fill/alignment will be used to take up the required +//! space (see below). +//! +//! The value for the width can also be provided as a [`usize`] in the list of +//! parameters by adding a postfix `$`, indicating that the second argument is +//! a [`usize`] specifying the width. +//! +//! Referring to an argument with the dollar syntax does not affect the "next +//! argument" counter, so it's usually a good idea to refer to arguments by +//! position, or use named arguments. +//! +//! ## Fill/Alignment +//! +//! ``` +//! assert_eq!(format!("Hello {:<5}!", "x"), "Hello x !"); +//! assert_eq!(format!("Hello {:-<5}!", "x"), "Hello x----!"); +//! assert_eq!(format!("Hello {:^5}!", "x"), "Hello x !"); +//! assert_eq!(format!("Hello {:>5}!", "x"), "Hello x!"); +//! ``` +//! +//! The optional fill character and alignment is provided normally in conjunction with the +//! [`width`](#width) parameter. It must be defined before `width`, right after the `:`. +//! This indicates that if the value being formatted is smaller than +//! `width` some extra characters will be printed around it. +//! Filling comes in the following variants for different alignments: +//! +//! * `[fill]<` - the argument is left-aligned in `width` columns +//! * `[fill]^` - the argument is center-aligned in `width` columns +//! * `[fill]>` - the argument is right-aligned in `width` columns +//! +//! The default [fill/alignment](#fillalignment) for non-numerics is a space and +//! left-aligned. The +//! default for numeric formatters is also a space character but with right-alignment. If +//! the `0` flag (see below) is specified for numerics, then the implicit fill character is +//! `0`. +//! +//! Note that alignment may not be implemented by some types. In particular, it +//! is not generally implemented for the `Debug` trait. A good way to ensure +//! padding is applied is to format your input, then pad this resulting string +//! to obtain your output: +//! +//! ``` +//! println!("Hello {:^15}!", format!("{:?}", Some("hi"))); // => "Hello Some("hi") !" +//! ``` +//! +//! ## Sign/`#`/`0` +//! +//! ``` +//! assert_eq!(format!("Hello {:+}!", 5), "Hello +5!"); +//! assert_eq!(format!("{:#x}!", 27), "0x1b!"); +//! assert_eq!(format!("Hello {:05}!", 5), "Hello 00005!"); +//! assert_eq!(format!("Hello {:05}!", -5), "Hello -0005!"); +//! assert_eq!(format!("{:#010x}!", 27), "0x0000001b!"); +//! ``` +//! +//! These are all flags altering the behavior of the formatter. +//! +//! * `+` - This is intended for numeric types and indicates that the sign +//! should always be printed. Positive signs are never printed by +//! default, and the negative sign is only printed by default for signed values. +//! This flag indicates that the correct sign (`+` or `-`) should always be printed. +//! * `-` - Currently not used +//! * `#` - This flag indicates that the "alternate" form of printing should +//! be used. The alternate forms are: +//! * `#?` - pretty-print the [`Debug`] formatting (adds linebreaks and indentation) +//! * `#x` - precedes the argument with a `0x` +//! * `#X` - precedes the argument with a `0x` +//! * `#b` - precedes the argument with a `0b` +//! * `#o` - precedes the argument with a `0o` +//! * `0` - This is used to indicate for integer formats that the padding to `width` should +//! both be done with a `0` character as well as be sign-aware. A format +//! like `{:08}` would yield `00000001` for the integer `1`, while the +//! same format would yield `-0000001` for the integer `-1`. Notice that +//! the negative version has one fewer zero than the positive version. +//! Note that padding zeros are always placed after the sign (if any) +//! and before the digits. When used together with the `#` flag, a similar +//! rule applies: padding zeros are inserted after the prefix but before +//! the digits. The prefix is included in the total width. +//! +//! ## Precision +//! +//! For non-numeric types, this can be considered a "maximum width". If the resulting string is +//! longer than this width, then it is truncated down to this many characters and that truncated +//! value is emitted with proper `fill`, `alignment` and `width` if those parameters are set. +//! +//! For integral types, this is ignored. +//! +//! For floating-point types, this indicates how many digits after the decimal point should be +//! printed. +//! +//! There are three possible ways to specify the desired `precision`: +//! +//! 1. An integer `.N`: +//! +//! the integer `N` itself is the precision. +//! +//! 2. An integer or name followed by dollar sign `.N$`: +//! +//! use format *argument* `N` (which must be a `usize`) as the precision. +//! +//! 3. An asterisk `.*`: +//! +//! `.*` means that this `{...}` is associated with *two* format inputs rather than one: the +//! first input holds the `usize` precision, and the second holds the value to print. Note that +//! in this case, if one uses the format string `{:.*}`, then the `` part refers +//! to the *value* to print, and the `precision` must come in the input preceding ``. +//! +//! For example, the following calls all print the same thing `Hello x is 0.01000`: +//! +//! ``` +//! // Hello {arg 0 ("x")} is {arg 1 (0.01) with precision specified inline (5)} +//! println!("Hello {0} is {1:.5}", "x", 0.01); +//! +//! // Hello {arg 1 ("x")} is {arg 2 (0.01) with precision specified in arg 0 (5)} +//! println!("Hello {1} is {2:.0$}", 5, "x", 0.01); +//! +//! // Hello {arg 0 ("x")} is {arg 2 (0.01) with precision specified in arg 1 (5)} +//! println!("Hello {0} is {2:.1$}", "x", 5, 0.01); +//! +//! // Hello {next arg ("x")} is {second of next two args (0.01) with precision +//! // specified in first of next two args (5)} +//! println!("Hello {} is {:.*}", "x", 5, 0.01); +//! +//! // Hello {next arg ("x")} is {arg 2 (0.01) with precision +//! // specified in its predecessor (5)} +//! println!("Hello {} is {2:.*}", "x", 5, 0.01); +//! +//! // Hello {next arg ("x")} is {arg "number" (0.01) with precision specified +//! // in arg "prec" (5)} +//! println!("Hello {} is {number:.prec$}", "x", prec = 5, number = 0.01); +//! ``` +//! +//! While these: +//! +//! ``` +//! println!("{}, `{name:.*}` has 3 fractional digits", "Hello", 3, name=1234.56); +//! println!("{}, `{name:.*}` has 3 characters", "Hello", 3, name="1234.56"); +//! println!("{}, `{name:>8.*}` has 3 right-aligned characters", "Hello", 3, name="1234.56"); +//! ``` +//! +//! print three significantly different things: +//! +//! ```text +//! Hello, `1234.560` has 3 fractional digits +//! Hello, `123` has 3 characters +//! Hello, ` 123` has 3 right-aligned characters +//! ``` +//! +//! ## Localization +//! +//! In some programming languages, the behavior of string formatting functions +//! depends on the operating system's locale setting. The format functions +//! provided by Rust's standard library do not have any concept of locale and +//! will produce the same results on all systems regardless of user +//! configuration. +//! +//! For example, the following code will always print `1.5` even if the system +//! locale uses a decimal separator other than a dot. +//! +//! ``` +//! println!("The value is {}", 1.5); +//! ``` +//! +//! # Escaping +//! +//! The literal characters `{` and `}` may be included in a string by preceding +//! them with the same character. For example, the `{` character is escaped with +//! `{{` and the `}` character is escaped with `}}`. +//! +//! ``` +//! assert_eq!(format!("Hello {{}}"), "Hello {}"); +//! assert_eq!(format!("{{ Hello"), "{ Hello"); +//! ``` +//! +//! # Syntax +//! +//! To summarize, here you can find the full grammar of format strings. +//! The syntax for the formatting language used is drawn from other languages, +//! so it should not be too alien. Arguments are formatted with Python-like +//! syntax, meaning that arguments are surrounded by `{}` instead of the C-like +//! `%`. The actual grammar for the formatting syntax is: +//! +//! ```text +//! format_string := text [ maybe_format text ] * +//! maybe_format := '{' '{' | '}' '}' | format +//! format := '{' [ argument ] [ ':' format_spec ] '}' +//! argument := integer | identifier +//! +//! format_spec := [[fill]align][sign]['#']['0'][width]['.' precision]type +//! fill := character +//! align := '<' | '^' | '>' +//! sign := '+' | '-' +//! width := count +//! precision := count | '*' +//! type := '' | '?' | 'x?' | 'X?' | identifier +//! count := parameter | integer +//! parameter := argument '$' +//! ``` +//! In the above grammar, `text` may not contain any `'{'` or `'}'` characters. +//! +//! # Formatting traits +//! +//! When requesting that an argument be formatted with a particular type, you +//! are actually requesting that an argument ascribes to a particular trait. +//! This allows multiple actual types to be formatted via `{:x}` (like [`i8`] as +//! well as [`isize`]). The current mapping of types to traits is: +//! +//! * *nothing* ⇒ [`Display`] +//! * `?` ⇒ [`Debug`] +//! * `x?` ⇒ [`Debug`] with lower-case hexadecimal integers +//! * `X?` ⇒ [`Debug`] with upper-case hexadecimal integers +//! * `o` ⇒ [`Octal`] +//! * `x` ⇒ [`LowerHex`] +//! * `X` ⇒ [`UpperHex`] +//! * `p` ⇒ [`Pointer`] +//! * `b` ⇒ [`Binary`] +//! * `e` ⇒ [`LowerExp`] +//! * `E` ⇒ [`UpperExp`] +//! +//! What this means is that any type of argument which implements the +//! [`fmt::Binary`][`Binary`] trait can then be formatted with `{:b}`. Implementations +//! are provided for these traits for a number of primitive types by the +//! standard library as well. If no format is specified (as in `{}` or `{:6}`), +//! then the format trait used is the [`Display`] trait. +//! +//! When implementing a format trait for your own type, you will have to +//! implement a method of the signature: +//! +//! ``` +//! # #![allow(dead_code)] +//! # use std::fmt; +//! # struct Foo; // our custom type +//! # impl fmt::Display for Foo { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! # write!(f, "testing, testing") +//! # } } +//! ``` +//! +//! Your type will be passed as `self` by-reference, and then the function +//! should emit output into the `f.buf` stream. It is up to each format trait +//! implementation to correctly adhere to the requested formatting parameters. +//! The values of these parameters will be listed in the fields of the +//! [`Formatter`] struct. In order to help with this, the [`Formatter`] struct also +//! provides some helper methods. +//! +//! Additionally, the return value of this function is [`fmt::Result`] which is a +//! type alias of [`Result`]`<(), `[`std::fmt::Error`]`>`. Formatting implementations +//! should ensure that they propagate errors from the [`Formatter`] (e.g., when +//! calling [`write!`]). However, they should never return errors spuriously. That +//! is, a formatting implementation must and may only return an error if the +//! passed-in [`Formatter`] returns an error. This is because, contrary to what +//! the function signature might suggest, string formatting is an infallible +//! operation. This function only returns a result because writing to the +//! underlying stream might fail and it must provide a way to propagate the fact +//! that an error has occurred back up the stack. +//! +//! An example of implementing the formatting traits would look +//! like: +//! +//! ``` +//! use std::fmt; +//! +//! #[derive(Debug)] +//! struct Vector2D { +//! x: isize, +//! y: isize, +//! } +//! +//! impl fmt::Display for Vector2D { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! // The `f` value implements the `Write` trait, which is what the +//! // write! macro is expecting. Note that this formatting ignores the +//! // various flags provided to format strings. +//! write!(f, "({}, {})", self.x, self.y) +//! } +//! } +//! +//! // Different traits allow different forms of output of a type. The meaning +//! // of this format is to print the magnitude of a vector. +//! impl fmt::Binary for Vector2D { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! let magnitude = (self.x * self.x + self.y * self.y) as f64; +//! let magnitude = magnitude.sqrt(); +//! +//! // Respect the formatting flags by using the helper method +//! // `pad_integral` on the Formatter object. See the method +//! // documentation for details, and the function `pad` can be used +//! // to pad strings. +//! let decimals = f.precision().unwrap_or(3); +//! let string = format!("{:.*}", decimals, magnitude); +//! f.pad_integral(true, "", &string) +//! } +//! } +//! +//! fn main() { +//! let myvector = Vector2D { x: 3, y: 4 }; +//! +//! println!("{}", myvector); // => "(3, 4)" +//! println!("{:?}", myvector); // => "Vector2D {x: 3, y:4}" +//! println!("{:10.3b}", myvector); // => " 5.000" +//! } +//! ``` +//! +//! ### `fmt::Display` vs `fmt::Debug` +//! +//! These two formatting traits have distinct purposes: +//! +//! - [`fmt::Display`][`Display`] implementations assert that the type can be faithfully +//! represented as a UTF-8 string at all times. It is **not** expected that +//! all types implement the [`Display`] trait. +//! - [`fmt::Debug`][`Debug`] implementations should be implemented for **all** public types. +//! Output will typically represent the internal state as faithfully as possible. +//! The purpose of the [`Debug`] trait is to facilitate debugging Rust code. In +//! most cases, using `#[derive(Debug)]` is sufficient and recommended. +//! +//! Some examples of the output from both traits: +//! +//! ``` +//! assert_eq!(format!("{} {:?}", 3, 4), "3 4"); +//! assert_eq!(format!("{} {:?}", 'a', 'b'), "a 'b'"); +//! assert_eq!(format!("{} {:?}", "foo\n", "bar\n"), "foo\n \"bar\\n\""); +//! ``` +//! +//! # Related macros +//! +//! There are a number of related macros in the [`format!`] family. The ones that +//! are currently implemented are: +//! +//! ```ignore (only-for-syntax-highlight) +//! format! // described above +//! write! // first argument is a &mut io::Write, the destination +//! writeln! // same as write but appends a newline +//! print! // the format string is printed to the standard output +//! println! // same as print but appends a newline +//! eprint! // the format string is printed to the standard error +//! eprintln! // same as eprint but appends a newline +//! format_args! // described below. +//! ``` +//! +//! ### `write!` +//! +//! This and [`writeln!`] are two macros which are used to emit the format string +//! to a specified stream. This is used to prevent intermediate allocations of +//! format strings and instead directly write the output. Under the hood, this +//! function is actually invoking the [`write_fmt`] function defined on the +//! [`std::io::Write`] trait. Example usage is: +//! +//! ``` +//! # #![allow(unused_must_use)] +//! use std::io::Write; +//! let mut w = Vec::new(); +//! write!(&mut w, "Hello {}!", "world"); +//! ``` +//! +//! ### `print!` +//! +//! This and [`println!`] emit their output to stdout. Similarly to the [`write!`] +//! macro, the goal of these macros is to avoid intermediate allocations when +//! printing output. Example usage is: +//! +//! ``` +//! print!("Hello {}!", "world"); +//! println!("I have a newline {}", "character at the end"); +//! ``` +//! ### `eprint!` +//! +//! The [`eprint!`] and [`eprintln!`] macros are identical to +//! [`print!`] and [`println!`], respectively, except they emit their +//! output to stderr. +//! +//! ### `format_args!` +//! +//! This is a curious macro used to safely pass around +//! an opaque object describing the format string. This object +//! does not require any heap allocations to create, and it only +//! references information on the stack. Under the hood, all of +//! the related macros are implemented in terms of this. First +//! off, some example usage is: +//! +//! ``` +//! # #![allow(unused_must_use)] +//! use std::fmt; +//! use std::io::{self, Write}; +//! +//! let mut some_writer = io::stdout(); +//! write!(&mut some_writer, "{}", format_args!("print with a {}", "macro")); +//! +//! fn my_fmt_fn(args: fmt::Arguments) { +//! write!(&mut io::stdout(), "{}", args); +//! } +//! my_fmt_fn(format_args!(", or a {} too", "function")); +//! ``` +//! +//! The result of the [`format_args!`] macro is a value of type [`fmt::Arguments`]. +//! This structure can then be passed to the [`write`] and [`format`] functions +//! inside this module in order to process the format string. +//! The goal of this macro is to even further prevent intermediate allocations +//! when dealing with formatting strings. +//! +//! For example, a logging library could use the standard formatting syntax, but +//! it would internally pass around this structure until it has been determined +//! where output should go to. +//! +//! [`fmt::Result`]: Result +//! [`Result`]: core::result::Result +//! [`std::fmt::Error`]: Error +//! [`write!`]: core::write +//! [`write`]: core::write +//! [`format!`]: crate::format +//! [`to_string`]: crate::string::ToString +//! [`writeln!`]: core::writeln +//! [`write_fmt`]: ../../std/io/trait.Write.html#method.write_fmt +//! [`std::io::Write`]: ../../std/io/trait.Write.html +//! [`print!`]: ../../std/macro.print.html +//! [`println!`]: ../../std/macro.println.html +//! [`eprint!`]: ../../std/macro.eprint.html +//! [`eprintln!`]: ../../std/macro.eprintln.html +//! [`format_args!`]: core::format_args +//! [`fmt::Arguments`]: Arguments +//! [`format`]: crate::format + +#![stable(feature = "rust1", since = "1.0.0")] + +#[unstable(feature = "fmt_internals", issue = "none")] +pub use core::fmt::rt; +#[stable(feature = "fmt_flags_align", since = "1.28.0")] +pub use core::fmt::Alignment; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::Error; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{write, ArgumentV1, Arguments}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{Binary, Octal}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{Debug, Display}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{DebugList, DebugMap, DebugSet, DebugStruct, DebugTuple}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{Formatter, Result, Write}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{LowerExp, UpperExp}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{LowerHex, Pointer, UpperHex}; + +#[cfg(not(no_global_oom_handling))] +use crate::string; + +/// The `format` function takes an [`Arguments`] struct and returns the resulting +/// formatted string. +/// +/// The [`Arguments`] instance can be created with the [`format_args!`] macro. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use std::fmt; +/// +/// let s = fmt::format(format_args!("Hello, {}!", "world")); +/// assert_eq!(s, "Hello, world!"); +/// ``` +/// +/// Please note that using [`format!`] might be preferable. +/// Example: +/// +/// ``` +/// let s = format!("Hello, {}!", "world"); +/// assert_eq!(s, "Hello, world!"); +/// ``` +/// +/// [`format_args!`]: core::format_args +/// [`format!`]: crate::format +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub fn format(args: Arguments<'_>) -> string::String { + let capacity = args.estimated_capacity(); + let mut output = string::String::with_capacity(capacity); + output.write_fmt(args).expect("a formatting trait implementation returned an error"); + output +} diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs new file mode 100644 index 0000000000000..61d734ddc9490 --- /dev/null +++ b/rust/alloc/lib.rs @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! # The Rust core allocation and collections library +//! +//! This library provides smart pointers and collections for managing +//! heap-allocated values. +//! +//! This library, like libcore, normally doesn’t need to be used directly +//! since its contents are re-exported in the [`std` crate](../std/index.html). +//! Crates that use the `#![no_std]` attribute however will typically +//! not depend on `std`, so they’d use this crate instead. +//! +//! ## Boxed values +//! +//! The [`Box`] type is a smart pointer type. There can only be one owner of a +//! [`Box`], and the owner can decide to mutate the contents, which live on the +//! heap. +//! +//! This type can be sent among threads efficiently as the size of a `Box` value +//! is the same as that of a pointer. Tree-like data structures are often built +//! with boxes because each node often has only one owner, the parent. +//! +//! ## Reference counted pointers +//! +//! The [`Rc`] type is a non-threadsafe reference-counted pointer type intended +//! for sharing memory within a thread. An [`Rc`] pointer wraps a type, `T`, and +//! only allows access to `&T`, a shared reference. +//! +//! This type is useful when inherited mutability (such as using [`Box`]) is too +//! constraining for an application, and is often paired with the [`Cell`] or +//! [`RefCell`] types in order to allow mutation. +//! +//! ## Atomically reference counted pointers +//! +//! The [`Arc`] type is the threadsafe equivalent of the [`Rc`] type. It +//! provides all the same functionality of [`Rc`], except it requires that the +//! contained type `T` is shareable. Additionally, [`Arc`][`Arc`] is itself +//! sendable while [`Rc`][`Rc`] is not. +//! +//! This type allows for shared access to the contained data, and is often +//! paired with synchronization primitives such as mutexes to allow mutation of +//! shared resources. +//! +//! ## Collections +//! +//! Implementations of the most common general purpose data structures are +//! defined in this library. They are re-exported through the +//! [standard collections library](../std/collections/index.html). +//! +//! ## Heap interfaces +//! +//! The [`alloc`](alloc/index.html) module defines the low-level interface to the +//! default global allocator. It is not compatible with the libc allocator API. +//! +//! [`Arc`]: sync +//! [`Box`]: boxed +//! [`Cell`]: core::cell +//! [`Rc`]: rc +//! [`RefCell`]: core::cell + +// To run liballoc tests without x.py without ending up with two copies of liballoc, Miri needs to be +// able to "empty" this crate. See . +// rustc itself never sets the feature, so this line has no affect there. +#![cfg(any(not(feature = "miri-test-libstd"), test, doctest))] +#![allow(unused_attributes)] +#![stable(feature = "alloc", since = "1.36.0")] +#![doc( + html_playground_url = "https://play.rust-lang.org/", + issue_tracker_base_url = "https://github.com/rust-lang/rust/issues/", + test(no_crate_inject, attr(allow(unused_variables), deny(warnings))) +)] +#![no_std] +#![needs_allocator] +#![warn(deprecated_in_future)] +#![warn(missing_docs)] +#![warn(missing_debug_implementations)] +#![allow(explicit_outlives_requirements)] +#![deny(unsafe_op_in_unsafe_fn)] +#![feature(rustc_allow_const_fn_unstable)] +#![cfg_attr(not(test), feature(generator_trait))] +#![cfg_attr(test, feature(test))] +#![cfg_attr(test, feature(new_uninit))] +#![feature(allocator_api)] +#![feature(array_chunks)] +#![feature(array_methods)] +#![feature(array_windows)] +#![feature(allow_internal_unstable)] +#![feature(arbitrary_self_types)] +#![feature(async_stream)] +#![feature(box_patterns)] +#![feature(box_syntax)] +#![feature(cfg_sanitize)] +#![feature(cfg_target_has_atomic)] +#![feature(coerce_unsized)] +#![cfg_attr(not(no_global_oom_handling), feature(const_btree_new))] +#![feature(const_fn_trait_bound)] +#![feature(cow_is_borrowed)] +#![feature(const_cow_is_borrowed)] +#![feature(destructuring_assignment)] +#![feature(dispatch_from_dyn)] +#![feature(core_intrinsics)] +#![feature(dropck_eyepatch)] +#![feature(exact_size_is_empty)] +#![feature(exclusive_range_pattern)] +#![feature(extend_one)] +#![feature(fmt_internals)] +#![feature(fn_traits)] +#![feature(fundamental)] +#![feature(inplace_iteration)] +// Technically, this is a bug in rustdoc: rustdoc sees the documentation on `#[lang = slice_alloc]` +// blocks is for `&[T]`, which also has documentation using this feature in `core`, and gets mad +// that the feature-gate isn't enabled. Ideally, it wouldn't check for the feature gate for docs +// from other crates, but since this can only appear for lang items, it doesn't seem worth fixing. +#![feature(intra_doc_pointers)] +#![feature(iter_zip)] +#![feature(lang_items)] +#![feature(layout_for_ptr)] +#![feature(negative_impls)] +#![feature(never_type)] +#![feature(nll)] +#![feature(nonnull_slice_from_raw_parts)] +#![feature(auto_traits)] +#![feature(option_result_unwrap_unchecked)] +#![feature(pattern)] +#![feature(ptr_internals)] +#![feature(rustc_attrs)] +#![feature(receiver_trait)] +#![feature(min_specialization)] +#![feature(set_ptr_value)] +#![feature(slice_ptr_get)] +#![feature(slice_ptr_len)] +#![feature(slice_range)] +#![feature(staged_api)] +#![feature(str_internals)] +#![feature(trusted_len)] +#![feature(unboxed_closures)] +#![feature(unicode_internals)] +#![feature(unsize)] +#![feature(unsized_fn_params)] +#![feature(allocator_internals)] +#![feature(slice_partition_dedup)] +#![feature(maybe_uninit_extra, maybe_uninit_slice, maybe_uninit_uninit_array)] +#![feature(alloc_layout_extra)] +#![feature(trusted_random_access)] +#![feature(try_trait_v2)] +#![feature(min_type_alias_impl_trait)] +#![feature(associated_type_bounds)] +#![feature(slice_group_by)] +#![feature(decl_macro)] +#![feature(bindings_after_at)] +// Allow testing this library + +#[cfg(test)] +#[macro_use] +extern crate std; +#[cfg(test)] +extern crate test; + +// Module with internal macros used by other modules (needs to be included before other modules). +#[macro_use] +mod macros; + +// Heaps provided for low-level allocation strategies + +pub mod alloc; + +// Primitive types using the heaps above + +// Need to conditionally define the mod from `boxed.rs` to avoid +// duplicating the lang-items when building in test cfg; but also need +// to allow code to have `use boxed::Box;` declarations. +#[cfg(not(test))] +pub mod boxed; +#[cfg(test)] +mod boxed { + pub use std::boxed::Box; +} +pub mod borrow; +pub mod collections; +pub mod fmt; +pub mod prelude; +pub mod raw_vec; +#[cfg(not(no_rc))] +pub mod rc; +pub mod slice; +pub mod str; +pub mod string; +#[cfg(all(not(no_sync), target_has_atomic = "ptr"))] +pub mod sync; +#[cfg(all(not(no_global_oom_handling), target_has_atomic = "ptr"))] +pub mod task; +#[cfg(test)] +mod tests; +pub mod vec; + +#[doc(hidden)] +#[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")] +pub mod __export { + pub use core::format_args; +} diff --git a/rust/alloc/macros.rs b/rust/alloc/macros.rs new file mode 100644 index 0000000000000..c2e70b94b3600 --- /dev/null +++ b/rust/alloc/macros.rs @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +/// Creates a [`Vec`] containing the arguments. +/// +/// `vec!` allows `Vec`s to be defined with the same syntax as array expressions. +/// There are two forms of this macro: +/// +/// - Create a [`Vec`] containing a given list of elements: +/// +/// ``` +/// let v = vec![1, 2, 3]; +/// assert_eq!(v[0], 1); +/// assert_eq!(v[1], 2); +/// assert_eq!(v[2], 3); +/// ``` +/// +/// - Create a [`Vec`] from a given element and size: +/// +/// ``` +/// let v = vec![1; 3]; +/// assert_eq!(v, [1, 1, 1]); +/// ``` +/// +/// Note that unlike array expressions this syntax supports all elements +/// which implement [`Clone`] and the number of elements doesn't have to be +/// a constant. +/// +/// This will use `clone` to duplicate an expression, so one should be careful +/// using this with types having a nonstandard `Clone` implementation. For +/// example, `vec![Rc::new(1); 5]` will create a vector of five references +/// to the same boxed integer value, not five references pointing to independently +/// boxed integers. +/// +/// Also, note that `vec![expr; 0]` is allowed, and produces an empty vector. +/// This will still evaluate `expr`, however, and immediately drop the resulting value, so +/// be mindful of side effects. +/// +/// [`Vec`]: crate::vec::Vec +#[cfg(not(test))] +#[macro_export] +#[stable(feature = "rust1", since = "1.0.0")] +#[allow_internal_unstable(box_syntax, liballoc_internals)] +macro_rules! vec { + () => ( + $crate::__rust_force_expr!($crate::vec::Vec::new()) + ); + ($elem:expr; $n:expr) => ( + $crate::__rust_force_expr!($crate::vec::from_elem($elem, $n)) + ); + ($($x:expr),+ $(,)?) => ( + $crate::__rust_force_expr!(<[_]>::into_vec(box [$($x),+])) + ); +} + +// HACK(japaric): with cfg(test) the inherent `[T]::into_vec` method, which is +// required for this macro definition, is not available. Instead use the +// `slice::into_vec` function which is only available with cfg(test) +// NB see the slice::hack module in slice.rs for more information +#[cfg(test)] +macro_rules! vec { + () => ( + $crate::vec::Vec::new() + ); + ($elem:expr; $n:expr) => ( + $crate::vec::from_elem($elem, $n) + ); + ($($x:expr),*) => ( + $crate::slice::into_vec(box [$($x),*]) + ); + ($($x:expr,)*) => (vec![$($x),*]) +} + +/// Creates a `String` using interpolation of runtime expressions. +/// +/// The first argument `format!` receives is a format string. This must be a string +/// literal. The power of the formatting string is in the `{}`s contained. +/// +/// Additional parameters passed to `format!` replace the `{}`s within the +/// formatting string in the order given unless named or positional parameters +/// are used; see [`std::fmt`] for more information. +/// +/// A common use for `format!` is concatenation and interpolation of strings. +/// The same convention is used with [`print!`] and [`write!`] macros, +/// depending on the intended destination of the string. +/// +/// To convert a single value to a string, use the [`to_string`] method. This +/// will use the [`Display`] formatting trait. +/// +/// [`std::fmt`]: ../std/fmt/index.html +/// [`print!`]: ../std/macro.print.html +/// [`write!`]: core::write +/// [`to_string`]: crate::string::ToString +/// [`Display`]: core::fmt::Display +/// +/// # Panics +/// +/// `format!` panics if a formatting trait implementation returns an error. +/// This indicates an incorrect implementation +/// since `fmt::Write for String` never returns an error itself. +/// +/// # Examples +/// +/// ``` +/// format!("test"); +/// format!("hello {}", "world!"); +/// format!("x = {}, y = {y}", 10, y = 30); +/// ``` +#[macro_export] +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(test), rustc_diagnostic_item = "format_macro")] +macro_rules! format { + ($($arg:tt)*) => {{ + let res = $crate::fmt::format($crate::__export::format_args!($($arg)*)); + res + }} +} + +/// Force AST node to an expression to improve diagnostics in pattern position. +#[doc(hidden)] +#[macro_export] +#[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")] +macro_rules! __rust_force_expr { + ($e:expr) => { + $e + }; +} diff --git a/rust/alloc/prelude/mod.rs b/rust/alloc/prelude/mod.rs new file mode 100644 index 0000000000000..a64a1843760eb --- /dev/null +++ b/rust/alloc/prelude/mod.rs @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! The alloc Prelude +//! +//! The purpose of this module is to alleviate imports of commonly-used +//! items of the `alloc` crate by adding a glob import to the top of modules: +//! +//! ``` +//! # #![allow(unused_imports)] +//! #![feature(alloc_prelude)] +//! extern crate alloc; +//! use alloc::prelude::v1::*; +//! ``` + +#![unstable(feature = "alloc_prelude", issue = "58935")] + +pub mod v1; diff --git a/rust/alloc/prelude/v1.rs b/rust/alloc/prelude/v1.rs new file mode 100644 index 0000000000000..48d75431c0d13 --- /dev/null +++ b/rust/alloc/prelude/v1.rs @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! The first version of the prelude of `alloc` crate. +//! +//! See the [module-level documentation](../index.html) for more. + +#![unstable(feature = "alloc_prelude", issue = "58935")] + +#[unstable(feature = "alloc_prelude", issue = "58935")] +pub use crate::borrow::ToOwned; +#[unstable(feature = "alloc_prelude", issue = "58935")] +pub use crate::boxed::Box; +#[unstable(feature = "alloc_prelude", issue = "58935")] +pub use crate::string::{String, ToString}; +#[unstable(feature = "alloc_prelude", issue = "58935")] +pub use crate::vec::Vec; diff --git a/rust/alloc/raw_vec.rs b/rust/alloc/raw_vec.rs new file mode 100644 index 0000000000000..629dbd3927d12 --- /dev/null +++ b/rust/alloc/raw_vec.rs @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#![unstable(feature = "raw_vec_internals", reason = "implementation detail", issue = "none")] +#![doc(hidden)] + +use core::alloc::LayoutError; +use core::cmp; +use core::intrinsics; +use core::mem::{self, ManuallyDrop, MaybeUninit}; +use core::ops::Drop; +use core::ptr::{self, NonNull, Unique}; +use core::slice; + +#[cfg(not(no_global_oom_handling))] +use crate::alloc::handle_alloc_error; +use crate::alloc::{Allocator, Global, Layout}; +use crate::boxed::Box; +use crate::collections::TryReserveError::{self, *}; + +#[cfg(test)] +mod tests; + +#[allow(dead_code)] +enum AllocInit { + /// The contents of the new memory are uninitialized. + Uninitialized, + /// The new memory is guaranteed to be zeroed. + Zeroed, +} + +/// A low-level utility for more ergonomically allocating, reallocating, and deallocating +/// a buffer of memory on the heap without having to worry about all the corner cases +/// involved. This type is excellent for building your own data structures like Vec and VecDeque. +/// In particular: +/// +/// * Produces `Unique::dangling()` on zero-sized types. +/// * Produces `Unique::dangling()` on zero-length allocations. +/// * Avoids freeing `Unique::dangling()`. +/// * Catches all overflows in capacity computations (promotes them to "capacity overflow" panics). +/// * Guards against 32-bit systems allocating more than isize::MAX bytes. +/// * Guards against overflowing your length. +/// * Calls `handle_alloc_error` for fallible allocations. +/// * Contains a `ptr::Unique` and thus endows the user with all related benefits. +/// * Uses the excess returned from the allocator to use the largest available capacity. +/// +/// This type does not in anyway inspect the memory that it manages. When dropped it *will* +/// free its memory, but it *won't* try to drop its contents. It is up to the user of `RawVec` +/// to handle the actual things *stored* inside of a `RawVec`. +/// +/// Note that the excess of a zero-sized types is always infinite, so `capacity()` always returns +/// `usize::MAX`. This means that you need to be careful when round-tripping this type with a +/// `Box<[T]>`, since `capacity()` won't yield the length. +#[allow(missing_debug_implementations)] +pub struct RawVec { + ptr: Unique, + cap: usize, + alloc: A, +} + +impl RawVec { + /// HACK(Centril): This exists because stable `const fn` can only call stable `const fn`, so + /// they cannot call `Self::new()`. + /// + /// If you change `RawVec::new` or dependencies, please take care to not introduce anything + /// that would truly const-call something unstable. + pub const NEW: Self = Self::new(); + + /// Creates the biggest possible `RawVec` (on the system heap) + /// without allocating. If `T` has positive size, then this makes a + /// `RawVec` with capacity `0`. If `T` is zero-sized, then it makes a + /// `RawVec` with capacity `usize::MAX`. Useful for implementing + /// delayed allocation. + pub const fn new() -> Self { + Self::new_in(Global) + } + + /// Creates a `RawVec` (on the system heap) with exactly the + /// capacity and alignment requirements for a `[T; capacity]`. This is + /// equivalent to calling `RawVec::new` when `capacity` is `0` or `T` is + /// zero-sized. Note that if `T` is zero-sized this means you will + /// *not* get a `RawVec` with the requested capacity. + /// + /// # Panics + /// + /// Panics if the requested capacity exceeds `isize::MAX` bytes. + /// + /// # Aborts + /// + /// Aborts on OOM. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_in(capacity, Global) + } + + /// Tries to create a `RawVec` (on the system heap) with exactly the + /// capacity and alignment requirements for a `[T; capacity]`. This is + /// equivalent to calling `RawVec::new` when `capacity` is `0` or `T` is + /// zero-sized. Note that if `T` is zero-sized this means you will + /// *not* get a `RawVec` with the requested capacity. + #[inline] + pub fn try_with_capacity(capacity: usize) -> Result { + Self::try_with_capacity_in(capacity, Global) + } + + /// Like `with_capacity`, but guarantees the buffer is zeroed. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn with_capacity_zeroed(capacity: usize) -> Self { + Self::with_capacity_zeroed_in(capacity, Global) + } + + /// Reconstitutes a `RawVec` from a pointer and capacity. + /// + /// # Safety + /// + /// The `ptr` must be allocated (on the system heap), and with the given `capacity`. + /// The `capacity` cannot exceed `isize::MAX` for sized types. (only a concern on 32-bit + /// systems). ZST vectors may have a capacity up to `usize::MAX`. + /// If the `ptr` and `capacity` come from a `RawVec`, then this is guaranteed. + #[inline] + pub unsafe fn from_raw_parts(ptr: *mut T, capacity: usize) -> Self { + unsafe { Self::from_raw_parts_in(ptr, capacity, Global) } + } +} + +impl RawVec { + // Tiny Vecs are dumb. Skip to: + // - 8 if the element size is 1, because any heap allocators is likely + // to round up a request of less than 8 bytes to at least 8 bytes. + // - 4 if elements are moderate-sized (<= 1 KiB). + // - 1 otherwise, to avoid wasting too much space for very short Vecs. + const MIN_NON_ZERO_CAP: usize = if mem::size_of::() == 1 { + 8 + } else if mem::size_of::() <= 1024 { + 4 + } else { + 1 + }; + + /// Like `new`, but parameterized over the choice of allocator for + /// the returned `RawVec`. + #[rustc_allow_const_fn_unstable(const_fn)] + pub const fn new_in(alloc: A) -> Self { + // `cap: 0` means "unallocated". zero-sized types are ignored. + Self { ptr: Unique::dangling(), cap: 0, alloc } + } + + /// Like `with_capacity`, but parameterized over the choice of + /// allocator for the returned `RawVec`. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { + Self::allocate_in(capacity, AllocInit::Uninitialized, alloc) + } + + /// Like `try_with_capacity`, but parameterized over the choice of + /// allocator for the returned `RawVec`. + #[inline] + pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result { + Self::try_allocate_in(capacity, AllocInit::Uninitialized, alloc) + } + + /// Like `with_capacity_zeroed`, but parameterized over the choice + /// of allocator for the returned `RawVec`. + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn with_capacity_zeroed_in(capacity: usize, alloc: A) -> Self { + Self::allocate_in(capacity, AllocInit::Zeroed, alloc) + } + + /// Converts a `Box<[T]>` into a `RawVec`. + pub fn from_box(slice: Box<[T], A>) -> Self { + unsafe { + let (slice, alloc) = Box::into_raw_with_allocator(slice); + RawVec::from_raw_parts_in(slice.as_mut_ptr(), slice.len(), alloc) + } + } + + /// Converts the entire buffer into `Box<[MaybeUninit]>` with the specified `len`. + /// + /// Note that this will correctly reconstitute any `cap` changes + /// that may have been performed. (See description of type for details.) + /// + /// # Safety + /// + /// * `len` must be greater than or equal to the most recently requested capacity, and + /// * `len` must be less than or equal to `self.capacity()`. + /// + /// Note, that the requested capacity and `self.capacity()` could differ, as + /// an allocator could overallocate and return a greater memory block than requested. + pub unsafe fn into_box(self, len: usize) -> Box<[MaybeUninit], A> { + // Sanity-check one half of the safety requirement (we cannot check the other half). + debug_assert!( + len <= self.capacity(), + "`len` must be smaller than or equal to `self.capacity()`" + ); + + let me = ManuallyDrop::new(self); + unsafe { + let slice = slice::from_raw_parts_mut(me.ptr() as *mut MaybeUninit, len); + Box::from_raw_in(slice, ptr::read(&me.alloc)) + } + } + + #[cfg(not(no_global_oom_handling))] + fn allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Self { + if mem::size_of::() == 0 { + Self::new_in(alloc) + } else { + // We avoid `unwrap_or_else` here because it bloats the amount of + // LLVM IR generated. + let layout = match Layout::array::(capacity) { + Ok(layout) => layout, + Err(_) => capacity_overflow(), + }; + match alloc_guard(layout.size()) { + Ok(_) => {} + Err(_) => capacity_overflow(), + } + let result = match init { + AllocInit::Uninitialized => alloc.allocate(layout), + AllocInit::Zeroed => alloc.allocate_zeroed(layout), + }; + let ptr = match result { + Ok(ptr) => ptr, + Err(_) => handle_alloc_error(layout), + }; + + Self { + ptr: unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) }, + cap: Self::capacity_from_bytes(ptr.len()), + alloc, + } + } + } + + fn try_allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Result { + if mem::size_of::() == 0 { + return Ok(Self::new_in(alloc)); + } + + let layout = Layout::array::(capacity)?; + alloc_guard(layout.size())?; + let result = match init { + AllocInit::Uninitialized => alloc.allocate(layout), + AllocInit::Zeroed => alloc.allocate_zeroed(layout), + }; + let ptr = match result { + Ok(ptr) => ptr, + Err(_) => return Err(TryReserveError::AllocError { layout, non_exhaustive: () }), + }; + + Ok(Self { + ptr: unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) }, + cap: Self::capacity_from_bytes(ptr.len()), + alloc, + }) + } + + /// Reconstitutes a `RawVec` from a pointer, capacity, and allocator. + /// + /// # Safety + /// + /// The `ptr` must be allocated (via the given allocator `alloc`), and with the given + /// `capacity`. + /// The `capacity` cannot exceed `isize::MAX` for sized types. (only a concern on 32-bit + /// systems). ZST vectors may have a capacity up to `usize::MAX`. + /// If the `ptr` and `capacity` come from a `RawVec` created via `alloc`, then this is + /// guaranteed. + #[inline] + pub unsafe fn from_raw_parts_in(ptr: *mut T, capacity: usize, alloc: A) -> Self { + Self { ptr: unsafe { Unique::new_unchecked(ptr) }, cap: capacity, alloc } + } + + /// Gets a raw pointer to the start of the allocation. Note that this is + /// `Unique::dangling()` if `capacity == 0` or `T` is zero-sized. In the former case, you must + /// be careful. + #[inline] + pub fn ptr(&self) -> *mut T { + self.ptr.as_ptr() + } + + /// Gets the capacity of the allocation. + /// + /// This will always be `usize::MAX` if `T` is zero-sized. + #[inline(always)] + pub fn capacity(&self) -> usize { + if mem::size_of::() == 0 { usize::MAX } else { self.cap } + } + + /// Returns a shared reference to the allocator backing this `RawVec`. + pub fn allocator(&self) -> &A { + &self.alloc + } + + fn current_memory(&self) -> Option<(NonNull, Layout)> { + if mem::size_of::() == 0 || self.cap == 0 { + None + } else { + // We have an allocated chunk of memory, so we can bypass runtime + // checks to get our current layout. + unsafe { + let align = mem::align_of::(); + let size = mem::size_of::() * self.cap; + let layout = Layout::from_size_align_unchecked(size, align); + Some((self.ptr.cast().into(), layout)) + } + } + } + + /// Ensures that the buffer contains at least enough space to hold `len + + /// additional` elements. If it doesn't already have enough capacity, will + /// reallocate enough space plus comfortable slack space to get amortized + /// *O*(1) behavior. Will limit this behavior if it would needlessly cause + /// itself to panic. + /// + /// If `len` exceeds `self.capacity()`, this may fail to actually allocate + /// the requested space. This is not really unsafe, but the unsafe + /// code *you* write that relies on the behavior of this function may break. + /// + /// This is ideal for implementing a bulk-push operation like `extend`. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Aborts + /// + /// Aborts on OOM. + /// + /// # Examples + /// + /// ``` + /// # #![feature(raw_vec_internals)] + /// # extern crate alloc; + /// # use std::ptr; + /// # use alloc::raw_vec::RawVec; + /// struct MyVec { + /// buf: RawVec, + /// len: usize, + /// } + /// + /// impl MyVec { + /// pub fn push_all(&mut self, elems: &[T]) { + /// self.buf.reserve(self.len, elems.len()); + /// // reserve would have aborted or panicked if the len exceeded + /// // `isize::MAX` so this is safe to do unchecked now. + /// for x in elems { + /// unsafe { + /// ptr::write(self.buf.ptr().add(self.len), x.clone()); + /// } + /// self.len += 1; + /// } + /// } + /// } + /// # fn main() { + /// # let mut vector = MyVec { buf: RawVec::new(), len: 0 }; + /// # vector.push_all(&[1, 3, 5, 7, 9]); + /// # } + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn reserve(&mut self, len: usize, additional: usize) { + // Callers expect this function to be very cheap when there is already sufficient capacity. + // Therefore, we move all the resizing and error-handling logic from grow_amortized and + // handle_reserve behind a call, while making sure that the this function is likely to be + // inlined as just a comparison and a call if the comparison fails. + #[cold] + fn do_reserve_and_handle( + slf: &mut RawVec, + len: usize, + additional: usize, + ) { + handle_reserve(slf.grow_amortized(len, additional)); + } + + if self.needs_to_grow(len, additional) { + do_reserve_and_handle(self, len, additional); + } + } + + /// The same as `reserve`, but returns on errors instead of panicking or aborting. + pub fn try_reserve(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { + if self.needs_to_grow(len, additional) { + self.grow_amortized(len, additional) + } else { + Ok(()) + } + } + + /// Ensures that the buffer contains at least enough space to hold `len + + /// additional` elements. If it doesn't already, will reallocate the + /// minimum possible amount of memory necessary. Generally this will be + /// exactly the amount of memory necessary, but in principle the allocator + /// is free to give back more than we asked for. + /// + /// If `len` exceeds `self.capacity()`, this may fail to actually allocate + /// the requested space. This is not really unsafe, but the unsafe code + /// *you* write that relies on the behavior of this function may break. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Aborts + /// + /// Aborts on OOM. + #[cfg(not(no_global_oom_handling))] + pub fn reserve_exact(&mut self, len: usize, additional: usize) { + handle_reserve(self.try_reserve_exact(len, additional)); + } + + /// The same as `reserve_exact`, but returns on errors instead of panicking or aborting. + pub fn try_reserve_exact( + &mut self, + len: usize, + additional: usize, + ) -> Result<(), TryReserveError> { + if self.needs_to_grow(len, additional) { self.grow_exact(len, additional) } else { Ok(()) } + } + + /// Shrinks the allocation down to the specified amount. If the given amount + /// is 0, actually completely deallocates. + /// + /// # Panics + /// + /// Panics if the given amount is *larger* than the current capacity. + /// + /// # Aborts + /// + /// Aborts on OOM. + #[cfg(not(no_global_oom_handling))] + pub fn shrink_to_fit(&mut self, amount: usize) { + handle_reserve(self.shrink(amount)); + } + + /// Tries to shrink the allocation down to the specified amount. If the given amount + /// is 0, actually completely deallocates. + /// + /// # Panics + /// + /// Panics if the given amount is *larger* than the current capacity. + pub fn try_shrink_to_fit(&mut self, amount: usize) -> Result<(), TryReserveError> { + self.shrink(amount) + } +} + +impl RawVec { + /// Returns if the buffer needs to grow to fulfill the needed extra capacity. + /// Mainly used to make inlining reserve-calls possible without inlining `grow`. + fn needs_to_grow(&self, len: usize, additional: usize) -> bool { + additional > self.capacity().wrapping_sub(len) + } + + fn capacity_from_bytes(excess: usize) -> usize { + debug_assert_ne!(mem::size_of::(), 0); + excess / mem::size_of::() + } + + fn set_ptr(&mut self, ptr: NonNull<[u8]>) { + self.ptr = unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) }; + self.cap = Self::capacity_from_bytes(ptr.len()); + } + + // This method is usually instantiated many times. So we want it to be as + // small as possible, to improve compile times. But we also want as much of + // its contents to be statically computable as possible, to make the + // generated code run faster. Therefore, this method is carefully written + // so that all of the code that depends on `T` is within it, while as much + // of the code that doesn't depend on `T` as possible is in functions that + // are non-generic over `T`. + fn grow_amortized(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { + // This is ensured by the calling contexts. + debug_assert!(additional > 0); + + if mem::size_of::() == 0 { + // Since we return a capacity of `usize::MAX` when `elem_size` is + // 0, getting to here necessarily means the `RawVec` is overfull. + return Err(CapacityOverflow); + } + + // Nothing we can really do about these checks, sadly. + let required_cap = len.checked_add(additional).ok_or(CapacityOverflow)?; + + // This guarantees exponential growth. The doubling cannot overflow + // because `cap <= isize::MAX` and the type of `cap` is `usize`. + let cap = cmp::max(self.cap * 2, required_cap); + let cap = cmp::max(Self::MIN_NON_ZERO_CAP, cap); + + let new_layout = Layout::array::(cap); + + // `finish_grow` is non-generic over `T`. + let ptr = finish_grow(new_layout, self.current_memory(), &mut self.alloc)?; + self.set_ptr(ptr); + Ok(()) + } + + // The constraints on this method are much the same as those on + // `grow_amortized`, but this method is usually instantiated less often so + // it's less critical. + fn grow_exact(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { + if mem::size_of::() == 0 { + // Since we return a capacity of `usize::MAX` when the type size is + // 0, getting to here necessarily means the `RawVec` is overfull. + return Err(CapacityOverflow); + } + + let cap = len.checked_add(additional).ok_or(CapacityOverflow)?; + let new_layout = Layout::array::(cap); + + // `finish_grow` is non-generic over `T`. + let ptr = finish_grow(new_layout, self.current_memory(), &mut self.alloc)?; + self.set_ptr(ptr); + Ok(()) + } + + fn shrink(&mut self, amount: usize) -> Result<(), TryReserveError> { + assert!(amount <= self.capacity(), "Tried to shrink to a larger capacity"); + + let (ptr, layout) = if let Some(mem) = self.current_memory() { mem } else { return Ok(()) }; + let new_size = amount * mem::size_of::(); + + let ptr = unsafe { + let new_layout = Layout::from_size_align_unchecked(new_size, layout.align()); + self.alloc.shrink(ptr, layout, new_layout).map_err(|_| TryReserveError::AllocError { + layout: new_layout, + non_exhaustive: (), + })? + }; + self.set_ptr(ptr); + Ok(()) + } +} + +// This function is outside `RawVec` to minimize compile times. See the comment +// above `RawVec::grow_amortized` for details. (The `A` parameter isn't +// significant, because the number of different `A` types seen in practice is +// much smaller than the number of `T` types.) +#[inline(never)] +fn finish_grow( + new_layout: Result, + current_memory: Option<(NonNull, Layout)>, + alloc: &mut A, +) -> Result, TryReserveError> +where + A: Allocator, +{ + // Check for the error here to minimize the size of `RawVec::grow_*`. + let new_layout = new_layout.map_err(|_| CapacityOverflow)?; + + alloc_guard(new_layout.size())?; + + let memory = if let Some((ptr, old_layout)) = current_memory { + debug_assert_eq!(old_layout.align(), new_layout.align()); + unsafe { + // The allocator checks for alignment equality + intrinsics::assume(old_layout.align() == new_layout.align()); + alloc.grow(ptr, old_layout, new_layout) + } + } else { + alloc.allocate(new_layout) + }; + + memory.map_err(|_| AllocError { layout: new_layout, non_exhaustive: () }) +} + +unsafe impl<#[may_dangle] T, A: Allocator> Drop for RawVec { + /// Frees the memory owned by the `RawVec` *without* trying to drop its contents. + fn drop(&mut self) { + if let Some((ptr, layout)) = self.current_memory() { + unsafe { self.alloc.deallocate(ptr, layout) } + } + } +} + +// Central function for reserve error handling. +#[cfg(not(no_global_oom_handling))] +#[inline] +fn handle_reserve(result: Result<(), TryReserveError>) { + match result { + Err(CapacityOverflow) => capacity_overflow(), + Err(AllocError { layout, .. }) => handle_alloc_error(layout), + Ok(()) => { /* yay */ } + } +} + +// We need to guarantee the following: +// * We don't ever allocate `> isize::MAX` byte-size objects. +// * We don't overflow `usize::MAX` and actually allocate too little. +// +// On 64-bit we just need to check for overflow since trying to allocate +// `> isize::MAX` bytes will surely fail. On 32-bit and 16-bit we need to add +// an extra guard for this in case we're running on a platform which can use +// all 4GB in user-space, e.g., PAE or x32. + +#[inline] +fn alloc_guard(alloc_size: usize) -> Result<(), TryReserveError> { + if usize::BITS < 64 && alloc_size > isize::MAX as usize { + Err(CapacityOverflow) + } else { + Ok(()) + } +} + +// One central function responsible for reporting capacity overflows. This'll +// ensure that the code generation related to these panics is minimal as there's +// only one location which panics rather than a bunch throughout the module. +#[cfg(not(no_global_oom_handling))] +fn capacity_overflow() -> ! { + panic!("capacity overflow"); +} diff --git a/rust/alloc/slice.rs b/rust/alloc/slice.rs new file mode 100644 index 0000000000000..9afb8f3caf11f --- /dev/null +++ b/rust/alloc/slice.rs @@ -0,0 +1,1271 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A dynamically-sized view into a contiguous sequence, `[T]`. +//! +//! *[See also the slice primitive type](slice).* +//! +//! Slices are a view into a block of memory represented as a pointer and a +//! length. +//! +//! ``` +//! // slicing a Vec +//! let vec = vec![1, 2, 3]; +//! let int_slice = &vec[..]; +//! // coercing an array to a slice +//! let str_slice: &[&str] = &["one", "two", "three"]; +//! ``` +//! +//! Slices are either mutable or shared. The shared slice type is `&[T]`, +//! while the mutable slice type is `&mut [T]`, where `T` represents the element +//! type. For example, you can mutate the block of memory that a mutable slice +//! points to: +//! +//! ``` +//! let x = &mut [1, 2, 3]; +//! x[1] = 7; +//! assert_eq!(x, &[1, 7, 3]); +//! ``` +//! +//! Here are some of the things this module contains: +//! +//! ## Structs +//! +//! There are several structs that are useful for slices, such as [`Iter`], which +//! represents iteration over a slice. +//! +//! ## Trait Implementations +//! +//! There are several implementations of common traits for slices. Some examples +//! include: +//! +//! * [`Clone`] +//! * [`Eq`], [`Ord`] - for slices whose element type are [`Eq`] or [`Ord`]. +//! * [`Hash`] - for slices whose element type is [`Hash`]. +//! +//! ## Iteration +//! +//! The slices implement `IntoIterator`. The iterator yields references to the +//! slice elements. +//! +//! ``` +//! let numbers = &[0, 1, 2]; +//! for n in numbers { +//! println!("{} is a number!", n); +//! } +//! ``` +//! +//! The mutable slice yields mutable references to the elements: +//! +//! ``` +//! let mut scores = [7, 8, 9]; +//! for score in &mut scores[..] { +//! *score += 1; +//! } +//! ``` +//! +//! This iterator yields mutable references to the slice's elements, so while +//! the element type of the slice is `i32`, the element type of the iterator is +//! `&mut i32`. +//! +//! * [`.iter`] and [`.iter_mut`] are the explicit methods to return the default +//! iterators. +//! * Further methods that return iterators are [`.split`], [`.splitn`], +//! [`.chunks`], [`.windows`] and more. +//! +//! [`Hash`]: core::hash::Hash +//! [`.iter`]: slice::iter +//! [`.iter_mut`]: slice::iter_mut +//! [`.split`]: slice::split +//! [`.splitn`]: slice::splitn +//! [`.chunks`]: slice::chunks +//! [`.windows`]: slice::windows +#![stable(feature = "rust1", since = "1.0.0")] +// Many of the usings in this module are only used in the test configuration. +// It's cleaner to just turn off the unused_imports warning than to fix them. +#![cfg_attr(test, allow(unused_imports, dead_code))] + +use core::borrow::{Borrow, BorrowMut}; +#[cfg(not(no_global_oom_handling))] +use core::cmp::Ordering::{self, Less}; +#[cfg(not(no_global_oom_handling))] +use core::mem; +#[cfg(not(no_global_oom_handling))] +use core::mem::size_of; +#[cfg(not(no_global_oom_handling))] +use core::ptr; + +use crate::alloc::Allocator; +use crate::alloc::Global; +#[cfg(not(no_global_oom_handling))] +use crate::borrow::ToOwned; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::vec::Vec; + +#[unstable(feature = "slice_range", issue = "76393")] +pub use core::slice::range; +#[unstable(feature = "array_chunks", issue = "74985")] +pub use core::slice::ArrayChunks; +#[unstable(feature = "array_chunks", issue = "74985")] +pub use core::slice::ArrayChunksMut; +#[unstable(feature = "array_windows", issue = "75027")] +pub use core::slice::ArrayWindows; +#[stable(feature = "slice_get_slice", since = "1.28.0")] +pub use core::slice::SliceIndex; +#[stable(feature = "from_ref", since = "1.28.0")] +pub use core::slice::{from_mut, from_ref}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{from_raw_parts, from_raw_parts_mut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{Chunks, Windows}; +#[stable(feature = "chunks_exact", since = "1.31.0")] +pub use core::slice::{ChunksExact, ChunksExactMut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{ChunksMut, Split, SplitMut}; +#[unstable(feature = "slice_group_by", issue = "80552")] +pub use core::slice::{GroupBy, GroupByMut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{Iter, IterMut}; +#[stable(feature = "rchunks", since = "1.31.0")] +pub use core::slice::{RChunks, RChunksExact, RChunksExactMut, RChunksMut}; +#[stable(feature = "slice_rsplit", since = "1.27.0")] +pub use core::slice::{RSplit, RSplitMut}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::slice::{RSplitN, RSplitNMut, SplitN, SplitNMut}; + +//////////////////////////////////////////////////////////////////////////////// +// Basic slice extension methods +//////////////////////////////////////////////////////////////////////////////// + +// HACK(japaric) needed for the implementation of `vec!` macro during testing +// N.B., see the `hack` module in this file for more details. +#[cfg(test)] +pub use hack::into_vec; + +// HACK(japaric) needed for the implementation of `Vec::clone` during testing +// N.B., see the `hack` module in this file for more details. +#[cfg(test)] +pub use hack::to_vec; + +// HACK(japaric): With cfg(test) `impl [T]` is not available, these three +// functions are actually methods that are in `impl [T]` but not in +// `core::slice::SliceExt` - we need to supply these functions for the +// `test_permutations` test +mod hack { + use core::alloc::Allocator; + + use crate::boxed::Box; + use crate::collections::TryReserveError; + use crate::vec::Vec; + + // We shouldn't add inline attribute to this since this is used in + // `vec!` macro mostly and causes perf regression. See #71204 for + // discussion and perf results. + pub fn into_vec(b: Box<[T], A>) -> Vec { + unsafe { + let len = b.len(); + let (b, alloc) = Box::into_raw_with_allocator(b); + Vec::from_raw_parts_in(b as *mut T, len, len, alloc) + } + } + + #[cfg(not(no_global_oom_handling))] + #[inline] + pub fn to_vec(s: &[T], alloc: A) -> Vec { + T::to_vec(s, alloc) + } + + #[inline] + pub fn try_to_vec(s: &[T], alloc: A) -> Result, TryReserveError> { + T::try_to_vec(s, alloc) + } + + #[cfg(not(no_global_oom_handling))] + pub trait ConvertVec { + fn to_vec(s: &[Self], alloc: A) -> Vec + where + Self: Sized; + } + + pub trait TryConvertVec { + fn try_to_vec(s: &[Self], alloc: A) -> Result, TryReserveError> + where + Self: Sized; + } + + #[cfg(not(no_global_oom_handling))] + impl ConvertVec for T { + #[inline] + default fn to_vec(s: &[Self], alloc: A) -> Vec { + struct DropGuard<'a, T, A: Allocator> { + vec: &'a mut Vec, + num_init: usize, + } + impl<'a, T, A: Allocator> Drop for DropGuard<'a, T, A> { + #[inline] + fn drop(&mut self) { + // SAFETY: + // items were marked initialized in the loop below + unsafe { + self.vec.set_len(self.num_init); + } + } + } + let mut vec = Vec::with_capacity_in(s.len(), alloc); + let mut guard = DropGuard { vec: &mut vec, num_init: 0 }; + let slots = guard.vec.spare_capacity_mut(); + // .take(slots.len()) is necessary for LLVM to remove bounds checks + // and has better codegen than zip. + for (i, b) in s.iter().enumerate().take(slots.len()) { + guard.num_init = i; + slots[i].write(b.clone()); + } + core::mem::forget(guard); + // SAFETY: + // the vec was allocated and initialized above to at least this length. + unsafe { + vec.set_len(s.len()); + } + vec + } + } + + #[cfg(not(no_global_oom_handling))] + impl ConvertVec for T { + #[inline] + fn to_vec(s: &[Self], alloc: A) -> Vec { + let mut v = Vec::with_capacity_in(s.len(), alloc); + // SAFETY: + // allocated above with the capacity of `s`, and initialize to `s.len()` in + // ptr::copy_to_non_overlapping below. + unsafe { + s.as_ptr().copy_to_nonoverlapping(v.as_mut_ptr(), s.len()); + v.set_len(s.len()); + } + v + } + } + + impl TryConvertVec for T { + #[inline] + default fn try_to_vec(s: &[Self], alloc: A) -> Result, TryReserveError> { + struct DropGuard<'a, T, A: Allocator> { + vec: &'a mut Vec, + num_init: usize, + } + impl<'a, T, A: Allocator> Drop for DropGuard<'a, T, A> { + #[inline] + fn drop(&mut self) { + // SAFETY: + // items were marked initialized in the loop below + unsafe { + self.vec.set_len(self.num_init); + } + } + } + let mut vec = Vec::try_with_capacity_in(s.len(), alloc)?; + let mut guard = DropGuard { vec: &mut vec, num_init: 0 }; + let slots = guard.vec.spare_capacity_mut(); + // .take(slots.len()) is necessary for LLVM to remove bounds checks + // and has better codegen than zip. + for (i, b) in s.iter().enumerate().take(slots.len()) { + guard.num_init = i; + slots[i].write(b.clone()); + } + core::mem::forget(guard); + // SAFETY: + // the vec was allocated and initialized above to at least this length. + unsafe { + vec.set_len(s.len()); + } + Ok(vec) + } + } +} + +#[lang = "slice_alloc"] +#[cfg(not(test))] +impl [T] { + /// Sorts the slice. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case. + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`sort_unstable`](slice::sort_unstable). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive, iterative merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage half the size of `self`, but for short slices a + /// non-allocating insertion sort is used instead. + /// + /// # Examples + /// + /// ``` + /// let mut v = [-5, 4, 1, -3, 2]; + /// + /// v.sort(); + /// assert!(v == [-5, -3, 1, 2, 4]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn sort(&mut self) + where + T: Ord, + { + merge_sort(self, |a, b| a.lt(b)); + } + + /// Sorts the slice with a comparator function. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case. + /// + /// The comparator function must define a total ordering for the elements in the slice. If + /// the ordering is not total, the order of the elements is unspecified. An order is a + /// total order if it is (for all `a`, `b` and `c`): + /// + /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and + /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`. + /// + /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use + /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`. + /// + /// ``` + /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0]; + /// floats.sort_by(|a, b| a.partial_cmp(b).unwrap()); + /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]); + /// ``` + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`sort_unstable_by`](slice::sort_unstable_by). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive, iterative merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage half the size of `self`, but for short slices a + /// non-allocating insertion sort is used instead. + /// + /// # Examples + /// + /// ``` + /// let mut v = [5, 4, 1, 3, 2]; + /// v.sort_by(|a, b| a.cmp(b)); + /// assert!(v == [1, 2, 3, 4, 5]); + /// + /// // reverse sorting + /// v.sort_by(|a, b| b.cmp(a)); + /// assert!(v == [5, 4, 3, 2, 1]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn sort_by(&mut self, mut compare: F) + where + F: FnMut(&T, &T) -> Ordering, + { + merge_sort(self, |a, b| compare(a, b) == Less); + } + + /// Sorts the slice with a key extraction function. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*)) + /// worst-case, where the key function is *O*(*m*). + /// + /// For expensive key functions (e.g. functions that are not simple property accesses or + /// basic operations), [`sort_by_cached_key`](slice::sort_by_cached_key) is likely to be + /// significantly faster, as it does not recompute element keys. + /// + /// When applicable, unstable sorting is preferred because it is generally faster than stable + /// sorting and it doesn't allocate auxiliary memory. + /// See [`sort_unstable_by_key`](slice::sort_unstable_by_key). + /// + /// # Current implementation + /// + /// The current algorithm is an adaptive, iterative merge sort inspired by + /// [timsort](https://en.wikipedia.org/wiki/Timsort). + /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of + /// two or more sorted sequences concatenated one after another. + /// + /// Also, it allocates temporary storage half the size of `self`, but for short slices a + /// non-allocating insertion sort is used instead. + /// + /// # Examples + /// + /// ``` + /// let mut v = [-5i32, 4, 1, -3, 2]; + /// + /// v.sort_by_key(|k| k.abs()); + /// assert!(v == [1, 2, -3, 4, -5]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "slice_sort_by_key", since = "1.7.0")] + #[inline] + pub fn sort_by_key(&mut self, mut f: F) + where + F: FnMut(&T) -> K, + K: Ord, + { + merge_sort(self, |a, b| f(a).lt(&f(b))); + } + + /// Sorts the slice with a key extraction function. + /// + /// During sorting, the key function is called only once per element. + /// + /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*)) + /// worst-case, where the key function is *O*(*m*). + /// + /// For simple key functions (e.g., functions that are property accesses or + /// basic operations), [`sort_by_key`](slice::sort_by_key) is likely to be + /// faster. + /// + /// # Current implementation + /// + /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters, + /// which combines the fast average case of randomized quicksort with the fast worst case of + /// heapsort, while achieving linear time on slices with certain patterns. It uses some + /// randomization to avoid degenerate cases, but with a fixed seed to always provide + /// deterministic behavior. + /// + /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the + /// length of the slice. + /// + /// # Examples + /// + /// ``` + /// let mut v = [-5i32, 4, 32, -3, 2]; + /// + /// v.sort_by_cached_key(|k| k.to_string()); + /// assert!(v == [-3, -5, 2, 32, 4]); + /// ``` + /// + /// [pdqsort]: https://github.com/orlp/pdqsort + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "slice_sort_by_cached_key", since = "1.34.0")] + #[inline] + pub fn sort_by_cached_key(&mut self, f: F) + where + F: FnMut(&T) -> K, + K: Ord, + { + // Helper macro for indexing our vector by the smallest possible type, to reduce allocation. + macro_rules! sort_by_key { + ($t:ty, $slice:ident, $f:ident) => {{ + let mut indices: Vec<_> = + $slice.iter().map($f).enumerate().map(|(i, k)| (k, i as $t)).collect(); + // The elements of `indices` are unique, as they are indexed, so any sort will be + // stable with respect to the original slice. We use `sort_unstable` here because + // it requires less memory allocation. + indices.sort_unstable(); + for i in 0..$slice.len() { + let mut index = indices[i].1; + while (index as usize) < i { + index = indices[index as usize].1; + } + indices[i].1 = index; + $slice.swap(i, index as usize); + } + }}; + } + + let sz_u8 = mem::size_of::<(K, u8)>(); + let sz_u16 = mem::size_of::<(K, u16)>(); + let sz_u32 = mem::size_of::<(K, u32)>(); + let sz_usize = mem::size_of::<(K, usize)>(); + + let len = self.len(); + if len < 2 { + return; + } + if sz_u8 < sz_u16 && len <= (u8::MAX as usize) { + return sort_by_key!(u8, self, f); + } + if sz_u16 < sz_u32 && len <= (u16::MAX as usize) { + return sort_by_key!(u16, self, f); + } + if sz_u32 < sz_usize && len <= (u32::MAX as usize) { + return sort_by_key!(u32, self, f); + } + sort_by_key!(usize, self, f) + } + + /// Copies `self` into a new `Vec`. + /// + /// # Examples + /// + /// ``` + /// let s = [10, 40, 30]; + /// let x = s.to_vec(); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_conversion_suggestion] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn to_vec(&self) -> Vec + where + T: Clone, + { + self.to_vec_in(Global) + } + + /// Tries to copy `self` into a new `Vec`. + /// + /// # Examples + /// + /// ``` + /// let s = [10, 40, 30]; + /// let x = s.try_to_vec().unwrap(); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_to_vec(&self) -> Result, TryReserveError> + where + T: Clone, + { + self.try_to_vec_in(Global) + } + + /// Copies `self` into a new `Vec` with an allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let s = [10, 40, 30]; + /// let x = s.to_vec_in(System); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub fn to_vec_in(&self, alloc: A) -> Vec + where + T: Clone, + { + // N.B., see the `hack` module in this file for more details. + hack::to_vec(self, alloc) + } + + /// Tries to copy `self` into a new `Vec` with an allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let s = [10, 40, 30]; + /// let x = s.try_to_vec_in(System).unwrap(); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_to_vec_in(&self, alloc: A) -> Result, TryReserveError> + where + T: Clone, + { + // N.B., see the `hack` module in this file for more details. + hack::try_to_vec(self, alloc) + } + + /// Converts `self` into a vector without clones or allocation. + /// + /// The resulting vector can be converted back into a box via + /// `Vec`'s `into_boxed_slice` method. + /// + /// # Examples + /// + /// ``` + /// let s: Box<[i32]> = Box::new([10, 40, 30]); + /// let x = s.into_vec(); + /// // `s` cannot be used anymore because it has been converted into `x`. + /// + /// assert_eq!(x, vec![10, 40, 30]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn into_vec(self: Box) -> Vec { + // N.B., see the `hack` module in this file for more details. + hack::into_vec(self) + } + + /// Creates a vector by repeating a slice `n` times. + /// + /// # Panics + /// + /// This function will panic if the capacity would overflow. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// assert_eq!([1, 2].repeat(3), vec![1, 2, 1, 2, 1, 2]); + /// ``` + /// + /// A panic upon overflow: + /// + /// ```should_panic + /// // this will panic at runtime + /// b"0123456789abcdef".repeat(usize::MAX); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "repeat_generic_slice", since = "1.40.0")] + pub fn repeat(&self, n: usize) -> Vec + where + T: Copy, + { + if n == 0 { + return Vec::new(); + } + + // If `n` is larger than zero, it can be split as + // `n = 2^expn + rem (2^expn > rem, expn >= 0, rem >= 0)`. + // `2^expn` is the number represented by the leftmost '1' bit of `n`, + // and `rem` is the remaining part of `n`. + + // Using `Vec` to access `set_len()`. + let capacity = self.len().checked_mul(n).expect("capacity overflow"); + let mut buf = Vec::with_capacity(capacity); + + // `2^expn` repetition is done by doubling `buf` `expn`-times. + buf.extend(self); + { + let mut m = n >> 1; + // If `m > 0`, there are remaining bits up to the leftmost '1'. + while m > 0 { + // `buf.extend(buf)`: + unsafe { + ptr::copy_nonoverlapping( + buf.as_ptr(), + (buf.as_mut_ptr() as *mut T).add(buf.len()), + buf.len(), + ); + // `buf` has capacity of `self.len() * n`. + let buf_len = buf.len(); + buf.set_len(buf_len * 2); + } + + m >>= 1; + } + } + + // `rem` (`= n - 2^expn`) repetition is done by copying + // first `rem` repetitions from `buf` itself. + let rem_len = capacity - buf.len(); // `self.len() * rem` + if rem_len > 0 { + // `buf.extend(buf[0 .. rem_len])`: + unsafe { + // This is non-overlapping since `2^expn > rem`. + ptr::copy_nonoverlapping( + buf.as_ptr(), + (buf.as_mut_ptr() as *mut T).add(buf.len()), + rem_len, + ); + // `buf.len() + rem_len` equals to `buf.capacity()` (`= self.len() * n`). + buf.set_len(capacity); + } + } + buf + } + + /// Flattens a slice of `T` into a single value `Self::Output`. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(["hello", "world"].concat(), "helloworld"); + /// assert_eq!([[1, 2], [3, 4]].concat(), [1, 2, 3, 4]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn concat(&self) -> >::Output + where + Self: Concat, + { + Concat::concat(self) + } + + /// Flattens a slice of `T` into a single value `Self::Output`, placing a + /// given separator between each. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(["hello", "world"].join(" "), "hello world"); + /// assert_eq!([[1, 2], [3, 4]].join(&0), [1, 2, 0, 3, 4]); + /// assert_eq!([[1, 2], [3, 4]].join(&[0, 0][..]), [1, 2, 0, 0, 3, 4]); + /// ``` + #[stable(feature = "rename_connect_to_join", since = "1.3.0")] + pub fn join(&self, sep: Separator) -> >::Output + where + Self: Join, + { + Join::join(self, sep) + } + + /// Flattens a slice of `T` into a single value `Self::Output`, placing a + /// given separator between each. + /// + /// # Examples + /// + /// ``` + /// # #![allow(deprecated)] + /// assert_eq!(["hello", "world"].connect(" "), "hello world"); + /// assert_eq!([[1, 2], [3, 4]].connect(&0), [1, 2, 0, 3, 4]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[rustc_deprecated(since = "1.3.0", reason = "renamed to join")] + pub fn connect(&self, sep: Separator) -> >::Output + where + Self: Join, + { + Join::join(self, sep) + } +} + +#[lang = "slice_u8_alloc"] +#[cfg(not(test))] +impl [u8] { + /// Returns a vector containing a copy of this slice where each byte + /// is mapped to its ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// [`make_ascii_uppercase`]: slice::make_ascii_uppercase + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_uppercase(&self) -> Vec { + let mut me = self.to_vec(); + me.make_ascii_uppercase(); + me + } + + /// Returns a vector containing a copy of this slice where each byte + /// is mapped to its ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// [`make_ascii_lowercase`]: slice::make_ascii_lowercase + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_lowercase(&self) -> Vec { + let mut me = self.to_vec(); + me.make_ascii_lowercase(); + me + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Extension traits for slices over specific kinds of data +//////////////////////////////////////////////////////////////////////////////// + +/// Helper trait for [`[T]::concat`](slice::concat). +/// +/// Note: the `Item` type parameter is not used in this trait, +/// but it allows impls to be more generic. +/// Without it, we get this error: +/// +/// ```error +/// error[E0207]: the type parameter `T` is not constrained by the impl trait, self type, or predica +/// --> src/liballoc/slice.rs:608:6 +/// | +/// 608 | impl> Concat for [V] { +/// | ^ unconstrained type parameter +/// ``` +/// +/// This is because there could exist `V` types with multiple `Borrow<[_]>` impls, +/// such that multiple `T` types would apply: +/// +/// ``` +/// # #[allow(dead_code)] +/// pub struct Foo(Vec, Vec); +/// +/// impl std::borrow::Borrow<[u32]> for Foo { +/// fn borrow(&self) -> &[u32] { &self.0 } +/// } +/// +/// impl std::borrow::Borrow<[String]> for Foo { +/// fn borrow(&self) -> &[String] { &self.1 } +/// } +/// ``` +#[unstable(feature = "slice_concat_trait", issue = "27747")] +pub trait Concat { + #[unstable(feature = "slice_concat_trait", issue = "27747")] + /// The resulting type after concatenation + type Output; + + /// Implementation of [`[T]::concat`](slice::concat) + #[unstable(feature = "slice_concat_trait", issue = "27747")] + fn concat(slice: &Self) -> Self::Output; +} + +/// Helper trait for [`[T]::join`](slice::join) +#[unstable(feature = "slice_concat_trait", issue = "27747")] +pub trait Join { + #[unstable(feature = "slice_concat_trait", issue = "27747")] + /// The resulting type after concatenation + type Output; + + /// Implementation of [`[T]::join`](slice::join) + #[unstable(feature = "slice_concat_trait", issue = "27747")] + fn join(slice: &Self, sep: Separator) -> Self::Output; +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> Concat for [V] { + type Output = Vec; + + fn concat(slice: &Self) -> Vec { + let size = slice.iter().map(|slice| slice.borrow().len()).sum(); + let mut result = Vec::with_capacity(size); + for v in slice { + result.extend_from_slice(v.borrow()) + } + result + } +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> Join<&T> for [V] { + type Output = Vec; + + fn join(slice: &Self, sep: &T) -> Vec { + let mut iter = slice.iter(); + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + let size = slice.iter().map(|v| v.borrow().len()).sum::() + slice.len() - 1; + let mut result = Vec::with_capacity(size); + result.extend_from_slice(first.borrow()); + + for v in iter { + result.push(sep.clone()); + result.extend_from_slice(v.borrow()) + } + result + } +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> Join<&[T]> for [V] { + type Output = Vec; + + fn join(slice: &Self, sep: &[T]) -> Vec { + let mut iter = slice.iter(); + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + let size = + slice.iter().map(|v| v.borrow().len()).sum::() + sep.len() * (slice.len() - 1); + let mut result = Vec::with_capacity(size); + result.extend_from_slice(first.borrow()); + + for v in iter { + result.extend_from_slice(sep); + result.extend_from_slice(v.borrow()) + } + result + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Standard trait implementations for slices +//////////////////////////////////////////////////////////////////////////////// + +#[stable(feature = "rust1", since = "1.0.0")] +impl Borrow<[T]> for Vec { + fn borrow(&self) -> &[T] { + &self[..] + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl BorrowMut<[T]> for Vec { + fn borrow_mut(&mut self) -> &mut [T] { + &mut self[..] + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl ToOwned for [T] { + type Owned = Vec; + #[cfg(not(test))] + fn to_owned(&self) -> Vec { + self.to_vec() + } + + #[cfg(test)] + fn to_owned(&self) -> Vec { + hack::to_vec(self, Global) + } + + fn clone_into(&self, target: &mut Vec) { + // drop anything in target that will not be overwritten + target.truncate(self.len()); + + // target.len <= self.len due to the truncate above, so the + // slices here are always in-bounds. + let (init, tail) = self.split_at(target.len()); + + // reuse the contained values' allocations/resources. + target.clone_from_slice(init); + target.extend_from_slice(tail); + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Sorting +//////////////////////////////////////////////////////////////////////////////// + +/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted. +/// +/// This is the integral subroutine of insertion sort. +#[cfg(not(no_global_oom_handling))] +fn insert_head(v: &mut [T], is_less: &mut F) +where + F: FnMut(&T, &T) -> bool, +{ + if v.len() >= 2 && is_less(&v[1], &v[0]) { + unsafe { + // There are three ways to implement insertion here: + // + // 1. Swap adjacent elements until the first one gets to its final destination. + // However, this way we copy data around more than is necessary. If elements are big + // structures (costly to copy), this method will be slow. + // + // 2. Iterate until the right place for the first element is found. Then shift the + // elements succeeding it to make room for it and finally place it into the + // remaining hole. This is a good method. + // + // 3. Copy the first element into a temporary variable. Iterate until the right place + // for it is found. As we go along, copy every traversed element into the slot + // preceding it. Finally, copy data from the temporary variable into the remaining + // hole. This method is very good. Benchmarks demonstrated slightly better + // performance than with the 2nd method. + // + // All methods were benchmarked, and the 3rd showed best results. So we chose that one. + let mut tmp = mem::ManuallyDrop::new(ptr::read(&v[0])); + + // Intermediate state of the insertion process is always tracked by `hole`, which + // serves two purposes: + // 1. Protects integrity of `v` from panics in `is_less`. + // 2. Fills the remaining hole in `v` in the end. + // + // Panic safety: + // + // If `is_less` panics at any point during the process, `hole` will get dropped and + // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it + // initially held exactly once. + let mut hole = InsertionHole { src: &mut *tmp, dest: &mut v[1] }; + ptr::copy_nonoverlapping(&v[1], &mut v[0], 1); + + for i in 2..v.len() { + if !is_less(&v[i], &*tmp) { + break; + } + ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1); + hole.dest = &mut v[i]; + } + // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`. + } + } + + // When dropped, copies from `src` into `dest`. + struct InsertionHole { + src: *mut T, + dest: *mut T, + } + + impl Drop for InsertionHole { + fn drop(&mut self) { + unsafe { + ptr::copy_nonoverlapping(self.src, self.dest, 1); + } + } + } +} + +/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and +/// stores the result into `v[..]`. +/// +/// # Safety +/// +/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough +/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type. +#[cfg(not(no_global_oom_handling))] +unsafe fn merge(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F) +where + F: FnMut(&T, &T) -> bool, +{ + let len = v.len(); + let v = v.as_mut_ptr(); + let (v_mid, v_end) = unsafe { (v.add(mid), v.add(len)) }; + + // The merge process first copies the shorter run into `buf`. Then it traces the newly copied + // run and the longer run forwards (or backwards), comparing their next unconsumed elements and + // copying the lesser (or greater) one into `v`. + // + // As soon as the shorter run is fully consumed, the process is done. If the longer run gets + // consumed first, then we must copy whatever is left of the shorter run into the remaining + // hole in `v`. + // + // Intermediate state of the process is always tracked by `hole`, which serves two purposes: + // 1. Protects integrity of `v` from panics in `is_less`. + // 2. Fills the remaining hole in `v` if the longer run gets consumed first. + // + // Panic safety: + // + // If `is_less` panics at any point during the process, `hole` will get dropped and fill the + // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every + // object it initially held exactly once. + let mut hole; + + if mid <= len - mid { + // The left run is shorter. + unsafe { + ptr::copy_nonoverlapping(v, buf, mid); + hole = MergeHole { start: buf, end: buf.add(mid), dest: v }; + } + + // Initially, these pointers point to the beginnings of their arrays. + let left = &mut hole.start; + let mut right = v_mid; + let out = &mut hole.dest; + + while *left < hole.end && right < v_end { + // Consume the lesser side. + // If equal, prefer the left run to maintain stability. + unsafe { + let to_copy = if is_less(&*right, &**left) { + get_and_increment(&mut right) + } else { + get_and_increment(left) + }; + ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1); + } + } + } else { + // The right run is shorter. + unsafe { + ptr::copy_nonoverlapping(v_mid, buf, len - mid); + hole = MergeHole { start: buf, end: buf.add(len - mid), dest: v_mid }; + } + + // Initially, these pointers point past the ends of their arrays. + let left = &mut hole.dest; + let right = &mut hole.end; + let mut out = v_end; + + while v < *left && buf < *right { + // Consume the greater side. + // If equal, prefer the right run to maintain stability. + unsafe { + let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) { + decrement_and_get(left) + } else { + decrement_and_get(right) + }; + ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1); + } + } + } + // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of + // it will now be copied into the hole in `v`. + + unsafe fn get_and_increment(ptr: &mut *mut T) -> *mut T { + let old = *ptr; + *ptr = unsafe { ptr.offset(1) }; + old + } + + unsafe fn decrement_and_get(ptr: &mut *mut T) -> *mut T { + *ptr = unsafe { ptr.offset(-1) }; + *ptr + } + + // When dropped, copies the range `start..end` into `dest..`. + struct MergeHole { + start: *mut T, + end: *mut T, + dest: *mut T, + } + + impl Drop for MergeHole { + fn drop(&mut self) { + // `T` is not a zero-sized type, so it's okay to divide by its size. + let len = (self.end as usize - self.start as usize) / mem::size_of::(); + unsafe { + ptr::copy_nonoverlapping(self.start, self.dest, len); + } + } + } +} + +/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail +/// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt). +/// +/// The algorithm identifies strictly descending and non-descending subsequences, which are called +/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed +/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are +/// satisfied: +/// +/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len` +/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len` +/// +/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case. +#[cfg(not(no_global_oom_handling))] +fn merge_sort(v: &mut [T], mut is_less: F) +where + F: FnMut(&T, &T) -> bool, +{ + // Slices of up to this length get sorted using insertion sort. + const MAX_INSERTION: usize = 20; + // Very short runs are extended using insertion sort to span at least this many elements. + const MIN_RUN: usize = 10; + + // Sorting has no meaningful behavior on zero-sized types. + if size_of::() == 0 { + return; + } + + let len = v.len(); + + // Short arrays get sorted in-place via insertion sort to avoid allocations. + if len <= MAX_INSERTION { + if len >= 2 { + for i in (0..len - 1).rev() { + insert_head(&mut v[i..], &mut is_less); + } + } + return; + } + + // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it + // shallow copies of the contents of `v` without risking the dtors running on copies if + // `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run, + // which will always have length at most `len / 2`. + let mut buf = Vec::with_capacity(len / 2); + + // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a + // strange decision, but consider the fact that merges more often go in the opposite direction + // (forwards). According to benchmarks, merging forwards is slightly faster than merging + // backwards. To conclude, identifying runs by traversing backwards improves performance. + let mut runs = vec![]; + let mut end = len; + while end > 0 { + // Find the next natural run, and reverse it if it's strictly descending. + let mut start = end - 1; + if start > 0 { + start -= 1; + unsafe { + if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) { + while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) { + start -= 1; + } + v[start..end].reverse(); + } else { + while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) + { + start -= 1; + } + } + } + } + + // Insert some more elements into the run if it's too short. Insertion sort is faster than + // merge sort on short sequences, so this significantly improves performance. + while start > 0 && end - start < MIN_RUN { + start -= 1; + insert_head(&mut v[start..end], &mut is_less); + } + + // Push this run onto the stack. + runs.push(Run { start, len: end - start }); + end = start; + + // Merge some pairs of adjacent runs to satisfy the invariants. + while let Some(r) = collapse(&runs) { + let left = runs[r + 1]; + let right = runs[r]; + unsafe { + merge( + &mut v[left.start..right.start + right.len], + left.len, + buf.as_mut_ptr(), + &mut is_less, + ); + } + runs[r] = Run { start: left.start, len: left.len + right.len }; + runs.remove(r + 1); + } + } + + // Finally, exactly one run must remain in the stack. + debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len); + + // Examines the stack of runs and identifies the next pair of runs to merge. More specifically, + // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the + // algorithm should continue building a new run instead, `None` is returned. + // + // TimSort is infamous for its buggy implementations, as described here: + // http://envisage-project.eu/timsort-specification-and-verification/ + // + // The gist of the story is: we must enforce the invariants on the top four runs on the stack. + // Enforcing them on just top three is not sufficient to ensure that the invariants will still + // hold for *all* runs in the stack. + // + // This function correctly checks invariants for the top four runs. Additionally, if the top + // run starts at index 0, it will always demand a merge operation until the stack is fully + // collapsed, in order to complete the sort. + #[inline] + fn collapse(runs: &[Run]) -> Option { + let n = runs.len(); + if n >= 2 + && (runs[n - 1].start == 0 + || runs[n - 2].len <= runs[n - 1].len + || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len) + || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len)) + { + if n >= 3 && runs[n - 3].len < runs[n - 1].len { Some(n - 3) } else { Some(n - 2) } + } else { + None + } + } + + #[derive(Clone, Copy)] + struct Run { + start: usize, + len: usize, + } +} diff --git a/rust/alloc/str.rs b/rust/alloc/str.rs new file mode 100644 index 0000000000000..fa5e3b55ad209 --- /dev/null +++ b/rust/alloc/str.rs @@ -0,0 +1,614 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Unicode string slices. +//! +//! *[See also the `str` primitive type](str).* +//! +//! The `&str` type is one of the two main string types, the other being `String`. +//! Unlike its `String` counterpart, its contents are borrowed. +//! +//! # Basic Usage +//! +//! A basic string declaration of `&str` type: +//! +//! ``` +//! let hello_world = "Hello, World!"; +//! ``` +//! +//! Here we have declared a string literal, also known as a string slice. +//! String literals have a static lifetime, which means the string `hello_world` +//! is guaranteed to be valid for the duration of the entire program. +//! We can explicitly specify `hello_world`'s lifetime as well: +//! +//! ``` +//! let hello_world: &'static str = "Hello, world!"; +//! ``` + +#![stable(feature = "rust1", since = "1.0.0")] +// Many of the usings in this module are only used in the test configuration. +// It's cleaner to just turn off the unused_imports warning than to fix them. +#![allow(unused_imports)] + +use core::borrow::{Borrow, BorrowMut}; +use core::iter::FusedIterator; +use core::mem; +use core::ptr; +use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; +use core::unicode::conversions; + +use crate::borrow::ToOwned; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::slice::{Concat, Join, SliceIndex}; +use crate::string::String; +use crate::vec::Vec; + +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::pattern; +#[stable(feature = "encode_utf16", since = "1.8.0")] +pub use core::str::EncodeUtf16; +#[stable(feature = "split_ascii_whitespace", since = "1.34.0")] +pub use core::str::SplitAsciiWhitespace; +#[stable(feature = "split_inclusive", since = "1.53.0")] +pub use core::str::SplitInclusive; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::SplitWhitespace; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{from_utf8, from_utf8_mut, Bytes, CharIndices, Chars}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut, ParseBoolError}; +#[stable(feature = "str_escape", since = "1.34.0")] +pub use core::str::{EscapeDebug, EscapeDefault, EscapeUnicode}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{FromStr, Utf8Error}; +#[allow(deprecated)] +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{Lines, LinesAny}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{MatchIndices, RMatchIndices}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{Matches, RMatches}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{RSplit, Split}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{RSplitN, SplitN}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{RSplitTerminator, SplitTerminator}; + +/// Note: `str` in `Concat` is not meaningful here. +/// This type parameter of the trait only exists to enable another impl. +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> Concat for [S] { + type Output = String; + + fn concat(slice: &Self) -> String { + Join::join(slice, "") + } +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> Join<&str> for [S] { + type Output = String; + + fn join(slice: &Self, sep: &str) -> String { + unsafe { String::from_utf8_unchecked(join_generic_copy(slice, sep.as_bytes())) } + } +} + +#[cfg(not(no_global_oom_handling))] +macro_rules! specialize_for_lengths { + ($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => {{ + let mut target = $target; + let iter = $iter; + let sep_bytes = $separator; + match $separator.len() { + $( + // loops with hardcoded sizes run much faster + // specialize the cases with small separator lengths + $num => { + for s in iter { + copy_slice_and_advance!(target, sep_bytes); + let content_bytes = s.borrow().as_ref(); + copy_slice_and_advance!(target, content_bytes); + } + }, + )* + _ => { + // arbitrary non-zero size fallback + for s in iter { + copy_slice_and_advance!(target, sep_bytes); + let content_bytes = s.borrow().as_ref(); + copy_slice_and_advance!(target, content_bytes); + } + } + } + target + }} +} + +#[cfg(not(no_global_oom_handling))] +macro_rules! copy_slice_and_advance { + ($target:expr, $bytes:expr) => { + let len = $bytes.len(); + let (head, tail) = { $target }.split_at_mut(len); + head.copy_from_slice($bytes); + $target = tail; + }; +} + +// Optimized join implementation that works for both Vec (T: Copy) and String's inner vec +// Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262) +// For this reason SliceConcat is not specialized for T: Copy and SliceConcat is the +// only user of this function. It is left in place for the time when that is fixed. +// +// the bounds for String-join are S: Borrow and for Vec-join Borrow<[T]> +// [T] and str both impl AsRef<[T]> for some T +// => s.borrow().as_ref() and we always have slices +#[cfg(not(no_global_oom_handling))] +fn join_generic_copy(slice: &[S], sep: &[T]) -> Vec +where + T: Copy, + B: AsRef<[T]> + ?Sized, + S: Borrow, +{ + let sep_len = sep.len(); + let mut iter = slice.iter(); + + // the first slice is the only one without a separator preceding it + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + + // compute the exact total length of the joined Vec + // if the `len` calculation overflows, we'll panic + // we would have run out of memory anyway and the rest of the function requires + // the entire Vec pre-allocated for safety + let reserved_len = sep_len + .checked_mul(iter.len()) + .and_then(|n| { + slice.iter().map(|s| s.borrow().as_ref().len()).try_fold(n, usize::checked_add) + }) + .expect("attempt to join into collection with len > usize::MAX"); + + // prepare an uninitialized buffer + let mut result = Vec::with_capacity(reserved_len); + debug_assert!(result.capacity() >= reserved_len); + + result.extend_from_slice(first.borrow().as_ref()); + + unsafe { + let pos = result.len(); + let target = result.get_unchecked_mut(pos..reserved_len); + + // copy separator and slices over without bounds checks + // generate loops with hardcoded offsets for small separators + // massive improvements possible (~ x2) + let remain = specialize_for_lengths!(sep, target, iter; 0, 1, 2, 3, 4); + + // A weird borrow implementation may return different + // slices for the length calculation and the actual copy. + // Make sure we don't expose uninitialized bytes to the caller. + let result_len = reserved_len - remain.len(); + result.set_len(result_len); + } + result +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Borrow for String { + #[inline] + fn borrow(&self) -> &str { + &self[..] + } +} + +#[stable(feature = "string_borrow_mut", since = "1.36.0")] +impl BorrowMut for String { + #[inline] + fn borrow_mut(&mut self) -> &mut str { + &mut self[..] + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl ToOwned for str { + type Owned = String; + #[inline] + fn to_owned(&self) -> String { + unsafe { String::from_utf8_unchecked(self.as_bytes().to_owned()) } + } + + fn clone_into(&self, target: &mut String) { + let mut b = mem::take(target).into_bytes(); + self.as_bytes().clone_into(&mut b); + *target = unsafe { String::from_utf8_unchecked(b) } + } +} + +/// Methods for string slices. +#[lang = "str_alloc"] +#[cfg(not(test))] +impl str { + /// Converts a `Box` into a `Box<[u8]>` without copying or allocating. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "this is a string"; + /// let boxed_str = s.to_owned().into_boxed_str(); + /// let boxed_bytes = boxed_str.into_boxed_bytes(); + /// assert_eq!(*boxed_bytes, *s.as_bytes()); + /// ``` + #[stable(feature = "str_box_extras", since = "1.20.0")] + #[inline] + pub fn into_boxed_bytes(self: Box) -> Box<[u8]> { + self.into() + } + + /// Replaces all matches of a pattern with another string. + /// + /// `replace` creates a new [`String`], and copies the data from this string slice into it. + /// While doing so, it attempts to find matches of a pattern. If it finds any, it + /// replaces them with the replacement string slice. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "this is old"; + /// + /// assert_eq!("this is new", s.replace("old", "new")); + /// ``` + /// + /// When the pattern doesn't match: + /// + /// ``` + /// let s = "this is old"; + /// assert_eq!(s, s.replace("cookie monster", "little lamb")); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[must_use = "this returns the replaced string as a new allocation, \ + without modifying the original"] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn replace<'a, P: Pattern<'a>>(&'a self, from: P, to: &str) -> String { + let mut result = String::new(); + let mut last_end = 0; + for (start, part) in self.match_indices(from) { + result.push_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_str(to); + last_end = start + part.len(); + } + result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); + result + } + + /// Replaces first N matches of a pattern with another string. + /// + /// `replacen` creates a new [`String`], and copies the data from this string slice into it. + /// While doing so, it attempts to find matches of a pattern. If it finds any, it + /// replaces them with the replacement string slice at most `count` times. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "foo foo 123 foo"; + /// assert_eq!("new new 123 foo", s.replacen("foo", "new", 2)); + /// assert_eq!("faa fao 123 foo", s.replacen('o', "a", 3)); + /// assert_eq!("foo foo new23 foo", s.replacen(char::is_numeric, "new", 1)); + /// ``` + /// + /// When the pattern doesn't match: + /// + /// ``` + /// let s = "this is old"; + /// assert_eq!(s, s.replacen("cookie monster", "little lamb", 10)); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[must_use = "this returns the replaced string as a new allocation, \ + without modifying the original"] + #[stable(feature = "str_replacen", since = "1.16.0")] + pub fn replacen<'a, P: Pattern<'a>>(&'a self, pat: P, to: &str, count: usize) -> String { + // Hope to reduce the times of re-allocation + let mut result = String::with_capacity(32); + let mut last_end = 0; + for (start, part) in self.match_indices(pat).take(count) { + result.push_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_str(to); + last_end = start + part.len(); + } + result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); + result + } + + /// Returns the lowercase equivalent of this string slice, as a new [`String`]. + /// + /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property + /// `Lowercase`. + /// + /// Since some characters can expand into multiple characters when changing + /// the case, this function returns a [`String`] instead of modifying the + /// parameter in-place. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "HELLO"; + /// + /// assert_eq!("hello", s.to_lowercase()); + /// ``` + /// + /// A tricky example, with sigma: + /// + /// ``` + /// let sigma = "Σ"; + /// + /// assert_eq!("σ", sigma.to_lowercase()); + /// + /// // but at the end of a word, it's ς, not σ: + /// let odysseus = "ὈΔΥΣΣΕΎΣ"; + /// + /// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase()); + /// ``` + /// + /// Languages without case are not changed: + /// + /// ``` + /// let new_year = "农历新年"; + /// + /// assert_eq!(new_year, new_year.to_lowercase()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "unicode_case_mapping", since = "1.2.0")] + pub fn to_lowercase(&self) -> String { + let mut s = String::with_capacity(self.len()); + for (i, c) in self[..].char_indices() { + if c == 'Σ' { + // Σ maps to σ, except at the end of a word where it maps to ς. + // This is the only conditional (contextual) but language-independent mapping + // in `SpecialCasing.txt`, + // so hard-code it rather than have a generic "condition" mechanism. + // See https://github.com/rust-lang/rust/issues/26035 + map_uppercase_sigma(self, i, &mut s) + } else { + match conversions::to_lower(c) { + [a, '\0', _] => s.push(a), + [a, b, '\0'] => { + s.push(a); + s.push(b); + } + [a, b, c] => { + s.push(a); + s.push(b); + s.push(c); + } + } + } + } + return s; + + fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) { + // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992 + // for the definition of `Final_Sigma`. + debug_assert!('Σ'.len_utf8() == 2); + let is_word_final = case_ignoreable_then_cased(from[..i].chars().rev()) + && !case_ignoreable_then_cased(from[i + 2..].chars()); + to.push_str(if is_word_final { "ς" } else { "σ" }); + } + + fn case_ignoreable_then_cased>(iter: I) -> bool { + use core::unicode::{Case_Ignorable, Cased}; + match iter.skip_while(|&c| Case_Ignorable(c)).next() { + Some(c) => Cased(c), + None => false, + } + } + } + + /// Returns the uppercase equivalent of this string slice, as a new [`String`]. + /// + /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property + /// `Uppercase`. + /// + /// Since some characters can expand into multiple characters when changing + /// the case, this function returns a [`String`] instead of modifying the + /// parameter in-place. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "hello"; + /// + /// assert_eq!("HELLO", s.to_uppercase()); + /// ``` + /// + /// Scripts without case are not changed: + /// + /// ``` + /// let new_year = "农历新年"; + /// + /// assert_eq!(new_year, new_year.to_uppercase()); + /// ``` + /// + /// One character can become multiple: + /// ``` + /// let s = "tschüß"; + /// + /// assert_eq!("TSCHÜSS", s.to_uppercase()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "unicode_case_mapping", since = "1.2.0")] + pub fn to_uppercase(&self) -> String { + let mut s = String::with_capacity(self.len()); + for c in self[..].chars() { + match conversions::to_upper(c) { + [a, '\0', _] => s.push(a), + [a, b, '\0'] => { + s.push(a); + s.push(b); + } + [a, b, c] => { + s.push(a); + s.push(b); + s.push(c); + } + } + } + s + } + + /// Converts a [`Box`] into a [`String`] without copying or allocating. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let string = String::from("birthday gift"); + /// let boxed_str = string.clone().into_boxed_str(); + /// + /// assert_eq!(boxed_str.into_string(), string); + /// ``` + #[stable(feature = "box_str", since = "1.4.0")] + #[inline] + pub fn into_string(self: Box) -> String { + let slice = Box::<[u8]>::from(self); + unsafe { String::from_utf8_unchecked(slice.into_vec()) } + } + + /// Creates a new [`String`] by repeating a string `n` times. + /// + /// # Panics + /// + /// This function will panic if the capacity would overflow. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// assert_eq!("abc".repeat(4), String::from("abcabcabcabc")); + /// ``` + /// + /// A panic upon overflow: + /// + /// ```should_panic + /// // this will panic at runtime + /// "0123456789abcdef".repeat(usize::MAX); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "repeat_str", since = "1.16.0")] + pub fn repeat(&self, n: usize) -> String { + unsafe { String::from_utf8_unchecked(self.as_bytes().repeat(n)) } + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// To uppercase ASCII characters in addition to non-ASCII characters, use + /// [`to_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// let s = "Grüße, Jürgen ❤"; + /// + /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase()); + /// ``` + /// + /// [`make_ascii_uppercase`]: str::make_ascii_uppercase + /// [`to_uppercase`]: #method.to_uppercase + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_uppercase(&self) -> String { + let mut bytes = self.as_bytes().to_vec(); + bytes.make_ascii_uppercase(); + // make_ascii_uppercase() preserves the UTF-8 invariant. + unsafe { String::from_utf8_unchecked(bytes) } + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// To lowercase ASCII characters in addition to non-ASCII characters, use + /// [`to_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// let s = "Grüße, Jürgen ❤"; + /// + /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase()); + /// ``` + /// + /// [`make_ascii_lowercase`]: str::make_ascii_lowercase + /// [`to_lowercase`]: #method.to_lowercase + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_lowercase(&self) -> String { + let mut bytes = self.as_bytes().to_vec(); + bytes.make_ascii_lowercase(); + // make_ascii_lowercase() preserves the UTF-8 invariant. + unsafe { String::from_utf8_unchecked(bytes) } + } + + /// Tries to create a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s: &str = "a"; + /// let ss: String = s.try_to_owned().unwrap(); + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_to_owned(&self) -> Result { + unsafe { Ok(String::from_utf8_unchecked(self.as_bytes().try_to_vec()?)) } + } +} + +/// Converts a boxed slice of bytes to a boxed string slice without checking +/// that the string contains valid UTF-8. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// let smile_utf8 = Box::new([226, 152, 186]); +/// let smile = unsafe { std::str::from_boxed_utf8_unchecked(smile_utf8) }; +/// +/// assert_eq!("☺", &*smile); +/// ``` +#[stable(feature = "str_box_extras", since = "1.20.0")] +#[inline] +pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box { + unsafe { Box::from_raw(Box::into_raw(v) as *mut str) } +} diff --git a/rust/alloc/string.rs b/rust/alloc/string.rs new file mode 100644 index 0000000000000..f37618893b2a6 --- /dev/null +++ b/rust/alloc/string.rs @@ -0,0 +1,2842 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A UTF-8–encoded, growable string. +//! +//! This module contains the [`String`] type, the [`ToString`] trait for +//! converting to strings, and several error types that may result from +//! working with [`String`]s. +//! +//! # Examples +//! +//! There are multiple ways to create a new [`String`] from a string literal: +//! +//! ``` +//! let s = "Hello".to_string(); +//! +//! let s = String::from("world"); +//! let s: String = "also this".into(); +//! ``` +//! +//! You can create a new [`String`] from an existing one by concatenating with +//! `+`: +//! +//! ``` +//! let s = "Hello".to_string(); +//! +//! let message = s + " world!"; +//! ``` +//! +//! If you have a vector of valid UTF-8 bytes, you can make a [`String`] out of +//! it. You can do the reverse too. +//! +//! ``` +//! let sparkle_heart = vec![240, 159, 146, 150]; +//! +//! // We know these bytes are valid, so we'll use `unwrap()`. +//! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); +//! +//! assert_eq!("💖", sparkle_heart); +//! +//! let bytes = sparkle_heart.into_bytes(); +//! +//! assert_eq!(bytes, [240, 159, 146, 150]); +//! ``` + +#![stable(feature = "rust1", since = "1.0.0")] + +#[cfg(not(no_global_oom_handling))] +use core::char::{decode_utf16, REPLACEMENT_CHARACTER}; +use core::fmt; +use core::hash; +#[cfg(not(no_global_oom_handling))] +use core::iter::{from_fn, FromIterator}; +use core::iter::FusedIterator; +#[cfg(not(no_global_oom_handling))] +use core::ops::Add; +#[cfg(not(no_global_oom_handling))] +use core::ops::AddAssign; +#[cfg(not(no_global_oom_handling))] +use core::ops::Bound::{Excluded, Included, Unbounded}; +use core::ops::{self, Index, IndexMut, Range, RangeBounds}; +use core::ptr; +use core::slice; +#[cfg(not(no_global_oom_handling))] +use core::str::lossy; +use core::str::pattern::Pattern; + +#[cfg(not(no_global_oom_handling))] +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::str::{self, Chars, Utf8Error}; +#[cfg(not(no_global_oom_handling))] +use crate::str::{from_boxed_utf8_unchecked, FromStr}; +use crate::vec::Vec; + +/// A UTF-8–encoded, growable string. +/// +/// The `String` type is the most common string type that has ownership over the +/// contents of the string. It has a close relationship with its borrowed +/// counterpart, the primitive [`str`]. +/// +/// # Examples +/// +/// You can create a `String` from [a literal string][`str`] with [`String::from`]: +/// +/// [`String::from`]: From::from +/// +/// ``` +/// let hello = String::from("Hello, world!"); +/// ``` +/// +/// You can append a [`char`] to a `String` with the [`push`] method, and +/// append a [`&str`] with the [`push_str`] method: +/// +/// ``` +/// let mut hello = String::from("Hello, "); +/// +/// hello.push('w'); +/// hello.push_str("orld!"); +/// ``` +/// +/// [`push`]: String::push +/// [`push_str`]: String::push_str +/// +/// If you have a vector of UTF-8 bytes, you can create a `String` from it with +/// the [`from_utf8`] method: +/// +/// ``` +/// // some bytes, in a vector +/// let sparkle_heart = vec![240, 159, 146, 150]; +/// +/// // We know these bytes are valid, so we'll use `unwrap()`. +/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); +/// +/// assert_eq!("💖", sparkle_heart); +/// ``` +/// +/// [`from_utf8`]: String::from_utf8 +/// +/// # UTF-8 +/// +/// `String`s are always valid UTF-8. This has a few implications, the first of +/// which is that if you need a non-UTF-8 string, consider [`OsString`]. It is +/// similar, but without the UTF-8 constraint. The second implication is that +/// you cannot index into a `String`: +/// +/// ```compile_fail,E0277 +/// let s = "hello"; +/// +/// println!("The first letter of s is {}", s[0]); // ERROR!!! +/// ``` +/// +/// [`OsString`]: ../../std/ffi/struct.OsString.html +/// +/// Indexing is intended to be a constant-time operation, but UTF-8 encoding +/// does not allow us to do this. Furthermore, it's not clear what sort of +/// thing the index should return: a byte, a codepoint, or a grapheme cluster. +/// The [`bytes`] and [`chars`] methods return iterators over the first +/// two, respectively. +/// +/// [`bytes`]: str::bytes +/// [`chars`]: str::chars +/// +/// # Deref +/// +/// `String`s implement [`Deref`]``, and so inherit all of [`str`]'s +/// methods. In addition, this means that you can pass a `String` to a +/// function which takes a [`&str`] by using an ampersand (`&`): +/// +/// ``` +/// fn takes_str(s: &str) { } +/// +/// let s = String::from("Hello"); +/// +/// takes_str(&s); +/// ``` +/// +/// This will create a [`&str`] from the `String` and pass it in. This +/// conversion is very inexpensive, and so generally, functions will accept +/// [`&str`]s as arguments unless they need a `String` for some specific +/// reason. +/// +/// In certain cases Rust doesn't have enough information to make this +/// conversion, known as [`Deref`] coercion. In the following example a string +/// slice [`&'a str`][`&str`] implements the trait `TraitExample`, and the function +/// `example_func` takes anything that implements the trait. In this case Rust +/// would need to make two implicit conversions, which Rust doesn't have the +/// means to do. For that reason, the following example will not compile. +/// +/// ```compile_fail,E0277 +/// trait TraitExample {} +/// +/// impl<'a> TraitExample for &'a str {} +/// +/// fn example_func(example_arg: A) {} +/// +/// let example_string = String::from("example_string"); +/// example_func(&example_string); +/// ``` +/// +/// There are two options that would work instead. The first would be to +/// change the line `example_func(&example_string);` to +/// `example_func(example_string.as_str());`, using the method [`as_str()`] +/// to explicitly extract the string slice containing the string. The second +/// way changes `example_func(&example_string);` to +/// `example_func(&*example_string);`. In this case we are dereferencing a +/// `String` to a [`str`][`&str`], then referencing the [`str`][`&str`] back to +/// [`&str`]. The second way is more idiomatic, however both work to do the +/// conversion explicitly rather than relying on the implicit conversion. +/// +/// # Representation +/// +/// A `String` is made up of three components: a pointer to some bytes, a +/// length, and a capacity. The pointer points to an internal buffer `String` +/// uses to store its data. The length is the number of bytes currently stored +/// in the buffer, and the capacity is the size of the buffer in bytes. As such, +/// the length will always be less than or equal to the capacity. +/// +/// This buffer is always stored on the heap. +/// +/// You can look at these with the [`as_ptr`], [`len`], and [`capacity`] +/// methods: +/// +/// ``` +/// use std::mem; +/// +/// let story = String::from("Once upon a time..."); +/// +// FIXME Update this when vec_into_raw_parts is stabilized +/// // Prevent automatically dropping the String's data +/// let mut story = mem::ManuallyDrop::new(story); +/// +/// let ptr = story.as_mut_ptr(); +/// let len = story.len(); +/// let capacity = story.capacity(); +/// +/// // story has nineteen bytes +/// assert_eq!(19, len); +/// +/// // We can re-build a String out of ptr, len, and capacity. This is all +/// // unsafe because we are responsible for making sure the components are +/// // valid: +/// let s = unsafe { String::from_raw_parts(ptr, len, capacity) } ; +/// +/// assert_eq!(String::from("Once upon a time..."), s); +/// ``` +/// +/// [`as_ptr`]: str::as_ptr +/// [`len`]: String::len +/// [`capacity`]: String::capacity +/// +/// If a `String` has enough capacity, adding elements to it will not +/// re-allocate. For example, consider this program: +/// +/// ``` +/// let mut s = String::new(); +/// +/// println!("{}", s.capacity()); +/// +/// for _ in 0..5 { +/// s.push_str("hello"); +/// println!("{}", s.capacity()); +/// } +/// ``` +/// +/// This will output the following: +/// +/// ```text +/// 0 +/// 5 +/// 10 +/// 20 +/// 20 +/// 40 +/// ``` +/// +/// At first, we have no memory allocated at all, but as we append to the +/// string, it increases its capacity appropriately. If we instead use the +/// [`with_capacity`] method to allocate the correct capacity initially: +/// +/// ``` +/// let mut s = String::with_capacity(25); +/// +/// println!("{}", s.capacity()); +/// +/// for _ in 0..5 { +/// s.push_str("hello"); +/// println!("{}", s.capacity()); +/// } +/// ``` +/// +/// [`with_capacity`]: String::with_capacity +/// +/// We end up with a different output: +/// +/// ```text +/// 25 +/// 25 +/// 25 +/// 25 +/// 25 +/// 25 +/// ``` +/// +/// Here, there's no need to allocate more memory inside the loop. +/// +/// [`str`]: prim@str +/// [`&str`]: prim@str +/// [`Deref`]: core::ops::Deref +/// [`as_str()`]: String::as_str +#[derive(PartialOrd, Eq, Ord)] +#[cfg_attr(not(test), rustc_diagnostic_item = "string_type")] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct String { + vec: Vec, +} + +/// A possible error value when converting a `String` from a UTF-8 byte vector. +/// +/// This type is the error type for the [`from_utf8`] method on [`String`]. It +/// is designed in such a way to carefully avoid reallocations: the +/// [`into_bytes`] method will give back the byte vector that was used in the +/// conversion attempt. +/// +/// [`from_utf8`]: String::from_utf8 +/// [`into_bytes`]: FromUtf8Error::into_bytes +/// +/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may +/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's +/// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error` +/// through the [`utf8_error`] method. +/// +/// [`Utf8Error`]: core::str::Utf8Error +/// [`std::str`]: core::str +/// [`&str`]: prim@str +/// [`utf8_error`]: Self::utf8_error +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// // some invalid bytes, in a vector +/// let bytes = vec![0, 159]; +/// +/// let value = String::from_utf8(bytes); +/// +/// assert!(value.is_err()); +/// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes()); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(no_global_oom_handling), derive(Clone))] +#[derive(Debug, PartialEq, Eq)] +pub struct FromUtf8Error { + bytes: Vec, + error: Utf8Error, +} + +/// A possible error value when converting a `String` from a UTF-16 byte slice. +/// +/// This type is the error type for the [`from_utf16`] method on [`String`]. +/// +/// [`from_utf16`]: String::from_utf16 +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// // 𝄞muic +/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, +/// 0xD800, 0x0069, 0x0063]; +/// +/// assert!(String::from_utf16(v).is_err()); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug)] +pub struct FromUtf16Error(()); + +impl String { + /// Creates a new empty `String`. + /// + /// Given that the `String` is empty, this will not allocate any initial + /// buffer. While that means that this initial operation is very + /// inexpensive, it may cause excessive allocation later when you add + /// data. If you have an idea of how much data the `String` will hold, + /// consider the [`with_capacity`] method to prevent excessive + /// re-allocation. + /// + /// [`with_capacity`]: String::with_capacity + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::new(); + /// ``` + #[inline] + #[rustc_const_stable(feature = "const_string_new", since = "1.39.0")] + #[stable(feature = "rust1", since = "1.0.0")] + pub const fn new() -> String { + String { vec: Vec::new() } + } + + /// Creates a new empty `String` with a particular capacity. + /// + /// `String`s have an internal buffer to hold their data. The capacity is + /// the length of that buffer, and can be queried with the [`capacity`] + /// method. This method creates an empty `String`, but one with an initial + /// buffer that can hold `capacity` bytes. This is useful when you may be + /// appending a bunch of data to the `String`, reducing the number of + /// reallocations it needs to do. + /// + /// [`capacity`]: String::capacity + /// + /// If the given capacity is `0`, no allocation will occur, and this method + /// is identical to the [`new`] method. + /// + /// [`new`]: String::new + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// + /// // The String contains no chars, even though it has capacity for more + /// assert_eq!(s.len(), 0); + /// + /// // These are all done without reallocating... + /// let cap = s.capacity(); + /// for _ in 0..10 { + /// s.push('a'); + /// } + /// + /// assert_eq!(s.capacity(), cap); + /// + /// // ...but this may make the string reallocate + /// s.push('a'); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize) -> String { + String { vec: Vec::with_capacity(capacity) } + } + + // HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is + // required for this method definition, is not available. Since we don't + // require this method for testing purposes, I'll just stub it + // NB see the slice::hack module in slice.rs for more information + #[inline] + #[cfg(test)] + pub fn from_str(_: &str) -> String { + panic!("not available with cfg(test)"); + } + + /// Converts a vector of bytes to a `String`. + /// + /// A string ([`String`]) is made of bytes ([`u8`]), and a vector of bytes + /// ([`Vec`]) is made of bytes, so this function converts between the + /// two. Not all byte slices are valid `String`s, however: `String` + /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that + /// the bytes are valid UTF-8, and then does the conversion. + /// + /// If you are sure that the byte slice is valid UTF-8, and you don't want + /// to incur the overhead of the validity check, there is an unsafe version + /// of this function, [`from_utf8_unchecked`], which has the same behavior + /// but skips the check. + /// + /// This method will take care to not copy the vector, for efficiency's + /// sake. + /// + /// If you need a [`&str`] instead of a `String`, consider + /// [`str::from_utf8`]. + /// + /// The inverse of this method is [`into_bytes`]. + /// + /// # Errors + /// + /// Returns [`Err`] if the slice is not UTF-8 with a description as to why the + /// provided bytes are not UTF-8. The vector you moved in is also included. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// // We know these bytes are valid, so we'll use `unwrap()`. + /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + /// + /// Incorrect bytes: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let sparkle_heart = vec![0, 159, 146, 150]; + /// + /// assert!(String::from_utf8(sparkle_heart).is_err()); + /// ``` + /// + /// See the docs for [`FromUtf8Error`] for more details on what you can do + /// with this error. + /// + /// [`from_utf8_unchecked`]: String::from_utf8_unchecked + /// [`Vec`]: crate::vec::Vec + /// [`&str`]: prim@str + /// [`into_bytes`]: String::into_bytes + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf8(vec: Vec) -> Result { + match str::from_utf8(&vec) { + Ok(..) => Ok(String { vec }), + Err(e) => Err(FromUtf8Error { bytes: vec, error: e }), + } + } + + /// Converts a slice of bytes to a string, including invalid characters. + /// + /// Strings are made of bytes ([`u8`]), and a slice of bytes + /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts + /// between the two. Not all byte slices are valid strings, however: strings + /// are required to be valid UTF-8. During this conversion, + /// `from_utf8_lossy()` will replace any invalid UTF-8 sequences with + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], which looks like this: � + /// + /// [byteslice]: prim@slice + /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER + /// + /// If you are sure that the byte slice is valid UTF-8, and you don't want + /// to incur the overhead of the conversion, there is an unsafe version + /// of this function, [`from_utf8_unchecked`], which has the same behavior + /// but skips the checks. + /// + /// [`from_utf8_unchecked`]: String::from_utf8_unchecked + /// + /// This function returns a [`Cow<'a, str>`]. If our byte slice is invalid + /// UTF-8, then we need to insert the replacement characters, which will + /// change the size of the string, and hence, require a `String`. But if + /// it's already valid UTF-8, we don't need a new allocation. This return + /// type allows us to handle both cases. + /// + /// [`Cow<'a, str>`]: crate::borrow::Cow + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// let sparkle_heart = String::from_utf8_lossy(&sparkle_heart); + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + /// + /// Incorrect bytes: + /// + /// ``` + /// // some invalid bytes + /// let input = b"Hello \xF0\x90\x80World"; + /// let output = String::from_utf8_lossy(input); + /// + /// assert_eq!("Hello �World", output); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf8_lossy(v: &[u8]) -> Cow<'_, str> { + let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks(); + + let (first_valid, first_broken) = if let Some(chunk) = iter.next() { + let lossy::Utf8LossyChunk { valid, broken } = chunk; + if valid.len() == v.len() { + debug_assert!(broken.is_empty()); + return Cow::Borrowed(valid); + } + (valid, broken) + } else { + return Cow::Borrowed(""); + }; + + const REPLACEMENT: &str = "\u{FFFD}"; + + let mut res = String::with_capacity(v.len()); + res.push_str(first_valid); + if !first_broken.is_empty() { + res.push_str(REPLACEMENT); + } + + for lossy::Utf8LossyChunk { valid, broken } in iter { + res.push_str(valid); + if !broken.is_empty() { + res.push_str(REPLACEMENT); + } + } + + Cow::Owned(res) + } + + /// Decode a UTF-16–encoded vector `v` into a `String`, returning [`Err`] + /// if `v` contains any invalid data. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // 𝄞music + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0x0073, 0x0069, 0x0063]; + /// assert_eq!(String::from("𝄞music"), + /// String::from_utf16(v).unwrap()); + /// + /// // 𝄞muic + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0xD800, 0x0069, 0x0063]; + /// assert!(String::from_utf16(v).is_err()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf16(v: &[u16]) -> Result { + // This isn't done via collect::>() for performance reasons. + // FIXME: the function can be simplified again when #48994 is closed. + let mut ret = String::with_capacity(v.len()); + for c in decode_utf16(v.iter().cloned()) { + if let Ok(c) = c { + ret.push(c); + } else { + return Err(FromUtf16Error(())); + } + } + Ok(ret) + } + + /// Decode a UTF-16–encoded slice `v` into a `String`, replacing + /// invalid data with [the replacement character (`U+FFFD`)][U+FFFD]. + /// + /// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`], + /// `from_utf16_lossy` returns a `String` since the UTF-16 to UTF-8 + /// conversion requires a memory allocation. + /// + /// [`from_utf8_lossy`]: String::from_utf8_lossy + /// [`Cow<'a, str>`]: crate::borrow::Cow + /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // 𝄞music + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0x0073, 0xDD1E, 0x0069, 0x0063, + /// 0xD834]; + /// + /// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"), + /// String::from_utf16_lossy(v)); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf16_lossy(v: &[u16]) -> String { + decode_utf16(v.iter().cloned()).map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)).collect() + } + + /// Decomposes a `String` into its raw components. + /// + /// Returns the raw pointer to the underlying data, the length of + /// the string (in bytes), and the allocated capacity of the data + /// (in bytes). These are the same arguments in the same order as + /// the arguments to [`from_raw_parts`]. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `String`. The only way to do + /// this is to convert the raw pointer, length, and capacity back + /// into a `String` with the [`from_raw_parts`] function, allowing + /// the destructor to perform the cleanup. + /// + /// [`from_raw_parts`]: String::from_raw_parts + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_into_raw_parts)] + /// let s = String::from("hello"); + /// + /// let (ptr, len, cap) = s.into_raw_parts(); + /// + /// let rebuilt = unsafe { String::from_raw_parts(ptr, len, cap) }; + /// assert_eq!(rebuilt, "hello"); + /// ``` + #[unstable(feature = "vec_into_raw_parts", reason = "new API", issue = "65816")] + pub fn into_raw_parts(self) -> (*mut u8, usize, usize) { + self.vec.into_raw_parts() + } + + /// Creates a new `String` from a length, capacity, and pointer. + /// + /// # Safety + /// + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: + /// + /// * The memory at `buf` needs to have been previously allocated by the + /// same allocator the standard library uses, with a required alignment of exactly 1. + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the correct value. + /// * The first `length` bytes at `buf` need to be valid UTF-8. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal data structures. + /// + /// The ownership of `buf` is effectively transferred to the + /// `String` which may then deallocate, reallocate or change the + /// contents of memory pointed to by the pointer at will. Ensure + /// that nothing else uses the pointer after calling this + /// function. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::mem; + /// + /// unsafe { + /// let s = String::from("hello"); + /// + // FIXME Update this when vec_into_raw_parts is stabilized + /// // Prevent automatically dropping the String's data + /// let mut s = mem::ManuallyDrop::new(s); + /// + /// let ptr = s.as_mut_ptr(); + /// let len = s.len(); + /// let capacity = s.capacity(); + /// + /// let s = String::from_raw_parts(ptr, len, capacity); + /// + /// assert_eq!(String::from("hello"), s); + /// } + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> String { + unsafe { String { vec: Vec::from_raw_parts(buf, length, capacity) } } + } + + /// Converts a vector of bytes to a `String` without checking that the + /// string contains valid UTF-8. + /// + /// See the safe version, [`from_utf8`], for more details. + /// + /// [`from_utf8`]: String::from_utf8 + /// + /// # Safety + /// + /// This function is unsafe because it does not check that the bytes passed + /// to it are valid UTF-8. If this constraint is violated, it may cause + /// memory unsafety issues with future users of the `String`, as the rest of + /// the standard library assumes that `String`s are valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// let sparkle_heart = unsafe { + /// String::from_utf8_unchecked(sparkle_heart) + /// }; + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_utf8_unchecked(bytes: Vec) -> String { + String { vec: bytes } + } + + /// Converts a `String` into a byte vector. + /// + /// This consumes the `String`, so we do not need to copy its contents. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// let bytes = s.into_bytes(); + /// + /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_bytes(self) -> Vec { + self.vec + } + + /// Extracts a string slice containing the entire `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("foo"); + /// + /// assert_eq!("foo", s.as_str()); + /// ``` + #[inline] + #[stable(feature = "string_as_str", since = "1.7.0")] + pub fn as_str(&self) -> &str { + self + } + + /// Converts a `String` into a mutable string slice. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foobar"); + /// let s_mut_str = s.as_mut_str(); + /// + /// s_mut_str.make_ascii_uppercase(); + /// + /// assert_eq!("FOOBAR", s_mut_str); + /// ``` + #[inline] + #[stable(feature = "string_as_str", since = "1.7.0")] + pub fn as_mut_str(&mut self) -> &mut str { + self + } + + /// Appends a given string slice onto the end of this `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.push_str("bar"); + /// + /// assert_eq!("foobar", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push_str(&mut self, string: &str) { + self.vec.extend_from_slice(string.as_bytes()) + } + + /// Copies elements from `src` range to the end of the string. + /// + /// ## Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// ## Examples + /// + /// ``` + /// #![feature(string_extend_from_within)] + /// let mut string = String::from("abcde"); + /// + /// string.extend_from_within(2..); + /// assert_eq!(string, "abcdecde"); + /// + /// string.extend_from_within(..2); + /// assert_eq!(string, "abcdecdeab"); + /// + /// string.extend_from_within(4..8); + /// assert_eq!(string, "abcdecdeabecde"); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "string_extend_from_within", issue = "none")] + pub fn extend_from_within(&mut self, src: R) + where + R: RangeBounds, + { + let src @ Range { start, end } = slice::range(src, ..self.len()); + + assert!(self.is_char_boundary(start)); + assert!(self.is_char_boundary(end)); + + self.vec.extend_from_within(src); + } + + /// Returns this `String`'s capacity, in bytes. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::with_capacity(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn capacity(&self) -> usize { + self.vec.capacity() + } + + /// Ensures that this `String`'s capacity is at least `additional` bytes + /// larger than its length. + /// + /// The capacity may be increased by more than `additional` bytes if it + /// chooses, to prevent frequent reallocations. + /// + /// If you do not want this "at least" behavior, see the [`reserve_exact`] + /// method. + /// + /// # Panics + /// + /// Panics if the new capacity overflows [`usize`]. + /// + /// [`reserve_exact`]: String::reserve_exact + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::new(); + /// + /// s.reserve(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + /// + /// This may not actually increase the capacity: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// s.push('a'); + /// s.push('b'); + /// + /// // s now has a length of 2 and a capacity of 10 + /// assert_eq!(2, s.len()); + /// assert_eq!(10, s.capacity()); + /// + /// // Since we already have an extra 8 capacity, calling this... + /// s.reserve(8); + /// + /// // ... doesn't actually increase. + /// assert_eq!(10, s.capacity()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve(&mut self, additional: usize) { + self.vec.reserve(additional) + } + + /// Ensures that this `String`'s capacity is `additional` bytes + /// larger than its length. + /// + /// Consider using the [`reserve`] method unless you absolutely know + /// better than the allocator. + /// + /// [`reserve`]: String::reserve + /// + /// # Panics + /// + /// Panics if the new capacity overflows `usize`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::new(); + /// + /// s.reserve_exact(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + /// + /// This may not actually increase the capacity: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// s.push('a'); + /// s.push('b'); + /// + /// // s now has a length of 2 and a capacity of 10 + /// assert_eq!(2, s.len()); + /// assert_eq!(10, s.capacity()); + /// + /// // Since we already have an extra 8 capacity, calling this... + /// s.reserve_exact(8); + /// + /// // ... doesn't actually increase. + /// assert_eq!(10, s.capacity()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve_exact(&mut self, additional: usize) { + self.vec.reserve_exact(additional) + } + + /// Tries to reserve capacity for at least `additional` more elements to be inserted + /// in the given `String`. The collection may reserve more space to avoid + /// frequent reallocations. After calling `reserve`, capacity will be + /// greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// #![feature(try_reserve)] + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &str) -> Result { + /// let mut output = String::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.push_str(data); + /// + /// Ok(output) + /// } + /// # process_data("rust").expect("why is the test harness OOMing on 4 bytes?"); + /// ``` + #[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.vec.try_reserve(additional) + } + + /// Tries to reserve the minimum capacity for exactly `additional` more elements to + /// be inserted in the given `String`. After calling `reserve_exact`, + /// capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer `reserve` if future insertions are expected. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// #![feature(try_reserve)] + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &str) -> Result { + /// let mut output = String::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.push_str(data); + /// + /// Ok(output) + /// } + /// # process_data("rust").expect("why is the test harness OOMing on 4 bytes?"); + /// ``` + #[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.vec.try_reserve_exact(additional) + } + + /// Shrinks the capacity of this `String` to match its length. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to_fit(); + /// assert_eq!(3, s.capacity()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn shrink_to_fit(&mut self) { + self.vec.shrink_to_fit() + } + + /// Shrinks the capacity of this `String` with a lower bound. + /// + /// The capacity will remain at least as large as both the length + /// and the supplied value. + /// + /// If the current capacity is less than the lower limit, this is a no-op. + /// + /// # Examples + /// + /// ``` + /// #![feature(shrink_to)] + /// let mut s = String::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to(10); + /// assert!(s.capacity() >= 10); + /// s.shrink_to(0); + /// assert!(s.capacity() >= 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "shrink_to", reason = "new API", issue = "56431")] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.vec.shrink_to(min_capacity) + } + + /// Appends the given [`char`] to the end of this `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("abc"); + /// + /// s.push('1'); + /// s.push('2'); + /// s.push('3'); + /// + /// assert_eq!("abc123", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push(&mut self, ch: char) { + match ch.len_utf8() { + 1 => self.vec.push(ch as u8), + _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), + } + } + + /// Returns a byte slice of this `String`'s contents. + /// + /// The inverse of this method is [`from_utf8`]. + /// + /// [`from_utf8`]: String::from_utf8 + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// + /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn as_bytes(&self) -> &[u8] { + &self.vec + } + + /// Shortens this `String` to the specified length. + /// + /// If `new_len` is greater than the string's current length, this has no + /// effect. + /// + /// Note that this method has no effect on the allocated capacity + /// of the string + /// + /// # Panics + /// + /// Panics if `new_len` does not lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("hello"); + /// + /// s.truncate(2); + /// + /// assert_eq!("he", s); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn truncate(&mut self, new_len: usize) { + if new_len <= self.len() { + assert!(self.is_char_boundary(new_len)); + self.vec.truncate(new_len) + } + } + + /// Removes the last character from the string buffer and returns it. + /// + /// Returns [`None`] if this `String` is empty. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// assert_eq!(s.pop(), Some('o')); + /// assert_eq!(s.pop(), Some('o')); + /// assert_eq!(s.pop(), Some('f')); + /// + /// assert_eq!(s.pop(), None); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn pop(&mut self) -> Option { + let ch = self.chars().rev().next()?; + let newlen = self.len() - ch.len_utf8(); + unsafe { + self.vec.set_len(newlen); + } + Some(ch) + } + + /// Removes a [`char`] from this `String` at a byte position and returns it. + /// + /// This is an *O*(*n*) operation, as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than or equal to the `String`'s length, + /// or if it does not lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// assert_eq!(s.remove(0), 'f'); + /// assert_eq!(s.remove(1), 'o'); + /// assert_eq!(s.remove(0), 'o'); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn remove(&mut self, idx: usize) -> char { + let ch = match self[idx..].chars().next() { + Some(ch) => ch, + None => panic!("cannot remove a char from the end of a string"), + }; + + let next = idx + ch.len_utf8(); + let len = self.len(); + unsafe { + ptr::copy(self.vec.as_ptr().add(next), self.vec.as_mut_ptr().add(idx), len - next); + self.vec.set_len(len - (next - idx)); + } + ch + } + + /// Remove all matches of pattern `pat` in the `String`. + /// + /// # Examples + /// + /// ``` + /// #![feature(string_remove_matches)] + /// let mut s = String::from("Trees are not green, the sky is not blue."); + /// s.remove_matches("not "); + /// assert_eq!("Trees are green, the sky is blue.", s); + /// ``` + /// + /// Matches will be detected and removed iteratively, so in cases where + /// patterns overlap, only the first pattern will be removed: + /// + /// ``` + /// #![feature(string_remove_matches)] + /// let mut s = String::from("banana"); + /// s.remove_matches("ana"); + /// assert_eq!("bna", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "string_remove_matches", reason = "new API", issue = "72826")] + pub fn remove_matches<'a, P>(&'a mut self, pat: P) + where + P: for<'x> Pattern<'x>, + { + use core::str::pattern::Searcher; + + let rejections = { + let mut searcher = pat.into_searcher(self); + // Per Searcher::next: + // + // A Match result needs to contain the whole matched pattern, + // however Reject results may be split up into arbitrary many + // adjacent fragments. Both ranges may have zero length. + // + // In practice the implementation of Searcher::next_match tends to + // be more efficient, so we use it here and do some work to invert + // matches into rejections since that's what we want to copy below. + let mut front = 0; + let rejections: Vec<_> = from_fn(|| { + let (start, end) = searcher.next_match()?; + let prev_front = front; + front = end; + Some((prev_front, start)) + }) + .collect(); + rejections.into_iter().chain(core::iter::once((front, self.len()))) + }; + + let mut len = 0; + let ptr = self.vec.as_mut_ptr(); + + for (start, end) in rejections { + let count = end - start; + if start != len { + // SAFETY: per Searcher::next: + // + // The stream of Match and Reject values up to a Done will + // contain index ranges that are adjacent, non-overlapping, + // covering the whole haystack, and laying on utf8 + // boundaries. + unsafe { + ptr::copy(ptr.add(start), ptr.add(len), count); + } + } + len += count; + } + + unsafe { + self.vec.set_len(len); + } + } + + /// Retains only the characters specified by the predicate. + /// + /// In other words, remove all characters `c` such that `f(c)` returns `false`. + /// This method operates in place, visiting each character exactly once in the + /// original order, and preserves the order of the retained characters. + /// + /// # Examples + /// + /// ``` + /// let mut s = String::from("f_o_ob_ar"); + /// + /// s.retain(|c| c != '_'); + /// + /// assert_eq!(s, "foobar"); + /// ``` + /// + /// The exact order may be useful for tracking external state, like an index. + /// + /// ``` + /// let mut s = String::from("abcde"); + /// let keep = [false, true, true, false, true]; + /// let mut i = 0; + /// s.retain(|_| (keep[i], i += 1).0); + /// assert_eq!(s, "bce"); + /// ``` + #[inline] + #[stable(feature = "string_retain", since = "1.26.0")] + pub fn retain(&mut self, mut f: F) + where + F: FnMut(char) -> bool, + { + struct SetLenOnDrop<'a> { + s: &'a mut String, + idx: usize, + del_bytes: usize, + } + + impl<'a> Drop for SetLenOnDrop<'a> { + fn drop(&mut self) { + let new_len = self.idx - self.del_bytes; + debug_assert!(new_len <= self.s.len()); + unsafe { self.s.vec.set_len(new_len) }; + } + } + + let len = self.len(); + let mut guard = SetLenOnDrop { s: self, idx: 0, del_bytes: 0 }; + + while guard.idx < len { + let ch = unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap() }; + let ch_len = ch.len_utf8(); + + if !f(ch) { + guard.del_bytes += ch_len; + } else if guard.del_bytes > 0 { + unsafe { + ptr::copy( + guard.s.vec.as_ptr().add(guard.idx), + guard.s.vec.as_mut_ptr().add(guard.idx - guard.del_bytes), + ch_len, + ); + } + } + + // Point idx to the next char + guard.idx += ch_len; + } + + drop(guard); + } + + /// Inserts a character into this `String` at a byte position. + /// + /// This is an *O*(*n*) operation as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than the `String`'s length, or if it does not + /// lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::with_capacity(3); + /// + /// s.insert(0, 'f'); + /// s.insert(1, 'o'); + /// s.insert(2, 'o'); + /// + /// assert_eq!("foo", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(&mut self, idx: usize, ch: char) { + assert!(self.is_char_boundary(idx)); + let mut bits = [0; 4]; + let bits = ch.encode_utf8(&mut bits).as_bytes(); + + unsafe { + self.insert_bytes(idx, bits); + } + } + + #[cfg(not(no_global_oom_handling))] + unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) { + let len = self.len(); + let amt = bytes.len(); + self.vec.reserve(amt); + + unsafe { + ptr::copy(self.vec.as_ptr().add(idx), self.vec.as_mut_ptr().add(idx + amt), len - idx); + ptr::copy_nonoverlapping(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt); + self.vec.set_len(len + amt); + } + } + + /// Inserts a string slice into this `String` at a byte position. + /// + /// This is an *O*(*n*) operation as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than the `String`'s length, or if it does not + /// lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("bar"); + /// + /// s.insert_str(0, "foo"); + /// + /// assert_eq!("foobar", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "insert_str", since = "1.16.0")] + pub fn insert_str(&mut self, idx: usize, string: &str) { + assert!(self.is_char_boundary(idx)); + + unsafe { + self.insert_bytes(idx, string.as_bytes()); + } + } + + /// Returns a mutable reference to the contents of this `String`. + /// + /// # Safety + /// + /// This function is unsafe because it does not check that the bytes passed + /// to it are valid UTF-8. If this constraint is violated, it may cause + /// memory unsafety issues with future users of the `String`, as the rest of + /// the standard library assumes that `String`s are valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("hello"); + /// + /// unsafe { + /// let vec = s.as_mut_vec(); + /// assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]); + /// + /// vec.reverse(); + /// } + /// assert_eq!(s, "olleh"); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn as_mut_vec(&mut self) -> &mut Vec { + &mut self.vec + } + + /// Returns the length of this `String`, in bytes, not [`char`]s or + /// graphemes. In other words, it may not be what a human considers the + /// length of the string. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let a = String::from("foo"); + /// assert_eq!(a.len(), 3); + /// + /// let fancy_f = String::from("ƒoo"); + /// assert_eq!(fancy_f.len(), 4); + /// assert_eq!(fancy_f.chars().count(), 3); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn len(&self) -> usize { + self.vec.len() + } + + /// Returns `true` if this `String` has a length of zero, and `false` otherwise. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut v = String::new(); + /// assert!(v.is_empty()); + /// + /// v.push('a'); + /// assert!(!v.is_empty()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Splits the string into two at the given byte index. + /// + /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and + /// the returned `String` contains bytes `[at, len)`. `at` must be on the + /// boundary of a UTF-8 code point. + /// + /// Note that the capacity of `self` does not change. + /// + /// # Panics + /// + /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last + /// code point of the string. + /// + /// # Examples + /// + /// ``` + /// # fn main() { + /// let mut hello = String::from("Hello, World!"); + /// let world = hello.split_off(7); + /// assert_eq!(hello, "Hello, "); + /// assert_eq!(world, "World!"); + /// # } + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "string_split_off", since = "1.16.0")] + #[must_use = "use `.truncate()` if you don't need the other half"] + pub fn split_off(&mut self, at: usize) -> String { + assert!(self.is_char_boundary(at)); + let other = self.vec.split_off(at); + unsafe { String::from_utf8_unchecked(other) } + } + + /// Truncates this `String`, removing all contents. + /// + /// While this means the `String` will have a length of zero, it does not + /// touch its capacity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.clear(); + /// + /// assert!(s.is_empty()); + /// assert_eq!(0, s.len()); + /// assert_eq!(3, s.capacity()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn clear(&mut self) { + self.vec.clear() + } + + /// Creates a draining iterator that removes the specified range in the `String` + /// and yields the removed `chars`. + /// + /// Note: The element range is removed even if the iterator is not + /// consumed until the end. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("α is alpha, β is beta"); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Remove the range up until the β from the string + /// let t: String = s.drain(..beta_offset).collect(); + /// assert_eq!(t, "α is alpha, "); + /// assert_eq!(s, "β is beta"); + /// + /// // A full range clears the string + /// s.drain(..); + /// assert_eq!(s, ""); + /// ``` + #[stable(feature = "drain", since = "1.6.0")] + pub fn drain(&mut self, range: R) -> Drain<'_> + where + R: RangeBounds, + { + // Memory safety + // + // The String version of Drain does not have the memory safety issues + // of the vector version. The data is just plain bytes. + // Because the range removal happens in Drop, if the Drain iterator is leaked, + // the removal will not happen. + let Range { start, end } = slice::range(range, ..self.len()); + assert!(self.is_char_boundary(start)); + assert!(self.is_char_boundary(end)); + + // Take out two simultaneous borrows. The &mut String won't be accessed + // until iteration is over, in Drop. + let self_ptr = self as *mut _; + // SAFETY: `slice::range` and `is_char_boundary` do the appropriate bounds checks. + let chars_iter = unsafe { self.get_unchecked(start..end) }.chars(); + + Drain { start, end, iter: chars_iter, string: self_ptr } + } + + /// Removes the specified range in the string, + /// and replaces it with the given string. + /// The given string doesn't need to be the same length as the range. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("α is alpha, β is beta"); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Replace the range up until the β from the string + /// s.replace_range(..beta_offset, "Α is capital alpha; "); + /// assert_eq!(s, "Α is capital alpha; β is beta"); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "splice", since = "1.27.0")] + pub fn replace_range(&mut self, range: R, replace_with: &str) + where + R: RangeBounds, + { + // Memory safety + // + // Replace_range does not have the memory safety issues of a vector Splice. + // of the vector version. The data is just plain bytes. + + // WARNING: Inlining this variable would be unsound (#81138) + let start = range.start_bound(); + match start { + Included(&n) => assert!(self.is_char_boundary(n)), + Excluded(&n) => assert!(self.is_char_boundary(n + 1)), + Unbounded => {} + }; + // WARNING: Inlining this variable would be unsound (#81138) + let end = range.end_bound(); + match end { + Included(&n) => assert!(self.is_char_boundary(n + 1)), + Excluded(&n) => assert!(self.is_char_boundary(n)), + Unbounded => {} + }; + + // Using `range` again would be unsound (#81138) + // We assume the bounds reported by `range` remain the same, but + // an adversarial implementation could change between calls + unsafe { self.as_mut_vec() }.splice((start, end), replace_with.bytes()); + } + + /// Converts this `String` into a [`Box`]`<`[`str`]`>`. + /// + /// This will drop any excess capacity. + /// + /// [`str`]: prim@str + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// + /// let b = s.into_boxed_str(); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "box_str", since = "1.4.0")] + #[inline] + pub fn into_boxed_str(self) -> Box { + let slice = self.vec.into_boxed_slice(); + unsafe { from_boxed_utf8_unchecked(slice) } + } +} + +impl FromUtf8Error { + /// Returns a slice of [`u8`]s bytes that were attempted to convert to a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let value = String::from_utf8(bytes); + /// + /// assert_eq!(&[0, 159], value.unwrap_err().as_bytes()); + /// ``` + #[stable(feature = "from_utf8_error_as_bytes", since = "1.26.0")] + pub fn as_bytes(&self) -> &[u8] { + &self.bytes[..] + } + + /// Returns the bytes that were attempted to convert to a `String`. + /// + /// This method is carefully constructed to avoid allocation. It will + /// consume the error, moving out the bytes, so that a copy of the bytes + /// does not need to be made. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let value = String::from_utf8(bytes); + /// + /// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_bytes(self) -> Vec { + self.bytes + } + + /// Fetch a `Utf8Error` to get more details about the conversion failure. + /// + /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may + /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's + /// an analogue to `FromUtf8Error`. See its documentation for more details + /// on using it. + /// + /// [`std::str`]: core::str + /// [`&str`]: prim@str + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let error = String::from_utf8(bytes).unwrap_err().utf8_error(); + /// + /// // the first byte is invalid here + /// assert_eq!(1, error.valid_up_to()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn utf8_error(&self) -> Utf8Error { + self.error + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for FromUtf8Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.error, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for FromUtf16Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt("invalid utf-16: lone surrogate found", f) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for String { + fn clone(&self) -> Self { + String { vec: self.vec.clone() } + } + + fn clone_from(&mut self, source: &Self) { + self.vec.clone_from(&source.vec); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl FromIterator for String { + fn from_iter>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_from_iter_by_ref", since = "1.17.0")] +impl<'a> FromIterator<&'a char> for String { + fn from_iter>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> FromIterator<&'a str> for String { + fn from_iter>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_string", since = "1.4.0")] +impl FromIterator for String { + fn from_iter>(iter: I) -> String { + let mut iterator = iter.into_iter(); + + // Because we're iterating over `String`s, we can avoid at least + // one allocation by getting the first string from the iterator + // and appending to it all the subsequent strings. + match iterator.next() { + None => String::new(), + Some(mut buf) => { + buf.extend(iterator); + buf + } + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_str2", since = "1.45.0")] +impl FromIterator> for String { + fn from_iter>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "herd_cows", since = "1.19.0")] +impl<'a> FromIterator> for String { + fn from_iter>>(iter: I) -> String { + let mut iterator = iter.into_iter(); + + // Because we're iterating over CoWs, we can (potentially) avoid at least + // one allocation by getting the first item and appending to it all the + // subsequent items. + match iterator.next() { + None => String::new(), + Some(cow) => { + let mut buf = cow.into_owned(); + buf.extend(iterator); + buf + } + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Extend for String { + fn extend>(&mut self, iter: I) { + let iterator = iter.into_iter(); + let (lower_bound, _) = iterator.size_hint(); + self.reserve(lower_bound); + iterator.for_each(move |c| self.push(c)); + } + + #[inline] + fn extend_one(&mut self, c: char) { + self.push(c); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_ref", since = "1.2.0")] +impl<'a> Extend<&'a char> for String { + fn extend>(&mut self, iter: I) { + self.extend(iter.into_iter().cloned()); + } + + #[inline] + fn extend_one(&mut self, &c: &'a char) { + self.push(c); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Extend<&'a str> for String { + fn extend>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(s)); + } + + #[inline] + fn extend_one(&mut self, s: &'a str) { + self.push_str(s); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_str2", since = "1.45.0")] +impl Extend> for String { + fn extend>>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_string", since = "1.4.0")] +impl Extend for String { + fn extend>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } + + #[inline] + fn extend_one(&mut self, s: String) { + self.push_str(&s); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "herd_cows", since = "1.19.0")] +impl<'a> Extend> for String { + fn extend>>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } + + #[inline] + fn extend_one(&mut self, s: Cow<'a, str>) { + self.push_str(&s); + } +} + +/// A convenience impl that delegates to the impl for `&str`. +/// +/// # Examples +/// +/// ``` +/// assert_eq!(String::from("Hello world").find("world"), Some(6)); +/// ``` +#[unstable( + feature = "pattern", + reason = "API not fully fleshed out and ready to be stabilized", + issue = "27721" +)] +impl<'a, 'b> Pattern<'a> for &'b String { + type Searcher = <&'b str as Pattern<'a>>::Searcher; + + fn into_searcher(self, haystack: &'a str) -> <&'b str as Pattern<'a>>::Searcher { + self[..].into_searcher(haystack) + } + + #[inline] + fn is_contained_in(self, haystack: &'a str) -> bool { + self[..].is_contained_in(haystack) + } + + #[inline] + fn is_prefix_of(self, haystack: &'a str) -> bool { + self[..].is_prefix_of(haystack) + } + + #[inline] + fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> { + self[..].strip_prefix_of(haystack) + } + + #[inline] + fn is_suffix_of(self, haystack: &'a str) -> bool { + self[..].is_suffix_of(haystack) + } + + #[inline] + fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> { + self[..].strip_suffix_of(haystack) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for String { + #[inline] + fn eq(&self, other: &String) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + #[inline] + fn ne(&self, other: &String) -> bool { + PartialEq::ne(&self[..], &other[..]) + } +} + +macro_rules! impl_eq { + ($lhs:ty, $rhs: ty) => { + #[stable(feature = "rust1", since = "1.0.0")] + #[allow(unused_lifetimes)] + impl<'a, 'b> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + #[inline] + fn ne(&self, other: &$rhs) -> bool { + PartialEq::ne(&self[..], &other[..]) + } + } + + #[stable(feature = "rust1", since = "1.0.0")] + #[allow(unused_lifetimes)] + impl<'a, 'b> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + #[inline] + fn ne(&self, other: &$lhs) -> bool { + PartialEq::ne(&self[..], &other[..]) + } + } + }; +} + +impl_eq! { String, str } +impl_eq! { String, &'a str } +#[cfg(not(no_global_oom_handling))] +impl_eq! { Cow<'a, str>, str } +#[cfg(not(no_global_oom_handling))] +impl_eq! { Cow<'a, str>, &'b str } +#[cfg(not(no_global_oom_handling))] +impl_eq! { Cow<'a, str>, String } + +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for String { + /// Creates an empty `String`. + #[inline] + fn default() -> String { + String::new() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for String { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for String { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl hash::Hash for String { + #[inline] + fn hash(&self, hasher: &mut H) { + (**self).hash(hasher) + } +} + +/// Implements the `+` operator for concatenating two strings. +/// +/// This consumes the `String` on the left-hand side and re-uses its buffer (growing it if +/// necessary). This is done to avoid allocating a new `String` and copying the entire contents on +/// every operation, which would lead to *O*(*n*^2) running time when building an *n*-byte string by +/// repeated concatenation. +/// +/// The string on the right-hand side is only borrowed; its contents are copied into the returned +/// `String`. +/// +/// # Examples +/// +/// Concatenating two `String`s takes the first by value and borrows the second: +/// +/// ``` +/// let a = String::from("hello"); +/// let b = String::from(" world"); +/// let c = a + &b; +/// // `a` is moved and can no longer be used here. +/// ``` +/// +/// If you want to keep using the first `String`, you can clone it and append to the clone instead: +/// +/// ``` +/// let a = String::from("hello"); +/// let b = String::from(" world"); +/// let c = a.clone() + &b; +/// // `a` is still valid here. +/// ``` +/// +/// Concatenating `&str` slices can be done by converting the first to a `String`: +/// +/// ``` +/// let a = "hello"; +/// let b = " world"; +/// let c = a.to_string() + b; +/// ``` +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Add<&str> for String { + type Output = String; + + #[inline] + fn add(mut self, other: &str) -> String { + self.push_str(other); + self + } +} + +/// Implements the `+=` operator for appending to a `String`. +/// +/// This has the same behavior as the [`push_str`][String::push_str] method. +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "stringaddassign", since = "1.12.0")] +impl AddAssign<&str> for String { + #[inline] + fn add_assign(&mut self, other: &str) { + self.push_str(other); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::Range) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeTo) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeFrom) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index for String { + type Output = str; + + #[inline] + fn index(&self, _index: ops::RangeFull) -> &str { + unsafe { str::from_utf8_unchecked(&self.vec) } + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::Index> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeInclusive) -> &str { + Index::index(&**self, index) + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::Index> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeToInclusive) -> &str { + Index::index(&**self, index) + } +} + +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut> for String { + #[inline] + fn index_mut(&mut self, index: ops::Range) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeTo) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeFrom) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut for String { + #[inline] + fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str { + unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) } + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::IndexMut> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeInclusive) -> &mut str { + IndexMut::index_mut(&mut **self, index) + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::IndexMut> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeToInclusive) -> &mut str { + IndexMut::index_mut(&mut **self, index) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Deref for String { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + unsafe { str::from_utf8_unchecked(&self.vec) } + } +} + +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::DerefMut for String { + #[inline] + fn deref_mut(&mut self) -> &mut str { + unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) } + } +} + +/// A type alias for [`Infallible`]. +/// +/// This alias exists for backwards compatibility, and may be eventually deprecated. +/// +/// [`Infallible`]: core::convert::Infallible +#[stable(feature = "str_parse_error", since = "1.5.0")] +pub type ParseError = core::convert::Infallible; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl FromStr for String { + type Err = core::convert::Infallible; + #[inline] + fn from_str(s: &str) -> Result { + Ok(String::from(s)) + } +} + +/// A trait for converting a value to a `String`. +/// +/// This trait is automatically implemented for any type which implements the +/// [`Display`] trait. As such, `ToString` shouldn't be implemented directly: +/// [`Display`] should be implemented instead, and you get the `ToString` +/// implementation for free. +/// +/// [`Display`]: fmt::Display +#[cfg_attr(not(test), rustc_diagnostic_item = "ToString")] +#[stable(feature = "rust1", since = "1.0.0")] +pub trait ToString { + /// Converts the given value to a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let i = 5; + /// let five = String::from("5"); + /// + /// assert_eq!(five, i.to_string()); + /// ``` + #[rustc_conversion_suggestion] + #[stable(feature = "rust1", since = "1.0.0")] + fn to_string(&self) -> String; +} + +/// # Panics +/// +/// In this implementation, the `to_string` method panics +/// if the `Display` implementation returns an error. +/// This indicates an incorrect `Display` implementation +/// since `fmt::Write for String` never returns an error itself. +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl ToString for T { + // A common guideline is to not inline generic functions. However, + // removing `#[inline]` from this method causes non-negligible regressions. + // See , the last attempt + // to try to remove it. + #[inline] + default fn to_string(&self) -> String { + let mut buf = String::new(); + let mut formatter = core::fmt::Formatter::new(&mut buf); + // Bypass format_args!() to avoid write_str with zero-length strs + fmt::Display::fmt(self, &mut formatter) + .expect("a Display implementation returned an error unexpectedly"); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "char_to_string_specialization", since = "1.46.0")] +impl ToString for char { + #[inline] + fn to_string(&self) -> String { + String::from(self.encode_utf8(&mut [0; 4])) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "u8_to_string_specialization", since = "1.54.0")] +impl ToString for u8 { + #[inline] + fn to_string(&self) -> String { + let mut buf = String::with_capacity(3); + let mut n = *self; + if n >= 10 { + if n >= 100 { + buf.push((b'0' + n / 100) as char); + n %= 100; + } + buf.push((b'0' + n / 10) as char); + n %= 10; + } + buf.push((b'0' + n) as char); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "i8_to_string_specialization", since = "1.54.0")] +impl ToString for i8 { + #[inline] + fn to_string(&self) -> String { + let mut buf = String::with_capacity(4); + if self.is_negative() { + buf.push('-'); + } + let mut n = self.unsigned_abs(); + if n >= 10 { + if n >= 100 { + buf.push('1'); + n -= 100; + } + buf.push((b'0' + n / 10) as char); + n %= 10; + } + buf.push((b'0' + n) as char); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "str_to_string_specialization", since = "1.9.0")] +impl ToString for str { + #[inline] + fn to_string(&self) -> String { + String::from(self) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_to_string_specialization", since = "1.17.0")] +impl ToString for Cow<'_, str> { + #[inline] + fn to_string(&self) -> String { + self[..].to_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_to_string_specialization", since = "1.17.0")] +impl ToString for String { + #[inline] + fn to_string(&self) -> String { + self.to_owned() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for String { + #[inline] + fn as_ref(&self) -> &str { + self + } +} + +#[stable(feature = "string_as_mut", since = "1.43.0")] +impl AsMut for String { + #[inline] + fn as_mut(&mut self) -> &mut str { + self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef<[u8]> for String { + #[inline] + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl From<&str> for String { + /// Converts a `&str` into a [`String`]. + /// + /// The result is allocated on the heap. + #[inline] + fn from(s: &str) -> String { + s.to_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_mut_str_for_string", since = "1.44.0")] +impl From<&mut str> for String { + /// Converts a `&mut str` into a [`String`]. + /// + /// The result is allocated on the heap. + #[inline] + fn from(s: &mut str) -> String { + s.to_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_ref_string", since = "1.35.0")] +impl From<&String> for String { + /// Converts a `&String` into a [`String`]. + /// + /// This clones `s` and returns the clone. + #[inline] + fn from(s: &String) -> String { + s.clone() + } +} + +// note: test pulls in libstd, which causes errors here +#[cfg(not(test))] +#[stable(feature = "string_from_box", since = "1.18.0")] +impl From> for String { + /// Converts the given boxed `str` slice to a [`String`]. + /// It is notable that the `str` slice is owned. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s1: String = String::from("hello world"); + /// let s2: Box = s1.into_boxed_str(); + /// let s3: String = String::from(s2); + /// + /// assert_eq!("hello world", s3) + /// ``` + fn from(s: Box) -> String { + s.into_string() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_str", since = "1.20.0")] +impl From for Box { + /// Converts the given [`String`] to a boxed `str` slice that is owned. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s1: String = String::from("hello world"); + /// let s2: Box = Box::from(s1); + /// let s3: String = String::from(s2); + /// + /// assert_eq!("hello world", s3) + /// ``` + fn from(s: String) -> Box { + s.into_boxed_str() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_from_cow_str", since = "1.14.0")] +impl<'a> From> for String { + /// Converts a clone-on-write string to an owned + /// instance of [`String`]. + /// + /// This extracts the owned string, + /// clones the string if it is not already owned. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// // If the string is not owned... + /// let cow: Cow = Cow::Borrowed("eggplant"); + /// // It will allocate on the heap and copy the string. + /// let owned: String = String::from(cow); + /// assert_eq!(&owned[..], "eggplant"); + /// ``` + fn from(s: Cow<'a, str>) -> String { + s.into_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> From<&'a str> for Cow<'a, str> { + /// Converts a string slice into a [`Borrowed`] variant. + /// No heap allocation is performed, and the string + /// is not copied. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// assert_eq!(Cow::from("eggplant"), Cow::Borrowed("eggplant")); + /// ``` + /// + /// [`Borrowed`]: crate::borrow::Cow::Borrowed + #[inline] + fn from(s: &'a str) -> Cow<'a, str> { + Cow::Borrowed(s) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> From for Cow<'a, str> { + /// Converts a [`String`] into an [`Owned`] variant. + /// No heap allocation is performed, and the string + /// is not copied. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// let s = "eggplant".to_string(); + /// let s2 = "eggplant".to_string(); + /// assert_eq!(Cow::from(s), Cow::<'static, str>::Owned(s2)); + /// ``` + /// + /// [`Owned`]: crate::borrow::Cow::Owned + #[inline] + fn from(s: String) -> Cow<'a, str> { + Cow::Owned(s) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_from_string_ref", since = "1.28.0")] +impl<'a> From<&'a String> for Cow<'a, str> { + /// Converts a [`String`] reference into a [`Borrowed`] variant. + /// No heap allocation is performed, and the string + /// is not copied. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// let s = "eggplant".to_string(); + /// assert_eq!(Cow::from(&s), Cow::Borrowed("eggplant")); + /// ``` + /// + /// [`Borrowed`]: crate::borrow::Cow::Borrowed + #[inline] + fn from(s: &'a String) -> Cow<'a, str> { + Cow::Borrowed(s.as_str()) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a> FromIterator for Cow<'a, str> { + fn from_iter>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a, 'b> FromIterator<&'b str> for Cow<'a, str> { + fn from_iter>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a> FromIterator for Cow<'a, str> { + fn from_iter>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[stable(feature = "from_string_for_vec_u8", since = "1.14.0")] +impl From for Vec { + /// Converts the given [`String`] to a vector [`Vec`] that holds values of type [`u8`]. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s1 = String::from("hello world"); + /// let v1 = Vec::from(s1); + /// + /// for b in v1 { + /// println!("{}", b); + /// } + /// ``` + fn from(string: String) -> Vec { + string.into_bytes() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Write for String { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push_str(s); + Ok(()) + } + + #[inline] + fn write_char(&mut self, c: char) -> fmt::Result { + self.push(c); + Ok(()) + } +} + +/// A draining iterator for `String`. +/// +/// This struct is created by the [`drain`] method on [`String`]. See its +/// documentation for more. +/// +/// [`drain`]: String::drain +#[stable(feature = "drain", since = "1.6.0")] +pub struct Drain<'a> { + /// Will be used as &'a mut String in the destructor + string: *mut String, + /// Start of part to remove + start: usize, + /// End of part to remove + end: usize, + /// Current remaining range to remove + iter: Chars<'a>, +} + +#[stable(feature = "collection_debug", since = "1.17.0")] +impl fmt::Debug for Drain<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Drain").field(&self.as_str()).finish() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl Sync for Drain<'_> {} +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl Send for Drain<'_> {} + +#[stable(feature = "drain", since = "1.6.0")] +impl Drop for Drain<'_> { + fn drop(&mut self) { + unsafe { + // Use Vec::drain. "Reaffirm" the bounds checks to avoid + // panic code being inserted again. + let self_vec = (*self.string).as_mut_vec(); + if self.start <= self.end && self.end <= self_vec.len() { + self_vec.drain(self.start..self.end); + } + } + } +} + +impl<'a> Drain<'a> { + /// Returns the remaining (sub)string of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// let mut s = String::from("abc"); + /// let mut drain = s.drain(..); + /// assert_eq!(drain.as_str(), "abc"); + /// let _ = drain.next().unwrap(); + /// assert_eq!(drain.as_str(), "bc"); + /// ``` + #[stable(feature = "string_drain_as_str", since = "1.55.0")] + pub fn as_str(&self) -> &str { + self.iter.as_str() + } +} + +#[stable(feature = "string_drain_as_str", since = "1.55.0")] +impl<'a> AsRef for Drain<'a> { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +#[stable(feature = "string_drain_as_str", since = "1.55.0")] +impl<'a> AsRef<[u8]> for Drain<'a> { + fn as_ref(&self) -> &[u8] { + self.as_str().as_bytes() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl Iterator for Drain<'_> { + type Item = char; + + #[inline] + fn next(&mut self) -> Option { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + #[inline] + fn last(mut self) -> Option { + self.next_back() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl DoubleEndedIterator for Drain<'_> { + #[inline] + fn next_back(&mut self) -> Option { + self.iter.next_back() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for Drain<'_> {} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_char_for_string", since = "1.46.0")] +impl From for String { + /// Allocates an owned [`String`] from a single character. + /// + /// # Example + /// ```rust + /// let c: char = 'a'; + /// let s: String = String::from(c); + /// assert_eq!("a", &s[..]); + /// ``` + #[inline] + fn from(c: char) -> Self { + c.to_string() + } +} diff --git a/rust/alloc/vec/drain.rs b/rust/alloc/vec/drain.rs new file mode 100644 index 0000000000000..65f7e32e95fd7 --- /dev/null +++ b/rust/alloc/vec/drain.rs @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::{Allocator, Global}; +use core::fmt; +use core::iter::{FusedIterator, TrustedLen}; +use core::mem::{self}; +use core::ptr::{self, NonNull}; +use core::slice::{self}; + +use super::Vec; + +/// A draining iterator for `Vec`. +/// +/// This `struct` is created by [`Vec::drain`]. +/// See its documentation for more. +/// +/// # Example +/// +/// ``` +/// let mut v = vec![0, 1, 2]; +/// let iter: std::vec::Drain<_> = v.drain(..); +/// ``` +#[stable(feature = "drain", since = "1.6.0")] +pub struct Drain< + 'a, + T: 'a, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator + 'a = Global, +> { + /// Index of tail to preserve + pub(super) tail_start: usize, + /// Length of tail + pub(super) tail_len: usize, + /// Current remaining range to remove + pub(super) iter: slice::Iter<'a, T>, + pub(super) vec: NonNull>, +} + +#[stable(feature = "collection_debug", since = "1.17.0")] +impl fmt::Debug for Drain<'_, T, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Drain").field(&self.iter.as_slice()).finish() + } +} + +impl<'a, T, A: Allocator> Drain<'a, T, A> { + /// Returns the remaining items of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!['a', 'b', 'c']; + /// let mut drain = vec.drain(..); + /// assert_eq!(drain.as_slice(), &['a', 'b', 'c']); + /// let _ = drain.next().unwrap(); + /// assert_eq!(drain.as_slice(), &['b', 'c']); + /// ``` + #[stable(feature = "vec_drain_as_slice", since = "1.46.0")] + pub fn as_slice(&self) -> &[T] { + self.iter.as_slice() + } + + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + unsafe { self.vec.as_ref().allocator() } + } +} + +#[stable(feature = "vec_drain_as_slice", since = "1.46.0")] +impl<'a, T, A: Allocator> AsRef<[T]> for Drain<'a, T, A> { + fn as_ref(&self) -> &[T] { + self.as_slice() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl Sync for Drain<'_, T, A> {} +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl Send for Drain<'_, T, A> {} + +#[stable(feature = "drain", since = "1.6.0")] +impl Iterator for Drain<'_, T, A> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + self.iter.next().map(|elt| unsafe { ptr::read(elt as *const _) }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl DoubleEndedIterator for Drain<'_, T, A> { + #[inline] + fn next_back(&mut self) -> Option { + self.iter.next_back().map(|elt| unsafe { ptr::read(elt as *const _) }) + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl Drop for Drain<'_, T, A> { + fn drop(&mut self) { + /// Continues dropping the remaining elements in the `Drain`, then moves back the + /// un-`Drain`ed elements to restore the original `Vec`. + struct DropGuard<'r, 'a, T, A: Allocator>(&'r mut Drain<'a, T, A>); + + impl<'r, 'a, T, A: Allocator> Drop for DropGuard<'r, 'a, T, A> { + fn drop(&mut self) { + // Continue the same loop we have below. If the loop already finished, this does + // nothing. + self.0.for_each(drop); + + if self.0.tail_len > 0 { + unsafe { + let source_vec = self.0.vec.as_mut(); + // memmove back untouched tail, update to new length + let start = source_vec.len(); + let tail = self.0.tail_start; + if tail != start { + let src = source_vec.as_ptr().add(tail); + let dst = source_vec.as_mut_ptr().add(start); + ptr::copy(src, dst, self.0.tail_len); + } + source_vec.set_len(start + self.0.tail_len); + } + } + } + } + + // exhaust self first + while let Some(item) = self.next() { + let guard = DropGuard(self); + drop(item); + mem::forget(guard); + } + + // Drop a `DropGuard` to move back the non-drained tail of `self`. + DropGuard(self); + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl ExactSizeIterator for Drain<'_, T, A> { + fn is_empty(&self) -> bool { + self.iter.is_empty() + } +} + +#[unstable(feature = "trusted_len", issue = "37572")] +unsafe impl TrustedLen for Drain<'_, T, A> {} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for Drain<'_, T, A> {} diff --git a/rust/alloc/vec/drain_filter.rs b/rust/alloc/vec/drain_filter.rs new file mode 100644 index 0000000000000..b04fce041622f --- /dev/null +++ b/rust/alloc/vec/drain_filter.rs @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::{Allocator, Global}; +use core::ptr::{self}; +use core::slice::{self}; + +use super::Vec; + +/// An iterator which uses a closure to determine if an element should be removed. +/// +/// This struct is created by [`Vec::drain_filter`]. +/// See its documentation for more. +/// +/// # Example +/// +/// ``` +/// #![feature(drain_filter)] +/// +/// let mut v = vec![0, 1, 2]; +/// let iter: std::vec::DrainFilter<_, _> = v.drain_filter(|x| *x % 2 == 0); +/// ``` +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +#[derive(Debug)] +pub struct DrainFilter< + 'a, + T, + F, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +> where + F: FnMut(&mut T) -> bool, +{ + pub(super) vec: &'a mut Vec, + /// The index of the item that will be inspected by the next call to `next`. + pub(super) idx: usize, + /// The number of items that have been drained (removed) thus far. + pub(super) del: usize, + /// The original length of `vec` prior to draining. + pub(super) old_len: usize, + /// The filter test predicate. + pub(super) pred: F, + /// A flag that indicates a panic has occurred in the filter test predicate. + /// This is used as a hint in the drop implementation to prevent consumption + /// of the remainder of the `DrainFilter`. Any unprocessed items will be + /// backshifted in the `vec`, but no further items will be dropped or + /// tested by the filter predicate. + pub(super) panic_flag: bool, +} + +impl DrainFilter<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + self.vec.allocator() + } +} + +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +impl Iterator for DrainFilter<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + type Item = T; + + fn next(&mut self) -> Option { + unsafe { + while self.idx < self.old_len { + let i = self.idx; + let v = slice::from_raw_parts_mut(self.vec.as_mut_ptr(), self.old_len); + self.panic_flag = true; + let drained = (self.pred)(&mut v[i]); + self.panic_flag = false; + // Update the index *after* the predicate is called. If the index + // is updated prior and the predicate panics, the element at this + // index would be leaked. + self.idx += 1; + if drained { + self.del += 1; + return Some(ptr::read(&v[i])); + } else if self.del > 0 { + let del = self.del; + let src: *const T = &v[i]; + let dst: *mut T = &mut v[i - del]; + ptr::copy_nonoverlapping(src, dst, 1); + } + } + None + } + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.old_len - self.idx)) + } +} + +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +impl Drop for DrainFilter<'_, T, F, A> +where + F: FnMut(&mut T) -> bool, +{ + fn drop(&mut self) { + struct BackshiftOnDrop<'a, 'b, T, F, A: Allocator> + where + F: FnMut(&mut T) -> bool, + { + drain: &'b mut DrainFilter<'a, T, F, A>, + } + + impl<'a, 'b, T, F, A: Allocator> Drop for BackshiftOnDrop<'a, 'b, T, F, A> + where + F: FnMut(&mut T) -> bool, + { + fn drop(&mut self) { + unsafe { + if self.drain.idx < self.drain.old_len && self.drain.del > 0 { + // This is a pretty messed up state, and there isn't really an + // obviously right thing to do. We don't want to keep trying + // to execute `pred`, so we just backshift all the unprocessed + // elements and tell the vec that they still exist. The backshift + // is required to prevent a double-drop of the last successfully + // drained item prior to a panic in the predicate. + let ptr = self.drain.vec.as_mut_ptr(); + let src = ptr.add(self.drain.idx); + let dst = src.sub(self.drain.del); + let tail_len = self.drain.old_len - self.drain.idx; + src.copy_to(dst, tail_len); + } + self.drain.vec.set_len(self.drain.old_len - self.drain.del); + } + } + } + + let backshift = BackshiftOnDrop { drain: self }; + + // Attempt to consume any remaining elements if the filter predicate + // has not yet panicked. We'll backshift any remaining elements + // whether we've already panicked or if the consumption here panics. + if !backshift.drain.panic_flag { + backshift.drain.for_each(drop); + } + } +} diff --git a/rust/alloc/vec/into_iter.rs b/rust/alloc/vec/into_iter.rs new file mode 100644 index 0000000000000..35ab72b7c7457 --- /dev/null +++ b/rust/alloc/vec/into_iter.rs @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::{Allocator, Global}; +use crate::raw_vec::RawVec; +use core::fmt; +use core::intrinsics::arith_offset; +use core::iter::{FusedIterator, InPlaceIterable, SourceIter, TrustedLen, TrustedRandomAccess}; +use core::marker::PhantomData; +use core::mem::{self}; +use core::ptr::{self, NonNull}; +use core::slice::{self}; + +/// An iterator that moves out of a vector. +/// +/// This `struct` is created by the `into_iter` method on [`Vec`](super::Vec) +/// (provided by the [`IntoIterator`] trait). +/// +/// # Example +/// +/// ``` +/// let v = vec![0, 1, 2]; +/// let iter: std::vec::IntoIter<_> = v.into_iter(); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +pub struct IntoIter< + T, + #[unstable(feature = "allocator_api", issue = "32838")] A: Allocator = Global, +> { + pub(super) buf: NonNull, + pub(super) phantom: PhantomData, + pub(super) cap: usize, + pub(super) alloc: A, + pub(super) ptr: *const T, + pub(super) end: *const T, +} + +#[stable(feature = "vec_intoiter_debug", since = "1.13.0")] +impl fmt::Debug for IntoIter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("IntoIter").field(&self.as_slice()).finish() + } +} + +impl IntoIter { + /// Returns the remaining items of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// let vec = vec!['a', 'b', 'c']; + /// let mut into_iter = vec.into_iter(); + /// assert_eq!(into_iter.as_slice(), &['a', 'b', 'c']); + /// let _ = into_iter.next().unwrap(); + /// assert_eq!(into_iter.as_slice(), &['b', 'c']); + /// ``` + #[stable(feature = "vec_into_iter_as_slice", since = "1.15.0")] + pub fn as_slice(&self) -> &[T] { + unsafe { slice::from_raw_parts(self.ptr, self.len()) } + } + + /// Returns the remaining items of this iterator as a mutable slice. + /// + /// # Examples + /// + /// ``` + /// let vec = vec!['a', 'b', 'c']; + /// let mut into_iter = vec.into_iter(); + /// assert_eq!(into_iter.as_slice(), &['a', 'b', 'c']); + /// into_iter.as_mut_slice()[2] = 'z'; + /// assert_eq!(into_iter.next().unwrap(), 'a'); + /// assert_eq!(into_iter.next().unwrap(), 'b'); + /// assert_eq!(into_iter.next().unwrap(), 'z'); + /// ``` + #[stable(feature = "vec_into_iter_as_slice", since = "1.15.0")] + pub fn as_mut_slice(&mut self) -> &mut [T] { + unsafe { &mut *self.as_raw_mut_slice() } + } + + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + &self.alloc + } + + fn as_raw_mut_slice(&mut self) -> *mut [T] { + ptr::slice_from_raw_parts_mut(self.ptr as *mut T, self.len()) + } + + /// Drops remaining elements and relinquishes the backing allocation. + /// + /// This is roughly equivalent to the following, but more efficient + /// + /// ``` + /// # let mut into_iter = Vec::::with_capacity(10).into_iter(); + /// (&mut into_iter).for_each(core::mem::drop); + /// unsafe { core::ptr::write(&mut into_iter, Vec::new().into_iter()); } + /// ``` + #[cfg(not(no_global_oom_handling))] + pub(super) fn forget_allocation_drop_remaining(&mut self) { + let remaining = self.as_raw_mut_slice(); + + // overwrite the individual fields instead of creating a new + // struct and then overwriting &mut self. + // this creates less assembly + self.cap = 0; + self.buf = unsafe { NonNull::new_unchecked(RawVec::NEW.ptr()) }; + self.ptr = self.buf.as_ptr(); + self.end = self.buf.as_ptr(); + + unsafe { + ptr::drop_in_place(remaining); + } + } +} + +#[stable(feature = "vec_intoiter_as_ref", since = "1.46.0")] +impl AsRef<[T]> for IntoIter { + fn as_ref(&self) -> &[T] { + self.as_slice() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl Send for IntoIter {} +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl Sync for IntoIter {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for IntoIter { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + if self.ptr as *const _ == self.end { + None + } else if mem::size_of::() == 0 { + // purposefully don't use 'ptr.offset' because for + // vectors with 0-size elements this would return the + // same pointer. + self.ptr = unsafe { arith_offset(self.ptr as *const i8, 1) as *mut T }; + + // Make up a value of this ZST. + Some(unsafe { mem::zeroed() }) + } else { + let old = self.ptr; + self.ptr = unsafe { self.ptr.offset(1) }; + + Some(unsafe { ptr::read(old) }) + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let exact = if mem::size_of::() == 0 { + (self.end as usize).wrapping_sub(self.ptr as usize) + } else { + unsafe { self.end.offset_from(self.ptr) as usize } + }; + (exact, Some(exact)) + } + + #[inline] + fn count(self) -> usize { + self.len() + } + + #[doc(hidden)] + unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item + where + Self: TrustedRandomAccess, + { + // SAFETY: the caller must guarantee that `i` is in bounds of the + // `Vec`, so `i` cannot overflow an `isize`, and the `self.ptr.add(i)` + // is guaranteed to pointer to an element of the `Vec` and + // thus guaranteed to be valid to dereference. + // + // Also note the implementation of `Self: TrustedRandomAccess` requires + // that `T: Copy` so reading elements from the buffer doesn't invalidate + // them for `Drop`. + unsafe { + if mem::size_of::() == 0 { mem::zeroed() } else { ptr::read(self.ptr.add(i)) } + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl DoubleEndedIterator for IntoIter { + #[inline] + fn next_back(&mut self) -> Option { + if self.end == self.ptr { + None + } else if mem::size_of::() == 0 { + // See above for why 'ptr.offset' isn't used + self.end = unsafe { arith_offset(self.end as *const i8, -1) as *mut T }; + + // Make up a value of this ZST. + Some(unsafe { mem::zeroed() }) + } else { + self.end = unsafe { self.end.offset(-1) }; + + Some(unsafe { ptr::read(self.end) }) + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ExactSizeIterator for IntoIter { + fn is_empty(&self) -> bool { + self.ptr == self.end + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for IntoIter {} + +#[unstable(feature = "trusted_len", issue = "37572")] +unsafe impl TrustedLen for IntoIter {} + +#[doc(hidden)] +#[unstable(issue = "none", feature = "std_internals")] +// T: Copy as approximation for !Drop since get_unchecked does not advance self.ptr +// and thus we can't implement drop-handling +unsafe impl TrustedRandomAccess for IntoIter +where + T: Copy, +{ + const MAY_HAVE_SIDE_EFFECT: bool = false; +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "vec_into_iter_clone", since = "1.8.0")] +impl Clone for IntoIter { + #[cfg(not(test))] + fn clone(&self) -> Self { + self.as_slice().to_vec_in(self.alloc.clone()).into_iter() + } + #[cfg(test)] + fn clone(&self) -> Self { + crate::slice::to_vec(self.as_slice(), self.alloc.clone()).into_iter() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T, A: Allocator> Drop for IntoIter { + fn drop(&mut self) { + struct DropGuard<'a, T, A: Allocator>(&'a mut IntoIter); + + impl Drop for DropGuard<'_, T, A> { + fn drop(&mut self) { + unsafe { + // `IntoIter::alloc` is not used anymore after this + let alloc = ptr::read(&self.0.alloc); + // RawVec handles deallocation + let _ = RawVec::from_raw_parts_in(self.0.buf.as_ptr(), self.0.cap, alloc); + } + } + } + + let guard = DropGuard(self); + // destroy the remaining elements + unsafe { + ptr::drop_in_place(guard.0.as_raw_mut_slice()); + } + // now `guard` will be dropped and do the rest + } +} + +#[unstable(issue = "none", feature = "inplace_iteration")] +#[doc(hidden)] +unsafe impl InPlaceIterable for IntoIter {} + +#[unstable(issue = "none", feature = "inplace_iteration")] +#[doc(hidden)] +unsafe impl SourceIter for IntoIter { + type Source = Self; + + #[inline] + unsafe fn as_inner(&mut self) -> &mut Self::Source { + self + } +} + +// internal helper trait for in-place iteration specialization. +#[rustc_specialization_trait] +pub(crate) trait AsIntoIter { + type Item; + fn as_into_iter(&mut self) -> &mut IntoIter; +} + +impl AsIntoIter for IntoIter { + type Item = T; + + fn as_into_iter(&mut self) -> &mut IntoIter { + self + } +} diff --git a/rust/alloc/vec/is_zero.rs b/rust/alloc/vec/is_zero.rs new file mode 100644 index 0000000000000..40e1e667c9fb3 --- /dev/null +++ b/rust/alloc/vec/is_zero.rs @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::boxed::Box; + +#[rustc_specialization_trait] +pub(super) unsafe trait IsZero { + /// Whether this value is zero + fn is_zero(&self) -> bool; +} + +macro_rules! impl_is_zero { + ($t:ty, $is_zero:expr) => { + unsafe impl IsZero for $t { + #[inline] + fn is_zero(&self) -> bool { + $is_zero(*self) + } + } + }; +} + +impl_is_zero!(i16, |x| x == 0); +impl_is_zero!(i32, |x| x == 0); +impl_is_zero!(i64, |x| x == 0); +impl_is_zero!(i128, |x| x == 0); +impl_is_zero!(isize, |x| x == 0); + +impl_is_zero!(u16, |x| x == 0); +impl_is_zero!(u32, |x| x == 0); +impl_is_zero!(u64, |x| x == 0); +impl_is_zero!(u128, |x| x == 0); +impl_is_zero!(usize, |x| x == 0); + +impl_is_zero!(bool, |x| x == false); +impl_is_zero!(char, |x| x == '\0'); + +impl_is_zero!(f32, |x: f32| x.to_bits() == 0); +impl_is_zero!(f64, |x: f64| x.to_bits() == 0); + +unsafe impl IsZero for *const T { + #[inline] + fn is_zero(&self) -> bool { + (*self).is_null() + } +} + +unsafe impl IsZero for *mut T { + #[inline] + fn is_zero(&self) -> bool { + (*self).is_null() + } +} + +// `Option<&T>` and `Option>` are guaranteed to represent `None` as null. +// For fat pointers, the bytes that would be the pointer metadata in the `Some` +// variant are padding in the `None` variant, so ignoring them and +// zero-initializing instead is ok. +// `Option<&mut T>` never implements `Clone`, so there's no need for an impl of +// `SpecFromElem`. + +unsafe impl IsZero for Option<&T> { + #[inline] + fn is_zero(&self) -> bool { + self.is_none() + } +} + +unsafe impl IsZero for Option> { + #[inline] + fn is_zero(&self) -> bool { + self.is_none() + } +} + +// `Option` and similar have a representation guarantee that +// they're the same size as the corresponding `u32` type, as well as a guarantee +// that transmuting between `NonZeroU32` and `Option` works. +// While the documentation officially makes it UB to transmute from `None`, +// we're the standard library so we can make extra inferences, and we know that +// the only niche available to represent `None` is the one that's all zeros. + +macro_rules! impl_is_zero_option_of_nonzero { + ($($t:ident,)+) => {$( + unsafe impl IsZero for Option { + #[inline] + fn is_zero(&self) -> bool { + self.is_none() + } + } + )+}; +} + +impl_is_zero_option_of_nonzero!( + NonZeroU8, + NonZeroU16, + NonZeroU32, + NonZeroU64, + NonZeroU128, + NonZeroI8, + NonZeroI16, + NonZeroI32, + NonZeroI64, + NonZeroI128, + NonZeroUsize, + NonZeroIsize, +); diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs new file mode 100644 index 0000000000000..3d026b1270322 --- /dev/null +++ b/rust/alloc/vec/mod.rs @@ -0,0 +1,3261 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A contiguous growable array type with heap-allocated contents, written +//! `Vec`. +//! +//! Vectors have `O(1)` indexing, amortized `O(1)` push (to the end) and +//! `O(1)` pop (from the end). +//! +//! Vectors ensure they never allocate more than `isize::MAX` bytes. +//! +//! # Examples +//! +//! You can explicitly create a [`Vec`] with [`Vec::new`]: +//! +//! ``` +//! let v: Vec = Vec::new(); +//! ``` +//! +//! ...or by using the [`vec!`] macro: +//! +//! ``` +//! let v: Vec = vec![]; +//! +//! let v = vec![1, 2, 3, 4, 5]; +//! +//! let v = vec![0; 10]; // ten zeroes +//! ``` +//! +//! You can [`push`] values onto the end of a vector (which will grow the vector +//! as needed): +//! +//! ``` +//! let mut v = vec![1, 2]; +//! +//! v.push(3); +//! ``` +//! +//! Popping values works in much the same way: +//! +//! ``` +//! let mut v = vec![1, 2]; +//! +//! let two = v.pop(); +//! ``` +//! +//! Vectors also support indexing (through the [`Index`] and [`IndexMut`] traits): +//! +//! ``` +//! let mut v = vec![1, 2, 3]; +//! let three = v[2]; +//! v[1] = v[1] + 5; +//! ``` +//! +//! [`push`]: Vec::push + +#![stable(feature = "rust1", since = "1.0.0")] + +#[cfg(not(no_global_oom_handling))] +use core::cmp; +use core::cmp::Ordering; +use core::convert::TryFrom; +use core::fmt; +use core::hash::{Hash, Hasher}; +use core::intrinsics::{arith_offset, assume}; +use core::iter; +#[cfg(not(no_global_oom_handling))] +use core::iter::FromIterator; +use core::marker::PhantomData; +use core::mem::{self, ManuallyDrop, MaybeUninit}; +use core::ops::{self, Index, IndexMut, Range, RangeBounds}; +use core::ptr::{self, NonNull}; +use core::slice::{self, SliceIndex}; + +use crate::alloc::{Allocator, Global}; +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::raw_vec::RawVec; + +#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] +pub use self::drain_filter::DrainFilter; + +mod drain_filter; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "vec_splice", since = "1.21.0")] +pub use self::splice::Splice; + +#[cfg(not(no_global_oom_handling))] +mod splice; + +#[stable(feature = "drain", since = "1.6.0")] +pub use self::drain::Drain; + +mod drain; + +#[cfg(not(no_global_oom_handling))] +mod cow; + +#[cfg(not(no_global_oom_handling))] +pub(crate) use self::into_iter::AsIntoIter; +#[stable(feature = "rust1", since = "1.0.0")] +pub use self::into_iter::IntoIter; + +mod into_iter; + +#[cfg(not(no_global_oom_handling))] +use self::is_zero::IsZero; + +mod is_zero; + +#[cfg(not(no_global_oom_handling))] +mod source_iter_marker; + +mod partial_eq; + +#[cfg(not(no_global_oom_handling))] +use self::spec_from_elem::SpecFromElem; + +#[cfg(not(no_global_oom_handling))] +mod spec_from_elem; + +use self::set_len_on_drop::SetLenOnDrop; + +mod set_len_on_drop; + +#[cfg(not(no_global_oom_handling))] +use self::in_place_drop::InPlaceDrop; + +#[cfg(not(no_global_oom_handling))] +mod in_place_drop; + +#[cfg(not(no_global_oom_handling))] +use self::spec_from_iter_nested::SpecFromIterNested; + +#[cfg(not(no_global_oom_handling))] +mod spec_from_iter_nested; + +#[cfg(not(no_global_oom_handling))] +use self::spec_from_iter::SpecFromIter; + +#[cfg(not(no_global_oom_handling))] +mod spec_from_iter; + +#[cfg(not(no_global_oom_handling))] +use self::spec_extend::SpecExtend; + +use self::spec_extend::TrySpecExtend; + +mod spec_extend; + +/// A contiguous growable array type, written as `Vec` and pronounced 'vector'. +/// +/// # Examples +/// +/// ``` +/// let mut vec = Vec::new(); +/// vec.push(1); +/// vec.push(2); +/// +/// assert_eq!(vec.len(), 2); +/// assert_eq!(vec[0], 1); +/// +/// assert_eq!(vec.pop(), Some(2)); +/// assert_eq!(vec.len(), 1); +/// +/// vec[0] = 7; +/// assert_eq!(vec[0], 7); +/// +/// vec.extend([1, 2, 3].iter().copied()); +/// +/// for x in &vec { +/// println!("{}", x); +/// } +/// assert_eq!(vec, [7, 1, 2, 3]); +/// ``` +/// +/// The [`vec!`] macro is provided to make initialization more convenient: +/// +/// ``` +/// let mut vec = vec![1, 2, 3]; +/// vec.push(4); +/// assert_eq!(vec, [1, 2, 3, 4]); +/// ``` +/// +/// It can also initialize each element of a `Vec` with a given value. +/// This may be more efficient than performing allocation and initialization +/// in separate steps, especially when initializing a vector of zeros: +/// +/// ``` +/// let vec = vec![0; 5]; +/// assert_eq!(vec, [0, 0, 0, 0, 0]); +/// +/// // The following is equivalent, but potentially slower: +/// let mut vec = Vec::with_capacity(5); +/// vec.resize(5, 0); +/// assert_eq!(vec, [0, 0, 0, 0, 0]); +/// ``` +/// +/// For more information, see +/// [Capacity and Reallocation](#capacity-and-reallocation). +/// +/// Use a `Vec` as an efficient stack: +/// +/// ``` +/// let mut stack = Vec::new(); +/// +/// stack.push(1); +/// stack.push(2); +/// stack.push(3); +/// +/// while let Some(top) = stack.pop() { +/// // Prints 3, 2, 1 +/// println!("{}", top); +/// } +/// ``` +/// +/// # Indexing +/// +/// The `Vec` type allows to access values by index, because it implements the +/// [`Index`] trait. An example will be more explicit: +/// +/// ``` +/// let v = vec![0, 2, 4, 6]; +/// println!("{}", v[1]); // it will display '2' +/// ``` +/// +/// However be careful: if you try to access an index which isn't in the `Vec`, +/// your software will panic! You cannot do this: +/// +/// ```should_panic +/// let v = vec![0, 2, 4, 6]; +/// println!("{}", v[6]); // it will panic! +/// ``` +/// +/// Use [`get`] and [`get_mut`] if you want to check whether the index is in +/// the `Vec`. +/// +/// # Slicing +/// +/// A `Vec` can be mutable. On the other hand, slices are read-only objects. +/// To get a [slice][prim@slice], use [`&`]. Example: +/// +/// ``` +/// fn read_slice(slice: &[usize]) { +/// // ... +/// } +/// +/// let v = vec![0, 1]; +/// read_slice(&v); +/// +/// // ... and that's all! +/// // you can also do it like this: +/// let u: &[usize] = &v; +/// // or like this: +/// let u: &[_] = &v; +/// ``` +/// +/// In Rust, it's more common to pass slices as arguments rather than vectors +/// when you just want to provide read access. The same goes for [`String`] and +/// [`&str`]. +/// +/// # Capacity and reallocation +/// +/// The capacity of a vector is the amount of space allocated for any future +/// elements that will be added onto the vector. This is not to be confused with +/// the *length* of a vector, which specifies the number of actual elements +/// within the vector. If a vector's length exceeds its capacity, its capacity +/// will automatically be increased, but its elements will have to be +/// reallocated. +/// +/// For example, a vector with capacity 10 and length 0 would be an empty vector +/// with space for 10 more elements. Pushing 10 or fewer elements onto the +/// vector will not change its capacity or cause reallocation to occur. However, +/// if the vector's length is increased to 11, it will have to reallocate, which +/// can be slow. For this reason, it is recommended to use [`Vec::with_capacity`] +/// whenever possible to specify how big the vector is expected to get. +/// +/// # Guarantees +/// +/// Due to its incredibly fundamental nature, `Vec` makes a lot of guarantees +/// about its design. This ensures that it's as low-overhead as possible in +/// the general case, and can be correctly manipulated in primitive ways +/// by unsafe code. Note that these guarantees refer to an unqualified `Vec`. +/// If additional type parameters are added (e.g., to support custom allocators), +/// overriding their defaults may change the behavior. +/// +/// Most fundamentally, `Vec` is and always will be a (pointer, capacity, length) +/// triplet. No more, no less. The order of these fields is completely +/// unspecified, and you should use the appropriate methods to modify these. +/// The pointer will never be null, so this type is null-pointer-optimized. +/// +/// However, the pointer might not actually point to allocated memory. In particular, +/// if you construct a `Vec` with capacity 0 via [`Vec::new`], [`vec![]`][`vec!`], +/// [`Vec::with_capacity(0)`][`Vec::with_capacity`], or by calling [`shrink_to_fit`] +/// on an empty Vec, it will not allocate memory. Similarly, if you store zero-sized +/// types inside a `Vec`, it will not allocate space for them. *Note that in this case +/// the `Vec` might not report a [`capacity`] of 0*. `Vec` will allocate if and only +/// if [`mem::size_of::`]`() * capacity() > 0`. In general, `Vec`'s allocation +/// details are very subtle — if you intend to allocate memory using a `Vec` +/// and use it for something else (either to pass to unsafe code, or to build your +/// own memory-backed collection), be sure to deallocate this memory by using +/// `from_raw_parts` to recover the `Vec` and then dropping it. +/// +/// If a `Vec` *has* allocated memory, then the memory it points to is on the heap +/// (as defined by the allocator Rust is configured to use by default), and its +/// pointer points to [`len`] initialized, contiguous elements in order (what +/// you would see if you coerced it to a slice), followed by [`capacity`]` - +/// `[`len`] logically uninitialized, contiguous elements. +/// +/// A vector containing the elements `'a'` and `'b'` with capacity 4 can be +/// visualized as below. The top part is the `Vec` struct, it contains a +/// pointer to the head of the allocation in the heap, length and capacity. +/// The bottom part is the allocation on the heap, a contiguous memory block. +/// +/// ```text +/// ptr len capacity +/// +--------+--------+--------+ +/// | 0x0123 | 2 | 4 | +/// +--------+--------+--------+ +/// | +/// v +/// Heap +--------+--------+--------+--------+ +/// | 'a' | 'b' | uninit | uninit | +/// +--------+--------+--------+--------+ +/// ``` +/// +/// - **uninit** represents memory that is not initialized, see [`MaybeUninit`]. +/// - Note: the ABI is not stable and `Vec` makes no guarantees about its memory +/// layout (including the order of fields). +/// +/// `Vec` will never perform a "small optimization" where elements are actually +/// stored on the stack for two reasons: +/// +/// * It would make it more difficult for unsafe code to correctly manipulate +/// a `Vec`. The contents of a `Vec` wouldn't have a stable address if it were +/// only moved, and it would be more difficult to determine if a `Vec` had +/// actually allocated memory. +/// +/// * It would penalize the general case, incurring an additional branch +/// on every access. +/// +/// `Vec` will never automatically shrink itself, even if completely empty. This +/// ensures no unnecessary allocations or deallocations occur. Emptying a `Vec` +/// and then filling it back up to the same [`len`] should incur no calls to +/// the allocator. If you wish to free up unused memory, use +/// [`shrink_to_fit`] or [`shrink_to`]. +/// +/// [`push`] and [`insert`] will never (re)allocate if the reported capacity is +/// sufficient. [`push`] and [`insert`] *will* (re)allocate if +/// [`len`]` == `[`capacity`]. That is, the reported capacity is completely +/// accurate, and can be relied on. It can even be used to manually free the memory +/// allocated by a `Vec` if desired. Bulk insertion methods *may* reallocate, even +/// when not necessary. +/// +/// `Vec` does not guarantee any particular growth strategy when reallocating +/// when full, nor when [`reserve`] is called. The current strategy is basic +/// and it may prove desirable to use a non-constant growth factor. Whatever +/// strategy is used will of course guarantee *O*(1) amortized [`push`]. +/// +/// `vec![x; n]`, `vec![a, b, c, d]`, and +/// [`Vec::with_capacity(n)`][`Vec::with_capacity`], will all produce a `Vec` +/// with exactly the requested capacity. If [`len`]` == `[`capacity`], +/// (as is the case for the [`vec!`] macro), then a `Vec` can be converted to +/// and from a [`Box<[T]>`][owned slice] without reallocating or moving the elements. +/// +/// `Vec` will not specifically overwrite any data that is removed from it, +/// but also won't specifically preserve it. Its uninitialized memory is +/// scratch space that it may use however it wants. It will generally just do +/// whatever is most efficient or otherwise easy to implement. Do not rely on +/// removed data to be erased for security purposes. Even if you drop a `Vec`, its +/// buffer may simply be reused by another `Vec`. Even if you zero a `Vec`'s memory +/// first, that might not actually happen because the optimizer does not consider +/// this a side-effect that must be preserved. There is one case which we will +/// not break, however: using `unsafe` code to write to the excess capacity, +/// and then increasing the length to match, is always valid. +/// +/// Currently, `Vec` does not guarantee the order in which elements are dropped. +/// The order has changed in the past and may change again. +/// +/// [`get`]: ../../std/vec/struct.Vec.html#method.get +/// [`get_mut`]: ../../std/vec/struct.Vec.html#method.get_mut +/// [`String`]: crate::string::String +/// [`&str`]: type@str +/// [`shrink_to_fit`]: Vec::shrink_to_fit +/// [`shrink_to`]: Vec::shrink_to +/// [`capacity`]: Vec::capacity +/// [`mem::size_of::`]: core::mem::size_of +/// [`len`]: Vec::len +/// [`push`]: Vec::push +/// [`insert`]: Vec::insert +/// [`reserve`]: Vec::reserve +/// [`MaybeUninit`]: core::mem::MaybeUninit +/// [owned slice]: Box +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(test), rustc_diagnostic_item = "vec_type")] +pub struct Vec { + buf: RawVec, + len: usize, +} + +//////////////////////////////////////////////////////////////////////////////// +// Inherent methods +//////////////////////////////////////////////////////////////////////////////// + +impl Vec { + /// Constructs a new, empty `Vec`. + /// + /// The vector will not allocate until elements are pushed onto it. + /// + /// # Examples + /// + /// ``` + /// # #![allow(unused_mut)] + /// let mut vec: Vec = Vec::new(); + /// ``` + #[inline] + #[rustc_const_stable(feature = "const_vec_new", since = "1.39.0")] + #[stable(feature = "rust1", since = "1.0.0")] + pub const fn new() -> Self { + Vec { buf: RawVec::NEW, len: 0 } + } + + /// Constructs a new, empty `Vec` with the specified capacity. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_in(capacity, Global) + } + + /// Tries to construct a new, empty `Vec` with the specified capacity. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::try_with_capacity(10).unwrap(); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// + /// let mut result = Vec::try_with_capacity(usize::MAX); + /// assert!(result.is_err()); + /// ``` + #[inline] + #[doc(alias = "malloc")] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_with_capacity(capacity: usize) -> Result { + Self::try_with_capacity_in(capacity, Global) + } + + /// Creates a `Vec` directly from the raw components of another vector. + /// + /// # Safety + /// + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: + /// + /// * `ptr` needs to have been previously allocated via [`String`]/`Vec` + /// (at least, it's highly likely to be incorrect if it wasn't). + /// * `T` needs to have the same size and alignment as what `ptr` was allocated with. + /// (`T` having a less strict alignment is not sufficient, the alignment really + /// needs to be equal to satisfy the [`dealloc`] requirement that memory must be + /// allocated and deallocated with the same layout.) + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the capacity that the pointer was allocated with. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal data structures. For example it is **not** safe + /// to build a `Vec` from a pointer to a C `char` array with length `size_t`. + /// It's also not safe to build one from a `Vec` and its length, because + /// the allocator cares about the alignment, and these two types have different + /// alignments. The buffer was allocated with alignment 2 (for `u16`), but after + /// turning it into a `Vec` it'll be deallocated with alignment 1. + /// + /// The ownership of `ptr` is effectively transferred to the + /// `Vec` which may then deallocate, reallocate or change the + /// contents of memory pointed to by the pointer at will. Ensure + /// that nothing else uses the pointer after calling this + /// function. + /// + /// [`String`]: crate::string::String + /// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc + /// + /// # Examples + /// + /// ``` + /// use std::ptr; + /// use std::mem; + /// + /// let v = vec![1, 2, 3]; + /// + // FIXME Update this when vec_into_raw_parts is stabilized + /// // Prevent running `v`'s destructor so we are in complete control + /// // of the allocation. + /// let mut v = mem::ManuallyDrop::new(v); + /// + /// // Pull out the various important pieces of information about `v` + /// let p = v.as_mut_ptr(); + /// let len = v.len(); + /// let cap = v.capacity(); + /// + /// unsafe { + /// // Overwrite memory with 4, 5, 6 + /// for i in 0..len as isize { + /// ptr::write(p.offset(i), 4 + i); + /// } + /// + /// // Put everything back together into a Vec + /// let rebuilt = Vec::from_raw_parts(p, len, cap); + /// assert_eq!(rebuilt, [4, 5, 6]); + /// } + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_raw_parts(ptr: *mut T, length: usize, capacity: usize) -> Self { + unsafe { Self::from_raw_parts_in(ptr, length, capacity, Global) } + } +} + +impl Vec { + /// Constructs a new, empty `Vec`. + /// + /// The vector will not allocate until elements are pushed onto it. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// # #[allow(unused_mut)] + /// let mut vec: Vec = Vec::new_in(System); + /// ``` + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub const fn new_in(alloc: A) -> Self { + Vec { buf: RawVec::new_in(alloc), len: 0 } + } + + /// Constructs a new, empty `Vec` with the specified capacity with the provided + /// allocator. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let mut vec = Vec::with_capacity_in(10, System); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { + Vec { buf: RawVec::with_capacity_in(capacity, alloc), len: 0 } + } + + /// Tries to construct a new, empty `Vec` with the specified capacity + /// with the provided allocator. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let mut vec = Vec::try_with_capacity_in(10, System).unwrap(); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// + /// let mut result = Vec::try_with_capacity_in(usize::MAX, System); + /// assert!(result.is_err()); + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result { + Ok(Vec { buf: RawVec::try_with_capacity_in(capacity, alloc)?, len: 0 }) + } + + /// Creates a `Vec` directly from the raw components of another vector. + /// + /// # Safety + /// + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: + /// + /// * `ptr` needs to have been previously allocated via [`String`]/`Vec` + /// (at least, it's highly likely to be incorrect if it wasn't). + /// * `T` needs to have the same size and alignment as what `ptr` was allocated with. + /// (`T` having a less strict alignment is not sufficient, the alignment really + /// needs to be equal to satisfy the [`dealloc`] requirement that memory must be + /// allocated and deallocated with the same layout.) + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the capacity that the pointer was allocated with. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal data structures. For example it is **not** safe + /// to build a `Vec` from a pointer to a C `char` array with length `size_t`. + /// It's also not safe to build one from a `Vec` and its length, because + /// the allocator cares about the alignment, and these two types have different + /// alignments. The buffer was allocated with alignment 2 (for `u16`), but after + /// turning it into a `Vec` it'll be deallocated with alignment 1. + /// + /// The ownership of `ptr` is effectively transferred to the + /// `Vec` which may then deallocate, reallocate or change the + /// contents of memory pointed to by the pointer at will. Ensure + /// that nothing else uses the pointer after calling this + /// function. + /// + /// [`String`]: crate::string::String + /// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// use std::ptr; + /// use std::mem; + /// + /// let mut v = Vec::with_capacity_in(3, System); + /// v.push(1); + /// v.push(2); + /// v.push(3); + /// + // FIXME Update this when vec_into_raw_parts is stabilized + /// // Prevent running `v`'s destructor so we are in complete control + /// // of the allocation. + /// let mut v = mem::ManuallyDrop::new(v); + /// + /// // Pull out the various important pieces of information about `v` + /// let p = v.as_mut_ptr(); + /// let len = v.len(); + /// let cap = v.capacity(); + /// let alloc = v.allocator(); + /// + /// unsafe { + /// // Overwrite memory with 4, 5, 6 + /// for i in 0..len as isize { + /// ptr::write(p.offset(i), 4 + i); + /// } + /// + /// // Put everything back together into a Vec + /// let rebuilt = Vec::from_raw_parts_in(p, len, cap, alloc.clone()); + /// assert_eq!(rebuilt, [4, 5, 6]); + /// } + /// ``` + #[inline] + #[unstable(feature = "allocator_api", issue = "32838")] + pub unsafe fn from_raw_parts_in(ptr: *mut T, length: usize, capacity: usize, alloc: A) -> Self { + unsafe { Vec { buf: RawVec::from_raw_parts_in(ptr, capacity, alloc), len: length } } + } + + /// Decomposes a `Vec` into its raw components. + /// + /// Returns the raw pointer to the underlying data, the length of + /// the vector (in elements), and the allocated capacity of the + /// data (in elements). These are the same arguments in the same + /// order as the arguments to [`from_raw_parts`]. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Vec`. The only way to do + /// this is to convert the raw pointer, length, and capacity back + /// into a `Vec` with the [`from_raw_parts`] function, allowing + /// the destructor to perform the cleanup. + /// + /// [`from_raw_parts`]: Vec::from_raw_parts + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_into_raw_parts)] + /// let v: Vec = vec![-1, 0, 1]; + /// + /// let (ptr, len, cap) = v.into_raw_parts(); + /// + /// let rebuilt = unsafe { + /// // We can now make changes to the components, such as + /// // transmuting the raw pointer to a compatible type. + /// let ptr = ptr as *mut u32; + /// + /// Vec::from_raw_parts(ptr, len, cap) + /// }; + /// assert_eq!(rebuilt, [4294967295, 0, 1]); + /// ``` + #[unstable(feature = "vec_into_raw_parts", reason = "new API", issue = "65816")] + pub fn into_raw_parts(self) -> (*mut T, usize, usize) { + let mut me = ManuallyDrop::new(self); + (me.as_mut_ptr(), me.len(), me.capacity()) + } + + /// Decomposes a `Vec` into its raw components. + /// + /// Returns the raw pointer to the underlying data, the length of the vector (in elements), + /// the allocated capacity of the data (in elements), and the allocator. These are the same + /// arguments in the same order as the arguments to [`from_raw_parts_in`]. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `Vec`. The only way to do + /// this is to convert the raw pointer, length, and capacity back + /// into a `Vec` with the [`from_raw_parts_in`] function, allowing + /// the destructor to perform the cleanup. + /// + /// [`from_raw_parts_in`]: Vec::from_raw_parts_in + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api, vec_into_raw_parts)] + /// + /// use std::alloc::System; + /// + /// let mut v: Vec = Vec::new_in(System); + /// v.push(-1); + /// v.push(0); + /// v.push(1); + /// + /// let (ptr, len, cap, alloc) = v.into_raw_parts_with_alloc(); + /// + /// let rebuilt = unsafe { + /// // We can now make changes to the components, such as + /// // transmuting the raw pointer to a compatible type. + /// let ptr = ptr as *mut u32; + /// + /// Vec::from_raw_parts_in(ptr, len, cap, alloc) + /// }; + /// assert_eq!(rebuilt, [4294967295, 0, 1]); + /// ``` + #[unstable(feature = "allocator_api", issue = "32838")] + // #[unstable(feature = "vec_into_raw_parts", reason = "new API", issue = "65816")] + pub fn into_raw_parts_with_alloc(self) -> (*mut T, usize, usize, A) { + let mut me = ManuallyDrop::new(self); + let len = me.len(); + let capacity = me.capacity(); + let ptr = me.as_mut_ptr(); + let alloc = unsafe { ptr::read(me.allocator()) }; + (ptr, len, capacity, alloc) + } + + /// Returns the number of elements the vector can hold without + /// reallocating. + /// + /// # Examples + /// + /// ``` + /// let vec: Vec = Vec::with_capacity(10); + /// assert_eq!(vec.capacity(), 10); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn capacity(&self) -> usize { + self.buf.capacity() + } + + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the given `Vec`. The collection may reserve more space to avoid + /// frequent reallocations. After calling `reserve`, capacity will be + /// greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.reserve(10); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve(&mut self, additional: usize) { + self.buf.reserve(self.len, additional); + } + + /// Reserves the minimum capacity for exactly `additional` more elements to + /// be inserted in the given `Vec`. After calling `reserve_exact`, + /// capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer `reserve` if future insertions are expected. + /// + /// # Panics + /// + /// Panics if the new capacity overflows `usize`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.reserve_exact(10); + /// assert!(vec.capacity() >= 11); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve_exact(&mut self, additional: usize) { + self.buf.reserve_exact(self.len, additional); + } + + /// Tries to reserve capacity for at least `additional` more elements to be inserted + /// in the given `Vec`. The collection may reserve more space to avoid + /// frequent reallocations. After calling `try_reserve`, capacity will be + /// greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// #![feature(try_reserve)] + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &[u32]) -> Result, TryReserveError> { + /// let mut output = Vec::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.extend(data.iter().map(|&val| { + /// val * 2 + 5 // very complicated + /// })); + /// + /// Ok(output) + /// } + /// # process_data(&[1, 2, 3]).expect("why is the test harness OOMing on 12 bytes?"); + /// ``` + #[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.buf.try_reserve(self.len, additional) + } + + /// Tries to reserve the minimum capacity for exactly `additional` + /// elements to be inserted in the given `Vec`. After calling + /// `try_reserve_exact`, capacity will be greater than or equal to + /// `self.len() + additional` if it returns `Ok(())`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer `reserve` if future insertions are expected. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// #![feature(try_reserve)] + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &[u32]) -> Result, TryReserveError> { + /// let mut output = Vec::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve_exact(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.extend(data.iter().map(|&val| { + /// val * 2 + 5 // very complicated + /// })); + /// + /// Ok(output) + /// } + /// # process_data(&[1, 2, 3]).expect("why is the test harness OOMing on 12 bytes?"); + /// ``` + #[unstable(feature = "try_reserve", reason = "new API", issue = "48043")] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.buf.try_reserve_exact(self.len, additional) + } + + /// Shrinks the capacity of the vector as much as possible. + /// + /// It will drop down as close as possible to the length but the allocator + /// may still inform the vector that there is space for a few more elements. + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// assert_eq!(vec.capacity(), 10); + /// vec.shrink_to_fit(); + /// assert!(vec.capacity() >= 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn shrink_to_fit(&mut self) { + // The capacity is never less than the length, and there's nothing to do when + // they are equal, so we can avoid the panic case in `RawVec::shrink_to_fit` + // by only calling it with a greater capacity. + if self.capacity() > self.len { + self.buf.shrink_to_fit(self.len); + } + } + + /// Tries to shrink the capacity of the vector as much as possible. + /// + /// It will drop down as close as possible to the length but the allocator + /// may still inform the vector that there is space for a few more elements. + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// assert_eq!(vec.capacity(), 10); + /// vec.try_shrink_to_fit().unwrap(); + /// assert!(vec.capacity() >= 3); + /// ``` + #[doc(alias = "realloc")] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_shrink_to_fit(&mut self) -> Result<(), TryReserveError> { + // The capacity is never less than the length, and there's nothing to do when + // they are equal, so we can avoid the panic case in `RawVec::try_shrink_to_fit` + // by only calling it with a greater capacity. + if self.capacity() <= self.len { + return Ok(()); + } + + self.buf.try_shrink_to_fit(self.len) + } + + /// Shrinks the capacity of the vector with a lower bound. + /// + /// The capacity will remain at least as large as both the length + /// and the supplied value. + /// + /// If the current capacity is less than the lower limit, this is a no-op. + /// + /// # Examples + /// + /// ``` + /// #![feature(shrink_to)] + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// assert_eq!(vec.capacity(), 10); + /// vec.shrink_to(4); + /// assert!(vec.capacity() >= 4); + /// vec.shrink_to(0); + /// assert!(vec.capacity() >= 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "shrink_to", reason = "new API", issue = "56431")] + pub fn shrink_to(&mut self, min_capacity: usize) { + if self.capacity() > min_capacity { + self.buf.shrink_to_fit(cmp::max(self.len, min_capacity)); + } + } + + /// Converts the vector into [`Box<[T]>`][owned slice]. + /// + /// Note that this will drop any excess capacity. + /// + /// [owned slice]: Box + /// + /// # Examples + /// + /// ``` + /// let v = vec![1, 2, 3]; + /// + /// let slice = v.into_boxed_slice(); + /// ``` + /// + /// Any excess capacity is removed: + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// + /// assert_eq!(vec.capacity(), 10); + /// let slice = vec.into_boxed_slice(); + /// assert_eq!(slice.into_vec().capacity(), 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_boxed_slice(mut self) -> Box<[T], A> { + unsafe { + self.shrink_to_fit(); + let me = ManuallyDrop::new(self); + let buf = ptr::read(&me.buf); + let len = me.len(); + buf.into_box(len).assume_init() + } + } + + /// Tries to convert the vector into [`Box<[T]>`][owned slice]. + /// + /// Note that this will drop any excess capacity. + /// + /// [owned slice]: Box + /// + /// # Examples + /// + /// ``` + /// let v = vec![1, 2, 3]; + /// + /// let slice = v.try_into_boxed_slice().unwrap(); + /// ``` + /// + /// Any excess capacity is removed: + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// + /// assert_eq!(vec.capacity(), 10); + /// let slice = vec.try_into_boxed_slice().unwrap(); + /// assert_eq!(slice.into_vec().capacity(), 3); + /// ``` + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_into_boxed_slice(mut self) -> Result, TryReserveError> { + unsafe { + self.try_shrink_to_fit()?; + let me = ManuallyDrop::new(self); + let buf = ptr::read(&me.buf); + let len = me.len(); + Ok(buf.into_box(len).assume_init()) + } + } + + /// Shortens the vector, keeping the first `len` elements and dropping + /// the rest. + /// + /// If `len` is greater than the vector's current length, this has no + /// effect. + /// + /// The [`drain`] method can emulate `truncate`, but causes the excess + /// elements to be returned instead of dropped. + /// + /// Note that this method has no effect on the allocated capacity + /// of the vector. + /// + /// # Examples + /// + /// Truncating a five element vector to two elements: + /// + /// ``` + /// let mut vec = vec![1, 2, 3, 4, 5]; + /// vec.truncate(2); + /// assert_eq!(vec, [1, 2]); + /// ``` + /// + /// No truncation occurs when `len` is greater than the vector's current + /// length: + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.truncate(8); + /// assert_eq!(vec, [1, 2, 3]); + /// ``` + /// + /// Truncating when `len == 0` is equivalent to calling the [`clear`] + /// method. + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.truncate(0); + /// assert_eq!(vec, []); + /// ``` + /// + /// [`clear`]: Vec::clear + /// [`drain`]: Vec::drain + #[stable(feature = "rust1", since = "1.0.0")] + pub fn truncate(&mut self, len: usize) { + // This is safe because: + // + // * the slice passed to `drop_in_place` is valid; the `len > self.len` + // case avoids creating an invalid slice, and + // * the `len` of the vector is shrunk before calling `drop_in_place`, + // such that no value will be dropped twice in case `drop_in_place` + // were to panic once (if it panics twice, the program aborts). + unsafe { + // Note: It's intentional that this is `>` and not `>=`. + // Changing it to `>=` has negative performance + // implications in some cases. See #78884 for more. + if len > self.len { + return; + } + let remaining_len = self.len - len; + let s = ptr::slice_from_raw_parts_mut(self.as_mut_ptr().add(len), remaining_len); + self.len = len; + ptr::drop_in_place(s); + } + } + + /// Extracts a slice containing the entire vector. + /// + /// Equivalent to `&s[..]`. + /// + /// # Examples + /// + /// ``` + /// use std::io::{self, Write}; + /// let buffer = vec![1, 2, 3, 5, 8]; + /// io::sink().write(buffer.as_slice()).unwrap(); + /// ``` + #[inline] + #[stable(feature = "vec_as_slice", since = "1.7.0")] + pub fn as_slice(&self) -> &[T] { + self + } + + /// Extracts a mutable slice of the entire vector. + /// + /// Equivalent to `&mut s[..]`. + /// + /// # Examples + /// + /// ``` + /// use std::io::{self, Read}; + /// let mut buffer = vec![0; 3]; + /// io::repeat(0b101).read_exact(buffer.as_mut_slice()).unwrap(); + /// ``` + #[inline] + #[stable(feature = "vec_as_slice", since = "1.7.0")] + pub fn as_mut_slice(&mut self) -> &mut [T] { + self + } + + /// Returns a raw pointer to the vector's buffer. + /// + /// The caller must ensure that the vector outlives the pointer this + /// function returns, or else it will end up pointing to garbage. + /// Modifying the vector may cause its buffer to be reallocated, + /// which would also make any pointers to it invalid. + /// + /// The caller must also ensure that the memory the pointer (non-transitively) points to + /// is never written to (except inside an `UnsafeCell`) using this pointer or any pointer + /// derived from it. If you need to mutate the contents of the slice, use [`as_mut_ptr`]. + /// + /// # Examples + /// + /// ``` + /// let x = vec![1, 2, 4]; + /// let x_ptr = x.as_ptr(); + /// + /// unsafe { + /// for i in 0..x.len() { + /// assert_eq!(*x_ptr.add(i), 1 << i); + /// } + /// } + /// ``` + /// + /// [`as_mut_ptr`]: Vec::as_mut_ptr + #[stable(feature = "vec_as_ptr", since = "1.37.0")] + #[inline] + pub fn as_ptr(&self) -> *const T { + // We shadow the slice method of the same name to avoid going through + // `deref`, which creates an intermediate reference. + let ptr = self.buf.ptr(); + unsafe { + assume(!ptr.is_null()); + } + ptr + } + + /// Returns an unsafe mutable pointer to the vector's buffer. + /// + /// The caller must ensure that the vector outlives the pointer this + /// function returns, or else it will end up pointing to garbage. + /// Modifying the vector may cause its buffer to be reallocated, + /// which would also make any pointers to it invalid. + /// + /// # Examples + /// + /// ``` + /// // Allocate vector big enough for 4 elements. + /// let size = 4; + /// let mut x: Vec = Vec::with_capacity(size); + /// let x_ptr = x.as_mut_ptr(); + /// + /// // Initialize elements via raw pointer writes, then set length. + /// unsafe { + /// for i in 0..size { + /// *x_ptr.add(i) = i as i32; + /// } + /// x.set_len(size); + /// } + /// assert_eq!(&*x, &[0, 1, 2, 3]); + /// ``` + #[stable(feature = "vec_as_ptr", since = "1.37.0")] + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut T { + // We shadow the slice method of the same name to avoid going through + // `deref_mut`, which creates an intermediate reference. + let ptr = self.buf.ptr(); + unsafe { + assume(!ptr.is_null()); + } + ptr + } + + /// Returns a reference to the underlying allocator. + #[unstable(feature = "allocator_api", issue = "32838")] + #[inline] + pub fn allocator(&self) -> &A { + self.buf.allocator() + } + + /// Forces the length of the vector to `new_len`. + /// + /// This is a low-level operation that maintains none of the normal + /// invariants of the type. Normally changing the length of a vector + /// is done using one of the safe operations instead, such as + /// [`truncate`], [`resize`], [`extend`], or [`clear`]. + /// + /// [`truncate`]: Vec::truncate + /// [`resize`]: Vec::resize + /// [`extend`]: Extend::extend + /// [`clear`]: Vec::clear + /// + /// # Safety + /// + /// - `new_len` must be less than or equal to [`capacity()`]. + /// - The elements at `old_len..new_len` must be initialized. + /// + /// [`capacity()`]: Vec::capacity + /// + /// # Examples + /// + /// This method can be useful for situations in which the vector + /// is serving as a buffer for other code, particularly over FFI: + /// + /// ```no_run + /// # #![allow(dead_code)] + /// # // This is just a minimal skeleton for the doc example; + /// # // don't use this as a starting point for a real library. + /// # pub struct StreamWrapper { strm: *mut std::ffi::c_void } + /// # const Z_OK: i32 = 0; + /// # extern "C" { + /// # fn deflateGetDictionary( + /// # strm: *mut std::ffi::c_void, + /// # dictionary: *mut u8, + /// # dictLength: *mut usize, + /// # ) -> i32; + /// # } + /// # impl StreamWrapper { + /// pub fn get_dictionary(&self) -> Option> { + /// // Per the FFI method's docs, "32768 bytes is always enough". + /// let mut dict = Vec::with_capacity(32_768); + /// let mut dict_length = 0; + /// // SAFETY: When `deflateGetDictionary` returns `Z_OK`, it holds that: + /// // 1. `dict_length` elements were initialized. + /// // 2. `dict_length` <= the capacity (32_768) + /// // which makes `set_len` safe to call. + /// unsafe { + /// // Make the FFI call... + /// let r = deflateGetDictionary(self.strm, dict.as_mut_ptr(), &mut dict_length); + /// if r == Z_OK { + /// // ...and update the length to what was initialized. + /// dict.set_len(dict_length); + /// Some(dict) + /// } else { + /// None + /// } + /// } + /// } + /// # } + /// ``` + /// + /// While the following example is sound, there is a memory leak since + /// the inner vectors were not freed prior to the `set_len` call: + /// + /// ``` + /// let mut vec = vec![vec![1, 0, 0], + /// vec![0, 1, 0], + /// vec![0, 0, 1]]; + /// // SAFETY: + /// // 1. `old_len..0` is empty so no elements need to be initialized. + /// // 2. `0 <= capacity` always holds whatever `capacity` is. + /// unsafe { + /// vec.set_len(0); + /// } + /// ``` + /// + /// Normally, here, one would use [`clear`] instead to correctly drop + /// the contents and thus not leak memory. + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn set_len(&mut self, new_len: usize) { + debug_assert!(new_len <= self.capacity()); + + self.len = new_len; + } + + /// Removes an element from the vector and returns it. + /// + /// The removed element is replaced by the last element of the vector. + /// + /// This does not preserve ordering, but is O(1). + /// + /// # Panics + /// + /// Panics if `index` is out of bounds. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec!["foo", "bar", "baz", "qux"]; + /// + /// assert_eq!(v.swap_remove(1), "bar"); + /// assert_eq!(v, ["foo", "qux", "baz"]); + /// + /// assert_eq!(v.swap_remove(0), "foo"); + /// assert_eq!(v, ["baz", "qux"]); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn swap_remove(&mut self, index: usize) -> T { + #[cold] + #[inline(never)] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("swap_remove index (is {}) should be < len (is {})", index, len); + } + + let len = self.len(); + if index >= len { + assert_failed(index, len); + } + unsafe { + // We replace self[index] with the last element. Note that if the + // bounds check above succeeds there must be a last element (which + // can be self[index] itself). + let last = ptr::read(self.as_ptr().add(len - 1)); + let hole = self.as_mut_ptr().add(index); + self.set_len(len - 1); + ptr::replace(hole, last) + } + } + + /// Inserts an element at position `index` within the vector, shifting all + /// elements after it to the right. + /// + /// # Panics + /// + /// Panics if `index > len`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.insert(1, 4); + /// assert_eq!(vec, [1, 4, 2, 3]); + /// vec.insert(4, 5); + /// assert_eq!(vec, [1, 4, 2, 3, 5]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(&mut self, index: usize, element: T) { + #[cold] + #[inline(never)] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("insertion index (is {}) should be <= len (is {})", index, len); + } + + let len = self.len(); + if index > len { + assert_failed(index, len); + } + + // space for the new element + if len == self.buf.capacity() { + self.reserve(1); + } + + unsafe { + // infallible + // The spot to put the new value + { + let p = self.as_mut_ptr().add(index); + // Shift everything over to make space. (Duplicating the + // `index`th element into two consecutive places.) + ptr::copy(p, p.offset(1), len - index); + // Write it in, overwriting the first copy of the `index`th + // element. + ptr::write(p, element); + } + self.set_len(len + 1); + } + } + + /// Removes and returns the element at position `index` within the vector, + /// shifting all elements after it to the left. + /// + /// # Panics + /// + /// Panics if `index` is out of bounds. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// assert_eq!(v.remove(1), 2); + /// assert_eq!(v, [1, 3]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn remove(&mut self, index: usize) -> T { + #[cold] + #[inline(never)] + fn assert_failed(index: usize, len: usize) -> ! { + panic!("removal index (is {}) should be < len (is {})", index, len); + } + + let len = self.len(); + if index >= len { + assert_failed(index, len); + } + unsafe { + // infallible + let ret; + { + // the place we are taking from. + let ptr = self.as_mut_ptr().add(index); + // copy it out, unsafely having a copy of the value on + // the stack and in the vector at the same time. + ret = ptr::read(ptr); + + // Shift everything down to fill in that spot. + ptr::copy(ptr.offset(1), ptr, len - index - 1); + } + self.set_len(len - 1); + ret + } + } + + /// Retains only the elements specified by the predicate. + /// + /// In other words, remove all elements `e` such that `f(&e)` returns `false`. + /// This method operates in place, visiting each element exactly once in the + /// original order, and preserves the order of the retained elements. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.retain(|&x| x % 2 == 0); + /// assert_eq!(vec, [2, 4]); + /// ``` + /// + /// Because the elements are visited exactly once in the original order, + /// external state may be used to decide which elements to keep. + /// + /// ``` + /// let mut vec = vec![1, 2, 3, 4, 5]; + /// let keep = [false, true, true, false, true]; + /// let mut iter = keep.iter(); + /// vec.retain(|_| *iter.next().unwrap()); + /// assert_eq!(vec, [2, 3, 5]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn retain(&mut self, mut f: F) + where + F: FnMut(&T) -> bool, + { + let original_len = self.len(); + // Avoid double drop if the drop guard is not executed, + // since we may make some holes during the process. + unsafe { self.set_len(0) }; + + // Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked] + // |<- processed len ->| ^- next to check + // |<- deleted cnt ->| + // |<- original_len ->| + // Kept: Elements which predicate returns true on. + // Hole: Moved or dropped element slot. + // Unchecked: Unchecked valid elements. + // + // This drop guard will be invoked when predicate or `drop` of element panicked. + // It shifts unchecked elements to cover holes and `set_len` to the correct length. + // In cases when predicate and `drop` never panick, it will be optimized out. + struct BackshiftOnDrop<'a, T, A: Allocator> { + v: &'a mut Vec, + processed_len: usize, + deleted_cnt: usize, + original_len: usize, + } + + impl Drop for BackshiftOnDrop<'_, T, A> { + fn drop(&mut self) { + if self.deleted_cnt > 0 { + // SAFETY: Trailing unchecked items must be valid since we never touch them. + unsafe { + ptr::copy( + self.v.as_ptr().add(self.processed_len), + self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt), + self.original_len - self.processed_len, + ); + } + } + // SAFETY: After filling holes, all items are in contiguous memory. + unsafe { + self.v.set_len(self.original_len - self.deleted_cnt); + } + } + } + + let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len }; + + while g.processed_len < original_len { + // SAFETY: Unchecked element must be valid. + let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) }; + if !f(cur) { + // Advance early to avoid double drop if `drop_in_place` panicked. + g.processed_len += 1; + g.deleted_cnt += 1; + // SAFETY: We never touch this element again after dropped. + unsafe { ptr::drop_in_place(cur) }; + // We already advanced the counter. + continue; + } + if g.deleted_cnt > 0 { + // SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element. + // We use copy for move, and never touch this element again. + unsafe { + let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt); + ptr::copy_nonoverlapping(cur, hole_slot, 1); + } + } + g.processed_len += 1; + } + + // All item are processed. This can be optimized to `set_len` by LLVM. + drop(g); + } + + /// Removes all but the first of consecutive elements in the vector that resolve to the same + /// key. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![10, 20, 21, 30, 20]; + /// + /// vec.dedup_by_key(|i| *i / 10); + /// + /// assert_eq!(vec, [10, 20, 30, 20]); + /// ``` + #[stable(feature = "dedup_by", since = "1.16.0")] + #[inline] + pub fn dedup_by_key(&mut self, mut key: F) + where + F: FnMut(&mut T) -> K, + K: PartialEq, + { + self.dedup_by(|a, b| key(a) == key(b)) + } + + /// Removes all but the first of consecutive elements in the vector satisfying a given equality + /// relation. + /// + /// The `same_bucket` function is passed references to two elements from the vector and + /// must determine if the elements compare equal. The elements are passed in opposite order + /// from their order in the slice, so if `same_bucket(a, b)` returns `true`, `a` is removed. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!["foo", "bar", "Bar", "baz", "bar"]; + /// + /// vec.dedup_by(|a, b| a.eq_ignore_ascii_case(b)); + /// + /// assert_eq!(vec, ["foo", "bar", "baz", "bar"]); + /// ``` + #[stable(feature = "dedup_by", since = "1.16.0")] + pub fn dedup_by(&mut self, mut same_bucket: F) + where + F: FnMut(&mut T, &mut T) -> bool, + { + let len = self.len(); + if len <= 1 { + return; + } + + /* INVARIANT: vec.len() > read >= write > write-1 >= 0 */ + struct FillGapOnDrop<'a, T, A: core::alloc::Allocator> { + /* Offset of the element we want to check if it is duplicate */ + read: usize, + + /* Offset of the place where we want to place the non-duplicate + * when we find it. */ + write: usize, + + /* The Vec that would need correction if `same_bucket` panicked */ + vec: &'a mut Vec, + } + + impl<'a, T, A: core::alloc::Allocator> Drop for FillGapOnDrop<'a, T, A> { + fn drop(&mut self) { + /* This code gets executed when `same_bucket` panics */ + + /* SAFETY: invariant guarantees that `read - write` + * and `len - read` never overflow and that the copy is always + * in-bounds. */ + unsafe { + let ptr = self.vec.as_mut_ptr(); + let len = self.vec.len(); + + /* How many items were left when `same_bucket` paniced. + * Basically vec[read..].len() */ + let items_left = len.wrapping_sub(self.read); + + /* Pointer to first item in vec[write..write+items_left] slice */ + let dropped_ptr = ptr.add(self.write); + /* Pointer to first item in vec[read..] slice */ + let valid_ptr = ptr.add(self.read); + + /* Copy `vec[read..]` to `vec[write..write+items_left]`. + * The slices can overlap, so `copy_nonoverlapping` cannot be used */ + ptr::copy(valid_ptr, dropped_ptr, items_left); + + /* How many items have been already dropped + * Basically vec[read..write].len() */ + let dropped = self.read.wrapping_sub(self.write); + + self.vec.set_len(len - dropped); + } + } + } + + let mut gap = FillGapOnDrop { read: 1, write: 1, vec: self }; + let ptr = gap.vec.as_mut_ptr(); + + /* Drop items while going through Vec, it should be more efficient than + * doing slice partition_dedup + truncate */ + + /* SAFETY: Because of the invariant, read_ptr, prev_ptr and write_ptr + * are always in-bounds and read_ptr never aliases prev_ptr */ + unsafe { + while gap.read < len { + let read_ptr = ptr.add(gap.read); + let prev_ptr = ptr.add(gap.write.wrapping_sub(1)); + + if same_bucket(&mut *read_ptr, &mut *prev_ptr) { + // Increase `gap.read` now since the drop may panic. + gap.read += 1; + /* We have found duplicate, drop it in-place */ + ptr::drop_in_place(read_ptr); + } else { + let write_ptr = ptr.add(gap.write); + + /* Because `read_ptr` can be equal to `write_ptr`, we either + * have to use `copy` or conditional `copy_nonoverlapping`. + * Looks like the first option is faster. */ + ptr::copy(read_ptr, write_ptr, 1); + + /* We have filled that place, so go further */ + gap.write += 1; + gap.read += 1; + } + } + + /* Technically we could let `gap` clean up with its Drop, but + * when `same_bucket` is guaranteed to not panic, this bloats a little + * the codegen, so we just do it manually */ + gap.vec.set_len(gap.write); + mem::forget(gap); + } + } + + /// Appends an element to the back of a collection. + /// + /// # Panics + /// + /// Panics if the new capacity exceeds `isize::MAX` bytes. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2]; + /// vec.push(3); + /// assert_eq!(vec, [1, 2, 3]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push(&mut self, value: T) { + // This will panic or abort if we would allocate > isize::MAX bytes + // or if the length increment would overflow for zero-sized types. + if self.len == self.buf.capacity() { + self.reserve(1); + } + unsafe { + let end = self.as_mut_ptr().add(self.len); + ptr::write(end, value); + self.len += 1; + } + } + + /// Tries to append an element to the back of a collection. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2]; + /// vec.try_push(3).unwrap(); + /// assert_eq!(vec, [1, 2, 3]); + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_push(&mut self, value: T) -> Result<(), TryReserveError> { + if self.len == self.buf.capacity() { + self.try_reserve(1)?; + } + unsafe { + let end = self.as_mut_ptr().add(self.len); + ptr::write(end, value); + self.len += 1; + } + Ok(()) + } + + /// Removes the last element from a vector and returns it, or [`None`] if it + /// is empty. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// assert_eq!(vec.pop(), Some(3)); + /// assert_eq!(vec, [1, 2]); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn pop(&mut self) -> Option { + if self.len == 0 { + None + } else { + unsafe { + self.len -= 1; + Some(ptr::read(self.as_ptr().add(self.len()))) + } + } + } + + /// Moves all the elements of `other` into `Self`, leaving `other` empty. + /// + /// # Panics + /// + /// Panics if the number of elements in the vector overflows a `usize`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// let mut vec2 = vec![4, 5, 6]; + /// vec.append(&mut vec2); + /// assert_eq!(vec, [1, 2, 3, 4, 5, 6]); + /// assert_eq!(vec2, []); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "append", since = "1.4.0")] + pub fn append(&mut self, other: &mut Self) { + unsafe { + self.append_elements(other.as_slice() as _); + other.set_len(0); + } + } + + /// Appends elements to `Self` from other buffer. + #[cfg(not(no_global_oom_handling))] + #[inline] + unsafe fn append_elements(&mut self, other: *const [T]) { + let count = unsafe { (*other).len() }; + self.reserve(count); + let len = self.len(); + unsafe { ptr::copy_nonoverlapping(other as *const T, self.as_mut_ptr().add(len), count) }; + self.len += count; + } + + /// Tries to append elements to `Self` from other buffer. + #[inline] + unsafe fn try_append_elements(&mut self, other: *const [T]) -> Result<(), TryReserveError> { + let count = unsafe { (*other).len() }; + self.try_reserve(count)?; + let len = self.len(); + unsafe { ptr::copy_nonoverlapping(other as *const T, self.as_mut_ptr().add(len), count) }; + self.len += count; + Ok(()) + } + + /// Creates a draining iterator that removes the specified range in the vector + /// and yields the removed items. + /// + /// When the iterator **is** dropped, all elements in the range are removed + /// from the vector, even if the iterator was not fully consumed. If the + /// iterator **is not** dropped (with [`mem::forget`] for example), it is + /// unspecified how many elements are removed. + /// + /// # Panics + /// + /// Panics if the starting point is greater than the end point or if + /// the end point is greater than the length of the vector. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// let u: Vec<_> = v.drain(1..).collect(); + /// assert_eq!(v, &[1]); + /// assert_eq!(u, &[2, 3]); + /// + /// // A full range clears the vector + /// v.drain(..); + /// assert_eq!(v, &[]); + /// ``` + #[stable(feature = "drain", since = "1.6.0")] + pub fn drain(&mut self, range: R) -> Drain<'_, T, A> + where + R: RangeBounds, + { + // Memory safety + // + // When the Drain is first created, it shortens the length of + // the source vector to make sure no uninitialized or moved-from elements + // are accessible at all if the Drain's destructor never gets to run. + // + // Drain will ptr::read out the values to remove. + // When finished, remaining tail of the vec is copied back to cover + // the hole, and the vector length is restored to the new length. + // + let len = self.len(); + let Range { start, end } = slice::range(range, ..len); + + unsafe { + // set self.vec length's to start, to be safe in case Drain is leaked + self.set_len(start); + // Use the borrow in the IterMut to indicate borrowing behavior of the + // whole Drain iterator (like &mut T). + let range_slice = slice::from_raw_parts_mut(self.as_mut_ptr().add(start), end - start); + Drain { + tail_start: end, + tail_len: len - end, + iter: range_slice.iter(), + vec: NonNull::from(self), + } + } + } + + /// Clears the vector, removing all values. + /// + /// Note that this method has no effect on the allocated capacity + /// of the vector. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// + /// v.clear(); + /// + /// assert!(v.is_empty()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn clear(&mut self) { + self.truncate(0) + } + + /// Returns the number of elements in the vector, also referred to + /// as its 'length'. + /// + /// # Examples + /// + /// ``` + /// let a = vec![1, 2, 3]; + /// assert_eq!(a.len(), 3); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn len(&self) -> usize { + self.len + } + + /// Returns `true` if the vector contains no elements. + /// + /// # Examples + /// + /// ``` + /// let mut v = Vec::new(); + /// assert!(v.is_empty()); + /// + /// v.push(1); + /// assert!(!v.is_empty()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Splits the collection into two at the given index. + /// + /// Returns a newly allocated vector containing the elements in the range + /// `[at, len)`. After the call, the original vector will be left containing + /// the elements `[0, at)` with its previous capacity unchanged. + /// + /// # Panics + /// + /// Panics if `at > len`. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// let vec2 = vec.split_off(1); + /// assert_eq!(vec, [1]); + /// assert_eq!(vec2, [2, 3]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[must_use = "use `.truncate()` if you don't need the other half"] + #[stable(feature = "split_off", since = "1.4.0")] + pub fn split_off(&mut self, at: usize) -> Self + where + A: Clone, + { + #[cold] + #[inline(never)] + fn assert_failed(at: usize, len: usize) -> ! { + panic!("`at` split index (is {}) should be <= len (is {})", at, len); + } + + if at > self.len() { + assert_failed(at, self.len()); + } + + if at == 0 { + // the new vector can take over the original buffer and avoid the copy + return mem::replace( + self, + Vec::with_capacity_in(self.capacity(), self.allocator().clone()), + ); + } + + let other_len = self.len - at; + let mut other = Vec::with_capacity_in(other_len, self.allocator().clone()); + + // Unsafely `set_len` and copy items to `other`. + unsafe { + self.set_len(at); + other.set_len(other_len); + + ptr::copy_nonoverlapping(self.as_ptr().add(at), other.as_mut_ptr(), other.len()); + } + other + } + + /// Resizes the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the + /// difference, with each additional slot filled with the result of + /// calling the closure `f`. The return values from `f` will end up + /// in the `Vec` in the order they have been generated. + /// + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + /// + /// This method uses a closure to create new values on every push. If + /// you'd rather [`Clone`] a given value, use [`Vec::resize`]. If you + /// want to use the [`Default`] trait to generate values, you can + /// pass [`Default::default`] as the second argument. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 3]; + /// vec.resize_with(5, Default::default); + /// assert_eq!(vec, [1, 2, 3, 0, 0]); + /// + /// let mut vec = vec![]; + /// let mut p = 1; + /// vec.resize_with(4, || { p *= 2; p }); + /// assert_eq!(vec, [2, 4, 8, 16]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_resize_with", since = "1.33.0")] + pub fn resize_with(&mut self, new_len: usize, f: F) + where + F: FnMut() -> T, + { + let len = self.len(); + if new_len > len { + self.extend_with(new_len - len, ExtendFunc(f)); + } else { + self.truncate(new_len); + } + } + + /// Consumes and leaks the `Vec`, returning a mutable reference to the contents, + /// `&'a mut [T]`. Note that the type `T` must outlive the chosen lifetime + /// `'a`. If the type has only static references, or none at all, then this + /// may be chosen to be `'static`. + /// + /// This function is similar to the [`leak`][Box::leak] function on [`Box`] + /// except that there is no way to recover the leaked memory. + /// + /// This function is mainly useful for data that lives for the remainder of + /// the program's life. Dropping the returned reference will cause a memory + /// leak. + /// + /// # Examples + /// + /// Simple usage: + /// + /// ``` + /// let x = vec![1, 2, 3]; + /// let static_ref: &'static mut [usize] = x.leak(); + /// static_ref[0] += 1; + /// assert_eq!(static_ref, &[2, 2, 3]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_leak", since = "1.47.0")] + #[inline] + pub fn leak<'a>(self) -> &'a mut [T] + where + A: 'a, + { + Box::leak(self.into_boxed_slice()) + } + + /// Returns the remaining spare capacity of the vector as a slice of + /// `MaybeUninit`. + /// + /// The returned slice can be used to fill the vector with data (e.g. by + /// reading from a file) before marking the data as initialized using the + /// [`set_len`] method. + /// + /// [`set_len`]: Vec::set_len + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_spare_capacity, maybe_uninit_extra)] + /// + /// // Allocate vector big enough for 10 elements. + /// let mut v = Vec::with_capacity(10); + /// + /// // Fill in the first 3 elements. + /// let uninit = v.spare_capacity_mut(); + /// uninit[0].write(0); + /// uninit[1].write(1); + /// uninit[2].write(2); + /// + /// // Mark the first 3 elements of the vector as being initialized. + /// unsafe { + /// v.set_len(3); + /// } + /// + /// assert_eq!(&v, &[0, 1, 2]); + /// ``` + #[unstable(feature = "vec_spare_capacity", issue = "75017")] + #[inline] + pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { + // Note: + // This method is not implemented in terms of `split_at_spare_mut`, + // to prevent invalidation of pointers to the buffer. + unsafe { + slice::from_raw_parts_mut( + self.as_mut_ptr().add(self.len) as *mut MaybeUninit, + self.buf.capacity() - self.len, + ) + } + } + + /// Returns vector content as a slice of `T`, along with the remaining spare + /// capacity of the vector as a slice of `MaybeUninit`. + /// + /// The returned spare capacity slice can be used to fill the vector with data + /// (e.g. by reading from a file) before marking the data as initialized using + /// the [`set_len`] method. + /// + /// [`set_len`]: Vec::set_len + /// + /// Note that this is a low-level API, which should be used with care for + /// optimization purposes. If you need to append data to a `Vec` + /// you can use [`push`], [`extend`], [`extend_from_slice`], + /// [`extend_from_within`], [`insert`], [`append`], [`resize`] or + /// [`resize_with`], depending on your exact needs. + /// + /// [`push`]: Vec::push + /// [`extend`]: Vec::extend + /// [`extend_from_slice`]: Vec::extend_from_slice + /// [`extend_from_within`]: Vec::extend_from_within + /// [`insert`]: Vec::insert + /// [`append`]: Vec::append + /// [`resize`]: Vec::resize + /// [`resize_with`]: Vec::resize_with + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_split_at_spare, maybe_uninit_extra)] + /// + /// let mut v = vec![1, 1, 2]; + /// + /// // Reserve additional space big enough for 10 elements. + /// v.reserve(10); + /// + /// let (init, uninit) = v.split_at_spare_mut(); + /// let sum = init.iter().copied().sum::(); + /// + /// // Fill in the next 4 elements. + /// uninit[0].write(sum); + /// uninit[1].write(sum * 2); + /// uninit[2].write(sum * 3); + /// uninit[3].write(sum * 4); + /// + /// // Mark the 4 elements of the vector as being initialized. + /// unsafe { + /// let len = v.len(); + /// v.set_len(len + 4); + /// } + /// + /// assert_eq!(&v, &[1, 1, 2, 4, 8, 12, 16]); + /// ``` + #[unstable(feature = "vec_split_at_spare", issue = "81944")] + #[inline] + pub fn split_at_spare_mut(&mut self) -> (&mut [T], &mut [MaybeUninit]) { + // SAFETY: + // - len is ignored and so never changed + let (init, spare, _) = unsafe { self.split_at_spare_mut_with_len() }; + (init, spare) + } + + /// Safety: changing returned .2 (&mut usize) is considered the same as calling `.set_len(_)`. + /// + /// This method provides unique access to all vec parts at once in `extend_from_within`. + unsafe fn split_at_spare_mut_with_len( + &mut self, + ) -> (&mut [T], &mut [MaybeUninit], &mut usize) { + let Range { start: ptr, end: spare_ptr } = self.as_mut_ptr_range(); + let spare_ptr = spare_ptr.cast::>(); + let spare_len = self.buf.capacity() - self.len; + + // SAFETY: + // - `ptr` is guaranteed to be valid for `len` elements + // - `spare_ptr` is pointing one element past the buffer, so it doesn't overlap with `initialized` + unsafe { + let initialized = slice::from_raw_parts_mut(ptr, self.len); + let spare = slice::from_raw_parts_mut(spare_ptr, spare_len); + + (initialized, spare, &mut self.len) + } + } +} + +impl Vec { + /// Resizes the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the + /// difference, with each additional slot filled with `value`. + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + /// + /// This method requires `T` to implement [`Clone`], + /// in order to be able to clone the passed value. + /// If you need more flexibility (or want to rely on [`Default`] instead of + /// [`Clone`]), use [`Vec::resize_with`]. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!["hello"]; + /// vec.resize(3, "world"); + /// assert_eq!(vec, ["hello", "world", "world"]); + /// + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.resize(2, 0); + /// assert_eq!(vec, [1, 2]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_resize", since = "1.5.0")] + pub fn resize(&mut self, new_len: usize, value: T) { + let len = self.len(); + + if new_len > len { + self.extend_with(new_len - len, ExtendElement(value)) + } else { + self.truncate(new_len); + } + } + + /// Tries to resize the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the + /// difference, with each additional slot filled with `value`. + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + /// + /// This method requires `T` to implement [`Clone`], + /// in order to be able to clone the passed value. + /// If you need more flexibility (or want to rely on [`Default`] instead of + /// [`Clone`]), use [`Vec::resize_with`]. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!["hello"]; + /// vec.try_resize(3, "world").unwrap(); + /// assert_eq!(vec, ["hello", "world", "world"]); + /// + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.try_resize(2, 0).unwrap(); + /// assert_eq!(vec, [1, 2]); + /// + /// let mut vec = vec![42]; + /// let result = vec.try_resize(usize::MAX, 0); + /// assert!(result.is_err()); + /// ``` + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_resize(&mut self, new_len: usize, value: T) -> Result<(), TryReserveError> { + let len = self.len(); + + if new_len > len { + self.try_extend_with(new_len - len, ExtendElement(value)) + } else { + self.truncate(new_len); + Ok(()) + } + } + + /// Clones and appends all elements in a slice to the `Vec`. + /// + /// Iterates over the slice `other`, clones each element, and then appends + /// it to this `Vec`. The `other` vector is traversed in-order. + /// + /// Note that this function is same as [`extend`] except that it is + /// specialized to work with slices instead. If and when Rust gets + /// specialization this function will likely be deprecated (but still + /// available). + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.extend_from_slice(&[2, 3, 4]); + /// assert_eq!(vec, [1, 2, 3, 4]); + /// ``` + /// + /// [`extend`]: Vec::extend + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_extend_from_slice", since = "1.6.0")] + pub fn extend_from_slice(&mut self, other: &[T]) { + self.spec_extend(other.iter()) + } + + /// Tries to clone and append all elements in a slice to the `Vec`. + /// + /// Iterates over the slice `other`, clones each element, and then appends + /// it to this `Vec`. The `other` vector is traversed in-order. + /// + /// Note that this function is same as [`extend`] except that it is + /// specialized to work with slices instead. If and when Rust gets + /// specialization this function will likely be deprecated (but still + /// available). + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.try_extend_from_slice(&[2, 3, 4]).unwrap(); + /// assert_eq!(vec, [1, 2, 3, 4]); + /// ``` + /// + /// [`extend`]: Vec::extend + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_extend_from_slice(&mut self, other: &[T]) -> Result<(), TryReserveError> { + self.try_spec_extend(other.iter()) + } + + /// Copies elements from `src` range to the end of the vector. + /// + /// ## Examples + /// + /// ``` + /// let mut vec = vec![0, 1, 2, 3, 4]; + /// + /// vec.extend_from_within(2..); + /// assert_eq!(vec, [0, 1, 2, 3, 4, 2, 3, 4]); + /// + /// vec.extend_from_within(..2); + /// assert_eq!(vec, [0, 1, 2, 3, 4, 2, 3, 4, 0, 1]); + /// + /// vec.extend_from_within(4..8); + /// assert_eq!(vec, [0, 1, 2, 3, 4, 2, 3, 4, 0, 1, 4, 2, 3, 4]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "vec_extend_from_within", since = "1.53.0")] + pub fn extend_from_within(&mut self, src: R) + where + R: RangeBounds, + { + let range = slice::range(src, ..self.len()); + self.reserve(range.len()); + + // SAFETY: + // - `slice::range` guarantees that the given range is valid for indexing self + unsafe { + self.spec_extend_from_within(range); + } + } +} + +// This code generalizes `extend_with_{element,default}`. +trait ExtendWith { + fn next(&mut self) -> T; + fn last(self) -> T; +} + +struct ExtendElement(T); +impl ExtendWith for ExtendElement { + fn next(&mut self) -> T { + self.0.clone() + } + fn last(self) -> T { + self.0 + } +} + +struct ExtendDefault; +impl ExtendWith for ExtendDefault { + fn next(&mut self) -> T { + Default::default() + } + fn last(self) -> T { + Default::default() + } +} + +struct ExtendFunc(F); +impl T> ExtendWith for ExtendFunc { + fn next(&mut self) -> T { + (self.0)() + } + fn last(mut self) -> T { + (self.0)() + } +} + +impl Vec { + #[cfg(not(no_global_oom_handling))] + /// Extend the vector by `n` values, using the given generator. + fn extend_with>(&mut self, n: usize, mut value: E) { + self.reserve(n); + + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + // Use SetLenOnDrop to work around bug where compiler + // may not realize the store through `ptr` through self.set_len() + // don't alias. + let mut local_len = SetLenOnDrop::new(&mut self.len); + + // Write all elements except the last one + for _ in 1..n { + ptr::write(ptr, value.next()); + ptr = ptr.offset(1); + // Increment the length in every step in case next() panics + local_len.increment_len(1); + } + + if n > 0 { + // We can write the last element directly without cloning needlessly + ptr::write(ptr, value.last()); + local_len.increment_len(1); + } + + // len set by scope guard + } + } + + /// Try to extend the vector by `n` values, using the given generator. + fn try_extend_with>(&mut self, n: usize, mut value: E) -> Result<(), TryReserveError> { + self.try_reserve(n)?; + + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + // Use SetLenOnDrop to work around bug where compiler + // may not realize the store through `ptr` through self.set_len() + // don't alias. + let mut local_len = SetLenOnDrop::new(&mut self.len); + + // Write all elements except the last one + for _ in 1..n { + ptr::write(ptr, value.next()); + ptr = ptr.offset(1); + // Increment the length in every step in case next() panics + local_len.increment_len(1); + } + + if n > 0 { + // We can write the last element directly without cloning needlessly + ptr::write(ptr, value.last()); + local_len.increment_len(1); + } + + // len set by scope guard + Ok(()) + } + } +} + +impl Vec { + /// Removes consecutive repeated elements in the vector according to the + /// [`PartialEq`] trait implementation. + /// + /// If the vector is sorted, this removes all duplicates. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1, 2, 2, 3, 2]; + /// + /// vec.dedup(); + /// + /// assert_eq!(vec, [1, 2, 3, 2]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn dedup(&mut self) { + self.dedup_by(|a, b| a == b) + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Internal methods and functions +//////////////////////////////////////////////////////////////////////////////// + +#[doc(hidden)] +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +pub fn from_elem(elem: T, n: usize) -> Vec { + ::from_elem(elem, n, Global) +} + +#[doc(hidden)] +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "allocator_api", issue = "32838")] +pub fn from_elem_in(elem: T, n: usize, alloc: A) -> Vec { + ::from_elem(elem, n, alloc) +} + +trait ExtendFromWithinSpec { + /// # Safety + /// + /// - `src` needs to be valid index + /// - `self.capacity() - self.len()` must be `>= src.len()` + unsafe fn spec_extend_from_within(&mut self, src: Range); +} + +impl ExtendFromWithinSpec for Vec { + default unsafe fn spec_extend_from_within(&mut self, src: Range) { + // SAFETY: + // - len is increased only after initializing elements + let (this, spare, len) = unsafe { self.split_at_spare_mut_with_len() }; + + // SAFETY: + // - caller guaratees that src is a valid index + let to_clone = unsafe { this.get_unchecked(src) }; + + iter::zip(to_clone, spare) + .map(|(src, dst)| dst.write(src.clone())) + // Note: + // - Element was just initialized with `MaybeUninit::write`, so it's ok to increase len + // - len is increased after each element to prevent leaks (see issue #82533) + .for_each(|_| *len += 1); + } +} + +impl ExtendFromWithinSpec for Vec { + unsafe fn spec_extend_from_within(&mut self, src: Range) { + let count = src.len(); + { + let (init, spare) = self.split_at_spare_mut(); + + // SAFETY: + // - caller guaratees that `src` is a valid index + let source = unsafe { init.get_unchecked(src) }; + + // SAFETY: + // - Both pointers are created from unique slice references (`&mut [_]`) + // so they are valid and do not overlap. + // - Elements are :Copy so it's OK to to copy them, without doing + // anything with the original values + // - `count` is equal to the len of `source`, so source is valid for + // `count` reads + // - `.reserve(count)` guarantees that `spare.len() >= count` so spare + // is valid for `count` writes + unsafe { ptr::copy_nonoverlapping(source.as_ptr(), spare.as_mut_ptr() as _, count) }; + } + + // SAFETY: + // - The elements were just initialized by `copy_nonoverlapping` + self.len += count; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Common trait implementations for Vec +//////////////////////////////////////////////////////////////////////////////// + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Deref for Vec { + type Target = [T]; + + fn deref(&self) -> &[T] { + unsafe { slice::from_raw_parts(self.as_ptr(), self.len) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::DerefMut for Vec { + fn deref_mut(&mut self) -> &mut [T] { + unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for Vec { + #[cfg(not(test))] + fn clone(&self) -> Self { + let alloc = self.allocator().clone(); + <[T]>::to_vec_in(&**self, alloc) + } + + // HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is + // required for this method definition, is not available. Instead use the + // `slice::to_vec` function which is only available with cfg(test) + // NB see the slice::hack module in slice.rs for more information + #[cfg(test)] + fn clone(&self) -> Self { + let alloc = self.allocator().clone(); + crate::slice::to_vec(&**self, alloc) + } + + fn clone_from(&mut self, other: &Self) { + // drop anything that will not be overwritten + self.truncate(other.len()); + + // self.len <= other.len due to the truncate above, so the + // slices here are always in-bounds. + let (init, tail) = other.split_at(self.len()); + + // reuse the contained values' allocations/resources. + self.clone_from_slice(init); + self.extend_from_slice(tail); + } +} + +/// The hash of a vector is the same as that of the corresponding slice, +/// as required by the `core::borrow::Borrow` implementation. +/// +/// ``` +/// #![feature(build_hasher_simple_hash_one)] +/// use std::hash::BuildHasher; +/// +/// let b = std::collections::hash_map::RandomState::new(); +/// let v: Vec = vec![0xa8, 0x3c, 0x09]; +/// let s: &[u8] = &[0xa8, 0x3c, 0x09]; +/// assert_eq!(b.hash_one(v), b.hash_one(s)); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for Vec { + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash(&**self, state) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_on_unimplemented( + message = "vector indices are of type `usize` or ranges of `usize`", + label = "vector indices are of type `usize` or ranges of `usize`" +)] +impl, A: Allocator> Index for Vec { + type Output = I::Output; + + #[inline] + fn index(&self, index: I) -> &Self::Output { + Index::index(&**self, index) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_on_unimplemented( + message = "vector indices are of type `usize` or ranges of `usize`", + label = "vector indices are of type `usize` or ranges of `usize`" +)] +impl, A: Allocator> IndexMut for Vec { + #[inline] + fn index_mut(&mut self, index: I) -> &mut Self::Output { + IndexMut::index_mut(&mut **self, index) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl FromIterator for Vec { + #[inline] + fn from_iter>(iter: I) -> Vec { + >::from_iter(iter.into_iter()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl IntoIterator for Vec { + type Item = T; + type IntoIter = IntoIter; + + /// Creates a consuming iterator, that is, one that moves each value out of + /// the vector (from start to end). The vector cannot be used after calling + /// this. + /// + /// # Examples + /// + /// ``` + /// let v = vec!["a".to_string(), "b".to_string()]; + /// for s in v.into_iter() { + /// // s has type String, not &String + /// println!("{}", s); + /// } + /// ``` + #[inline] + fn into_iter(self) -> IntoIter { + unsafe { + let mut me = ManuallyDrop::new(self); + let alloc = ptr::read(me.allocator()); + let begin = me.as_mut_ptr(); + let end = if mem::size_of::() == 0 { + arith_offset(begin as *const i8, me.len() as isize) as *const T + } else { + begin.add(me.len()) as *const T + }; + let cap = me.buf.capacity(); + IntoIter { + buf: NonNull::new_unchecked(begin), + phantom: PhantomData, + cap, + alloc, + ptr: begin, + end, + } + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, A: Allocator> IntoIterator for &'a Vec { + type Item = &'a T; + type IntoIter = slice::Iter<'a, T>; + + fn into_iter(self) -> slice::Iter<'a, T> { + self.iter() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, T, A: Allocator> IntoIterator for &'a mut Vec { + type Item = &'a mut T; + type IntoIter = slice::IterMut<'a, T>; + + fn into_iter(self) -> slice::IterMut<'a, T> { + self.iter_mut() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Extend for Vec { + #[inline] + fn extend>(&mut self, iter: I) { + >::spec_extend(self, iter.into_iter()) + } + + #[inline] + fn extend_one(&mut self, item: T) { + self.push(item); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +impl Vec { + // leaf method to which various SpecFrom/SpecExtend implementations delegate when + // they have no further optimizations to apply + #[cfg(not(no_global_oom_handling))] + fn extend_desugared>(&mut self, mut iterator: I) { + // This is the case for a general iterator. + // + // This function should be the moral equivalent of: + // + // for item in iterator { + // self.push(item); + // } + while let Some(element) = iterator.next() { + let len = self.len(); + if len == self.capacity() { + let (lower, _) = iterator.size_hint(); + self.reserve(lower.saturating_add(1)); + } + unsafe { + ptr::write(self.as_mut_ptr().add(len), element); + // Since next() executes user code which can panic we have to bump the length + // after each step. + // NB can't overflow since we would have had to alloc the address space + self.set_len(len + 1); + } + } + } + + // leaf method to which various SpecFrom/SpecExtend implementations delegate when + // they have no further optimizations to apply + fn try_extend_desugared>(&mut self, mut iterator: I) -> Result<(), TryReserveError> { + // This is the case for a general iterator. + // + // This function should be the moral equivalent of: + // + // for item in iterator { + // self.push(item); + // } + while let Some(element) = iterator.next() { + let len = self.len(); + if len == self.capacity() { + let (lower, _) = iterator.size_hint(); + self.try_reserve(lower.saturating_add(1))?; + } + unsafe { + ptr::write(self.as_mut_ptr().add(len), element); + // NB can't overflow since we would have had to alloc the address space + self.set_len(len + 1); + } + } + + Ok(()) + } + + /// Creates a splicing iterator that replaces the specified range in the vector + /// with the given `replace_with` iterator and yields the removed items. + /// `replace_with` does not need to be the same length as `range`. + /// + /// `range` is removed even if the iterator is not consumed until the end. + /// + /// It is unspecified how many elements are removed from the vector + /// if the `Splice` value is leaked. + /// + /// The input iterator `replace_with` is only consumed when the `Splice` value is dropped. + /// + /// This is optimal if: + /// + /// * The tail (elements in the vector after `range`) is empty, + /// * or `replace_with` yields fewer or equal elements than `range`’s length + /// * or the lower bound of its `size_hint()` is exact. + /// + /// Otherwise, a temporary vector is allocated and the tail is moved twice. + /// + /// # Panics + /// + /// Panics if the starting point is greater than the end point or if + /// the end point is greater than the length of the vector. + /// + /// # Examples + /// + /// ``` + /// let mut v = vec![1, 2, 3]; + /// let new = [7, 8]; + /// let u: Vec<_> = v.splice(..2, new).collect(); + /// assert_eq!(v, &[7, 8, 3]); + /// assert_eq!(u, &[1, 2]); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "vec_splice", since = "1.21.0")] + pub fn splice(&mut self, range: R, replace_with: I) -> Splice<'_, I::IntoIter, A> + where + R: RangeBounds, + I: IntoIterator, + { + Splice { drain: self.drain(range), replace_with: replace_with.into_iter() } + } + + /// Creates an iterator which uses a closure to determine if an element should be removed. + /// + /// If the closure returns true, then the element is removed and yielded. + /// If the closure returns false, the element will remain in the vector and will not be yielded + /// by the iterator. + /// + /// Using this method is equivalent to the following code: + /// + /// ``` + /// # let some_predicate = |x: &mut i32| { *x == 2 || *x == 3 || *x == 6 }; + /// # let mut vec = vec![1, 2, 3, 4, 5, 6]; + /// let mut i = 0; + /// while i < vec.len() { + /// if some_predicate(&mut vec[i]) { + /// let val = vec.remove(i); + /// // your code here + /// } else { + /// i += 1; + /// } + /// } + /// + /// # assert_eq!(vec, vec![1, 4, 5]); + /// ``` + /// + /// But `drain_filter` is easier to use. `drain_filter` is also more efficient, + /// because it can backshift the elements of the array in bulk. + /// + /// Note that `drain_filter` also lets you mutate every element in the filter closure, + /// regardless of whether you choose to keep or remove it. + /// + /// # Examples + /// + /// Splitting an array into evens and odds, reusing the original allocation: + /// + /// ``` + /// #![feature(drain_filter)] + /// let mut numbers = vec![1, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15]; + /// + /// let evens = numbers.drain_filter(|x| *x % 2 == 0).collect::>(); + /// let odds = numbers; + /// + /// assert_eq!(evens, vec![2, 4, 6, 8, 14]); + /// assert_eq!(odds, vec![1, 3, 5, 9, 11, 13, 15]); + /// ``` + #[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] + pub fn drain_filter(&mut self, filter: F) -> DrainFilter<'_, T, F, A> + where + F: FnMut(&mut T) -> bool, + { + let old_len = self.len(); + + // Guard against us getting leaked (leak amplification) + unsafe { + self.set_len(0); + } + + DrainFilter { vec: self, idx: 0, del: 0, old_len, pred: filter, panic_flag: false } + } +} + +/// Extend implementation that copies elements out of references before pushing them onto the Vec. +/// +/// This implementation is specialized for slice iterators, where it uses [`copy_from_slice`] to +/// append the entire slice at once. +/// +/// [`copy_from_slice`]: slice::copy_from_slice +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_ref", since = "1.2.0")] +impl<'a, T: Copy + 'a, A: Allocator + 'a> Extend<&'a T> for Vec { + fn extend>(&mut self, iter: I) { + self.spec_extend(iter.into_iter()) + } + + #[inline] + fn extend_one(&mut self, &item: &'a T) { + self.push(item); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +/// Implements comparison of vectors, [lexicographically](core::cmp::Ord#lexicographical-comparison). +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for Vec { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + PartialOrd::partial_cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for Vec {} + +/// Implements ordering of vectors, [lexicographically](core::cmp::Ord#lexicographical-comparison). +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for Vec { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + Ord::cmp(&**self, &**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +unsafe impl<#[may_dangle] T, A: Allocator> Drop for Vec { + fn drop(&mut self) { + unsafe { + // use drop for [T] + // use a raw slice to refer to the elements of the vector as weakest necessary type; + // could avoid questions of validity in certain cases + ptr::drop_in_place(ptr::slice_from_raw_parts_mut(self.as_mut_ptr(), self.len)) + } + // RawVec handles deallocation + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Default for Vec { + /// Creates an empty `Vec`. + fn default() -> Vec { + Vec::new() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for Vec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef> for Vec { + fn as_ref(&self) -> &Vec { + self + } +} + +#[stable(feature = "vec_as_mut", since = "1.5.0")] +impl AsMut> for Vec { + fn as_mut(&mut self) -> &mut Vec { + self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef<[T]> for Vec { + fn as_ref(&self) -> &[T] { + self + } +} + +#[stable(feature = "vec_as_mut", since = "1.5.0")] +impl AsMut<[T]> for Vec { + fn as_mut(&mut self) -> &mut [T] { + self + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl From<&[T]> for Vec { + /// Allocate a `Vec` and fill it by cloning `s`'s items. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from(&[1, 2, 3][..]), vec![1, 2, 3]); + /// ``` + #[cfg(not(test))] + fn from(s: &[T]) -> Vec { + s.to_vec() + } + #[cfg(test)] + fn from(s: &[T]) -> Vec { + crate::slice::to_vec(s, Global) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "vec_from_mut", since = "1.19.0")] +impl From<&mut [T]> for Vec { + /// Allocate a `Vec` and fill it by cloning `s`'s items. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from(&mut [1, 2, 3][..]), vec![1, 2, 3]); + /// ``` + #[cfg(not(test))] + fn from(s: &mut [T]) -> Vec { + s.to_vec() + } + #[cfg(test)] + fn from(s: &mut [T]) -> Vec { + crate::slice::to_vec(s, Global) + } +} + +#[stable(feature = "vec_from_array", since = "1.44.0")] +impl From<[T; N]> for Vec { + #[cfg(not(test))] + fn from(s: [T; N]) -> Vec { + <[T]>::into_vec(box s) + } + /// Allocate a `Vec` and move `s`'s items into it. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from([1, 2, 3]), vec![1, 2, 3]); + /// ``` + #[cfg(test)] + fn from(s: [T; N]) -> Vec { + crate::slice::into_vec(box s) + } +} + +#[stable(feature = "vec_from_cow_slice", since = "1.14.0")] +impl<'a, T> From> for Vec +where + [T]: ToOwned>, +{ + /// Convert a clone-on-write slice into a vector. + /// + /// If `s` already owns a `Vec`, it will be returned directly. + /// If `s` is borrowing a slice, a new `Vec` will be allocated and + /// filled by cloning `s`'s items into it. + /// + /// # Examples + /// + /// ``` + /// # use std::borrow::Cow; + /// let o: Cow<[i32]> = Cow::Owned(vec![1, 2, 3]); + /// let b: Cow<[i32]> = Cow::Borrowed(&[1, 2, 3]); + /// assert_eq!(Vec::from(o), Vec::from(b)); + /// ``` + fn from(s: Cow<'a, [T]>) -> Vec { + s.into_owned() + } +} + +// note: test pulls in libstd, which causes errors here +#[cfg(not(test))] +#[stable(feature = "vec_from_box", since = "1.18.0")] +impl From> for Vec { + /// Convert a boxed slice into a vector by transferring ownership of + /// the existing heap allocation. + /// + /// # Examples + /// + /// ``` + /// let b: Box<[i32]> = vec![1, 2, 3].into_boxed_slice(); + /// assert_eq!(Vec::from(b), vec![1, 2, 3]); + /// ``` + fn from(s: Box<[T], A>) -> Self { + s.into_vec() + } +} + +// note: test pulls in libstd, which causes errors here +#[cfg(not(no_global_oom_handling))] +#[cfg(not(test))] +#[stable(feature = "box_from_vec", since = "1.20.0")] +impl From> for Box<[T], A> { + /// Convert a vector into a boxed slice. + /// + /// If `v` has excess capacity, its items will be moved into a + /// newly-allocated buffer with exactly the right capacity. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Box::from(vec![1, 2, 3]), vec![1, 2, 3].into_boxed_slice()); + /// ``` + fn from(v: Vec) -> Self { + v.into_boxed_slice() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl From<&str> for Vec { + /// Allocate a `Vec` and fill it with a UTF-8 string. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(Vec::from("123"), vec![b'1', b'2', b'3']); + /// ``` + fn from(s: &str) -> Vec { + From::from(s.as_bytes()) + } +} + +#[stable(feature = "array_try_from_vec", since = "1.48.0")] +impl TryFrom> for [T; N] { + type Error = Vec; + + /// Gets the entire contents of the `Vec` as an array, + /// if its size exactly matches that of the requested array. + /// + /// # Examples + /// + /// ``` + /// use std::convert::TryInto; + /// assert_eq!(vec![1, 2, 3].try_into(), Ok([1, 2, 3])); + /// assert_eq!(>::new().try_into(), Ok([])); + /// ``` + /// + /// If the length doesn't match, the input comes back in `Err`: + /// ``` + /// use std::convert::TryInto; + /// let r: Result<[i32; 4], _> = (0..10).collect::>().try_into(); + /// assert_eq!(r, Err(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9])); + /// ``` + /// + /// If you're fine with just getting a prefix of the `Vec`, + /// you can call [`.truncate(N)`](Vec::truncate) first. + /// ``` + /// use std::convert::TryInto; + /// let mut v = String::from("hello world").into_bytes(); + /// v.sort(); + /// v.truncate(2); + /// let [a, b]: [_; 2] = v.try_into().unwrap(); + /// assert_eq!(a, b' '); + /// assert_eq!(b, b'd'); + /// ``` + fn try_from(mut vec: Vec) -> Result<[T; N], Vec> { + if vec.len() != N { + return Err(vec); + } + + // SAFETY: `.set_len(0)` is always sound. + unsafe { vec.set_len(0) }; + + // SAFETY: A `Vec`'s pointer is always aligned properly, and + // the alignment the array needs is the same as the items. + // We checked earlier that we have sufficient items. + // The items will not double-drop as the `set_len` + // tells the `Vec` not to also drop them. + let array = unsafe { ptr::read(vec.as_ptr() as *const [T; N]) }; + Ok(array) + } +} diff --git a/rust/alloc/vec/partial_eq.rs b/rust/alloc/vec/partial_eq.rs new file mode 100644 index 0000000000000..273e99bed4888 --- /dev/null +++ b/rust/alloc/vec/partial_eq.rs @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::Allocator; +#[cfg(not(no_global_oom_handling))] +use crate::borrow::Cow; + +use super::Vec; + +macro_rules! __impl_slice_eq1 { + ([$($vars:tt)*] $lhs:ty, $rhs:ty $(where $ty:ty: $bound:ident)?, #[$stability:meta]) => { + #[$stability] + impl PartialEq<$rhs> for $lhs + where + T: PartialEq, + $($ty: $bound)? + { + #[inline] + fn eq(&self, other: &$rhs) -> bool { self[..] == other[..] } + #[inline] + fn ne(&self, other: &$rhs) -> bool { self[..] != other[..] } + } + } +} + +__impl_slice_eq1! { [A: Allocator] Vec, Vec, #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator] Vec, &[U], #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator] Vec, &mut [U], #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator] &[T], Vec, #[stable(feature = "partialeq_vec_for_ref_slice", since = "1.46.0")] } +__impl_slice_eq1! { [A: Allocator] &mut [T], Vec, #[stable(feature = "partialeq_vec_for_ref_slice", since = "1.46.0")] } +__impl_slice_eq1! { [A: Allocator] Vec, [U], #[stable(feature = "partialeq_vec_for_slice", since = "1.48.0")] } +__impl_slice_eq1! { [A: Allocator] [T], Vec, #[stable(feature = "partialeq_vec_for_slice", since = "1.48.0")] } +#[cfg(not(no_global_oom_handling))] +__impl_slice_eq1! { [A: Allocator] Cow<'_, [T]>, Vec where T: Clone, #[stable(feature = "rust1", since = "1.0.0")] } +#[cfg(not(no_global_oom_handling))] +__impl_slice_eq1! { [] Cow<'_, [T]>, &[U] where T: Clone, #[stable(feature = "rust1", since = "1.0.0")] } +#[cfg(not(no_global_oom_handling))] +__impl_slice_eq1! { [] Cow<'_, [T]>, &mut [U] where T: Clone, #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator, const N: usize] Vec, [U; N], #[stable(feature = "rust1", since = "1.0.0")] } +__impl_slice_eq1! { [A: Allocator, const N: usize] Vec, &[U; N], #[stable(feature = "rust1", since = "1.0.0")] } + +// NOTE: some less important impls are omitted to reduce code bloat +// FIXME(Centril): Reconsider this? +//__impl_slice_eq1! { [const N: usize] Vec, &mut [B; N], } +//__impl_slice_eq1! { [const N: usize] [A; N], Vec, } +//__impl_slice_eq1! { [const N: usize] &[A; N], Vec, } +//__impl_slice_eq1! { [const N: usize] &mut [A; N], Vec, } +//__impl_slice_eq1! { [const N: usize] Cow<'a, [A]>, [B; N], } +//__impl_slice_eq1! { [const N: usize] Cow<'a, [A]>, &[B; N], } +//__impl_slice_eq1! { [const N: usize] Cow<'a, [A]>, &mut [B; N], } diff --git a/rust/alloc/vec/set_len_on_drop.rs b/rust/alloc/vec/set_len_on_drop.rs new file mode 100644 index 0000000000000..448bf5076a0bf --- /dev/null +++ b/rust/alloc/vec/set_len_on_drop.rs @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Set the length of the vec when the `SetLenOnDrop` value goes out of scope. +// +// The idea is: The length field in SetLenOnDrop is a local variable +// that the optimizer will see does not alias with any stores through the Vec's data +// pointer. This is a workaround for alias analysis issue #32155 +pub(super) struct SetLenOnDrop<'a> { + len: &'a mut usize, + local_len: usize, +} + +impl<'a> SetLenOnDrop<'a> { + #[inline] + pub(super) fn new(len: &'a mut usize) -> Self { + SetLenOnDrop { local_len: *len, len } + } + + #[inline] + pub(super) fn increment_len(&mut self, increment: usize) { + self.local_len += increment; + } +} + +impl Drop for SetLenOnDrop<'_> { + #[inline] + fn drop(&mut self) { + *self.len = self.local_len; + } +} diff --git a/rust/alloc/vec/spec_extend.rs b/rust/alloc/vec/spec_extend.rs new file mode 100644 index 0000000000000..d56c2225f8154 --- /dev/null +++ b/rust/alloc/vec/spec_extend.rs @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::Allocator; +use crate::vec::TryReserveError; +use core::iter::TrustedLen; +use core::ptr::{self}; +use core::slice::{self}; + +use super::{IntoIter, SetLenOnDrop, Vec}; + +// Specialization trait used for Vec::extend +#[cfg(not(no_global_oom_handling))] +pub(super) trait SpecExtend { + fn spec_extend(&mut self, iter: I); +} + +// Specialization trait used for Vec::try_extend +pub(super) trait TrySpecExtend { + fn try_spec_extend(&mut self, iter: I) -> Result<(), TryReserveError>; +} + +#[cfg(not(no_global_oom_handling))] +impl SpecExtend for Vec +where + I: Iterator, +{ + default fn spec_extend(&mut self, iter: I) { + self.extend_desugared(iter) + } +} + +impl TrySpecExtend for Vec +where + I: Iterator, +{ + default fn try_spec_extend(&mut self, iter: I) -> Result<(), TryReserveError> { + self.try_extend_desugared(iter) + } +} + +#[cfg(not(no_global_oom_handling))] +impl SpecExtend for Vec +where + I: TrustedLen, +{ + default fn spec_extend(&mut self, iterator: I) { + // This is the case for a TrustedLen iterator. + let (low, high) = iterator.size_hint(); + if let Some(additional) = high { + debug_assert_eq!( + low, + additional, + "TrustedLen iterator's size hint is not exact: {:?}", + (low, high) + ); + self.reserve(additional); + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + let mut local_len = SetLenOnDrop::new(&mut self.len); + iterator.for_each(move |element| { + ptr::write(ptr, element); + ptr = ptr.offset(1); + // Since the loop executes user code which can panic we have to bump the pointer + // after each step. + // NB can't overflow since we would have had to alloc the address space + local_len.increment_len(1); + }); + } + } else { + // Per TrustedLen contract a `None` upper bound means that the iterator length + // truly exceeds usize::MAX, which would eventually lead to a capacity overflow anyway. + // Since the other branch already panics eagerly (via `reserve()`) we do the same here. + // This avoids additional codegen for a fallback code path which would eventually + // panic anyway. + panic!("capacity overflow"); + } + } +} + +impl TrySpecExtend for Vec +where + I: TrustedLen, +{ + default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> { + // This is the case for a TrustedLen iterator. + let (low, high) = iterator.size_hint(); + if let Some(additional) = high { + debug_assert_eq!( + low, + additional, + "TrustedLen iterator's size hint is not exact: {:?}", + (low, high) + ); + self.try_reserve(additional)?; + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + let mut local_len = SetLenOnDrop::new(&mut self.len); + iterator.for_each(move |element| { + ptr::write(ptr, element); + ptr = ptr.offset(1); + // NB can't overflow since we would have had to alloc the address space + local_len.increment_len(1); + }); + } + Ok(()) + } else { + Err(TryReserveError::CapacityOverflow) + } + } +} + +#[cfg(not(no_global_oom_handling))] +impl SpecExtend> for Vec { + fn spec_extend(&mut self, mut iterator: IntoIter) { + unsafe { + self.append_elements(iterator.as_slice() as _); + } + iterator.ptr = iterator.end; + } +} + +impl TrySpecExtend> for Vec { + fn try_spec_extend(&mut self, mut iterator: IntoIter) -> Result<(), TryReserveError> { + unsafe { + self.try_append_elements(iterator.as_slice() as _)?; + } + iterator.ptr = iterator.end; + Ok(()) + } +} + +#[cfg(not(no_global_oom_handling))] +impl<'a, T: 'a, I, A: Allocator + 'a> SpecExtend<&'a T, I> for Vec +where + I: Iterator, + T: Clone, +{ + default fn spec_extend(&mut self, iterator: I) { + self.spec_extend(iterator.cloned()) + } +} + +impl<'a, T: 'a, I, A: Allocator + 'a> TrySpecExtend<&'a T, I> for Vec +where + I: Iterator, + T: Clone, +{ + default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> { + self.try_spec_extend(iterator.cloned()) + } +} + +#[cfg(not(no_global_oom_handling))] +impl<'a, T: 'a, A: Allocator + 'a> SpecExtend<&'a T, slice::Iter<'a, T>> for Vec +where + T: Copy, +{ + fn spec_extend(&mut self, iterator: slice::Iter<'a, T>) { + let slice = iterator.as_slice(); + unsafe { self.append_elements(slice) }; + } +} + +impl<'a, T: 'a, A: Allocator + 'a> TrySpecExtend<&'a T, slice::Iter<'a, T>> for Vec +where + T: Copy, +{ + fn try_spec_extend(&mut self, iterator: slice::Iter<'a, T>) -> Result<(), TryReserveError> { + let slice = iterator.as_slice(); + unsafe { self.try_append_elements(slice) } + } +} From 2800b55bc445883a44f297192ba899327aa95a67 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:30:16 +0200 Subject: [PATCH 0238/1202] rust: add `build_error` crate The `build_error` crate provides the `build_error` function which is then used to provide the `build_error!` and the `build_assert!` macros. `build_assert!` is intended to be used when `static_assert!` cannot be used, e.g. when the condition refers to generic parameters or parameters of an inline function. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/build_error.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 rust/build_error.rs diff --git a/rust/build_error.rs b/rust/build_error.rs new file mode 100644 index 0000000000000..d47fa8393cbc8 --- /dev/null +++ b/rust/build_error.rs @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Build-time error. +//! +//! This crate provides a function `build_error`, which will panic in +//! compile-time if executed in const context, and will cause a build error +//! if not executed at compile time and the optimizer does not optimise away the +//! call. +//! +//! It is used by `build_assert!` in the kernel crate, allowing checking of +//! conditions that could be checked statically, but could not be enforced in +//! Rust yet (e.g. perform some checks in const functions, but those +//! functions could still be called in the runtime). + +#![no_std] +#![feature(const_panic, core_panic)] + +/// Panics if executed in const context, or triggers a build error if not. +#[inline(never)] +#[cold] +#[no_mangle] +#[track_caller] +pub const fn build_error(msg: &'static str) -> ! { + // Could also be `panic!(msg)` to avoid using unstable feature `core_panic`, + // but it is not allowed in Rust 2021, while `panic!("{}", msg)` could not + // yet be used in const context. + core::panicking::panic(msg); +} + +#[cfg(CONFIG_RUST_BUILD_ASSERT_WARN)] +#[link_section = ".gnu.warning.build_error"] +#[used] +static BUILD_ERROR_WARNING: [u8; 45] = *b"call to build_error present after compilation"; From e704163f9bf8aee3cc534bb6d47606e55d4e72b2 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 16:56:17 +0200 Subject: [PATCH 0239/1202] rust: add `macros` crate This crate contains all the procedural macros ("proc macros") shared by all the kernel. Procedural macros allow to create syntax extensions. They run at compile-time and can consume as well as produce Rust syntax. For instance, the `module!` macro that is used by Rust modules is implemented here. It allows to easily declare the equivalent information to the `MODULE_*` macros in C modules, e.g.: module! { type: RustMinimal, name: b"rust_minimal", author: b"Rust for Linux Contributors", description: b"Rust minimal sample", license: b"GPL v2", } Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/macros/helpers.rs | 79 +++++ rust/macros/lib.rs | 128 ++++++++ rust/macros/module.rs | 678 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 885 insertions(+) create mode 100644 rust/macros/helpers.rs create mode 100644 rust/macros/lib.rs create mode 100644 rust/macros/module.rs diff --git a/rust/macros/helpers.rs b/rust/macros/helpers.rs new file mode 100644 index 0000000000000..5431f65fe16ff --- /dev/null +++ b/rust/macros/helpers.rs @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 + +use proc_macro::{token_stream, Group, TokenTree}; + +pub fn try_ident(it: &mut token_stream::IntoIter) -> Option { + if let Some(TokenTree::Ident(ident)) = it.next() { + Some(ident.to_string()) + } else { + None + } +} + +pub fn try_literal(it: &mut token_stream::IntoIter) -> Option { + if let Some(TokenTree::Literal(literal)) = it.next() { + Some(literal.to_string()) + } else { + None + } +} + +pub fn try_byte_string(it: &mut token_stream::IntoIter) -> Option { + try_literal(it).and_then(|byte_string| { + if byte_string.starts_with("b\"") && byte_string.ends_with('\"') { + Some(byte_string[2..byte_string.len() - 1].to_string()) + } else { + None + } + }) +} + +pub fn expect_ident(it: &mut token_stream::IntoIter) -> String { + try_ident(it).expect("Expected Ident") +} + +pub fn expect_punct(it: &mut token_stream::IntoIter) -> char { + if let TokenTree::Punct(punct) = it.next().expect("Reached end of token stream for Punct") { + punct.as_char() + } else { + panic!("Expected Punct"); + } +} + +pub fn expect_literal(it: &mut token_stream::IntoIter) -> String { + try_literal(it).expect("Expected Literal") +} + +pub fn expect_group(it: &mut token_stream::IntoIter) -> Group { + if let TokenTree::Group(group) = it.next().expect("Reached end of token stream for Group") { + group + } else { + panic!("Expected Group"); + } +} + +pub fn expect_byte_string(it: &mut token_stream::IntoIter) -> String { + try_byte_string(it).expect("Expected byte string") +} + +pub fn expect_end(it: &mut token_stream::IntoIter) { + if it.next().is_some() { + panic!("Expected end"); + } +} + +pub fn get_literal(it: &mut token_stream::IntoIter, expected_name: &str) -> String { + assert_eq!(expect_ident(it), expected_name); + assert_eq!(expect_punct(it), ':'); + let literal = expect_literal(it); + assert_eq!(expect_punct(it), ','); + literal +} + +pub fn get_byte_string(it: &mut token_stream::IntoIter, expected_name: &str) -> String { + assert_eq!(expect_ident(it), expected_name); + assert_eq!(expect_punct(it), ':'); + let byte_string = expect_byte_string(it); + assert_eq!(expect_punct(it), ','); + byte_string +} diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs new file mode 100644 index 0000000000000..f8b818a83ac4c --- /dev/null +++ b/rust/macros/lib.rs @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Crate for all kernel procedural macros. + +mod helpers; +mod module; + +use proc_macro::TokenStream; + +/// Declares a kernel module. +/// +/// The `type` argument should be a type which implements the [`KernelModule`] +/// trait. Also accepts various forms of kernel metadata. +/// +/// C header: [`include/linux/moduleparam.h`](../../../include/linux/moduleparam.h) +/// +/// [`KernelModule`]: ../kernel/trait.KernelModule.html +/// +/// # Examples +/// +/// ```ignore +/// use kernel::prelude::*; +/// +/// module!{ +/// type: MyKernelModule, +/// name: b"my_kernel_module", +/// author: b"Rust for Linux Contributors", +/// description: b"My very own kernel module!", +/// license: b"GPL v2", +/// params: { +/// my_i32: i32 { +/// default: 42, +/// permissions: 0o000, +/// description: b"Example of i32", +/// }, +/// writeable_i32: i32 { +/// default: 42, +/// permissions: 0o644, +/// description: b"Example of i32", +/// }, +/// }, +/// } +/// +/// struct MyKernelModule; +/// +/// impl KernelModule for MyKernelModule { +/// fn init() -> Result { +/// // If the parameter is writeable, then the kparam lock must be +/// // taken to read the parameter: +/// { +/// let lock = THIS_MODULE.kernel_param_lock(); +/// pr_info!("i32 param is: {}\n", writeable_i32.read(&lock)); +/// } +/// // If the parameter is read only, it can be read without locking +/// // the kernel parameters: +/// pr_info!("i32 param is: {}\n", my_i32.read()); +/// Ok(MyKernelModule) +/// } +/// } +/// ``` +/// +/// # Supported argument types +/// - `type`: type which implements the [`KernelModule`] trait (required). +/// - `name`: byte array of the name of the kernel module (required). +/// - `author`: byte array of the author of the kernel module. +/// - `description`: byte array of the description of the kernel module. +/// - `license`: byte array of the license of the kernel module (required). +/// - `alias`: byte array of alias name of the kernel module. +/// - `alias_rtnl_link`: byte array of the `rtnl_link_alias` of the kernel module (mutually exclusive with `alias`). +/// - `params`: parameters for the kernel module, as described below. +/// +/// # Supported parameter types +/// +/// - `bool`: Corresponds to C `bool` param type. +/// - `i8`: No equivalent C param type. +/// - `u8`: Corresponds to C `char` param type. +/// - `i16`: Corresponds to C `short` param type. +/// - `u16`: Corresponds to C `ushort` param type. +/// - `i32`: Corresponds to C `int` param type. +/// - `u32`: Corresponds to C `uint` param type. +/// - `i64`: No equivalent C param type. +/// - `u64`: Corresponds to C `ullong` param type. +/// - `isize`: No equivalent C param type. +/// - `usize`: No equivalent C param type. +/// - `str`: Corresponds to C `charp` param type. Reading returns a byte slice. +/// - `ArrayParam`: Corresponds to C parameters created using `module_param_array`. An array +/// of `T`'s of length at **most** `N`. +/// +/// `invbool` is unsupported: it was only ever used in a few modules. +/// Consider using a `bool` and inverting the logic instead. +#[proc_macro] +pub fn module(ts: TokenStream) -> TokenStream { + module::module(ts) +} + +/// Declares a kernel module that exposes a single misc device. +/// +/// The `type` argument should be a type which implements the [`FileOpener`] trait. Also accepts +/// various forms of kernel metadata. +/// +/// C header: [`include/linux/moduleparam.h`](../../../include/linux/moduleparam.h) +/// +/// [`FileOpener`]: ../kernel/file_operations/trait.FileOpener.html +/// +/// # Examples +/// +/// ```ignore +/// use kernel::prelude::*; +/// +/// module_misc_device! { +/// type: MyFile, +/// name: b"my_miscdev_kernel_module", +/// author: b"Rust for Linux Contributors", +/// description: b"My very own misc device kernel module!", +/// license: b"GPL v2", +/// } +/// +/// #[derive(Default)] +/// struct MyFile; +/// +/// impl kernel::file_operations::FileOperations for MyFile { +/// kernel::declare_file_operations!(); +/// } +/// ``` +#[proc_macro] +pub fn module_misc_device(ts: TokenStream) -> TokenStream { + module::module_misc_device(ts) +} diff --git a/rust/macros/module.rs b/rust/macros/module.rs new file mode 100644 index 0000000000000..1f82258158da1 --- /dev/null +++ b/rust/macros/module.rs @@ -0,0 +1,678 @@ +// SPDX-License-Identifier: GPL-2.0 + +use proc_macro::{token_stream, Delimiter, Group, Literal, TokenStream, TokenTree}; + +use crate::helpers::*; + +#[derive(Clone, PartialEq)] +enum ParamType { + Ident(String), + Array { vals: String, max_length: usize }, +} + +fn expect_array_fields(it: &mut token_stream::IntoIter) -> ParamType { + assert_eq!(expect_punct(it), '<'); + let vals = expect_ident(it); + assert_eq!(expect_punct(it), ','); + let max_length_str = expect_literal(it); + let max_length = max_length_str + .parse::() + .expect("Expected usize length"); + assert_eq!(expect_punct(it), '>'); + ParamType::Array { vals, max_length } +} + +fn expect_type(it: &mut token_stream::IntoIter) -> ParamType { + if let TokenTree::Ident(ident) = it + .next() + .expect("Reached end of token stream for param type") + { + match ident.to_string().as_ref() { + "ArrayParam" => expect_array_fields(it), + _ => ParamType::Ident(ident.to_string()), + } + } else { + panic!("Expected Param Type") + } +} + +struct ModInfoBuilder<'a> { + module: &'a str, + counter: usize, + buffer: String, +} + +impl<'a> ModInfoBuilder<'a> { + fn new(module: &'a str) -> Self { + ModInfoBuilder { + module, + counter: 0, + buffer: String::new(), + } + } + + fn emit_base(&mut self, field: &str, content: &str, builtin: bool) { + use std::fmt::Write; + + let string = if builtin { + // Built-in modules prefix their modinfo strings by `module.`. + format!( + "{module}.{field}={content}\0", + module = self.module, + field = field, + content = content + ) + } else { + // Loadable modules' modinfo strings go as-is. + format!("{field}={content}\0", field = field, content = content) + }; + + write!( + &mut self.buffer, + " + {cfg} + #[doc(hidden)] + #[link_section = \".modinfo\"] + #[used] + pub static __{module}_{counter}: [u8; {length}] = *{string}; + ", + cfg = if builtin { + "#[cfg(not(MODULE))]" + } else { + "#[cfg(MODULE)]" + }, + module = self.module, + counter = self.counter, + length = string.len(), + string = Literal::byte_string(string.as_bytes()), + ) + .unwrap(); + + self.counter += 1; + } + + fn emit_only_builtin(&mut self, field: &str, content: &str) { + self.emit_base(field, content, true) + } + + fn emit_only_loadable(&mut self, field: &str, content: &str) { + self.emit_base(field, content, false) + } + + fn emit(&mut self, field: &str, content: &str) { + self.emit_only_builtin(field, content); + self.emit_only_loadable(field, content); + } + + fn emit_param(&mut self, field: &str, param: &str, content: &str) { + let content = format!("{param}:{content}", param = param, content = content); + self.emit(field, &content); + } +} + +fn permissions_are_readonly(perms: &str) -> bool { + let (radix, digits) = if let Some(n) = perms.strip_prefix("0x") { + (16, n) + } else if let Some(n) = perms.strip_prefix("0o") { + (8, n) + } else if let Some(n) = perms.strip_prefix("0b") { + (2, n) + } else { + (10, perms) + }; + match u32::from_str_radix(digits, radix) { + Ok(perms) => perms & 0o222 == 0, + Err(_) => false, + } +} + +fn param_ops_path(param_type: &str) -> &'static str { + match param_type { + "bool" => "kernel::module_param::PARAM_OPS_BOOL", + "i8" => "kernel::module_param::PARAM_OPS_I8", + "u8" => "kernel::module_param::PARAM_OPS_U8", + "i16" => "kernel::module_param::PARAM_OPS_I16", + "u16" => "kernel::module_param::PARAM_OPS_U16", + "i32" => "kernel::module_param::PARAM_OPS_I32", + "u32" => "kernel::module_param::PARAM_OPS_U32", + "i64" => "kernel::module_param::PARAM_OPS_I64", + "u64" => "kernel::module_param::PARAM_OPS_U64", + "isize" => "kernel::module_param::PARAM_OPS_ISIZE", + "usize" => "kernel::module_param::PARAM_OPS_USIZE", + "str" => "kernel::module_param::PARAM_OPS_STR", + t => panic!("Unrecognized type {}", t), + } +} + +fn try_simple_param_val( + param_type: &str, +) -> Box Option> { + match param_type { + "bool" => Box::new(|param_it| try_ident(param_it)), + "str" => Box::new(|param_it| { + try_byte_string(param_it) + .map(|s| format!("kernel::module_param::StringParam::Ref(b\"{}\")", s)) + }), + _ => Box::new(|param_it| try_literal(param_it)), + } +} + +fn get_default(param_type: &ParamType, param_it: &mut token_stream::IntoIter) -> String { + let try_param_val = match param_type { + ParamType::Ident(ref param_type) + | ParamType::Array { + vals: ref param_type, + max_length: _, + } => try_simple_param_val(param_type), + }; + assert_eq!(expect_ident(param_it), "default"); + assert_eq!(expect_punct(param_it), ':'); + let default = match param_type { + ParamType::Ident(_) => try_param_val(param_it).expect("Expected default param value"), + ParamType::Array { + vals: _, + max_length: _, + } => { + let group = expect_group(param_it); + assert_eq!(group.delimiter(), Delimiter::Bracket); + let mut default_vals = Vec::new(); + let mut it = group.stream().into_iter(); + + while let Some(default_val) = try_param_val(&mut it) { + default_vals.push(default_val); + match it.next() { + Some(TokenTree::Punct(punct)) => assert_eq!(punct.as_char(), ','), + None => break, + _ => panic!("Expected ',' or end of array default values"), + } + } + + let mut default_array = "kernel::module_param::ArrayParam::create(&[".to_string(); + default_array.push_str( + &default_vals + .iter() + .map(|val| val.to_string()) + .collect::>() + .join(","), + ); + default_array.push_str("])"); + default_array + } + }; + assert_eq!(expect_punct(param_it), ','); + default +} + +fn generated_array_ops_name(vals: &str, max_length: usize) -> String { + format!( + "__generated_array_ops_{vals}_{max_length}", + vals = vals, + max_length = max_length + ) +} + +#[derive(Debug, Default)] +struct ModuleInfo { + type_: String, + license: String, + name: String, + author: Option, + description: Option, + alias: Option, + params: Option, +} + +impl ModuleInfo { + fn parse(it: &mut token_stream::IntoIter) -> Self { + let mut info = ModuleInfo::default(); + + const EXPECTED_KEYS: &[&str] = &[ + "type", + "name", + "author", + "description", + "license", + "alias", + "alias_rtnl_link", + "params", + ]; + const REQUIRED_KEYS: &[&str] = &["type", "name", "license"]; + let mut seen_keys = Vec::new(); + + loop { + let key = match it.next() { + Some(TokenTree::Ident(ident)) => ident.to_string(), + Some(_) => panic!("Expected Ident or end"), + None => break, + }; + + if seen_keys.contains(&key) { + panic!( + "Duplicated key \"{}\". Keys can only be specified once.", + key + ); + } + + assert_eq!(expect_punct(it), ':'); + + match key.as_str() { + "type" => info.type_ = expect_ident(it), + "name" => info.name = expect_byte_string(it), + "author" => info.author = Some(expect_byte_string(it)), + "description" => info.description = Some(expect_byte_string(it)), + "license" => info.license = expect_byte_string(it), + "alias" => info.alias = Some(expect_byte_string(it)), + "alias_rtnl_link" => { + info.alias = Some(format!("rtnl-link-{}", expect_byte_string(it))) + } + "params" => info.params = Some(expect_group(it)), + _ => panic!( + "Unknown key \"{}\". Valid keys are: {:?}.", + key, EXPECTED_KEYS + ), + } + + assert_eq!(expect_punct(it), ','); + + seen_keys.push(key); + } + + expect_end(it); + + for key in REQUIRED_KEYS { + if !seen_keys.iter().any(|e| e == key) { + panic!("Missing required key \"{}\".", key); + } + } + + let mut ordered_keys: Vec<&str> = Vec::new(); + for key in EXPECTED_KEYS { + if seen_keys.iter().any(|e| e == key) { + ordered_keys.push(key); + } + } + + if seen_keys != ordered_keys { + panic!( + "Keys are not ordered as expected. Order them like: {:?}.", + ordered_keys + ); + } + + info + } +} + +pub fn module(ts: TokenStream) -> TokenStream { + let mut it = ts.into_iter(); + + let info = ModuleInfo::parse(&mut it); + + let mut modinfo = ModInfoBuilder::new(info.name.as_ref()); + if let Some(author) = info.author { + modinfo.emit("author", &author); + } + if let Some(description) = info.description { + modinfo.emit("description", &description); + } + modinfo.emit("license", &info.license); + if let Some(alias) = info.alias { + modinfo.emit("alias", &alias); + } + + // Built-in modules also export the `file` modinfo string + let file = + std::env::var("RUST_MODFILE").expect("Unable to fetch RUST_MODFILE environmental variable"); + modinfo.emit_only_builtin("file", &file); + + let mut array_types_to_generate = Vec::new(); + if let Some(params) = info.params { + assert_eq!(params.delimiter(), Delimiter::Brace); + + let mut it = params.stream().into_iter(); + + loop { + let param_name = match it.next() { + Some(TokenTree::Ident(ident)) => ident.to_string(), + Some(_) => panic!("Expected Ident or end"), + None => break, + }; + + assert_eq!(expect_punct(&mut it), ':'); + let param_type = expect_type(&mut it); + let group = expect_group(&mut it); + assert_eq!(expect_punct(&mut it), ','); + + assert_eq!(group.delimiter(), Delimiter::Brace); + + let mut param_it = group.stream().into_iter(); + let param_default = get_default(¶m_type, &mut param_it); + let param_permissions = get_literal(&mut param_it, "permissions"); + let param_description = get_byte_string(&mut param_it, "description"); + expect_end(&mut param_it); + + // TODO: more primitive types + // TODO: other kinds: unsafes, etc. + let (param_kernel_type, ops): (String, _) = match param_type { + ParamType::Ident(ref param_type) => ( + param_type.to_string(), + param_ops_path(param_type).to_string(), + ), + ParamType::Array { + ref vals, + max_length, + } => { + array_types_to_generate.push((vals.clone(), max_length)); + ( + format!("__rust_array_param_{}_{}", vals, max_length), + generated_array_ops_name(vals, max_length), + ) + } + }; + + modinfo.emit_param("parmtype", ¶m_name, ¶m_kernel_type); + modinfo.emit_param("parm", ¶m_name, ¶m_description); + let param_type_internal = match param_type { + ParamType::Ident(ref param_type) => match param_type.as_ref() { + "str" => "kernel::module_param::StringParam".to_string(), + other => other.to_string(), + }, + ParamType::Array { + ref vals, + max_length, + } => format!( + "kernel::module_param::ArrayParam<{vals}, {max_length}>", + vals = vals, + max_length = max_length + ), + }; + let read_func = if permissions_are_readonly(¶m_permissions) { + format!( + " + fn read(&self) -> &<{param_type_internal} as kernel::module_param::ModuleParam>::Value {{ + // SAFETY: Parameters do not need to be locked because they are read only or sysfs is not enabled. + unsafe {{ <{param_type_internal} as kernel::module_param::ModuleParam>::value(&__{name}_{param_name}_value) }} + }} + ", + name = info.name, + param_name = param_name, + param_type_internal = param_type_internal, + ) + } else { + format!( + " + fn read<'lck>(&self, lock: &'lck kernel::KParamGuard) -> &'lck <{param_type_internal} as kernel::module_param::ModuleParam>::Value {{ + // SAFETY: Parameters are locked by `KParamGuard`. + unsafe {{ <{param_type_internal} as kernel::module_param::ModuleParam>::value(&__{name}_{param_name}_value) }} + }} + ", + name = info.name, + param_name = param_name, + param_type_internal = param_type_internal, + ) + }; + let kparam = format!( + " + kernel::bindings::kernel_param__bindgen_ty_1 {{ + arg: unsafe {{ &__{name}_{param_name}_value }} as *const _ as *mut kernel::c_types::c_void, + }}, + ", + name = info.name, + param_name = param_name, + ); + modinfo.buffer.push_str( + &format!( + " + static mut __{name}_{param_name}_value: {param_type_internal} = {param_default}; + + struct __{name}_{param_name}; + + impl __{name}_{param_name} {{ {read_func} }} + + const {param_name}: __{name}_{param_name} = __{name}_{param_name}; + + // Note: the C macro that generates the static structs for the `__param` section + // asks for them to be `aligned(sizeof(void *))`. However, that was put in place + // in 2003 in commit 38d5b085d2 (\"[PATCH] Fix over-alignment problem on x86-64\") + // to undo GCC over-alignment of static structs of >32 bytes. It seems that is + // not the case anymore, so we simplify to a transparent representation here + // in the expectation that it is not needed anymore. + // TODO: revisit this to confirm the above comment and remove it if it happened + #[repr(transparent)] + struct __{name}_{param_name}_RacyKernelParam(kernel::bindings::kernel_param); + + unsafe impl Sync for __{name}_{param_name}_RacyKernelParam {{ + }} + + #[cfg(not(MODULE))] + const __{name}_{param_name}_name: *const kernel::c_types::c_char = b\"{name}.{param_name}\\0\" as *const _ as *const kernel::c_types::c_char; + + #[cfg(MODULE)] + const __{name}_{param_name}_name: *const kernel::c_types::c_char = b\"{param_name}\\0\" as *const _ as *const kernel::c_types::c_char; + + #[link_section = \"__param\"] + #[used] + static __{name}_{param_name}_struct: __{name}_{param_name}_RacyKernelParam = __{name}_{param_name}_RacyKernelParam(kernel::bindings::kernel_param {{ + name: __{name}_{param_name}_name, + // SAFETY: `__this_module` is constructed by the kernel at load time and will not be freed until the module is unloaded. + #[cfg(MODULE)] + mod_: unsafe {{ &kernel::bindings::__this_module as *const _ as *mut _ }}, + #[cfg(not(MODULE))] + mod_: core::ptr::null_mut(), + ops: unsafe {{ &{ops} }} as *const kernel::bindings::kernel_param_ops, + perm: {permissions}, + level: -1, + flags: 0, + __bindgen_anon_1: {kparam} + }}); + ", + name = info.name, + param_type_internal = param_type_internal, + read_func = read_func, + param_default = param_default, + param_name = param_name, + ops = ops, + permissions = param_permissions, + kparam = kparam, + ) + ); + } + } + + let mut generated_array_types = String::new(); + + for (vals, max_length) in array_types_to_generate { + let ops_name = generated_array_ops_name(&vals, max_length); + generated_array_types.push_str(&format!( + " + kernel::make_param_ops!( + {ops_name}, + kernel::module_param::ArrayParam<{vals}, {{ {max_length} }}> + ); + ", + ops_name = ops_name, + vals = vals, + max_length = max_length, + )); + } + + format!( + " + /// The module name. + /// + /// Used by the printing macros, e.g. [`info!`]. + const __LOG_PREFIX: &[u8] = b\"{name}\\0\"; + + static mut __MOD: Option<{type_}> = None; + + // SAFETY: `__this_module` is constructed by the kernel at load time and will not be freed until the module is unloaded. + #[cfg(MODULE)] + static THIS_MODULE: kernel::ThisModule = unsafe {{ kernel::ThisModule::from_ptr(&kernel::bindings::__this_module as *const _ as *mut _) }}; + #[cfg(not(MODULE))] + static THIS_MODULE: kernel::ThisModule = unsafe {{ kernel::ThisModule::from_ptr(core::ptr::null_mut()) }}; + + // Loadable modules need to export the `{{init,cleanup}}_module` identifiers + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn init_module() -> kernel::c_types::c_int {{ + __init() + }} + + #[cfg(MODULE)] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn cleanup_module() {{ + __exit() + }} + + // Built-in modules are initialized through an initcall pointer + // and the identifiers need to be unique + #[cfg(not(MODULE))] + #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))] + #[doc(hidden)] + #[link_section = \"{initcall_section}\"] + #[used] + pub static __{name}_initcall: extern \"C\" fn() -> kernel::c_types::c_int = __{name}_init; + + #[cfg(not(MODULE))] + #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)] + global_asm!( + r#\".section \"{initcall_section}\", \"a\" + __{name}_initcall: + .long __{name}_init - . + .previous + \"# + ); + + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_init() -> kernel::c_types::c_int {{ + __init() + }} + + #[cfg(not(MODULE))] + #[doc(hidden)] + #[no_mangle] + pub extern \"C\" fn __{name}_exit() {{ + __exit() + }} + + fn __init() -> kernel::c_types::c_int {{ + match <{type_} as kernel::KernelModule>::init() {{ + Ok(m) => {{ + unsafe {{ + __MOD = Some(m); + }} + return 0; + }} + Err(e) => {{ + return e.to_kernel_errno(); + }} + }} + }} + + fn __exit() {{ + unsafe {{ + // Invokes `drop()` on `__MOD`, which should be used for cleanup. + __MOD = None; + }} + }} + + {modinfo} + + {generated_array_types} + ", + type_ = info.type_, + name = info.name, + modinfo = modinfo.buffer, + generated_array_types = generated_array_types, + initcall_section = ".initcall6.init" + ).parse().expect("Error parsing formatted string into token stream.") +} + +pub fn module_misc_device(ts: TokenStream) -> TokenStream { + let mut it = ts.into_iter(); + + let info = ModuleInfo::parse(&mut it); + + let module = format!("__internal_ModuleFor{}", info.type_); + + format!( + " + #[doc(hidden)] + struct {module} {{ + _dev: core::pin::Pin>, + }} + + impl kernel::KernelModule for {module} {{ + fn init() -> kernel::Result {{ + Ok(Self {{ + _dev: kernel::miscdev::Registration::new_pinned::<{type_}>( + kernel::c_str!(\"{name}\"), + None, + (), + )?, + }}) + }} + }} + + kernel::prelude::module! {{ + type: {module}, + name: b\"{name}\", + {author} + {description} + license: b\"{license}\", + {alias} + }} + ", + module = module, + type_ = info.type_, + name = info.name, + author = info + .author + .map(|v| format!("author: b\"{}\",", v)) + .unwrap_or_else(|| "".to_string()), + description = info + .description + .map(|v| format!("description: b\"{}\",", v)) + .unwrap_or_else(|| "".to_string()), + alias = info + .alias + .map(|v| format!("alias: b\"{}\",", v)) + .unwrap_or_else(|| "".to_string()), + license = info.license + ) + .parse() + .expect("Error parsing formatted string into token stream.") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_permissions_are_readonly() { + assert!(permissions_are_readonly("0b000000000")); + assert!(permissions_are_readonly("0o000")); + assert!(permissions_are_readonly("000")); + assert!(permissions_are_readonly("0x000")); + + assert!(!permissions_are_readonly("0b111111111")); + assert!(!permissions_are_readonly("0o777")); + assert!(!permissions_are_readonly("511")); + assert!(!permissions_are_readonly("0x1ff")); + + assert!(permissions_are_readonly("0o014")); + assert!(permissions_are_readonly("0o015")); + + assert!(!permissions_are_readonly("0o214")); + assert!(!permissions_are_readonly("0o024")); + assert!(!permissions_are_readonly("0o012")); + + assert!(!permissions_are_readonly("0o315")); + assert!(!permissions_are_readonly("0o065")); + assert!(!permissions_are_readonly("0o017")); + } +} From 82969eee6804dbe3240a966fc182e649162a75d1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 2 Oct 2021 17:02:23 -0700 Subject: [PATCH 0240/1202] m68k: set a default value for MEMORY_RESERVE 'make randconfig' can produce a .config file with "CONFIG_MEMORY_RESERVE=" (no value) since it has no default. When a subsequent 'make all' is done, kconfig restarts the config and prompts for a value for MEMORY_RESERVE. This breaks scripting/automation where there is no interactive user input. Add a default value for MEMORY_RESERVE. (Any integer value will work here for kconfig.) Fixes a kconfig warning: .config:214:warning: symbol value '' invalid for MEMORY_RESERVE * Restart config... Memory reservation (MiB) (MEMORY_RESERVE) [] (NEW) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") # from beginning of git history Signed-off-by: Randy Dunlap Reviewed-by: Geert Uytterhoeven Cc: Greg Ungerer Cc: linux-m68k@lists.linux-m68k.org Signed-off-by: Greg Ungerer --- arch/m68k/Kconfig.machine | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index 36fa0c3ef1296..eeab4f3e6c197 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -203,6 +203,7 @@ config INIT_LCD config MEMORY_RESERVE int "Memory reservation (MiB)" depends on (UCSIMM || UCDIMM) + default 0 help Reserve certain memory regions on 68x328 based boards. From b7c534f5015b9af21574b89aa108732bafecbd74 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 4 Oct 2021 09:02:31 +0200 Subject: [PATCH 0241/1202] m68knommu: Remove MCPU32 config symbol As of commit a3595962d82495f5 ("m68knommu: remove obsolete 68360 support"), nothing selects MCPU32 anymore. Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Ungerer --- arch/m68k/Kconfig.cpu | 11 ----------- arch/m68k/include/asm/bitops.h | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 277d61a094637..0d00ef5117dce 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -53,17 +53,6 @@ config M68000 System-On-Chip devices (eg 68328, 68302, etc). It does not contain a paging MMU. -config MCPU32 - bool - select CPU_HAS_NO_BITFIELDS - select CPU_HAS_NO_CAS - select CPU_HAS_NO_UNALIGNED - select CPU_NO_EFFICIENT_FFS - help - The Freescale (was then Motorola) CPU32 is a CPU core that is - based on the 68020 processor. For the most part it is used in - System-On-Chip parts, and does not contain a paging MMU. - config M68020 bool "68020 support" depends on MMU diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index 7b414099e5fc2..7b93e1fd8ffa9 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -451,7 +451,7 @@ static inline unsigned long ffz(unsigned long word) * generic functions for those. */ #if (defined(__mcfisaaplus__) || defined(__mcfisac__)) && \ - !defined(CONFIG_M68000) && !defined(CONFIG_MCPU32) + !defined(CONFIG_M68000) static inline unsigned long __ffs(unsigned long x) { __asm__ __volatile__ ("bitrev %0; ff1 %0" From ec8de6b8cec23078e1f5ec1c59177b199ce5f692 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 15 Oct 2021 16:55:43 +0800 Subject: [PATCH 0242/1202] USB: serial: keyspan: fix memleak on probe errors I got memory leak as follows when doing fault injection test: unreferenced object 0xffff888258228440 (size 64): comm "kworker/7:2", pid 2005, jiffies 4294989509 (age 824.540s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] slab_post_alloc_hook+0x9c/0x490 [] kmem_cache_alloc_trace+0x1f7/0x470 [] keyspan_port_probe+0xa4/0x5d0 [keyspan] [] usb_serial_device_probe+0x97/0x1d0 [usbserial] [] really_probe+0x167/0x460 [] __driver_probe_device+0xf9/0x180 [] driver_probe_device+0x53/0x130 [] __device_attach_driver+0x105/0x130 [] bus_for_each_drv+0x129/0x190 [] __device_attach+0x1c9/0x270 [] device_initial_probe+0x20/0x30 [] bus_probe_device+0x142/0x160 [] device_add+0x829/0x1300 [] usb_serial_probe.cold+0xc9b/0x14ac [usbserial] [] usb_probe_interface+0x1aa/0x3c0 [usbcore] [] really_probe+0x167/0x460 If keyspan_port_probe() fails to allocate memory for an out_buffer[i] or in_buffer[i], the previously allocated memory for out_buffer or in_buffer needs to be freed on the error handling path, otherwise a memory leak will result. Fixes: bad41a5bf177 ("USB: keyspan: fix port DMA-buffer allocations") Reported-by: Hulk Robot Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20211015085543.1203011-1-wanghai38@huawei.com Cc: stable@vger.kernel.org # 3.12 Signed-off-by: Johan Hovold --- drivers/usb/serial/keyspan.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 87b89c99d5177..1cfcd805f2868 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -2890,22 +2890,22 @@ static int keyspan_port_probe(struct usb_serial_port *port) for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i) { p_priv->in_buffer[i] = kzalloc(IN_BUFLEN, GFP_KERNEL); if (!p_priv->in_buffer[i]) - goto err_in_buffer; + goto err_free_in_buffer; } for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i) { p_priv->out_buffer[i] = kzalloc(OUT_BUFLEN, GFP_KERNEL); if (!p_priv->out_buffer[i]) - goto err_out_buffer; + goto err_free_out_buffer; } p_priv->inack_buffer = kzalloc(INACK_BUFLEN, GFP_KERNEL); if (!p_priv->inack_buffer) - goto err_inack_buffer; + goto err_free_out_buffer; p_priv->outcont_buffer = kzalloc(OUTCONT_BUFLEN, GFP_KERNEL); if (!p_priv->outcont_buffer) - goto err_outcont_buffer; + goto err_free_inack_buffer; p_priv->device_details = d_details; @@ -2951,15 +2951,14 @@ static int keyspan_port_probe(struct usb_serial_port *port) return 0; -err_outcont_buffer: +err_free_inack_buffer: kfree(p_priv->inack_buffer); -err_inack_buffer: +err_free_out_buffer: for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i) kfree(p_priv->out_buffer[i]); -err_out_buffer: +err_free_in_buffer: for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i) kfree(p_priv->in_buffer[i]); -err_in_buffer: kfree(p_priv); return -ENOMEM; From 7f3e6a91ca123c2a39734ce72350f10e884b6cdf Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 29 Sep 2021 17:38:37 +0100 Subject: [PATCH 0243/1202] PCI: apple: Add initial hardware bring-up Add a minimal driver to bring up the PCIe bus on Apple system-on-chips, particularly the Apple M1. This driver exposes the internal bus used for the USB type-A ports, Ethernet, Wi-Fi, and Bluetooth. Bringing up the radios requires additional drivers beyond what's necessary for PCIe itself. Co-developed-by: Stan Skowronek Link: https://lore.kernel.org/r/20210929163847.2807812-5-maz@kernel.org Signed-off-by: Stan Skowronek Signed-off-by: Alyssa Rosenzweig Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring Reviewed-by: Sven Peter --- MAINTAINERS | 7 + drivers/pci/controller/Kconfig | 13 ++ drivers/pci/controller/Makefile | 1 + drivers/pci/controller/pcie-apple.c | 238 ++++++++++++++++++++++++++++ 4 files changed, 259 insertions(+) create mode 100644 drivers/pci/controller/pcie-apple.c diff --git a/MAINTAINERS b/MAINTAINERS index ca6d6fde85cf8..af7b775fd54fc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1280,6 +1280,13 @@ S: Maintained F: Documentation/devicetree/bindings/iommu/apple,dart.yaml F: drivers/iommu/apple-dart.c +APPLE PCIE CONTROLLER DRIVER +M: Alyssa Rosenzweig +M: Marc Zyngier +L: linux-pci@vger.kernel.org +S: Maintained +F: drivers/pci/controller/pcie-apple.c + APPLE SMC DRIVER M: Henrik Rydberg L: linux-hwmon@vger.kernel.org diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 326f7d13024f9..953d9acc031f8 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -312,6 +312,19 @@ config PCIE_HISI_ERR Say Y here if you want error handling support for the PCIe controller's errors on HiSilicon HIP SoCs +config PCIE_APPLE + tristate "Apple PCIe controller" + depends on ARCH_APPLE || COMPILE_TEST + depends on OF + depends on PCI_MSI_IRQ_DOMAIN + select PCI_HOST_COMMON + help + Say Y here if you want to enable PCIe controller support on Apple + system-on-chips, like the Apple M1. This is required for the USB + type-A ports, Ethernet, Wi-Fi, and Bluetooth. + + If unsure, say Y if you have an Apple Silicon system. + source "drivers/pci/controller/dwc/Kconfig" source "drivers/pci/controller/mobiveil/Kconfig" source "drivers/pci/controller/cadence/Kconfig" diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile index aaf30b3dcc143..f9d40bad932cf 100644 --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_VMD) += vmd.o obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o obj-$(CONFIG_PCI_LOONGSON) += pci-loongson.o obj-$(CONFIG_PCIE_HISI_ERR) += pcie-hisi-error.o +obj-$(CONFIG_PCIE_APPLE) += pcie-apple.o # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW obj-y += dwc/ obj-y += mobiveil/ diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c new file mode 100644 index 0000000000000..ed78af70dbe70 --- /dev/null +++ b/drivers/pci/controller/pcie-apple.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCIe host bridge driver for Apple system-on-chips. + * + * The HW is ECAM compliant, so once the controller is initialized, + * the driver mostly deals MSI mapping and handling of per-port + * interrupts (INTx, management and error signals). + * + * Initialization requires enabling power and clocks, along with a + * number of register pokes. + * + * Copyright (C) 2021 Alyssa Rosenzweig + * Copyright (C) 2021 Google LLC + * Copyright (C) 2021 Corellium LLC + * Copyright (C) 2021 Mark Kettenis + * + * Author: Alyssa Rosenzweig + * Author: Marc Zyngier + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define CORE_RC_PHYIF_CTL 0x00024 +#define CORE_RC_PHYIF_CTL_RUN BIT(0) +#define CORE_RC_PHYIF_STAT 0x00028 +#define CORE_RC_PHYIF_STAT_REFCLK BIT(4) +#define CORE_RC_CTL 0x00050 +#define CORE_RC_CTL_RUN BIT(0) +#define CORE_RC_STAT 0x00058 +#define CORE_RC_STAT_READY BIT(0) +#define CORE_FABRIC_STAT 0x04000 +#define CORE_FABRIC_STAT_MASK 0x001F001F +#define CORE_LANE_CFG(port) (0x84000 + 0x4000 * (port)) +#define CORE_LANE_CFG_REFCLK0REQ BIT(0) +#define CORE_LANE_CFG_REFCLK1 BIT(1) +#define CORE_LANE_CFG_REFCLK0ACK BIT(2) +#define CORE_LANE_CFG_REFCLKEN (BIT(9) | BIT(10)) +#define CORE_LANE_CTL(port) (0x84004 + 0x4000 * (port)) +#define CORE_LANE_CTL_CFGACC BIT(15) + +#define PORT_LTSSMCTL 0x00080 +#define PORT_LTSSMCTL_START BIT(0) +#define PORT_INTSTAT 0x00100 +#define PORT_INT_TUNNEL_ERR 31 +#define PORT_INT_CPL_TIMEOUT 23 +#define PORT_INT_RID2SID_MAPERR 22 +#define PORT_INT_CPL_ABORT 21 +#define PORT_INT_MSI_BAD_DATA 19 +#define PORT_INT_MSI_ERR 18 +#define PORT_INT_REQADDR_GT32 17 +#define PORT_INT_AF_TIMEOUT 15 +#define PORT_INT_LINK_DOWN 14 +#define PORT_INT_LINK_UP 12 +#define PORT_INT_LINK_BWMGMT 11 +#define PORT_INT_AER_MASK (15 << 4) +#define PORT_INT_PORT_ERR 4 +#define PORT_INT_INTx(i) i +#define PORT_INT_INTx_MASK 15 +#define PORT_INTMSK 0x00104 +#define PORT_INTMSKSET 0x00108 +#define PORT_INTMSKCLR 0x0010c +#define PORT_MSICFG 0x00124 +#define PORT_MSICFG_EN BIT(0) +#define PORT_MSICFG_L2MSINUM_SHIFT 4 +#define PORT_MSIBASE 0x00128 +#define PORT_MSIBASE_1_SHIFT 16 +#define PORT_MSIADDR 0x00168 +#define PORT_LINKSTS 0x00208 +#define PORT_LINKSTS_UP BIT(0) +#define PORT_LINKSTS_BUSY BIT(2) +#define PORT_LINKCMDSTS 0x00210 +#define PORT_OUTS_NPREQS 0x00284 +#define PORT_OUTS_NPREQS_REQ BIT(24) +#define PORT_OUTS_NPREQS_CPL BIT(16) +#define PORT_RXWR_FIFO 0x00288 +#define PORT_RXWR_FIFO_HDR GENMASK(15, 10) +#define PORT_RXWR_FIFO_DATA GENMASK(9, 0) +#define PORT_RXRD_FIFO 0x0028C +#define PORT_RXRD_FIFO_REQ GENMASK(6, 0) +#define PORT_OUTS_CPLS 0x00290 +#define PORT_OUTS_CPLS_SHRD GENMASK(14, 8) +#define PORT_OUTS_CPLS_WAIT GENMASK(6, 0) +#define PORT_APPCLK 0x00800 +#define PORT_APPCLK_EN BIT(0) +#define PORT_APPCLK_CGDIS BIT(8) +#define PORT_STATUS 0x00804 +#define PORT_STATUS_READY BIT(0) +#define PORT_REFCLK 0x00810 +#define PORT_REFCLK_EN BIT(0) +#define PORT_REFCLK_CGDIS BIT(8) +#define PORT_PERST 0x00814 +#define PORT_PERST_OFF BIT(0) +#define PORT_RID2SID(i16) (0x00828 + 4 * (i16)) +#define PORT_RID2SID_VALID BIT(31) +#define PORT_RID2SID_SID_SHIFT 16 +#define PORT_RID2SID_BUS_SHIFT 8 +#define PORT_RID2SID_DEV_SHIFT 3 +#define PORT_RID2SID_FUNC_SHIFT 0 +#define PORT_OUTS_PREQS_HDR 0x00980 +#define PORT_OUTS_PREQS_HDR_MASK GENMASK(9, 0) +#define PORT_OUTS_PREQS_DATA 0x00984 +#define PORT_OUTS_PREQS_DATA_MASK GENMASK(15, 0) +#define PORT_TUNCTRL 0x00988 +#define PORT_TUNCTRL_PERST_ON BIT(0) +#define PORT_TUNCTRL_PERST_ACK_REQ BIT(1) +#define PORT_TUNSTAT 0x0098c +#define PORT_TUNSTAT_PERST_ON BIT(0) +#define PORT_TUNSTAT_PERST_ACK_PEND BIT(1) +#define PORT_PREFMEM_ENABLE 0x00994 + +struct apple_pcie { + struct device *dev; + void __iomem *base; +}; + +struct apple_pcie_port { + struct apple_pcie *pcie; + struct device_node *np; + void __iomem *base; + int idx; +}; + +static void rmw_set(u32 set, void __iomem *addr) +{ + writel_relaxed(readl_relaxed(addr) | set, addr); +} + +static int apple_pcie_setup_port(struct apple_pcie *pcie, + struct device_node *np) +{ + struct platform_device *platform = to_platform_device(pcie->dev); + struct apple_pcie_port *port; + struct gpio_desc *reset; + u32 stat, idx; + int ret; + + reset = gpiod_get_from_of_node(np, "reset-gpios", 0, + GPIOD_OUT_LOW, "#PERST"); + if (IS_ERR(reset)) + return PTR_ERR(reset); + + port = devm_kzalloc(pcie->dev, sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + + ret = of_property_read_u32_index(np, "reg", 0, &idx); + if (ret) + return ret; + + /* Use the first reg entry to work out the port index */ + port->idx = idx >> 11; + port->pcie = pcie; + port->np = np; + + port->base = devm_platform_ioremap_resource(platform, port->idx + 2); + if (IS_ERR(port->base)) + return PTR_ERR(port->base); + + rmw_set(PORT_APPCLK_EN, port->base + PORT_APPCLK); + + rmw_set(PORT_PERST_OFF, port->base + PORT_PERST); + gpiod_set_value(reset, 1); + + ret = readl_relaxed_poll_timeout(port->base + PORT_STATUS, stat, + stat & PORT_STATUS_READY, 100, 250000); + if (ret < 0) { + dev_err(pcie->dev, "port %pOF ready wait timeout\n", np); + return ret; + } + + writel_relaxed(PORT_LTSSMCTL_START, port->base + PORT_LTSSMCTL); + + return 0; +} + +static int apple_pcie_init(struct pci_config_window *cfg) +{ + struct device *dev = cfg->parent; + struct platform_device *platform = to_platform_device(dev); + struct device_node *of_port; + struct apple_pcie *pcie; + int ret; + + pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); + if (!pcie) + return -ENOMEM; + + pcie->dev = dev; + + pcie->base = devm_platform_ioremap_resource(platform, 1); + if (IS_ERR(pcie->base)) + return PTR_ERR(pcie->base); + + for_each_child_of_node(dev->of_node, of_port) { + ret = apple_pcie_setup_port(pcie, of_port); + if (ret) { + dev_err(pcie->dev, "Port %pOF setup fail: %d\n", of_port, ret); + of_node_put(of_port); + return ret; + } + } + + return 0; +} + +static const struct pci_ecam_ops apple_pcie_cfg_ecam_ops = { + .init = apple_pcie_init, + .pci_ops = { + .map_bus = pci_ecam_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, + } +}; + +static const struct of_device_id apple_pcie_of_match[] = { + { .compatible = "apple,pcie", .data = &apple_pcie_cfg_ecam_ops }, + { } +}; +MODULE_DEVICE_TABLE(of, apple_pcie_of_match); + +static struct platform_driver apple_pcie_driver = { + .probe = pci_host_common_probe, + .driver = { + .name = "pcie-apple", + .of_match_table = apple_pcie_of_match, + .suppress_bind_attrs = true, + }, +}; +module_platform_driver(apple_pcie_driver); + +MODULE_LICENSE("GPL v2"); From 5e16921fdd2131aaf6a3d255ee5cb73df947feb3 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 29 Sep 2021 17:38:38 +0100 Subject: [PATCH 0244/1202] PCI: apple: Set up reference clocks when probing Apple's PCIe controller requires clocks to be configured in order to bring up the hardware. Add the register pokes required to do so. Adapted from Corellium's driver via Mark Kettenis's U-Boot patches. Co-developed-by: Stan Skowronek Link: https://lore.kernel.org/r/20210929163847.2807812-6-maz@kernel.org Signed-off-by: Stan Skowronek Signed-off-by: Alyssa Rosenzweig Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/pcie-apple.c | 46 +++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index ed78af70dbe70..66d4bc2f72e16 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -132,6 +132,48 @@ static void rmw_set(u32 set, void __iomem *addr) writel_relaxed(readl_relaxed(addr) | set, addr); } +static void rmw_clear(u32 clr, void __iomem *addr) +{ + writel_relaxed(readl_relaxed(addr) & ~clr, addr); +} + +static int apple_pcie_setup_refclk(struct apple_pcie *pcie, + struct apple_pcie_port *port) +{ + u32 stat; + int res; + + res = readl_relaxed_poll_timeout(pcie->base + CORE_RC_PHYIF_STAT, stat, + stat & CORE_RC_PHYIF_STAT_REFCLK, + 100, 50000); + if (res < 0) + return res; + + rmw_set(CORE_LANE_CTL_CFGACC, pcie->base + CORE_LANE_CTL(port->idx)); + rmw_set(CORE_LANE_CFG_REFCLK0REQ, pcie->base + CORE_LANE_CFG(port->idx)); + + res = readl_relaxed_poll_timeout(pcie->base + CORE_LANE_CFG(port->idx), + stat, stat & CORE_LANE_CFG_REFCLK0ACK, + 100, 50000); + if (res < 0) + return res; + + rmw_set(CORE_LANE_CFG_REFCLK1, pcie->base + CORE_LANE_CFG(port->idx)); + res = readl_relaxed_poll_timeout(pcie->base + CORE_LANE_CFG(port->idx), + stat, stat & CORE_LANE_CFG_REFCLK1, + 100, 50000); + + if (res < 0) + return res; + + rmw_clear(CORE_LANE_CTL_CFGACC, pcie->base + CORE_LANE_CTL(port->idx)); + + rmw_set(CORE_LANE_CFG_REFCLKEN, pcie->base + CORE_LANE_CFG(port->idx)); + rmw_set(PORT_REFCLK_EN, port->base + PORT_REFCLK); + + return 0; +} + static int apple_pcie_setup_port(struct apple_pcie *pcie, struct device_node *np) { @@ -165,6 +207,10 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, rmw_set(PORT_APPCLK_EN, port->base + PORT_APPCLK); + ret = apple_pcie_setup_refclk(pcie, port); + if (ret < 0) + return ret; + rmw_set(PORT_PERST_OFF, port->base + PORT_PERST); gpiod_set_value(reset, 1); From f0aba6951e8fbb81bdc41b416200ec70416dae1b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 29 Sep 2021 17:38:39 +0100 Subject: [PATCH 0245/1202] PCI: apple: Add INTx and per-port interrupt support Add support for the per-port interrupt controller that deals with both INTx signalling and management interrupts. This allows the Link-up/Link-down interrupts to be wired, allowing the bring-up to be synchronised (and provide debug information). The framework can further be used to handle the rest of the per port events if and when necessary. Likewise, INTx signalling is implemented so that end-points can actually be used. Link: https://lore.kernel.org/r/20210929163847.2807812-7-maz@kernel.org Link: https://lore.kernel.org/r/20211004150552.3844830-1-maz@kernel.org Tested-by: Alyssa Rosenzweig Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/pcie-apple.c | 209 ++++++++++++++++++++++++++++ kernel/irq/irqdomain.c | 1 + 2 files changed, 210 insertions(+) diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index 66d4bc2f72e16..55a334893cd6a 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -118,12 +119,14 @@ struct apple_pcie { struct device *dev; void __iomem *base; + struct completion event; }; struct apple_pcie_port { struct apple_pcie *pcie; struct device_node *np; void __iomem *base; + struct irq_domain *domain; int idx; }; @@ -137,6 +140,200 @@ static void rmw_clear(u32 clr, void __iomem *addr) writel_relaxed(readl_relaxed(addr) & ~clr, addr); } +static void apple_port_irq_mask(struct irq_data *data) +{ + struct apple_pcie_port *port = irq_data_get_irq_chip_data(data); + + writel_relaxed(BIT(data->hwirq), port->base + PORT_INTMSKSET); +} + +static void apple_port_irq_unmask(struct irq_data *data) +{ + struct apple_pcie_port *port = irq_data_get_irq_chip_data(data); + + writel_relaxed(BIT(data->hwirq), port->base + PORT_INTMSKCLR); +} + +static bool hwirq_is_intx(unsigned int hwirq) +{ + return BIT(hwirq) & PORT_INT_INTx_MASK; +} + +static void apple_port_irq_ack(struct irq_data *data) +{ + struct apple_pcie_port *port = irq_data_get_irq_chip_data(data); + + if (!hwirq_is_intx(data->hwirq)) + writel_relaxed(BIT(data->hwirq), port->base + PORT_INTSTAT); +} + +static int apple_port_irq_set_type(struct irq_data *data, unsigned int type) +{ + /* + * It doesn't seem that there is any way to configure the + * trigger, so assume INTx have to be level (as per the spec), + * and the rest is edge (which looks likely). + */ + if (hwirq_is_intx(data->hwirq) ^ !!(type & IRQ_TYPE_LEVEL_MASK)) + return -EINVAL; + + irqd_set_trigger_type(data, type); + return 0; +} + +static struct irq_chip apple_port_irqchip = { + .name = "PCIe", + .irq_ack = apple_port_irq_ack, + .irq_mask = apple_port_irq_mask, + .irq_unmask = apple_port_irq_unmask, + .irq_set_type = apple_port_irq_set_type, +}; + +static int apple_port_irq_domain_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *args) +{ + struct apple_pcie_port *port = domain->host_data; + struct irq_fwspec *fwspec = args; + int i; + + for (i = 0; i < nr_irqs; i++) { + irq_flow_handler_t flow = handle_edge_irq; + unsigned int type = IRQ_TYPE_EDGE_RISING; + + if (hwirq_is_intx(fwspec->param[0] + i)) { + flow = handle_level_irq; + type = IRQ_TYPE_LEVEL_HIGH; + } + + irq_domain_set_info(domain, virq + i, fwspec->param[0] + i, + &apple_port_irqchip, port, flow, + NULL, NULL); + + irq_set_irq_type(virq + i, type); + } + + return 0; +} + +static void apple_port_irq_domain_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + int i; + + for (i = 0; i < nr_irqs; i++) { + struct irq_data *d = irq_domain_get_irq_data(domain, virq + i); + irq_set_handler(virq + i, NULL); + irq_domain_reset_irq_data(d); + } +} + +static const struct irq_domain_ops apple_port_irq_domain_ops = { + .translate = irq_domain_translate_onecell, + .alloc = apple_port_irq_domain_alloc, + .free = apple_port_irq_domain_free, +}; + +static void apple_port_irq_handler(struct irq_desc *desc) +{ + struct apple_pcie_port *port = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned long stat; + int i; + + chained_irq_enter(chip, desc); + + stat = readl_relaxed(port->base + PORT_INTSTAT); + + for_each_set_bit(i, &stat, 32) + generic_handle_domain_irq(port->domain, i); + + chained_irq_exit(chip, desc); +} + +static int apple_pcie_port_setup_irq(struct apple_pcie_port *port) +{ + struct fwnode_handle *fwnode = &port->np->fwnode; + unsigned int irq; + + /* FIXME: consider moving each interrupt under each port */ + irq = irq_of_parse_and_map(to_of_node(dev_fwnode(port->pcie->dev)), + port->idx); + if (!irq) + return -ENXIO; + + port->domain = irq_domain_create_linear(fwnode, 32, + &apple_port_irq_domain_ops, + port); + if (!port->domain) + return -ENOMEM; + + /* Disable all interrupts */ + writel_relaxed(~0, port->base + PORT_INTMSKSET); + writel_relaxed(~0, port->base + PORT_INTSTAT); + + irq_set_chained_handler_and_data(irq, apple_port_irq_handler, port); + + return 0; +} + +static irqreturn_t apple_pcie_port_irq(int irq, void *data) +{ + struct apple_pcie_port *port = data; + unsigned int hwirq = irq_domain_get_irq_data(port->domain, irq)->hwirq; + + switch (hwirq) { + case PORT_INT_LINK_UP: + dev_info_ratelimited(port->pcie->dev, "Link up on %pOF\n", + port->np); + complete_all(&port->pcie->event); + break; + case PORT_INT_LINK_DOWN: + dev_info_ratelimited(port->pcie->dev, "Link down on %pOF\n", + port->np); + break; + default: + return IRQ_NONE; + } + + return IRQ_HANDLED; +} + +static int apple_pcie_port_register_irqs(struct apple_pcie_port *port) +{ + static struct { + unsigned int hwirq; + const char *name; + } port_irqs[] = { + { PORT_INT_LINK_UP, "Link up", }, + { PORT_INT_LINK_DOWN, "Link down", }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(port_irqs); i++) { + struct irq_fwspec fwspec = { + .fwnode = &port->np->fwnode, + .param_count = 1, + .param = { + [0] = port_irqs[i].hwirq, + }, + }; + unsigned int irq; + int ret; + + irq = irq_domain_alloc_irqs(port->domain, 1, NUMA_NO_NODE, + &fwspec); + if (WARN_ON(!irq)) + continue; + + ret = request_irq(irq, apple_pcie_port_irq, 0, + port_irqs[i].name, port); + WARN_ON(ret); + } + + return 0; +} + static int apple_pcie_setup_refclk(struct apple_pcie *pcie, struct apple_pcie_port *port) { @@ -221,8 +418,20 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, return ret; } + ret = apple_pcie_port_setup_irq(port); + if (ret) + return ret; + + init_completion(&pcie->event); + + ret = apple_pcie_port_register_irqs(port); + WARN_ON(ret); + writel_relaxed(PORT_LTSSMCTL_START, port->base + PORT_LTSSMCTL); + if (!wait_for_completion_timeout(&pcie->event, HZ / 10)) + dev_warn(pcie->dev, "%pOF link didn't come up\n", np); + return 0; } diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 5a698c1f6cc68..40e85a46f9132 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1502,6 +1502,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, irq_free_descs(virq, nr_irqs); return ret; } +EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs); /* The irq_data was moved, fix the revmap to refer to the new location */ static void irq_domain_fix_revmap(struct irq_data *d) From 51cda3849048d88333a3177f6be3618e419120ed Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 29 Sep 2021 17:38:40 +0100 Subject: [PATCH 0246/1202] PCI: apple: Implement MSI support Probe for the 'msi-ranges' property, and implement the MSI support in the form of the usual two-level hierarchy. Note that contrary to the wired interrupts, MSIs are shared among all the ports. Link: https://lore.kernel.org/r/20210929163847.2807812-8-maz@kernel.org Tested-by: Alyssa Rosenzweig Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/pcie-apple.c | 169 +++++++++++++++++++++++++++- 1 file changed, 168 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index 55a334893cd6a..c18bf02d10b53 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -116,10 +116,22 @@ #define PORT_TUNSTAT_PERST_ACK_PEND BIT(1) #define PORT_PREFMEM_ENABLE 0x00994 +/* + * The doorbell address is set to 0xfffff000, which by convention + * matches what MacOS does, and it is possible to use any other + * address (in the bottom 4GB, as the base register is only 32bit). + */ +#define DOORBELL_ADDR 0xfffff000 + struct apple_pcie { + struct mutex lock; struct device *dev; void __iomem *base; + struct irq_domain *domain; + unsigned long *bitmap; struct completion event; + struct irq_fwspec fwspec; + u32 nvecs; }; struct apple_pcie_port { @@ -140,6 +152,101 @@ static void rmw_clear(u32 clr, void __iomem *addr) writel_relaxed(readl_relaxed(addr) & ~clr, addr); } +static void apple_msi_top_irq_mask(struct irq_data *d) +{ + pci_msi_mask_irq(d); + irq_chip_mask_parent(d); +} + +static void apple_msi_top_irq_unmask(struct irq_data *d) +{ + pci_msi_unmask_irq(d); + irq_chip_unmask_parent(d); +} + +static struct irq_chip apple_msi_top_chip = { + .name = "PCIe MSI", + .irq_mask = apple_msi_top_irq_mask, + .irq_unmask = apple_msi_top_irq_unmask, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_set_type = irq_chip_set_type_parent, +}; + +static void apple_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + msg->address_hi = upper_32_bits(DOORBELL_ADDR); + msg->address_lo = lower_32_bits(DOORBELL_ADDR); + msg->data = data->hwirq; +} + +static struct irq_chip apple_msi_bottom_chip = { + .name = "MSI", + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_set_type = irq_chip_set_type_parent, + .irq_compose_msi_msg = apple_msi_compose_msg, +}; + +static int apple_msi_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + struct apple_pcie *pcie = domain->host_data; + struct irq_fwspec fwspec = pcie->fwspec; + unsigned int i; + int ret, hwirq; + + mutex_lock(&pcie->lock); + + hwirq = bitmap_find_free_region(pcie->bitmap, pcie->nvecs, + order_base_2(nr_irqs)); + + mutex_unlock(&pcie->lock); + + if (hwirq < 0) + return -ENOSPC; + + fwspec.param[1] += hwirq; + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &fwspec); + if (ret) + return ret; + + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, + &apple_msi_bottom_chip, + domain->host_data); + } + + return 0; +} + +static void apple_msi_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct apple_pcie *pcie = domain->host_data; + + mutex_lock(&pcie->lock); + + bitmap_release_region(pcie->bitmap, d->hwirq, order_base_2(nr_irqs)); + + mutex_unlock(&pcie->lock); +} + +static const struct irq_domain_ops apple_msi_domain_ops = { + .alloc = apple_msi_domain_alloc, + .free = apple_msi_domain_free, +}; + +static struct msi_domain_info apple_msi_info = { + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX), + .chip = &apple_msi_top_chip, +}; + static void apple_port_irq_mask(struct irq_data *data) { struct apple_pcie_port *port = irq_data_get_irq_chip_data(data); @@ -274,6 +381,15 @@ static int apple_pcie_port_setup_irq(struct apple_pcie_port *port) irq_set_chained_handler_and_data(irq, apple_port_irq_handler, port); + /* Configure MSI base address */ + BUILD_BUG_ON(upper_32_bits(DOORBELL_ADDR)); + writel_relaxed(lower_32_bits(DOORBELL_ADDR), port->base + PORT_MSIADDR); + + /* Enable MSIs, shared between all ports */ + writel_relaxed(0, port->base + PORT_MSIBASE); + writel_relaxed((ilog2(port->pcie->nvecs) << PORT_MSICFG_L2MSINUM_SHIFT) | + PORT_MSICFG_EN, port->base + PORT_MSICFG); + return 0; } @@ -435,6 +551,55 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, return 0; } +static int apple_msi_init(struct apple_pcie *pcie) +{ + struct fwnode_handle *fwnode = dev_fwnode(pcie->dev); + struct of_phandle_args args = {}; + struct irq_domain *parent; + int ret; + + ret = of_parse_phandle_with_args(to_of_node(fwnode), "msi-ranges", + "#interrupt-cells", 0, &args); + if (ret) + return ret; + + ret = of_property_read_u32_index(to_of_node(fwnode), "msi-ranges", + args.args_count + 1, &pcie->nvecs); + if (ret) + return ret; + + of_phandle_args_to_fwspec(args.np, args.args, args.args_count, + &pcie->fwspec); + + pcie->bitmap = devm_bitmap_zalloc(pcie->dev, pcie->nvecs, GFP_KERNEL); + if (!pcie->bitmap) + return -ENOMEM; + + parent = irq_find_matching_fwspec(&pcie->fwspec, DOMAIN_BUS_WIRED); + if (!parent) { + dev_err(pcie->dev, "failed to find parent domain\n"); + return -ENXIO; + } + + parent = irq_domain_create_hierarchy(parent, 0, pcie->nvecs, fwnode, + &apple_msi_domain_ops, pcie); + if (!parent) { + dev_err(pcie->dev, "failed to create IRQ domain\n"); + return -ENOMEM; + } + irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS); + + pcie->domain = pci_msi_create_irq_domain(fwnode, &apple_msi_info, + parent); + if (!pcie->domain) { + dev_err(pcie->dev, "failed to create MSI domain\n"); + irq_domain_remove(parent); + return -ENOMEM; + } + + return 0; +} + static int apple_pcie_init(struct pci_config_window *cfg) { struct device *dev = cfg->parent; @@ -449,6 +614,8 @@ static int apple_pcie_init(struct pci_config_window *cfg) pcie->dev = dev; + mutex_init(&pcie->lock); + pcie->base = devm_platform_ioremap_resource(platform, 1); if (IS_ERR(pcie->base)) return PTR_ERR(pcie->base); @@ -462,7 +629,7 @@ static int apple_pcie_init(struct pci_config_window *cfg) } } - return 0; + return apple_msi_init(pcie); } static const struct pci_ecam_ops apple_pcie_cfg_ecam_ops = { From ccccdd9e03f895792822236e849a38a50dd8c742 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 29 Sep 2021 17:38:41 +0100 Subject: [PATCH 0247/1202] iommu/dart: Exclude MSI doorbell from PCIe device IOVA range The MSI doorbell on Apple HW can be any address in the low 4GB range. However, the MSI write is matched by the PCIe block before hitting the iommu. It must thus be excluded from the IOVA range that is assigned to any PCIe device. Link: https://lore.kernel.org/r/20210929163847.2807812-9-maz@kernel.org Tested-by: Alyssa Rosenzweig Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Reviewed-by: Sven Peter --- drivers/iommu/apple-dart.c | 27 +++++++++++++++++++++++++++ drivers/pci/controller/Kconfig | 5 +++++ drivers/pci/controller/pcie-apple.c | 4 +++- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 559db9259e65c..f1f1f024c604c 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -721,6 +721,31 @@ static int apple_dart_def_domain_type(struct device *dev) return 0; } +#ifndef CONFIG_PCIE_APPLE_MSI_DOORBELL_ADDR +/* Keep things compiling when CONFIG_PCI_APPLE isn't selected */ +#define CONFIG_PCIE_APPLE_MSI_DOORBELL_ADDR 0 +#endif +#define DOORBELL_ADDR (CONFIG_PCIE_APPLE_MSI_DOORBELL_ADDR & PAGE_MASK) + +static void apple_dart_get_resv_regions(struct device *dev, + struct list_head *head) +{ + if (IS_ENABLED(CONFIG_PCIE_APPLE) && dev_is_pci(dev)) { + struct iommu_resv_region *region; + int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; + + region = iommu_alloc_resv_region(DOORBELL_ADDR, + PAGE_SIZE, prot, + IOMMU_RESV_MSI); + if (!region) + return; + + list_add_tail(®ion->list, head); + } + + iommu_dma_get_resv_regions(dev, head); +} + static const struct iommu_ops apple_dart_iommu_ops = { .domain_alloc = apple_dart_domain_alloc, .domain_free = apple_dart_domain_free, @@ -737,6 +762,8 @@ static const struct iommu_ops apple_dart_iommu_ops = { .device_group = apple_dart_device_group, .of_xlate = apple_dart_of_xlate, .def_domain_type = apple_dart_def_domain_type, + .get_resv_regions = apple_dart_get_resv_regions, + .put_resv_regions = generic_iommu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during dart probe */ }; diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 953d9acc031f8..5661d4a84832e 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -312,6 +312,11 @@ config PCIE_HISI_ERR Say Y here if you want error handling support for the PCIe controller's errors on HiSilicon HIP SoCs +config PCIE_APPLE_MSI_DOORBELL_ADDR + hex + default 0xfffff000 + depends on PCIE_APPLE + config PCIE_APPLE tristate "Apple PCIe controller" depends on ARCH_APPLE || COMPILE_TEST diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index c18bf02d10b53..ded14cbe8b74c 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -120,8 +120,10 @@ * The doorbell address is set to 0xfffff000, which by convention * matches what MacOS does, and it is possible to use any other * address (in the bottom 4GB, as the base register is only 32bit). + * However, it has to be excluded from the the IOVA range, and the + * DART driver has to know about it. */ -#define DOORBELL_ADDR 0xfffff000 +#define DOORBELL_ADDR CONFIG_PCIE_APPLE_MSI_DOORBELL_ADDR struct apple_pcie { struct mutex lock; From 1fa7dcb08110011cf6d4ae2d7c00ba1710cbf717 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 29 Sep 2021 17:38:42 +0100 Subject: [PATCH 0248/1202] PCI: apple: Configure RID to SID mapper on device addition The Apple PCIe controller doesn't directly feed the endpoint's Requester ID to the IOMMU (DART), but instead maps RIDs onto Stream IDs (SIDs). The DART and the PCIe controller must thus agree on the SIDs that are used for translation (by using the 'iommu-map' property). For this purpose, parse the 'iommu-map' property each time a device gets added, and use the resulting translation to configure the PCIe RID-to-SID mapper. Similarily, remove the translation if/when the device gets removed. This is all driven from a bus notifier which gets registered at probe time. Hopefully this is the only PCI controller driver in the whole system. Link: https://lore.kernel.org/r/20210929163847.2807812-10-maz@kernel.org Tested-by: Alyssa Rosenzweig Signed-off-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Reviewed-by: Sven Peter --- drivers/pci/controller/pcie-apple.c | 165 +++++++++++++++++++++++++++- 1 file changed, 163 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index ded14cbe8b74c..b665d29af77a5 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -23,8 +23,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -116,6 +118,8 @@ #define PORT_TUNSTAT_PERST_ACK_PEND BIT(1) #define PORT_PREFMEM_ENABLE 0x00994 +#define MAX_RID2SID 64 + /* * The doorbell address is set to 0xfffff000, which by convention * matches what MacOS does, and it is possible to use any other @@ -131,6 +135,7 @@ struct apple_pcie { void __iomem *base; struct irq_domain *domain; unsigned long *bitmap; + struct list_head ports; struct completion event; struct irq_fwspec fwspec; u32 nvecs; @@ -141,6 +146,9 @@ struct apple_pcie_port { struct device_node *np; void __iomem *base; struct irq_domain *domain; + struct list_head entry; + DECLARE_BITMAP( sid_map, MAX_RID2SID); + int sid_map_sz; int idx; }; @@ -489,6 +497,14 @@ static int apple_pcie_setup_refclk(struct apple_pcie *pcie, return 0; } +static u32 apple_pcie_rid2sid_write(struct apple_pcie_port *port, + int idx, u32 val) +{ + writel_relaxed(val, port->base + PORT_RID2SID(idx)); + /* Read back to ensure completion of the write */ + return readl_relaxed(port->base + PORT_RID2SID(idx)); +} + static int apple_pcie_setup_port(struct apple_pcie *pcie, struct device_node *np) { @@ -496,7 +512,7 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, struct apple_pcie_port *port; struct gpio_desc *reset; u32 stat, idx; - int ret; + int ret, i; reset = gpiod_get_from_of_node(np, "reset-gpios", 0, GPIOD_OUT_LOW, "#PERST"); @@ -540,6 +556,18 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, if (ret) return ret; + /* Reset all RID/SID mappings, and check for RAZ/WI registers */ + for (i = 0; i < MAX_RID2SID; i++) { + if (apple_pcie_rid2sid_write(port, i, 0xbad1d) != 0xbad1d) + break; + apple_pcie_rid2sid_write(port, i, 0); + } + + dev_dbg(pcie->dev, "%pOF: %d RID/SID mapping entries\n", np, i); + + port->sid_map_sz = i; + + list_add_tail(&port->entry, &pcie->ports); init_completion(&pcie->event); ret = apple_pcie_port_register_irqs(port); @@ -602,6 +630,121 @@ static int apple_msi_init(struct apple_pcie *pcie) return 0; } +static struct apple_pcie_port *apple_pcie_get_port(struct pci_dev *pdev) +{ + struct pci_config_window *cfg = pdev->sysdata; + struct apple_pcie *pcie = cfg->priv; + struct pci_dev *port_pdev; + struct apple_pcie_port *port; + + /* Find the root port this device is on */ + port_pdev = pcie_find_root_port(pdev); + + /* If finding the port itself, nothing to do */ + if (WARN_ON(!port_pdev) || pdev == port_pdev) + return NULL; + + list_for_each_entry(port, &pcie->ports, entry) { + if (port->idx == PCI_SLOT(port_pdev->devfn)) + return port; + } + + return NULL; +} + +static int apple_pcie_add_device(struct apple_pcie_port *port, + struct pci_dev *pdev) +{ + u32 sid, rid = PCI_DEVID(pdev->bus->number, pdev->devfn); + int idx, err; + + dev_dbg(&pdev->dev, "added to bus %s, index %d\n", + pci_name(pdev->bus->self), port->idx); + + err = of_map_id(port->pcie->dev->of_node, rid, "iommu-map", + "iommu-map-mask", NULL, &sid); + if (err) + return err; + + mutex_lock(&port->pcie->lock); + + idx = bitmap_find_free_region(port->sid_map, port->sid_map_sz, 0); + if (idx >= 0) { + apple_pcie_rid2sid_write(port, idx, + PORT_RID2SID_VALID | + (sid << PORT_RID2SID_SID_SHIFT) | rid); + + dev_dbg(&pdev->dev, "mapping RID%x to SID%x (index %d)\n", + rid, sid, idx); + } + + mutex_unlock(&port->pcie->lock); + + return idx >= 0 ? 0 : -ENOSPC; +} + +static void apple_pcie_release_device(struct apple_pcie_port *port, + struct pci_dev *pdev) +{ + u32 rid = PCI_DEVID(pdev->bus->number, pdev->devfn); + int idx; + + mutex_lock(&port->pcie->lock); + + for_each_set_bit(idx, port->sid_map, port->sid_map_sz) { + u32 val; + + val = readl_relaxed(port->base + PORT_RID2SID(idx)); + if ((val & 0xffff) == rid) { + apple_pcie_rid2sid_write(port, idx, 0); + bitmap_release_region(port->sid_map, idx, 0); + dev_dbg(&pdev->dev, "Released %x (%d)\n", val, idx); + break; + } + } + + mutex_unlock(&port->pcie->lock); +} + +static int apple_pcie_bus_notifier(struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + struct apple_pcie_port *port; + int err; + + /* + * This is a bit ugly. We assume that if we get notified for + * any PCI device, we must be in charge of it, and that there + * is no other PCI controller in the whole system. It probably + * holds for now, but who knows for how long? + */ + port = apple_pcie_get_port(pdev); + if (!port) + return NOTIFY_DONE; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + err = apple_pcie_add_device(port, pdev); + if (err) + return notifier_from_errno(err); + break; + case BUS_NOTIFY_DEL_DEVICE: + apple_pcie_release_device(port, pdev); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static struct notifier_block apple_pcie_nb = { + .notifier_call = apple_pcie_bus_notifier, +}; + static int apple_pcie_init(struct pci_config_window *cfg) { struct device *dev = cfg->parent; @@ -622,6 +765,9 @@ static int apple_pcie_init(struct pci_config_window *cfg) if (IS_ERR(pcie->base)) return PTR_ERR(pcie->base); + cfg->priv = pcie; + INIT_LIST_HEAD(&pcie->ports); + for_each_child_of_node(dev->of_node, of_port) { ret = apple_pcie_setup_port(pcie, of_port); if (ret) { @@ -634,6 +780,21 @@ static int apple_pcie_init(struct pci_config_window *cfg) return apple_msi_init(pcie); } +static int apple_pcie_probe(struct platform_device *pdev) +{ + int ret; + + ret = bus_register_notifier(&pci_bus_type, &apple_pcie_nb); + if (ret) + return ret; + + ret = pci_host_common_probe(pdev); + if (ret) + bus_unregister_notifier(&pci_bus_type, &apple_pcie_nb); + + return ret; +} + static const struct pci_ecam_ops apple_pcie_cfg_ecam_ops = { .init = apple_pcie_init, .pci_ops = { @@ -650,7 +811,7 @@ static const struct of_device_id apple_pcie_of_match[] = { MODULE_DEVICE_TABLE(of, apple_pcie_of_match); static struct platform_driver apple_pcie_driver = { - .probe = pci_host_common_probe, + .probe = apple_pcie_probe, .driver = { .name = "pcie-apple", .of_match_table = apple_pcie_of_match, From 9339e8dd7665548684cb5299c1fd40195d36c392 Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Wed, 25 Aug 2021 17:03:10 +0200 Subject: [PATCH 0249/1202] soc: xilinx: move PM_INIT_FINALIZE to zynqmp_pm_domains driver PM_INIT_FINALIZE tells the PMU FW that Linux is able to handle the power management nodes that are provided by the PMU FW. Nodes that are not requested are shut down after this call. Calling PM_INIT_FINALIZE from the zynqmp_power driver is wrong. The PM node request mechanism is implemented in the zynqmp_pm_domains driver, which must also call PM_INIT_FINALIZE. Due to the behavior of the PMU FW, all devices must be powered up before PM_INIT_FINALIZE is called, because otherwise the devices might misbehave. Calling PM_INIT_FINALIZE from the sync_state device callback ensures that all users probed successfully before the PMU FW is allowed to power off unused domains. Signed-off-by: Michael Tretter Acked-by: Michal Simek Acked-by: Rajan Vaja Link: https://lore.kernel.org/r/20210825150313.4033156-2-m.tretter@pengutronix.de Signed-off-by: Michal Simek --- drivers/soc/xilinx/zynqmp_pm_domains.c | 16 ++++++++++++++++ drivers/soc/xilinx/zynqmp_power.c | 1 - 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/soc/xilinx/zynqmp_pm_domains.c b/drivers/soc/xilinx/zynqmp_pm_domains.c index 226d343f0a6a5..81e8e10f10929 100644 --- a/drivers/soc/xilinx/zynqmp_pm_domains.c +++ b/drivers/soc/xilinx/zynqmp_pm_domains.c @@ -152,11 +152,17 @@ static int zynqmp_gpd_power_off(struct generic_pm_domain *domain) static int zynqmp_gpd_attach_dev(struct generic_pm_domain *domain, struct device *dev) { + struct device_link *link; int ret; struct zynqmp_pm_domain *pd; pd = container_of(domain, struct zynqmp_pm_domain, gpd); + link = device_link_add(dev, &domain->dev, DL_FLAG_SYNC_STATE_ONLY); + if (!link) + dev_dbg(&domain->dev, "failed to create device link for %s\n", + dev_name(dev)); + /* If this is not the first device to attach there is nothing to do */ if (domain->device_count) return 0; @@ -299,9 +305,19 @@ static int zynqmp_gpd_remove(struct platform_device *pdev) return 0; } +static void zynqmp_gpd_sync_state(struct device *dev) +{ + int ret; + + ret = zynqmp_pm_init_finalize(); + if (ret) + dev_warn(dev, "failed to release power management to firmware\n"); +} + static struct platform_driver zynqmp_power_domain_driver = { .driver = { .name = "zynqmp_power_controller", + .sync_state = zynqmp_gpd_sync_state, }, .probe = zynqmp_gpd_probe, .remove = zynqmp_gpd_remove, diff --git a/drivers/soc/xilinx/zynqmp_power.c b/drivers/soc/xilinx/zynqmp_power.c index c556623dae024..f8c301984d4f9 100644 --- a/drivers/soc/xilinx/zynqmp_power.c +++ b/drivers/soc/xilinx/zynqmp_power.c @@ -178,7 +178,6 @@ static int zynqmp_pm_probe(struct platform_device *pdev) u32 pm_api_version; struct mbox_client *client; - zynqmp_pm_init_finalize(); zynqmp_pm_get_api_version(&pm_api_version); /* Check PM API version number */ From ceae494be052208102398979585a2ff9321ed4a0 Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Wed, 25 Aug 2021 17:03:11 +0200 Subject: [PATCH 0250/1202] soc: xilinx: cleanup debug and error messages Use dev_err/dev_dbg instead of pr_err/pr_debug. Add the PM node ids to supplement the (arbitrary) power domain names to include information which PM nodes are requested by the driver. Drop function names from the messages, because they can easily be added with dynamic debug. Remove comments explaining that error messages are printed on errors. Signed-off-by: Michael Tretter Acked-by: Michal Simek Acked-by: Rajan Vaja Link: https://lore.kernel.org/r/20210825150313.4033156-3-m.tretter@pengutronix.de Signed-off-by: Michal Simek --- drivers/soc/xilinx/zynqmp_pm_domains.c | 45 +++++++++++++------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/soc/xilinx/zynqmp_pm_domains.c b/drivers/soc/xilinx/zynqmp_pm_domains.c index 81e8e10f10929..27c46c68e5cc5 100644 --- a/drivers/soc/xilinx/zynqmp_pm_domains.c +++ b/drivers/soc/xilinx/zynqmp_pm_domains.c @@ -80,12 +80,15 @@ static int zynqmp_gpd_power_on(struct generic_pm_domain *domain) ZYNQMP_PM_MAX_QOS, ZYNQMP_PM_REQUEST_ACK_BLOCKING); if (ret) { - pr_err("%s() %s set requirement for node %d failed: %d\n", - __func__, domain->name, pd->node_id, ret); + dev_err(&domain->dev, + "failed to set requirement to 0x%x for PM node id %d: %d\n", + ZYNQMP_PM_CAPABILITY_ACCESS, pd->node_id, ret); return ret; } - pr_debug("%s() Powered on %s domain\n", __func__, domain->name); + dev_dbg(&domain->dev, "set requirement to 0x%x for PM node id %d\n", + ZYNQMP_PM_CAPABILITY_ACCESS, pd->node_id); + return 0; } @@ -110,8 +113,8 @@ static int zynqmp_gpd_power_off(struct generic_pm_domain *domain) /* If domain is already released there is nothing to be done */ if (!(pd->flags & ZYNQMP_PM_DOMAIN_REQUESTED)) { - pr_debug("%s() %s domain is already released\n", - __func__, domain->name); + dev_dbg(&domain->dev, "PM node id %d is already released\n", + pd->node_id); return 0; } @@ -128,17 +131,16 @@ static int zynqmp_gpd_power_off(struct generic_pm_domain *domain) ret = zynqmp_pm_set_requirement(pd->node_id, capabilities, 0, ZYNQMP_PM_REQUEST_ACK_NO); - /** - * If powering down of any node inside this domain fails, - * report and return the error - */ if (ret) { - pr_err("%s() %s set requirement for node %d failed: %d\n", - __func__, domain->name, pd->node_id, ret); + dev_err(&domain->dev, + "failed to set requirement to 0x%x for PM node id %d: %d\n", + capabilities, pd->node_id, ret); return ret; } - pr_debug("%s() Powered off %s domain\n", __func__, domain->name); + dev_dbg(&domain->dev, "set requirement to 0x%x for PM node id %d\n", + capabilities, pd->node_id); + return 0; } @@ -169,17 +171,17 @@ static int zynqmp_gpd_attach_dev(struct generic_pm_domain *domain, ret = zynqmp_pm_request_node(pd->node_id, 0, 0, ZYNQMP_PM_REQUEST_ACK_BLOCKING); - /* If requesting a node fails print and return the error */ if (ret) { - pr_err("%s() %s request failed for node %d: %d\n", - __func__, domain->name, pd->node_id, ret); + dev_err(&domain->dev, "%s request failed for node %d: %d\n", + domain->name, pd->node_id, ret); return ret; } pd->flags |= ZYNQMP_PM_DOMAIN_REQUESTED; - pr_debug("%s() %s attached to %s domain\n", __func__, - dev_name(dev), domain->name); + dev_dbg(&domain->dev, "%s requested PM node id %d\n", + dev_name(dev), pd->node_id); + return 0; } @@ -201,17 +203,16 @@ static void zynqmp_gpd_detach_dev(struct generic_pm_domain *domain, return; ret = zynqmp_pm_release_node(pd->node_id); - /* If releasing a node fails print the error and return */ if (ret) { - pr_err("%s() %s release failed for node %d: %d\n", - __func__, domain->name, pd->node_id, ret); + dev_err(&domain->dev, "failed to release PM node id %d: %d\n", + pd->node_id, ret); return; } pd->flags &= ~ZYNQMP_PM_DOMAIN_REQUESTED; - pr_debug("%s() %s detached from %s domain\n", __func__, - dev_name(dev), domain->name); + dev_dbg(&domain->dev, "%s released PM node id %d\n", + dev_name(dev), pd->node_id); } static struct generic_pm_domain *zynqmp_gpd_xlate From 955ebc1a8c4c50345bac21aed94937654596241c Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Wed, 25 Aug 2021 17:03:12 +0200 Subject: [PATCH 0251/1202] soc: xilinx: use a properly named field instead of flags Instead of defining a flags field and a single bit in this field to signal that a PM node has been requested, use a boolean field with a descriptive name. No functional change, but using a proper name instead of flags makes the code easier to read. Signed-off-by: Michael Tretter Acked-by: Michal Simek Acked-by: Rajan Vaja Link: https://lore.kernel.org/r/20210825150313.4033156-4-m.tretter@pengutronix.de Signed-off-by: Michal Simek --- drivers/soc/xilinx/zynqmp_pm_domains.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/soc/xilinx/zynqmp_pm_domains.c b/drivers/soc/xilinx/zynqmp_pm_domains.c index 27c46c68e5cc5..53b0041f9fe0e 100644 --- a/drivers/soc/xilinx/zynqmp_pm_domains.c +++ b/drivers/soc/xilinx/zynqmp_pm_domains.c @@ -20,8 +20,6 @@ #include #define ZYNQMP_NUM_DOMAINS (100) -/* Flag stating if PM nodes mapped to the PM domain has been requested */ -#define ZYNQMP_PM_DOMAIN_REQUESTED BIT(0) static int min_capability; @@ -29,12 +27,12 @@ static int min_capability; * struct zynqmp_pm_domain - Wrapper around struct generic_pm_domain * @gpd: Generic power domain * @node_id: PM node ID corresponding to device inside PM domain - * @flags: ZynqMP PM domain flags + * @requested: The PM node mapped to the PM domain has been requested */ struct zynqmp_pm_domain { struct generic_pm_domain gpd; u32 node_id; - u8 flags; + bool requested; }; /** @@ -112,7 +110,7 @@ static int zynqmp_gpd_power_off(struct generic_pm_domain *domain) pd = container_of(domain, struct zynqmp_pm_domain, gpd); /* If domain is already released there is nothing to be done */ - if (!(pd->flags & ZYNQMP_PM_DOMAIN_REQUESTED)) { + if (!pd->requested) { dev_dbg(&domain->dev, "PM node id %d is already released\n", pd->node_id); return 0; @@ -177,7 +175,7 @@ static int zynqmp_gpd_attach_dev(struct generic_pm_domain *domain, return ret; } - pd->flags |= ZYNQMP_PM_DOMAIN_REQUESTED; + pd->requested = true; dev_dbg(&domain->dev, "%s requested PM node id %d\n", dev_name(dev), pd->node_id); @@ -209,7 +207,7 @@ static void zynqmp_gpd_detach_dev(struct generic_pm_domain *domain, return; } - pd->flags &= ~ZYNQMP_PM_DOMAIN_REQUESTED; + pd->requested = false; dev_dbg(&domain->dev, "%s released PM node id %d\n", dev_name(dev), pd->node_id); From e2fee520fe5f3f2926fd7f44e6c55a847f7ebd8e Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Wed, 25 Aug 2021 17:03:13 +0200 Subject: [PATCH 0252/1202] soc: xilinx: add a to_zynqmp_pm_domain macro Replace container_of for converting a generic_pm_domain to a zynqmp_pm_domain with a macro definition to simplify the code. Signed-off-by: Michael Tretter Acked-by: Michal Simek Acked-by: Rajan Vaja Link: https://lore.kernel.org/r/20210825150313.4033156-5-m.tretter@pengutronix.de Signed-off-by: Michal Simek --- drivers/soc/xilinx/zynqmp_pm_domains.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/soc/xilinx/zynqmp_pm_domains.c b/drivers/soc/xilinx/zynqmp_pm_domains.c index 53b0041f9fe0e..fcce2433bd6d5 100644 --- a/drivers/soc/xilinx/zynqmp_pm_domains.c +++ b/drivers/soc/xilinx/zynqmp_pm_domains.c @@ -35,6 +35,9 @@ struct zynqmp_pm_domain { bool requested; }; +#define to_zynqmp_pm_domain(pm_domain) \ + container_of(pm_domain, struct zynqmp_pm_domain, gpd) + /** * zynqmp_gpd_is_active_wakeup_path() - Check if device is in wakeup source * path @@ -69,10 +72,9 @@ static int zynqmp_gpd_is_active_wakeup_path(struct device *dev, void *not_used) */ static int zynqmp_gpd_power_on(struct generic_pm_domain *domain) { + struct zynqmp_pm_domain *pd = to_zynqmp_pm_domain(domain); int ret; - struct zynqmp_pm_domain *pd; - pd = container_of(domain, struct zynqmp_pm_domain, gpd); ret = zynqmp_pm_set_requirement(pd->node_id, ZYNQMP_PM_CAPABILITY_ACCESS, ZYNQMP_PM_MAX_QOS, @@ -101,14 +103,12 @@ static int zynqmp_gpd_power_on(struct generic_pm_domain *domain) */ static int zynqmp_gpd_power_off(struct generic_pm_domain *domain) { + struct zynqmp_pm_domain *pd = to_zynqmp_pm_domain(domain); int ret; struct pm_domain_data *pdd, *tmp; - struct zynqmp_pm_domain *pd; u32 capabilities = min_capability; bool may_wakeup; - pd = container_of(domain, struct zynqmp_pm_domain, gpd); - /* If domain is already released there is nothing to be done */ if (!pd->requested) { dev_dbg(&domain->dev, "PM node id %d is already released\n", @@ -152,11 +152,9 @@ static int zynqmp_gpd_power_off(struct generic_pm_domain *domain) static int zynqmp_gpd_attach_dev(struct generic_pm_domain *domain, struct device *dev) { + struct zynqmp_pm_domain *pd = to_zynqmp_pm_domain(domain); struct device_link *link; int ret; - struct zynqmp_pm_domain *pd; - - pd = container_of(domain, struct zynqmp_pm_domain, gpd); link = device_link_add(dev, &domain->dev, DL_FLAG_SYNC_STATE_ONLY); if (!link) @@ -191,10 +189,8 @@ static int zynqmp_gpd_attach_dev(struct generic_pm_domain *domain, static void zynqmp_gpd_detach_dev(struct generic_pm_domain *domain, struct device *dev) { + struct zynqmp_pm_domain *pd = to_zynqmp_pm_domain(domain); int ret; - struct zynqmp_pm_domain *pd; - - pd = container_of(domain, struct zynqmp_pm_domain, gpd); /* If this is not the last device to detach there is nothing to do */ if (domain->device_count) @@ -220,7 +216,7 @@ static struct generic_pm_domain *zynqmp_gpd_xlate unsigned int i, idx = genpdspec->args[0]; struct zynqmp_pm_domain *pd; - pd = container_of(genpd_data->domains[0], struct zynqmp_pm_domain, gpd); + pd = to_zynqmp_pm_domain(genpd_data->domains[0]); if (genpdspec->args_count != 1) return ERR_PTR(-EINVAL); From 4e2dfd51f7ff81ebd2538243ee8c20bcba898864 Mon Sep 17 00:00:00 2001 From: Rajan Vaja Date: Wed, 6 Oct 2021 01:43:55 -0700 Subject: [PATCH 0253/1202] firmware: xilinx: check return value of zynqmp_pm_get_api_version() Currently return value of zynqmp_pm_get_api_version() is ignored. Because of that, API version is checked in case of error also. So add check for return value of zynqmp_pm_get_api_version(). Signed-off-by: Rajan Vaja Reviewed-by: Michal Simek Link: https://lore.kernel.org/r/1633509835-31949-1-git-send-email-rajan.vaja@xilinx.com Signed-off-by: Michal Simek --- drivers/firmware/xilinx/zynqmp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index a3cadbaf3cba7..070197ea978ca 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -1371,7 +1371,10 @@ static int zynqmp_firmware_probe(struct platform_device *pdev) return ret; /* Check PM API version number */ - zynqmp_pm_get_api_version(&pm_api_version); + ret = zynqmp_pm_get_api_version(&pm_api_version); + if (ret) + return ret; + if (pm_api_version < ZYNQMP_PM_VERSION) { panic("%s Platform Management API version error. Expected: v%d.%d - Found: v%d.%d\n", __func__, From 0cacde3cd46682bd1e9d8fd0e43fd8246a6ef096 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 18 Aug 2021 13:41:19 +0300 Subject: [PATCH 0254/1202] btrfs: rename btrfs_alloc_chunk to btrfs_create_chunk The user facing function used to allocate new chunks is btrfs_chunk_alloc, unfortunately there is yet another similar sounding function - btrfs_alloc_chunk. This creates confusion, especially since the latter function can be considered "private" in the sense that it implements the first stage of chunk creation and as such is called by btrfs_chunk_alloc. To avoid the awkwardness that comes with having similarly named but distinctly different in their purpose function rename btrfs_alloc_chunk to btrfs_create_chunk, given that the main purpose of this function is to orchestrate the whole process of allocating a chunk - reserving space into devices, deciding on characteristics of the stripe size and creating the in-memory structures. Reviewed-by: Filipe Manana Reviewed-by: Anand Jain Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 6 +++--- fs/btrfs/volumes.c | 10 +++++----- fs/btrfs/volumes.h | 2 +- fs/btrfs/zoned.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index a3b830b8410a8..1f8b06afbd03d 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3380,7 +3380,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) */ check_system_chunk(trans, flags); - bg = btrfs_alloc_chunk(trans, flags); + bg = btrfs_create_chunk(trans, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); goto out; @@ -3441,7 +3441,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info); struct btrfs_block_group *sys_bg; - sys_bg = btrfs_alloc_chunk(trans, sys_flags); + sys_bg = btrfs_create_chunk(trans, sys_flags); if (IS_ERR(sys_bg)) { ret = PTR_ERR(sys_bg); btrfs_abort_transaction(trans, ret); @@ -3734,7 +3734,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) * could deadlock on an extent buffer since our caller may be * COWing an extent buffer from the chunk btree. */ - bg = btrfs_alloc_chunk(trans, flags); + bg = btrfs_create_chunk(trans, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); } else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2ec3b8ac8fa35..484834f303b31 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3096,7 +3096,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) const u64 sys_flags = btrfs_system_alloc_profile(fs_info); struct btrfs_block_group *sys_bg; - sys_bg = btrfs_alloc_chunk(trans, sys_flags); + sys_bg = btrfs_create_chunk(trans, sys_flags); if (IS_ERR(sys_bg)) { ret = PTR_ERR(sys_bg); btrfs_abort_transaction(trans, ret); @@ -4973,7 +4973,7 @@ static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type) } /* - * Structure used internally for __btrfs_alloc_chunk() function. + * Structure used internally for btrfs_create_chunk() function. * Wraps needed parameters. */ struct alloc_chunk_ctl { @@ -5377,7 +5377,7 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans, return block_group; } -struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans, +struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, u64 type) { struct btrfs_fs_info *info = trans->fs_info; @@ -5578,12 +5578,12 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) */ alloc_profile = btrfs_metadata_alloc_profile(fs_info); - meta_bg = btrfs_alloc_chunk(trans, alloc_profile); + meta_bg = btrfs_create_chunk(trans, alloc_profile); if (IS_ERR(meta_bg)) return PTR_ERR(meta_bg); alloc_profile = btrfs_system_alloc_profile(fs_info); - sys_bg = btrfs_alloc_chunk(trans, alloc_profile); + sys_bg = btrfs_create_chunk(trans, alloc_profile); if (IS_ERR(sys_bg)) return PTR_ERR(sys_bg); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2183361db614d..5314ce51b9273 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -450,7 +450,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map, struct btrfs_io_geometry *io_geom); int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); -struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans, +struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, u64 type); void btrfs_mapping_tree_free(struct extent_map_tree *tree); blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 47af1ab3bf120..90d9df131fc16 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -585,7 +585,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) /* * stripe_size is always aligned to BTRFS_STRIPE_LEN in - * __btrfs_alloc_chunk(). Since we want stripe_len == zone_size, + * btrfs_create_chunk(). Since we want stripe_len == zone_size, * check the alignment here. */ if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) { From 3ee4f5a8280c2b00fc6aa44c3f05a1f2b871751d Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Sun, 1 Aug 2021 20:35:49 -0300 Subject: [PATCH 0255/1202] btrfs: send: simplify send_create_inode_if_needed The out label is being overused, we can simply return if the condition permits. No functional changes. Reviewed-by: Su Yue Reviewed-by: Nikolay Borisov Signed-off-by: Marcos Paulo de Souza Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/send.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 72f9b865e8479..afdcbe7844e0e 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2720,19 +2720,12 @@ static int send_create_inode_if_needed(struct send_ctx *sctx) if (S_ISDIR(sctx->cur_inode_mode)) { ret = did_create_dir(sctx, sctx->cur_ino); if (ret < 0) - goto out; - if (ret) { - ret = 0; - goto out; - } + return ret; + else if (ret > 0) + return 0; } - ret = send_create_inode(sctx, sctx->cur_ino); - if (ret < 0) - goto out; - -out: - return ret; + return send_create_inode(sctx, sctx->cur_ino); } struct recorded_ref { From 2cdafb85335a6b886bc58fa8c78dcac06b15e5e1 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 28 Jul 2021 14:20:41 +0800 Subject: [PATCH 0256/1202] btrfs: drop unnecessary ret in ioctl_quota_rescan_status There is no need for the variable ret after d66105cfa873 ("btrfs: allocate btrfs_ioctl_quota_rescan_args on stack"), remove it. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index cc61813213d83..1c4b6247ca00c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4430,7 +4430,6 @@ static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info, void __user *arg) { struct btrfs_ioctl_quota_rescan_args qsa = {0}; - int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -4441,9 +4440,9 @@ static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info, } if (copy_to_user(arg, &qsa, sizeof(qsa))) - ret = -EFAULT; + return -EFAULT; - return ret; + return 0; } static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info, From 3da2ec5637949b26c4c6c48a4b154655022cbf28 Mon Sep 17 00:00:00 2001 From: Su Yue Date: Wed, 18 Aug 2021 12:15:48 +0800 Subject: [PATCH 0257/1202] btrfs: update comment for fs_devices::seed_list in btrfs_rm_device Update it since commit 944d3f9fac61 ("btrfs: switch seed device to list api") did conversion from fs_devices::seed to fs_devices::seed_list. Reviewed-by: Anand Jain Signed-off-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 484834f303b31..a5e29a3f56d21 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2159,7 +2159,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, /* * In normal cases the cur_devices == fs_devices. But in case * of deleting a seed device, the cur_devices should point to - * its own fs_devices listed under the fs_devices->seed. + * its own fs_devices listed under the fs_devices->seed_list. */ cur_devices = device->fs_devices; mutex_lock(&fs_devices->device_list_mutex); From 8bc894170b6933e17cd3f988a33025e14260bbcd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 17 Aug 2021 17:38:49 +0800 Subject: [PATCH 0258/1202] btrfs: subpage: only call btrfs_alloc_subpage() when sectorsize is smaller than PAGE_SIZE There are two call sites of btrfs_alloc_subpage(): - btrfs_attach_subpage() We have ensured sectorsize is smaller than PAGE_SIZE - alloc_extent_buffer() We call btrfs_alloc_subpage() unconditionally. The alloc_extent_buffer() forces us to check the sectorsize size against page size inside btrfs_alloc_subpage(). Since the function name, btrfs_alloc_subpage(), already indicates it should only get called for subpage cases, do the check in alloc_extent_buffer() and add an ASSERT() in btrfs_alloc_subpage(). Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 16 +++++++++------- fs/btrfs/subpage.c | 3 +-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aaddd72253481..19889dbfcf154 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -6137,13 +6137,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, * page, but it may change in the future for 16K page size * support, so we still preallocate the memory in the loop. */ - ret = btrfs_alloc_subpage(fs_info, &prealloc, - BTRFS_SUBPAGE_METADATA); - if (ret < 0) { - unlock_page(p); - put_page(p); - exists = ERR_PTR(ret); - goto free_eb; + if (fs_info->sectorsize < PAGE_SIZE) { + ret = btrfs_alloc_subpage(fs_info, &prealloc, + BTRFS_SUBPAGE_METADATA); + if (ret < 0) { + unlock_page(p); + put_page(p); + exists = ERR_PTR(ret); + goto free_eb; + } } spin_lock(&mapping->private_lock); diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index cb10e56ee31e1..ff1c6ba34a4d0 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -104,8 +104,7 @@ int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, struct btrfs_subpage **ret, enum btrfs_subpage_type type) { - if (fs_info->sectorsize == PAGE_SIZE) - return 0; + ASSERT(fs_info->sectorsize < PAGE_SIZE); *ret = kzalloc(sizeof(struct btrfs_subpage), GFP_NOFS); if (!*ret) From 147a54886bfedf23dc425a93e02672d2b8d321c1 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 17 Aug 2021 17:38:50 +0800 Subject: [PATCH 0259/1202] btrfs: subpage: make btrfs_alloc_subpage() return btrfs_subpage directly The existing calling convention of btrfs_alloc_subpage() is pretty awful. Change it to a more common pattern by returning struct btrfs_subpage directly and let the caller to determine if the call succeeded. Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 6 +++--- fs/btrfs/subpage.c | 35 +++++++++++++++++++---------------- fs/btrfs/subpage.h | 5 ++--- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 19889dbfcf154..f7895b34011eb 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -6138,9 +6138,9 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, * support, so we still preallocate the memory in the loop. */ if (fs_info->sectorsize < PAGE_SIZE) { - ret = btrfs_alloc_subpage(fs_info, &prealloc, - BTRFS_SUBPAGE_METADATA); - if (ret < 0) { + prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA); + if (IS_ERR(prealloc)) { + ret = PTR_ERR(prealloc); unlock_page(p); put_page(p); exists = ERR_PTR(ret); diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index ff1c6ba34a4d0..03dcc35d7d0b3 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -66,8 +66,7 @@ int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, struct page *page, enum btrfs_subpage_type type) { - struct btrfs_subpage *subpage = NULL; - int ret; + struct btrfs_subpage *subpage; /* * We have cases like a dummy extent buffer page, which is not mappped @@ -75,13 +74,15 @@ int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, */ if (page->mapping) ASSERT(PageLocked(page)); + /* Either not subpage, or the page already has private attached */ if (fs_info->sectorsize == PAGE_SIZE || PagePrivate(page)) return 0; - ret = btrfs_alloc_subpage(fs_info, &subpage, type); - if (ret < 0) - return ret; + subpage = btrfs_alloc_subpage(fs_info, type); + if (IS_ERR(subpage)) + return PTR_ERR(subpage); + attach_page_private(page, subpage); return 0; } @@ -100,23 +101,25 @@ void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, btrfs_free_subpage(subpage); } -int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, - struct btrfs_subpage **ret, - enum btrfs_subpage_type type) +struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, + enum btrfs_subpage_type type) { + struct btrfs_subpage *ret; + ASSERT(fs_info->sectorsize < PAGE_SIZE); - *ret = kzalloc(sizeof(struct btrfs_subpage), GFP_NOFS); - if (!*ret) - return -ENOMEM; - spin_lock_init(&(*ret)->lock); + ret = kzalloc(sizeof(struct btrfs_subpage), GFP_NOFS); + if (!ret) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&ret->lock); if (type == BTRFS_SUBPAGE_METADATA) { - atomic_set(&(*ret)->eb_refs, 0); + atomic_set(&ret->eb_refs, 0); } else { - atomic_set(&(*ret)->readers, 0); - atomic_set(&(*ret)->writers, 0); + atomic_set(&ret->readers, 0); + atomic_set(&ret->writers, 0); } - return 0; + return ret; } void btrfs_free_subpage(struct btrfs_subpage *subpage) diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h index 0120948f37a10..fdcadc5193c1d 100644 --- a/fs/btrfs/subpage.h +++ b/fs/btrfs/subpage.h @@ -59,9 +59,8 @@ void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct page *page); /* Allocate additional data where page represents more than one sector */ -int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, - struct btrfs_subpage **ret, - enum btrfs_subpage_type type); +struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, + enum btrfs_subpage_type type); void btrfs_free_subpage(struct btrfs_subpage *subpage); void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info, From bfe50ced42c1eb591d327d77cc25850bf153f965 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 17 Aug 2021 17:38:51 +0800 Subject: [PATCH 0260/1202] btrfs: subpage: introduce btrfs_subpage_bitmap_info Currently we use fixed size u16 bitmap for subpage bitmap. This is fine for 4K sectorsize with 64K page size. But for 4K sectorsize and larger page size, the bitmap is too small, while for smaller page size like 16K, u16 bitmaps waste too much space. Here we introduce a new helper structure, btrfs_subpage_bitmap_info, to record the proper bitmap size, and where each bitmap should start at. By this, we can later compact all subpage bitmaps into one u32 bitmap. This patch is the first step. Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 12 +++++++++--- fs/btrfs/subpage.c | 28 ++++++++++++++++++++++++++++ fs/btrfs/subpage.h | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c0cebcf745cef..8736347d1fa36 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -899,6 +899,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *scrub_workers; struct btrfs_workqueue *scrub_wr_completion_workers; struct btrfs_workqueue *scrub_parity_workers; + struct btrfs_subpage_info *subpage_info; struct btrfs_discard_ctl discard_ctl; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 355ea88d5c5f7..71aafe9606e04 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1644,6 +1644,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) btrfs_extent_buffer_leak_debug_check(fs_info); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); + kfree(fs_info->subpage_info); kvfree(fs_info); } @@ -3392,12 +3393,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_alloc; } - if (sectorsize != PAGE_SIZE) { + if (sectorsize < PAGE_SIZE) { + struct btrfs_subpage_info *subpage_info; + btrfs_warn(fs_info, "read-write for sector size %u with page size %lu is experimental", sectorsize, PAGE_SIZE); - } - if (sectorsize != PAGE_SIZE) { if (btrfs_super_incompat_flags(fs_info->super_copy) & BTRFS_FEATURE_INCOMPAT_RAID56) { btrfs_err(fs_info, @@ -3406,6 +3407,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device err = -EINVAL; goto fail_alloc; } + subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL); + if (!subpage_info) + goto fail_alloc; + btrfs_init_subpage_info(subpage_info, sectorsize); + fs_info->subpage_info = subpage_info; } ret = btrfs_init_workqueues(fs_info, fs_devices); diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 03dcc35d7d0b3..f127bac6d6101 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -63,6 +63,34 @@ * This means a slightly higher tree locking latency. */ +void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize) +{ + unsigned int cur = 0; + unsigned int nr_bits; + + ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize)); + + nr_bits = PAGE_SIZE / sectorsize; + subpage_info->bitmap_nr_bits = nr_bits; + + subpage_info->uptodate_offset = cur; + cur += nr_bits; + + subpage_info->error_offset = cur; + cur += nr_bits; + + subpage_info->dirty_offset = cur; + cur += nr_bits; + + subpage_info->writeback_offset = cur; + cur += nr_bits; + + subpage_info->ordered_offset = cur; + cur += nr_bits; + + subpage_info->total_nr_bits = cur; +} + int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, struct page *page, enum btrfs_subpage_type type) { diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h index fdcadc5193c1d..386246a7770f3 100644 --- a/fs/btrfs/subpage.h +++ b/fs/btrfs/subpage.h @@ -11,6 +11,39 @@ */ #define BTRFS_SUBPAGE_BITMAP_SIZE 16 +/* + * Extra info for subpapge bitmap. + * + * For subpage we pack all uptodate/error/dirty/writeback/ordered bitmaps into + * one larger bitmap. + * + * This structure records how they are organized in the bitmap: + * + * /- uptodate_offset /- error_offset /- dirty_offset + * | | | + * v v v + * |u|u|u|u|........|u|u|e|e|.......|e|e| ... |o|o| + * |<- bitmap_nr_bits ->| + * |<--------------- total_nr_bits ---------------->| + */ +struct btrfs_subpage_info { + /* Number of bits for each bitmap */ + unsigned int bitmap_nr_bits; + + /* Total number of bits for the whole bitmap */ + unsigned int total_nr_bits; + + /* + * *_start indicates where the bitmap starts, the length is always + * @bitmap_size, which is calculated from PAGE_SIZE / sectorsize. + */ + unsigned int uptodate_offset; + unsigned int error_offset; + unsigned int dirty_offset; + unsigned int writeback_offset; + unsigned int ordered_offset; +}; + /* * Structure to trace status of each sector inside a page, attached to * page::private for both data and metadata inodes. @@ -53,6 +86,7 @@ enum btrfs_subpage_type { BTRFS_SUBPAGE_DATA, }; +void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize); int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, struct page *page, enum btrfs_subpage_type type); void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, From 160e1fe9a72899944f1a1f5f21414a939f913a7d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 17 Aug 2021 17:38:52 +0800 Subject: [PATCH 0261/1202] btrfs: subpage: pack all subpage bitmaps into a larger bitmap Currently we use u16 bitmap to make 4k sectorsize work for 64K page size. But this u16 bitmap is not large enough to contain larger page size like 128K, nor is space efficient for 16K page size. To handle both cases, here we pack all subpage bitmaps into a larger bitmap, now btrfs_subpage::bitmaps[] will be the ultimate bitmap for subpage usage. Each sub-bitmap will has its start bit number recorded in btrfs_subpage_info::*_start, and its bitmap length will be recorded in btrfs_subpage_info::bitmap_nr_bits. All subpage bitmap operations will be converted from using direct u16 operations to bitmap operations, with above *_start calculated. For 64K page size with 4K sectorsize, this should not cause much difference. While for 16K page size, we will only need 1 unsigned long (u32) to store all the bitmaps, which saves quite some space. Furthermore, this allows us to support larger page size like 128K and 258K. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 61 ++++++++++++--------- fs/btrfs/subpage.c | 126 +++++++++++++++++++++++++++---------------- fs/btrfs/subpage.h | 19 ++----- 3 files changed, 121 insertions(+), 85 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f7895b34011eb..fdc066cac5722 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3854,12 +3854,11 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info, struct page *page, u64 *start, u64 *end) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; + struct btrfs_subpage_info *spi = fs_info->subpage_info; u64 orig_start = *start; /* Declare as unsigned long so we can use bitmap ops */ - unsigned long dirty_bitmap; unsigned long flags; - int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits; - int range_start_bit = nbits; + int range_start_bit; int range_end_bit; /* @@ -3872,13 +3871,18 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info, return; } + range_start_bit = spi->dirty_offset + + (offset_in_page(orig_start) >> fs_info->sectorsize_bits); + /* We should have the page locked, but just in case */ spin_lock_irqsave(&subpage->lock, flags); - dirty_bitmap = subpage->dirty_bitmap; + bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit, + spi->dirty_offset + spi->bitmap_nr_bits); spin_unlock_irqrestore(&subpage->lock, flags); - bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit, - BTRFS_SUBPAGE_BITMAP_SIZE); + range_start_bit -= spi->dirty_offset; + range_end_bit -= spi->dirty_offset; + *start = page_offset(page) + range_start_bit * fs_info->sectorsize; *end = page_offset(page) + range_end_bit * fs_info->sectorsize; } @@ -4602,12 +4606,11 @@ static int submit_eb_subpage(struct page *page, int submitted = 0; u64 page_start = page_offset(page); int bit_start = 0; - const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE; int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits; int ret; /* Lock and write each dirty extent buffers in the range */ - while (bit_start < nbits) { + while (bit_start < fs_info->subpage_info->bitmap_nr_bits) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; struct extent_buffer *eb; unsigned long flags; @@ -4623,7 +4626,8 @@ static int submit_eb_subpage(struct page *page, break; } spin_lock_irqsave(&subpage->lock, flags); - if (!((1 << bit_start) & subpage->dirty_bitmap)) { + if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset, + subpage->bitmaps)) { spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock(&page->mapping->private_lock); bit_start++; @@ -7169,32 +7173,41 @@ void memmove_extent_buffer(const struct extent_buffer *dst, } } +#define GANG_LOOKUP_SIZE 16 static struct extent_buffer *get_next_extent_buffer( struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr) { - struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE]; + struct extent_buffer *gang[GANG_LOOKUP_SIZE]; struct extent_buffer *found = NULL; u64 page_start = page_offset(page); - int ret; - int i; + u64 cur = page_start; ASSERT(in_range(bytenr, page_start, PAGE_SIZE)); - ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE); lockdep_assert_held(&fs_info->buffer_lock); - ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang, - bytenr >> fs_info->sectorsize_bits, - PAGE_SIZE / fs_info->nodesize); - for (i = 0; i < ret; i++) { - /* Already beyond page end */ - if (gang[i]->start >= page_start + PAGE_SIZE) - break; - /* Found one */ - if (gang[i]->start >= bytenr) { - found = gang[i]; - break; + while (cur < page_start + PAGE_SIZE) { + int ret; + int i; + + ret = radix_tree_gang_lookup(&fs_info->buffer_radix, + (void **)gang, cur >> fs_info->sectorsize_bits, + min_t(unsigned int, GANG_LOOKUP_SIZE, + PAGE_SIZE / fs_info->nodesize)); + if (ret == 0) + goto out; + for (i = 0; i < ret; i++) { + /* Already beyond page end */ + if (gang[i]->start >= page_start + PAGE_SIZE) + goto out; + /* Found one */ + if (gang[i]->start >= bytenr) { + found = gang[i]; + goto out; + } } + cur = gang[ret - 1]->start + gang[ret - 1]->len; } +out: return found; } diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index f127bac6d6101..51f873a680eaf 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -133,10 +133,13 @@ struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, enum btrfs_subpage_type type) { struct btrfs_subpage *ret; + unsigned int real_size; ASSERT(fs_info->sectorsize < PAGE_SIZE); - ret = kzalloc(sizeof(struct btrfs_subpage), GFP_NOFS); + real_size = struct_size(ret, bitmaps, + BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits)); + ret = kzalloc(real_size, GFP_NOFS); if (!ret) return ERR_PTR(-ENOMEM); @@ -319,37 +322,59 @@ void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info, unlock_page(page); } -/* - * Convert the [start, start + len) range into a u16 bitmap - * - * For example: if start == page_offset() + 16K, len = 16K, we get 0x00f0. - */ -static u16 btrfs_subpage_calc_bitmap(const struct btrfs_fs_info *fs_info, - struct page *page, u64 start, u32 len) +static bool bitmap_test_range_all_set(unsigned long *addr, unsigned int start, + unsigned int nbits) { - const int bit_start = offset_in_page(start) >> fs_info->sectorsize_bits; - const int nbits = len >> fs_info->sectorsize_bits; + unsigned int found_zero; - btrfs_subpage_assert(fs_info, page, start, len); + found_zero = find_next_zero_bit(addr, start + nbits, start); + if (found_zero == start + nbits) + return true; + return false; +} - /* - * Here nbits can be 16, thus can go beyond u16 range. We make the - * first left shift to be calculate in unsigned long (at least u32), - * then truncate the result to u16. - */ - return (u16)(((1UL << nbits) - 1) << bit_start); +static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned int start, + unsigned int nbits) +{ + unsigned int found_set; + + found_set = find_next_bit(addr, start + nbits, start); + if (found_set == start + nbits) + return true; + return false; } +#define subpage_calc_start_bit(fs_info, page, name, start, len) \ +({ \ + unsigned int start_bit; \ + \ + btrfs_subpage_assert(fs_info, page, start, len); \ + start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ + start_bit += fs_info->subpage_info->name##_offset; \ + start_bit; \ +}) + +#define subpage_test_bitmap_all_set(fs_info, subpage, name) \ + bitmap_test_range_all_set(subpage->bitmaps, \ + fs_info->subpage_info->name##_offset, \ + fs_info->subpage_info->bitmap_nr_bits) + +#define subpage_test_bitmap_all_zero(fs_info, subpage, name) \ + bitmap_test_range_all_zero(subpage->bitmaps, \ + fs_info->subpage_info->name##_offset, \ + fs_info->subpage_info->bitmap_nr_bits) + void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; - const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + uptodate, start, len); unsigned long flags; spin_lock_irqsave(&subpage->lock, flags); - subpage->uptodate_bitmap |= tmp; - if (subpage->uptodate_bitmap == U16_MAX) + bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate)) SetPageUptodate(page); spin_unlock_irqrestore(&subpage->lock, flags); } @@ -358,11 +383,12 @@ void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; - const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + uptodate, start, len); unsigned long flags; spin_lock_irqsave(&subpage->lock, flags); - subpage->uptodate_bitmap &= ~tmp; + bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); ClearPageUptodate(page); spin_unlock_irqrestore(&subpage->lock, flags); } @@ -371,11 +397,12 @@ void btrfs_subpage_set_error(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; - const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + error, start, len); unsigned long flags; spin_lock_irqsave(&subpage->lock, flags); - subpage->error_bitmap |= tmp; + bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); SetPageError(page); spin_unlock_irqrestore(&subpage->lock, flags); } @@ -384,12 +411,13 @@ void btrfs_subpage_clear_error(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; - const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + error, start, len); unsigned long flags; spin_lock_irqsave(&subpage->lock, flags); - subpage->error_bitmap &= ~tmp; - if (subpage->error_bitmap == 0) + bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_zero(fs_info, subpage, error)) ClearPageError(page); spin_unlock_irqrestore(&subpage->lock, flags); } @@ -398,11 +426,12 @@ void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; - u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + dirty, start, len); unsigned long flags; spin_lock_irqsave(&subpage->lock, flags); - subpage->dirty_bitmap |= tmp; + bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); spin_unlock_irqrestore(&subpage->lock, flags); set_page_dirty(page); } @@ -421,13 +450,14 @@ bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; - u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + dirty, start, len); unsigned long flags; bool last = false; spin_lock_irqsave(&subpage->lock, flags); - subpage->dirty_bitmap &= ~tmp; - if (subpage->dirty_bitmap == 0) + bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty)) last = true; spin_unlock_irqrestore(&subpage->lock, flags); return last; @@ -447,11 +477,12 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; - u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + writeback, start, len); unsigned long flags; spin_lock_irqsave(&subpage->lock, flags); - subpage->writeback_bitmap |= tmp; + bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); set_page_writeback(page); spin_unlock_irqrestore(&subpage->lock, flags); } @@ -460,12 +491,13 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; - u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + writeback, start, len); unsigned long flags; spin_lock_irqsave(&subpage->lock, flags); - subpage->writeback_bitmap &= ~tmp; - if (subpage->writeback_bitmap == 0) { + bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) { ASSERT(PageWriteback(page)); end_page_writeback(page); } @@ -476,11 +508,12 @@ void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; - const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + ordered, start, len); unsigned long flags; spin_lock_irqsave(&subpage->lock, flags); - subpage->ordered_bitmap |= tmp; + bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); SetPageOrdered(page); spin_unlock_irqrestore(&subpage->lock, flags); } @@ -489,12 +522,13 @@ void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len) { struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; - const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + ordered, start, len); unsigned long flags; spin_lock_irqsave(&subpage->lock, flags); - subpage->ordered_bitmap &= ~tmp; - if (subpage->ordered_bitmap == 0) + bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered)) ClearPageOrdered(page); spin_unlock_irqrestore(&subpage->lock, flags); } @@ -507,12 +541,14 @@ bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ struct page *page, u64 start, u32 len) \ { \ struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; \ - const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); \ + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, \ + name, start, len); \ unsigned long flags; \ bool ret; \ \ spin_lock_irqsave(&subpage->lock, flags); \ - ret = ((subpage->name##_bitmap & tmp) == tmp); \ + ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ + len >> fs_info->sectorsize_bits); \ spin_unlock_irqrestore(&subpage->lock, flags); \ return ret; \ } @@ -609,5 +645,5 @@ void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info, return; ASSERT(PagePrivate(page) && page->private); - ASSERT(subpage->dirty_bitmap == 0); + ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty)); } diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h index 386246a7770f3..ac4dd64ed2572 100644 --- a/fs/btrfs/subpage.h +++ b/fs/btrfs/subpage.h @@ -5,12 +5,6 @@ #include -/* - * Maximum page size we support is 64K, minimum sector size is 4K, u16 bitmap - * is sufficient. Regular bitmap_* is not used due to size reasons. - */ -#define BTRFS_SUBPAGE_BITMAP_SIZE 16 - /* * Extra info for subpapge bitmap. * @@ -51,10 +45,6 @@ struct btrfs_subpage_info { struct btrfs_subpage { /* Common members for both data and metadata pages */ spinlock_t lock; - u16 uptodate_bitmap; - u16 error_bitmap; - u16 dirty_bitmap; - u16 writeback_bitmap; /* * Both data and metadata needs to track how many readers are for the * page. @@ -71,14 +61,11 @@ struct btrfs_subpage { * manages whether the subpage can be detached. */ atomic_t eb_refs; - /* Structures only used by data */ - struct { - atomic_t writers; - /* Tracke pending ordered extent in this sector */ - u16 ordered_bitmap; - }; + /* Structures only used by data */ + atomic_t writers; }; + unsigned long bitmaps[]; }; enum btrfs_subpage_type { From f3ecd10ae748b4eb937a7ede2f544d0394e1b63c Mon Sep 17 00:00:00 2001 From: Sidong Yang Date: Thu, 26 Aug 2021 14:44:36 +0000 Subject: [PATCH 0262/1202] btrfs: reflink: initialize return value to 0 in btrfs_extent_same() Fix a warning reported by smatch that ret could be returned without initialized. The dedupe operations are supposed to to return 0 for a 0 length range but the caller does not pass olen == 0. To keep this behaviour and also fix the warning initialize ret to 0. Reviewed-by: Filipe Manana Signed-off-by: Sidong Yang Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/reflink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index 9b0814318e726..c71e49782e86d 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -649,7 +649,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, struct inode *dst, u64 dst_loff) { - int ret; + int ret = 0; u64 i, tail_len, chunk_count; struct btrfs_root *root_dst = BTRFS_I(dst)->root; From 8ef278edd1a974d994a9799d05cea81599d66bc2 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 24 Aug 2021 13:27:42 +0800 Subject: [PATCH 0263/1202] btrfs: rename and switch to bool btrfs_chunk_readonly btrfs_chunk_readonly() checks if the given chunk is writeable. It returns 1 for readonly, and 0 for writeable. So the return argument type bool shall suffice instead of the current type int. Also, rename btrfs_chunk_readonly() to btrfs_chunk_writeable() as we check if the bg is writeable, and helps to keep the logic at the parent function simpler to understand. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 17 ++++++++++------- fs/btrfs/volumes.c | 17 ++++++++--------- fs/btrfs/volumes.h | 2 +- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 1f8b06afbd03d..64c4685bd7706 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2062,15 +2062,18 @@ static int read_one_block_group(struct btrfs_fs_info *info, link_block_group(cache); set_avail_alloc_bits(info, cache->flags); - if (btrfs_chunk_readonly(info, cache->start)) { + if (btrfs_chunk_writeable(info, cache->start)) { + if (cache->used == 0) { + ASSERT(list_empty(&cache->bg_list)); + if (btrfs_test_opt(info, DISCARD_ASYNC)) + btrfs_discard_queue_work(&info->discard_ctl, cache); + else + btrfs_mark_bg_unused(cache); + } + } else { inc_block_group_ro(cache, 1); - } else if (cache->used == 0) { - ASSERT(list_empty(&cache->bg_list)); - if (btrfs_test_opt(info, DISCARD_ASYNC)) - btrfs_discard_queue_work(&info->discard_ctl, cache); - else - btrfs_mark_bg_unused(cache); } + return 0; error: btrfs_put_block_group(cache); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a5e29a3f56d21..161b2da02fe44 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5597,17 +5597,17 @@ static inline int btrfs_chunk_max_errors(struct map_lookup *map) return btrfs_raid_array[index].tolerated_failures; } -int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset) +bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset) { struct extent_map *em; struct map_lookup *map; - int readonly = 0; int miss_ndevs = 0; int i; + bool ret = true; em = btrfs_get_chunk_map(fs_info, chunk_offset, 1); if (IS_ERR(em)) - return 1; + return false; map = em->map_lookup; for (i = 0; i < map->num_stripes; i++) { @@ -5618,21 +5618,20 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset) } if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &map->stripes[i].dev->dev_state)) { - readonly = 1; + ret = false; goto end; } } /* - * If the number of missing devices is larger than max errors, - * we can not write the data into that chunk successfully, so - * set it readonly. + * If the number of missing devices is larger than max errors, we can + * not write the data into that chunk successfully. */ if (miss_ndevs > btrfs_chunk_max_errors(map)) - readonly = 1; + ret = false; end: free_extent_map(em); - return readonly; + return ret; } void btrfs_mapping_tree_free(struct extent_map_tree *tree) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5314ce51b9273..937a81f9f6e61 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -493,7 +493,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info); int btrfs_uuid_scan_kthread(void *data); -int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset); +bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset); int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *max_avail); void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); From 6777d3e398efc0ca8ba6ae3b3f131dd09cb8fa78 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 16:12:32 +0800 Subject: [PATCH 0264/1202] btrfs: defrag: pass file_ra_state instead of file to btrfs_defrag_file() Currently btrfs_defrag_file() accepts both "struct inode" and "struct file" as parameter. We can easily grab "struct inode" from "struct file" using file_inode() helper. The reason why we need "struct file" is just to re-use its f_ra. Change this to pass "struct file_ra_state" parameter, so that it's more clear what we really want. Since we're here, also add some comments on the function btrfs_defrag_file(). Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/ioctl.c | 27 ++++++++++++++++++--------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8736347d1fa36..8f0174c62274f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3243,9 +3243,9 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns, int btrfs_ioctl_get_supported_features(void __user *arg); void btrfs_sync_inode_flags_to_i_flags(struct inode *inode); int __pure btrfs_is_empty_uuid(u8 *uuid); -int btrfs_defrag_file(struct inode *inode, struct file *file, +int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, struct btrfs_ioctl_defrag_range_args *range, - u64 newer_than, unsigned long max_pages); + u64 newer_than, unsigned long max_to_defrag); void btrfs_get_block_group_info(struct list_head *groups_list, struct btrfs_ioctl_space_info *space); void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1c4b6247ca00c..033726aed96e4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1403,13 +1403,22 @@ static int cluster_pages_for_defrag(struct inode *inode, } -int btrfs_defrag_file(struct inode *inode, struct file *file, +/* + * Entry point to file defragmentation. + * + * @inode: inode to be defragged + * @ra: readahead state (can be NUL) + * @range: defrag options including range and flags + * @newer_than: minimum transid to defrag + * @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode + * will be defragged. + */ +int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, struct btrfs_ioctl_defrag_range_args *range, u64 newer_than, unsigned long max_to_defrag) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; - struct file_ra_state *ra = NULL; unsigned long last_index; u64 isize = i_size_read(inode); u64 last_len = 0; @@ -1427,6 +1436,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, u64 new_align = ~((u64)SZ_128K - 1); struct page **pages = NULL; bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS; + bool ra_allocated = false; if (isize == 0) return 0; @@ -1445,16 +1455,15 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, extent_thresh = SZ_256K; /* - * If we were not given a file, allocate a readahead context. As + * If we were not given a ra, allocate a readahead context. As * readahead is just an optimization, defrag will work without it so * we don't error out. */ - if (!file) { + if (!ra) { + ra_allocated = true; ra = kzalloc(sizeof(*ra), GFP_KERNEL); if (ra) file_ra_state_init(ra, inode->i_mapping); - } else { - ra = &file->f_ra; } pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL); @@ -1536,7 +1545,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ra_index = max(i, ra_index); if (ra) page_cache_sync_readahead(inode->i_mapping, ra, - file, ra_index, cluster); + NULL, ra_index, cluster); ra_index += cluster; } @@ -1607,7 +1616,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE; btrfs_inode_unlock(inode, 0); } - if (!file) + if (ra_allocated) kfree(ra); kfree(pages); return ret; @@ -3176,7 +3185,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) /* the rest are all set to zero by kzalloc */ range.len = (u64)-1; } - ret = btrfs_defrag_file(file_inode(file), file, + ret = btrfs_defrag_file(file_inode(file), &file->f_ra, &range, BTRFS_OLDEST_GENERATION, 0); if (ret > 0) ret = 0; From 057455043767a101b89db80c8eb5a93f83b4e172 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 16:12:33 +0800 Subject: [PATCH 0265/1202] btrfs: defrag: also check PagePrivate for subpage cases in cluster_pages_for_defrag() In function cluster_pages_for_defrag() we have a window where we unlock page, either start the ordered range or read the content from disk. When we re-lock the page, we need to make sure it still has the correct page->private for subpage. Thus add the extra PagePrivate check here to handle subpage cases properly. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 033726aed96e4..b8b30e11a2268 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1278,7 +1278,8 @@ static int cluster_pages_for_defrag(struct inode *inode, * we unlocked the page above, so we need check if * it was released or not. */ - if (page->mapping != inode->i_mapping) { + if (page->mapping != inode->i_mapping || + !PagePrivate(page)) { unlock_page(page); put_page(page); goto again; @@ -1296,7 +1297,7 @@ static int cluster_pages_for_defrag(struct inode *inode, } } - if (page->mapping != inode->i_mapping) { + if (page->mapping != inode->i_mapping || !PagePrivate(page)) { unlock_page(page); put_page(page); goto again; From 926cdd5070394fdc3a6974dafbc3457640f69296 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 16:12:34 +0800 Subject: [PATCH 0266/1202] btrfs: defrag: replace hard coded PAGE_SIZE with sectorsize When testing subpage defrag support, I always find some strange inode nbytes error, after a lot of debugging, it turns out that defrag_lookup_extent() is using PAGE_SIZE as size for lookup_extent_mapping(). Since lookup_extent_mapping() is calling __lookup_extent_mapping() with @strict == 1, this means any extent map smaller than one page will be ignored, prevent subpage defrag to grab a correct extent map. There are quite some PAGE_SIZE usage in ioctl.c, but most of them are correct usages, and can be one of the following cases: - ioctl structure size check We want ioctl structure to be contained inside one page. - real page operations The remaining cases in defrag_lookup_extent() and check_defrag_in_cache() will be addressed in this patch. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b8b30e11a2268..8e9c51acc9c76 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -997,10 +997,11 @@ static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh) struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize; u64 end; read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE); + em = lookup_extent_mapping(em_tree, offset, sectorsize); read_unlock(&em_tree->lock); if (em) { @@ -1090,23 +1091,23 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_map *em; - u64 len = PAGE_SIZE; + const u32 sectorsize = BTRFS_I(inode)->root->fs_info->sectorsize; /* * hopefully we have this extent in the tree already, try without * the full extent lock */ read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, start, len); + em = lookup_extent_mapping(em_tree, start, sectorsize); read_unlock(&em_tree->lock); if (!em) { struct extent_state *cached = NULL; - u64 end = start + len - 1; + u64 end = start + sectorsize - 1; /* get the big lock and read metadata off disk */ lock_extent_bits(io_tree, start, end, &cached); - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, sectorsize); unlock_extent_cached(io_tree, start, end, &cached); if (IS_ERR(em)) From dc78c1aadedf2c9fbbdc15f1ff3271cbb0b3e063 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 16:12:35 +0800 Subject: [PATCH 0267/1202] btrfs: defrag: factor out page preparation into a helper In cluster_pages_for_defrag(), we have complex code block inside one for() loop. The code block is to prepare one page for defrag, this will ensure: - The page is locked and set up properly. - No ordered extent exists in the page range. - The page is uptodate. This behavior is pretty common and will be reused by later defrag rework. So factor out the code into its own helper, defrag_prepare_one_page(), for later usage, and cleanup the code by a little. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 148 ++++++++++++++++++++++++++++------------------- 1 file changed, 87 insertions(+), 61 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8e9c51acc9c76..30456253ee597 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1197,6 +1197,88 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh, return ret; } +/* + * Prepare one page to be defragged. + * + * This will ensure: + * + * - Returned page is locked and has been set up properly. + * - No ordered extent exists in the page. + * - The page is uptodate. + * + * NOTE: Caller should also wait for page writeback after the cluster is + * prepared, here we don't do writeback wait for each page. + */ +static struct page *defrag_prepare_one_page(struct btrfs_inode *inode, + pgoff_t index) +{ + struct address_space *mapping = inode->vfs_inode.i_mapping; + gfp_t mask = btrfs_alloc_write_mask(mapping); + u64 page_start = (u64)index << PAGE_SHIFT; + u64 page_end = page_start + PAGE_SIZE - 1; + struct extent_state *cached_state = NULL; + struct page *page; + int ret; + +again: + page = find_or_create_page(mapping, index, mask); + if (!page) + return ERR_PTR(-ENOMEM); + + ret = set_page_extent_mapped(page); + if (ret < 0) { + unlock_page(page); + put_page(page); + return ERR_PTR(ret); + } + + /* Wait for any existing ordered extent in the range */ + while (1) { + struct btrfs_ordered_extent *ordered; + + lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state); + ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE); + unlock_extent_cached(&inode->io_tree, page_start, page_end, + &cached_state); + if (!ordered) + break; + + unlock_page(page); + btrfs_start_ordered_extent(ordered, 1); + btrfs_put_ordered_extent(ordered); + lock_page(page); + /* + * We unlocked the page above, so we need check if it was + * released or not. + */ + if (page->mapping != mapping || !PagePrivate(page)) { + unlock_page(page); + put_page(page); + goto again; + } + } + + /* + * Now the page range has no ordered extent any more. Read the page to + * make it uptodate. + */ + if (!PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (page->mapping != mapping || !PagePrivate(page)) { + unlock_page(page); + put_page(page); + goto again; + } + if (!PageUptodate(page)) { + unlock_page(page); + put_page(page); + return ERR_PTR(-EIO); + } + } + return page; +} + /* * it doesn't do much good to defrag one or two pages * at a time. This pulls in a nice chunk of pages @@ -1224,11 +1306,8 @@ static int cluster_pages_for_defrag(struct inode *inode, int ret; int i; int i_done; - struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; - struct extent_io_tree *tree; struct extent_changeset *data_reserved = NULL; - gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); file_end = (isize - 1) >> PAGE_SHIFT; if (!isize || start_index > file_end) @@ -1241,69 +1320,16 @@ static int cluster_pages_for_defrag(struct inode *inode, if (ret) return ret; i_done = 0; - tree = &BTRFS_I(inode)->io_tree; /* step one, lock all the pages */ for (i = 0; i < page_cnt; i++) { struct page *page; -again: - page = find_or_create_page(inode->i_mapping, - start_index + i, mask); - if (!page) - break; - ret = set_page_extent_mapped(page); - if (ret < 0) { - unlock_page(page); - put_page(page); + page = defrag_prepare_one_page(BTRFS_I(inode), start_index + i); + if (IS_ERR(page)) { + ret = PTR_ERR(page); break; } - - page_start = page_offset(page); - page_end = page_start + PAGE_SIZE - 1; - while (1) { - lock_extent_bits(tree, page_start, page_end, - &cached_state); - ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), - page_start); - unlock_extent_cached(tree, page_start, page_end, - &cached_state); - if (!ordered) - break; - - unlock_page(page); - btrfs_start_ordered_extent(ordered, 1); - btrfs_put_ordered_extent(ordered); - lock_page(page); - /* - * we unlocked the page above, so we need check if - * it was released or not. - */ - if (page->mapping != inode->i_mapping || - !PagePrivate(page)) { - unlock_page(page); - put_page(page); - goto again; - } - } - - if (!PageUptodate(page)) { - btrfs_readpage(NULL, page); - lock_page(page); - if (!PageUptodate(page)) { - unlock_page(page); - put_page(page); - ret = -EIO; - break; - } - } - - if (page->mapping != inode->i_mapping || !PagePrivate(page)) { - unlock_page(page); - put_page(page); - goto again; - } - pages[i] = page; i_done++; } @@ -1314,8 +1340,8 @@ static int cluster_pages_for_defrag(struct inode *inode, goto out; /* - * so now we have a nice long stream of locked - * and up to date pages, lets wait on them + * Now we have a nice long stream of locked and up to date pages, let's + * wait on them. */ for (i = 0; i < i_done; i++) wait_on_page_writeback(pages[i]); From ebcaee5a45eaa9a64daf59a026b2828f041dc02b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 16:12:36 +0800 Subject: [PATCH 0268/1202] btrfs: defrag: introduce helper to collect target file extents Introduce a helper, defrag_collect_targets(), to collect all possible targets to be defragged. This function will not consider things like max_sectors_to_defrag, thus caller should be responsible to ensure we don't exceed the limit. This function will be the first stage of later defrag rework. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 30456253ee597..07a70520819f0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1431,6 +1431,126 @@ static int cluster_pages_for_defrag(struct inode *inode, } +struct defrag_target_range { + struct list_head list; + u64 start; + u64 len; +}; + +/* + * Collect all valid target extents. + * + * @start: file offset to lookup + * @len: length to lookup + * @extent_thresh: file extent size threshold, any extent size >= this value + * will be ignored + * @newer_than: only defrag extents newer than this value + * @do_compress: whether the defrag is doing compression + * if true, @extent_thresh will be ignored and all regular + * file extents meeting @newer_than will be targets. + * @target_list: list of targets file extents + */ +static int defrag_collect_targets(struct btrfs_inode *inode, + u64 start, u64 len, u32 extent_thresh, + u64 newer_than, bool do_compress, + struct list_head *target_list) +{ + u64 cur = start; + int ret = 0; + + while (cur < start + len) { + struct extent_map *em; + struct defrag_target_range *new; + bool next_mergeable = true; + u64 range_len; + + em = defrag_lookup_extent(&inode->vfs_inode, cur); + if (!em) + break; + + /* Skip hole/inline/preallocated extents */ + if (em->block_start >= EXTENT_MAP_LAST_BYTE || + test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) + goto next; + + /* Skip older extent */ + if (em->generation < newer_than) + goto next; + + /* + * For do_compress case, we want to compress all valid file + * extents, thus no @extent_thresh or mergeable check. + */ + if (do_compress) + goto add; + + /* Skip too large extent */ + if (em->len >= extent_thresh) + goto next; + + next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em); + if (!next_mergeable) { + struct defrag_target_range *last; + + /* Empty target list, no way to merge with last entry */ + if (list_empty(target_list)) + goto next; + last = list_entry(target_list->prev, + struct defrag_target_range, list); + /* Not mergeable with last entry */ + if (last->start + last->len != cur) + goto next; + + /* Mergeable, fall through to add it to @target_list. */ + } + +add: + range_len = min(extent_map_end(em), start + len) - cur; + /* + * This one is a good target, check if it can be merged into + * last range of the target list. + */ + if (!list_empty(target_list)) { + struct defrag_target_range *last; + + last = list_entry(target_list->prev, + struct defrag_target_range, list); + ASSERT(last->start + last->len <= cur); + if (last->start + last->len == cur) { + /* Mergeable, enlarge the last entry */ + last->len += range_len; + goto next; + } + /* Fall through to allocate a new entry */ + } + + /* Allocate new defrag_target_range */ + new = kmalloc(sizeof(*new), GFP_NOFS); + if (!new) { + free_extent_map(em); + ret = -ENOMEM; + break; + } + new->start = cur; + new->len = range_len; + list_add_tail(&new->list, target_list); + +next: + cur = extent_map_end(em); + free_extent_map(em); + } + if (ret < 0) { + struct defrag_target_range *entry; + struct defrag_target_range *tmp; + + list_for_each_entry_safe(entry, tmp, target_list, list) { + list_del_init(&entry->list); + kfree(entry); + } + } + return ret; +} + /* * Entry point to file defragmentation. * From c619afc63aed477e0a2728ba2b176617bba36ecc Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 16:12:37 +0800 Subject: [PATCH 0269/1202] btrfs: defrag: introduce helper to defrag a contiguous prepared range A new helper, defrag_one_locked_target(), introduced to do the real part of defrag. The caller needs to ensure both page and extents bits are locked, and no ordered extent exists for the range, and all writeback is finished. The core defrag part is pretty straight-forward: - Reserve space - Set extent bits to defrag - Update involved pages to be dirty Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 07a70520819f0..465ee00e29545 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -48,6 +48,7 @@ #include "space-info.h" #include "delalloc-space.h" #include "block-group.h" +#include "subpage.h" #ifdef CONFIG_64BIT /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI @@ -1551,6 +1552,60 @@ static int defrag_collect_targets(struct btrfs_inode *inode, return ret; } +#define CLUSTER_SIZE (SZ_256K) + +/* + * Defrag one contiguous target range. + * + * @inode: target inode + * @target: target range to defrag + * @pages: locked pages covering the defrag range + * @nr_pages: number of locked pages + * + * Caller should ensure: + * + * - Pages are prepared + * Pages should be locked, no ordered extent in the pages range, + * no writeback. + * + * - Extent bits are locked + */ +static int defrag_one_locked_target(struct btrfs_inode *inode, + struct defrag_target_range *target, + struct page **pages, int nr_pages, + struct extent_state **cached_state) +{ + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct extent_changeset *data_reserved = NULL; + const u64 start = target->start; + const u64 len = target->len; + unsigned long last_index = (start + len - 1) >> PAGE_SHIFT; + unsigned long start_index = start >> PAGE_SHIFT; + unsigned long first_index = page_index(pages[0]); + int ret = 0; + int i; + + ASSERT(last_index - first_index + 1 <= nr_pages); + + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, start, len); + if (ret < 0) + return ret; + clear_extent_bit(&inode->io_tree, start, start + len - 1, + EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | + EXTENT_DEFRAG, 0, 0, cached_state); + set_extent_defrag(&inode->io_tree, start, start + len - 1, cached_state); + + /* Update the page status */ + for (i = start_index - first_index; i <= last_index - first_index; i++) { + ClearPageChecked(pages[i]); + btrfs_page_clamp_set_dirty(fs_info, pages[i], start, len); + } + btrfs_delalloc_release_extents(inode, len); + extent_changeset_free(data_reserved); + + return ret; +} + /* * Entry point to file defragmentation. * From f29cedc7d1a19da96090b66c188056459e1ba53a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 16:12:38 +0800 Subject: [PATCH 0270/1202] btrfs: defrag: introduce helper to defrag a range A new helper, defrag_one_range(), is introduced to defrag one range. This function will mostly prepare the needed pages and extent status for defrag_one_locked_target(). As we can only have a consistent view of extent map with page and extent bits locked, we need to re-check the range passed in to get a real target list for defrag_one_locked_target(). Since defrag_collect_targets() will call defrag_lookup_extent() and lock extent range, we also need to teach those two functions to skip extent lock. Thus new parameter, @locked, is introduced to skip extent lock if the caller has already locked the range. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 103 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 465ee00e29545..d05db6ce559c8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1087,7 +1087,8 @@ static int find_new_extents(struct btrfs_root *root, return -ENOENT; } -static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) +static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start, + bool locked) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; @@ -1107,9 +1108,11 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) u64 end = start + sectorsize - 1; /* get the big lock and read metadata off disk */ - lock_extent_bits(io_tree, start, end, &cached); + if (!locked) + lock_extent_bits(io_tree, start, end, &cached); em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, sectorsize); - unlock_extent_cached(io_tree, start, end, &cached); + if (!locked) + unlock_extent_cached(io_tree, start, end, &cached); if (IS_ERR(em)) return NULL; @@ -1118,7 +1121,8 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) return em; } -static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) +static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, + bool locked) { struct extent_map *next; bool ret = true; @@ -1127,7 +1131,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) if (em->start + em->len >= i_size_read(inode)) return false; - next = defrag_lookup_extent(inode, em->start + em->len); + next = defrag_lookup_extent(inode, em->start + em->len, locked); if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) ret = false; else if ((em->block_start + em->block_len == next->block_start) && @@ -1156,7 +1160,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh, *skip = 0; - em = defrag_lookup_extent(inode, start); + em = defrag_lookup_extent(inode, start, false); if (!em) return 0; @@ -1169,7 +1173,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u32 thresh, if (!*defrag_end) prev_mergeable = false; - next_mergeable = defrag_check_next_extent(inode, em); + next_mergeable = defrag_check_next_extent(inode, em, false); /* * we hit a real extent, if it is big or the next extent is not a * real extent, don't bother defragging it @@ -1449,12 +1453,13 @@ struct defrag_target_range { * @do_compress: whether the defrag is doing compression * if true, @extent_thresh will be ignored and all regular * file extents meeting @newer_than will be targets. + * @locked: if the range has already held extent lock * @target_list: list of targets file extents */ static int defrag_collect_targets(struct btrfs_inode *inode, u64 start, u64 len, u32 extent_thresh, u64 newer_than, bool do_compress, - struct list_head *target_list) + bool locked, struct list_head *target_list) { u64 cur = start; int ret = 0; @@ -1465,7 +1470,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, bool next_mergeable = true; u64 range_len; - em = defrag_lookup_extent(&inode->vfs_inode, cur); + em = defrag_lookup_extent(&inode->vfs_inode, cur, locked); if (!em) break; @@ -1489,7 +1494,8 @@ static int defrag_collect_targets(struct btrfs_inode *inode, if (em->len >= extent_thresh) goto next; - next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em); + next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, + locked); if (!next_mergeable) { struct defrag_target_range *last; @@ -1606,6 +1612,83 @@ static int defrag_one_locked_target(struct btrfs_inode *inode, return ret; } +static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len, + u32 extent_thresh, u64 newer_than, bool do_compress) +{ + struct extent_state *cached_state = NULL; + struct defrag_target_range *entry; + struct defrag_target_range *tmp; + LIST_HEAD(target_list); + struct page **pages; + const u32 sectorsize = inode->root->fs_info->sectorsize; + u64 last_index = (start + len - 1) >> PAGE_SHIFT; + u64 start_index = start >> PAGE_SHIFT; + unsigned int nr_pages = last_index - start_index + 1; + int ret = 0; + int i; + + ASSERT(nr_pages <= CLUSTER_SIZE / PAGE_SIZE); + ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(len, sectorsize)); + + pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); + if (!pages) + return -ENOMEM; + + /* Prepare all pages */ + for (i = 0; i < nr_pages; i++) { + pages[i] = defrag_prepare_one_page(inode, start_index + i); + if (IS_ERR(pages[i])) { + ret = PTR_ERR(pages[i]); + pages[i] = NULL; + goto free_pages; + } + } + for (i = 0; i < nr_pages; i++) + wait_on_page_writeback(pages[i]); + + /* Lock the pages range */ + lock_extent_bits(&inode->io_tree, start_index << PAGE_SHIFT, + (last_index << PAGE_SHIFT) + PAGE_SIZE - 1, + &cached_state); + /* + * Now we have a consistent view about the extent map, re-check + * which range really needs to be defragged. + * + * And this time we have extent locked already, pass @locked = true + * so that we won't relock the extent range and cause deadlock. + */ + ret = defrag_collect_targets(inode, start, len, extent_thresh, + newer_than, do_compress, true, + &target_list); + if (ret < 0) + goto unlock_extent; + + list_for_each_entry(entry, &target_list, list) { + ret = defrag_one_locked_target(inode, entry, pages, nr_pages, + &cached_state); + if (ret < 0) + break; + } + + list_for_each_entry_safe(entry, tmp, &target_list, list) { + list_del_init(&entry->list); + kfree(entry); + } +unlock_extent: + unlock_extent_cached(&inode->io_tree, start_index << PAGE_SHIFT, + (last_index << PAGE_SHIFT) + PAGE_SIZE - 1, + &cached_state); +free_pages: + for (i = 0; i < nr_pages; i++) { + if (pages[i]) { + unlock_page(pages[i]); + put_page(pages[i]); + } + } + kfree(pages); + return ret; +} + /* * Entry point to file defragmentation. * From 2dd04bb3e709ed25052a1b13227efbf5007183cf Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 16:12:39 +0800 Subject: [PATCH 0271/1202] btrfs: defrag: introduce helper to defrag one cluster This new helper, defrag_one_cluster(), will defrag one cluster (at most 256K): - Collect all initial targets - Kick in readahead when possible - Call defrag_one_range() on each initial target With some extra range clamping. - Update @sectors_defragged parameter This involves one behavior change, the defragged sectors accounting is no longer as accurate as old behavior, as the initial targets are not consistent. We can have new holes punched inside the initial target, and we will skip such holes later. But the defragged sectors accounting doesn't need to be that accurate anyway, thus I don't want to pass those extra accounting burden into defrag_one_range(). Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d05db6ce559c8..777cb3a3413c9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1689,6 +1689,62 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len, return ret; } +static int defrag_one_cluster(struct btrfs_inode *inode, + struct file_ra_state *ra, + u64 start, u32 len, u32 extent_thresh, + u64 newer_than, bool do_compress, + unsigned long *sectors_defragged, + unsigned long max_sectors) +{ + const u32 sectorsize = inode->root->fs_info->sectorsize; + struct defrag_target_range *entry; + struct defrag_target_range *tmp; + LIST_HEAD(target_list); + int ret; + + BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE)); + ret = defrag_collect_targets(inode, start, len, extent_thresh, + newer_than, do_compress, false, + &target_list); + if (ret < 0) + goto out; + + list_for_each_entry(entry, &target_list, list) { + u32 range_len = entry->len; + + /* Reached the limit */ + if (max_sectors && max_sectors == *sectors_defragged) + break; + + if (max_sectors) + range_len = min_t(u32, range_len, + (max_sectors - *sectors_defragged) * sectorsize); + + if (ra) + page_cache_sync_readahead(inode->vfs_inode.i_mapping, + ra, NULL, entry->start >> PAGE_SHIFT, + ((entry->start + range_len - 1) >> PAGE_SHIFT) - + (entry->start >> PAGE_SHIFT) + 1); + /* + * Here we may not defrag any range if holes are punched before + * we locked the pages. + * But that's fine, it only affects the @sectors_defragged + * accounting. + */ + ret = defrag_one_range(inode, entry->start, range_len, + extent_thresh, newer_than, do_compress); + if (ret < 0) + break; + *sectors_defragged += range_len; + } +out: + list_for_each_entry_safe(entry, tmp, &target_list, list) { + list_del_init(&entry->list); + kfree(entry); + } + return ret; +} + /* * Entry point to file defragmentation. * From 84f585f9966fdbfebf6d6f1b6286a9a2b2aaa23b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 27 May 2021 20:33:22 +0800 Subject: [PATCH 0272/1202] btrfs: defrag: use defrag_one_cluster() to implement btrfs_defrag_file() The function defrag_one_cluster() is able to defrag one range well enough, we only need to do preparation for it, including: - Clamp and align the defrag range - Exclude invalid cases - Proper inode locking The old infrastructures will not be removed in this patch, as it would be too noisy to review. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 204 +++++++++++++---------------------------------- 1 file changed, 55 insertions(+), 149 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 777cb3a3413c9..d66fc025b8843 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1760,25 +1760,15 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, u64 newer_than, unsigned long max_to_defrag) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; - unsigned long last_index; + unsigned long sectors_defragged = 0; u64 isize = i_size_read(inode); - u64 last_len = 0; - u64 skip = 0; - u64 defrag_end = 0; - u64 newer_off = range->start; - unsigned long i; - unsigned long ra_index = 0; - int ret; - int defrag_count = 0; - int compress_type = BTRFS_COMPRESS_ZLIB; - u32 extent_thresh = range->extent_thresh; - unsigned long max_cluster = SZ_256K >> PAGE_SHIFT; - unsigned long cluster = max_cluster; - u64 new_align = ~((u64)SZ_128K - 1); - struct page **pages = NULL; + u64 cur; + u64 last_byte; bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS; bool ra_allocated = false; + int compress_type = BTRFS_COMPRESS_ZLIB; + int ret = 0; + u32 extent_thresh = range->extent_thresh; if (isize == 0) return 0; @@ -1796,6 +1786,14 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, if (extent_thresh == 0) extent_thresh = SZ_256K; + if (range->start + range->len > range->start) { + /* Got a specific range */ + last_byte = min(isize, range->start + range->len) - 1; + } else { + /* Defrag until file end */ + last_byte = isize - 1; + } + /* * If we were not given a ra, allocate a readahead context. As * readahead is just an optimization, defrag will work without it so @@ -1808,159 +1806,67 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, file_ra_state_init(ra, inode->i_mapping); } - pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL); - if (!pages) { - ret = -ENOMEM; - goto out_ra; - } + /* Align the range */ + cur = round_down(range->start, fs_info->sectorsize); + last_byte = round_up(last_byte, fs_info->sectorsize) - 1; - /* find the last page to defrag */ - if (range->start + range->len > range->start) { - last_index = min_t(u64, isize - 1, - range->start + range->len - 1) >> PAGE_SHIFT; - } else { - last_index = (isize - 1) >> PAGE_SHIFT; - } - - if (newer_than) { - ret = find_new_extents(root, inode, newer_than, - &newer_off, SZ_64K); - if (!ret) { - range->start = newer_off; - /* - * we always align our defrag to help keep - * the extents in the file evenly spaced - */ - i = (newer_off & new_align) >> PAGE_SHIFT; - } else - goto out_ra; - } else { - i = range->start >> PAGE_SHIFT; - } - if (!max_to_defrag) - max_to_defrag = last_index - i + 1; - - /* - * make writeback starts from i, so the defrag range can be - * written sequentially. - */ - if (i < inode->i_mapping->writeback_index) - inode->i_mapping->writeback_index = i; - - while (i <= last_index && defrag_count < max_to_defrag && - (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) { - /* - * make sure we stop running if someone unmounts - * the FS - */ - if (!(inode->i_sb->s_flags & SB_ACTIVE)) - break; - - if (btrfs_defrag_cancelled(fs_info)) { - btrfs_debug(fs_info, "defrag_file cancelled"); - ret = -EAGAIN; - goto error; - } - - if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT, - extent_thresh, &last_len, &skip, - &defrag_end, do_compress)){ - unsigned long next; - /* - * the should_defrag function tells us how much to skip - * bump our counter by the suggested amount - */ - next = DIV_ROUND_UP(skip, PAGE_SIZE); - i = max(i + 1, next); - continue; - } + while (cur < last_byte) { + u64 cluster_end; - if (!newer_than) { - cluster = (PAGE_ALIGN(defrag_end) >> - PAGE_SHIFT) - i; - cluster = min(cluster, max_cluster); - } else { - cluster = max_cluster; - } + /* The cluster size 256K should always be page aligned */ + BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE)); - if (i + cluster > ra_index) { - ra_index = max(i, ra_index); - if (ra) - page_cache_sync_readahead(inode->i_mapping, ra, - NULL, ra_index, cluster); - ra_index += cluster; - } + /* We want the cluster end at page boundary when possible */ + cluster_end = (((cur >> PAGE_SHIFT) + + (SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1; + cluster_end = min(cluster_end, last_byte); btrfs_inode_lock(inode, 0); if (IS_SWAPFILE(inode)) { ret = -ETXTBSY; - } else { - if (do_compress) - BTRFS_I(inode)->defrag_compress = compress_type; - ret = cluster_pages_for_defrag(inode, pages, i, cluster); + btrfs_inode_unlock(inode, 0); + break; } - if (ret < 0) { + if (!(inode->i_sb->s_flags & SB_ACTIVE)) { btrfs_inode_unlock(inode, 0); - goto out_ra; + break; } - - defrag_count += ret; - balance_dirty_pages_ratelimited(inode->i_mapping); + if (do_compress) + BTRFS_I(inode)->defrag_compress = compress_type; + ret = defrag_one_cluster(BTRFS_I(inode), ra, cur, + cluster_end + 1 - cur, extent_thresh, + newer_than, do_compress, + §ors_defragged, max_to_defrag); btrfs_inode_unlock(inode, 0); - - if (newer_than) { - if (newer_off == (u64)-1) - break; - - if (ret > 0) - i += ret; - - newer_off = max(newer_off + 1, - (u64)i << PAGE_SHIFT); - - ret = find_new_extents(root, inode, newer_than, - &newer_off, SZ_64K); - if (!ret) { - range->start = newer_off; - i = (newer_off & new_align) >> PAGE_SHIFT; - } else { - break; - } - } else { - if (ret > 0) { - i += ret; - last_len += ret << PAGE_SHIFT; - } else { - i++; - last_len = 0; - } - } + if (ret < 0) + break; + cur = cluster_end + 1; } - ret = defrag_count; -error: - if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { - filemap_flush(inode->i_mapping); - if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, - &BTRFS_I(inode)->runtime_flags)) + if (ra_allocated) + kfree(ra); + if (sectors_defragged) { + /* + * We have defragged some sectors, for compression case they + * need to be written back immediately. + */ + if (range->flags & BTRFS_DEFRAG_RANGE_START_IO) { filemap_flush(inode->i_mapping); + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + filemap_flush(inode->i_mapping); + } + if (range->compress_type == BTRFS_COMPRESS_LZO) + btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); + else if (range->compress_type == BTRFS_COMPRESS_ZSTD) + btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); + ret = sectors_defragged; } - - if (range->compress_type == BTRFS_COMPRESS_LZO) { - btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); - } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { - btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); - } - -out_ra: if (do_compress) { btrfs_inode_lock(inode, 0); BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE; btrfs_inode_unlock(inode, 0); } - if (ra_allocated) - kfree(ra); - kfree(pages); return ret; } From 231a780925d5de49935872e0c12f7446c69c65ee Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 16:12:41 +0800 Subject: [PATCH 0273/1202] btrfs: defrag: remove the old infrastructure Now the old infrastructure can all be removed, defrag Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 313 ----------------------------------------------- 1 file changed, 313 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d66fc025b8843..502eba1081187 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -986,107 +986,6 @@ static noinline int btrfs_mksnapshot(const struct path *parent, return ret; } -/* - * When we're defragging a range, we don't want to kick it off again - * if it is really just waiting for delalloc to send it down. - * If we find a nice big extent or delalloc range for the bytes in the - * file you want to defrag, we return 0 to let you know to skip this - * part of the file - */ -static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh) -{ - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct extent_map *em = NULL; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize; - u64 end; - - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, offset, sectorsize); - read_unlock(&em_tree->lock); - - if (em) { - end = extent_map_end(em); - free_extent_map(em); - if (end - offset > thresh) - return 0; - } - /* if we already have a nice delalloc here, just stop */ - thresh /= 2; - end = count_range_bits(io_tree, &offset, offset + thresh, - thresh, EXTENT_DELALLOC, 1); - if (end >= thresh) - return 0; - return 1; -} - -/* - * helper function to walk through a file and find extents - * newer than a specific transid, and smaller than thresh. - * - * This is used by the defragging code to find new and small - * extents - */ -static int find_new_extents(struct btrfs_root *root, - struct inode *inode, u64 newer_than, - u64 *off, u32 thresh) -{ - struct btrfs_path *path; - struct btrfs_key min_key; - struct extent_buffer *leaf; - struct btrfs_file_extent_item *extent; - int type; - int ret; - u64 ino = btrfs_ino(BTRFS_I(inode)); - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - min_key.objectid = ino; - min_key.type = BTRFS_EXTENT_DATA_KEY; - min_key.offset = *off; - - while (1) { - ret = btrfs_search_forward(root, &min_key, path, newer_than); - if (ret != 0) - goto none; -process_slot: - if (min_key.objectid != ino) - goto none; - if (min_key.type != BTRFS_EXTENT_DATA_KEY) - goto none; - - leaf = path->nodes[0]; - extent = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - - type = btrfs_file_extent_type(leaf, extent); - if (type == BTRFS_FILE_EXTENT_REG && - btrfs_file_extent_num_bytes(leaf, extent) < thresh && - check_defrag_in_cache(inode, min_key.offset, thresh)) { - *off = min_key.offset; - btrfs_free_path(path); - return 0; - } - - path->slots[0]++; - if (path->slots[0] < btrfs_header_nritems(leaf)) { - btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); - goto process_slot; - } - - if (min_key.offset == (u64)-1) - goto none; - - min_key.offset++; - btrfs_release_path(path); - } -none: - btrfs_free_path(path); - return -ENOENT; -} - static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start, bool locked) { @@ -1142,66 +1041,6 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, return ret; } -static int should_defrag_range(struct inode *inode, u64 start, u32 thresh, - u64 *last_len, u64 *skip, u64 *defrag_end, - int compress) -{ - struct extent_map *em; - int ret = 1; - bool next_mergeable = true; - bool prev_mergeable = true; - - /* - * make sure that once we start defragging an extent, we keep on - * defragging it - */ - if (start < *defrag_end) - return 1; - - *skip = 0; - - em = defrag_lookup_extent(inode, start, false); - if (!em) - return 0; - - /* this will cover holes, and inline extents */ - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - ret = 0; - goto out; - } - - if (!*defrag_end) - prev_mergeable = false; - - next_mergeable = defrag_check_next_extent(inode, em, false); - /* - * we hit a real extent, if it is big or the next extent is not a - * real extent, don't bother defragging it - */ - if (!compress && (*last_len == 0 || *last_len >= thresh) && - (em->len >= thresh || (!next_mergeable && !prev_mergeable))) - ret = 0; -out: - /* - * last_len ends up being a counter of how many bytes we've defragged. - * every time we choose not to defrag an extent, we reset *last_len - * so that the next tiny extent will force a defrag. - * - * The end result of this is that tiny extents before a single big - * extent will force at least part of that big extent to be defragged. - */ - if (ret) { - *defrag_end = extent_map_end(em); - } else { - *last_len = 0; - *skip = extent_map_end(em); - *defrag_end = 0; - } - - free_extent_map(em); - return ret; -} - /* * Prepare one page to be defragged. * @@ -1284,158 +1123,6 @@ static struct page *defrag_prepare_one_page(struct btrfs_inode *inode, return page; } -/* - * it doesn't do much good to defrag one or two pages - * at a time. This pulls in a nice chunk of pages - * to COW and defrag. - * - * It also makes sure the delalloc code has enough - * dirty data to avoid making new small extents as part - * of the defrag - * - * It's a good idea to start RA on this range - * before calling this. - */ -static int cluster_pages_for_defrag(struct inode *inode, - struct page **pages, - unsigned long start_index, - unsigned long num_pages) -{ - unsigned long file_end; - u64 isize = i_size_read(inode); - u64 page_start; - u64 page_end; - u64 page_cnt; - u64 start = (u64)start_index << PAGE_SHIFT; - u64 search_start; - int ret; - int i; - int i_done; - struct extent_state *cached_state = NULL; - struct extent_changeset *data_reserved = NULL; - - file_end = (isize - 1) >> PAGE_SHIFT; - if (!isize || start_index > file_end) - return 0; - - page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); - - ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, - start, page_cnt << PAGE_SHIFT); - if (ret) - return ret; - i_done = 0; - - /* step one, lock all the pages */ - for (i = 0; i < page_cnt; i++) { - struct page *page; - - page = defrag_prepare_one_page(BTRFS_I(inode), start_index + i); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - break; - } - pages[i] = page; - i_done++; - } - if (!i_done || ret) - goto out; - - if (!(inode->i_sb->s_flags & SB_ACTIVE)) - goto out; - - /* - * Now we have a nice long stream of locked and up to date pages, let's - * wait on them. - */ - for (i = 0; i < i_done; i++) - wait_on_page_writeback(pages[i]); - - page_start = page_offset(pages[0]); - page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE; - - lock_extent_bits(&BTRFS_I(inode)->io_tree, - page_start, page_end - 1, &cached_state); - - /* - * When defragmenting we skip ranges that have holes or inline extents, - * (check should_defrag_range()), to avoid unnecessary IO and wasting - * space. At btrfs_defrag_file(), we check if a range should be defragged - * before locking the inode and then, if it should, we trigger a sync - * page cache readahead - we lock the inode only after that to avoid - * blocking for too long other tasks that possibly want to operate on - * other file ranges. But before we were able to get the inode lock, - * some other task may have punched a hole in the range, or we may have - * now an inline extent, in which case we should not defrag. So check - * for that here, where we have the inode and the range locked, and bail - * out if that happened. - */ - search_start = page_start; - while (search_start < page_end) { - struct extent_map *em; - - em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start, - page_end - search_start); - if (IS_ERR(em)) { - ret = PTR_ERR(em); - goto out_unlock_range; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - free_extent_map(em); - /* Ok, 0 means we did not defrag anything */ - ret = 0; - goto out_unlock_range; - } - search_start = extent_map_end(em); - free_extent_map(em); - } - - clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, - page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | - EXTENT_DEFRAG, 0, 0, &cached_state); - - if (i_done != page_cnt) { - spin_lock(&BTRFS_I(inode)->lock); - btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); - spin_unlock(&BTRFS_I(inode)->lock); - btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, - start, (page_cnt - i_done) << PAGE_SHIFT, true); - } - - - set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, - &cached_state); - - unlock_extent_cached(&BTRFS_I(inode)->io_tree, - page_start, page_end - 1, &cached_state); - - for (i = 0; i < i_done; i++) { - clear_page_dirty_for_io(pages[i]); - ClearPageChecked(pages[i]); - set_page_dirty(pages[i]); - unlock_page(pages[i]); - put_page(pages[i]); - } - btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); - extent_changeset_free(data_reserved); - return i_done; - -out_unlock_range: - unlock_extent_cached(&BTRFS_I(inode)->io_tree, - page_start, page_end - 1, &cached_state); -out: - for (i = 0; i < i_done; i++) { - unlock_page(pages[i]); - put_page(pages[i]); - } - btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, - start, page_cnt << PAGE_SHIFT, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); - extent_changeset_free(data_reserved); - return ret; - -} - struct defrag_target_range { struct list_head list; u64 start; From 4808ec2622c48288cc52ae75c50f751d419632e6 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 16:12:42 +0800 Subject: [PATCH 0274/1202] btrfs: defrag: enable defrag for subpage case With the new infrastructure which has taken subpage into consideration, now we should be safe to allow defrag to work for subpage case. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 502eba1081187..9eb0c1eb568e5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3080,12 +3080,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) goto out; } - /* Subpage defrag will be supported in later commits */ - if (root->fs_info->sectorsize < PAGE_SIZE) { - ret = -ENOTTY; - goto out; - } - switch (inode->i_mode & S_IFMT) { case S_IFDIR: if (!capable(CAP_SYS_ADMIN)) { From 90df065385abc73472387813af15d583a5c96ffa Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:08 +0900 Subject: [PATCH 0275/1202] btrfs: zoned: load zone capacity information from devices The ZNS specification introduces the concept of a Zone Capacity. A zone capacity is an additional per-zone attribute that indicates the number of usable logical blocks within each zone, starting from the first logical block of each zone. It is always smaller or equal to the zone size. With the SINGLE profile, we can set a block group's "capacity" as the same as the underlying zone's Zone Capacity. We will limit the allocation not to exceed in a following commit. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.h | 1 + fs/btrfs/zoned.c | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index c72a71efcb187..2db40a0055129 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -202,6 +202,7 @@ struct btrfs_block_group { */ u64 alloc_offset; u64 zone_unusable; + u64 zone_capacity; u64 meta_write_pointer; }; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 90d9df131fc16..10ea5c12c4bae 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1039,6 +1039,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) int i; unsigned int nofs_flag; u64 *alloc_offsets = NULL; + u64 *caps = NULL; u64 last_alloc = 0; u32 num_sequential = 0, num_conventional = 0; @@ -1069,6 +1070,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) return -ENOMEM; } + caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS); + if (!caps) { + ret = -ENOMEM; + goto out; + } + for (i = 0; i < map->num_stripes; i++) { bool is_sequential; struct blk_zone zone; @@ -1131,6 +1138,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } + caps[i] = (zone.capacity << SECTOR_SHIFT); + switch (zone.cond) { case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: @@ -1144,7 +1153,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) alloc_offsets[i] = 0; break; case BLK_ZONE_COND_FULL: - alloc_offsets[i] = fs_info->zone_size; + alloc_offsets[i] = caps[i]; break; default: /* Partially used zone */ @@ -1169,6 +1178,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) * calculate_alloc_pointer() which takes extent buffer * locks to avoid deadlock. */ + + /* Zone capacity is always zone size in emulation */ + cache->zone_capacity = cache->length; if (new) { cache->alloc_offset = 0; goto out; @@ -1195,6 +1207,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } cache->alloc_offset = alloc_offsets[0]; + cache->zone_capacity = caps[0]; break; case BTRFS_BLOCK_GROUP_DUP: case BTRFS_BLOCK_GROUP_RAID1: @@ -1218,6 +1231,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ret = -EIO; } + if (cache->alloc_offset > cache->zone_capacity) { + btrfs_err(fs_info, +"zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", + cache->alloc_offset, cache->zone_capacity, + cache->start); + ret = -EIO; + } + /* An extent is allocated after the write pointer */ if (!ret && num_conventional && last_alloc > cache->alloc_offset) { btrfs_err(fs_info, @@ -1229,6 +1250,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) if (!ret) cache->meta_write_pointer = cache->alloc_offset + cache->start; + kfree(caps); kfree(alloc_offsets); free_extent_map(em); From 77625a1c19535b53c4f256d25a5f420f278d549c Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:09 +0900 Subject: [PATCH 0276/1202] btrfs: zoned: move btrfs_free_excluded_extents out of btrfs_calc_zone_unusable btrfs_free_excluded_extents() is not neccessary for btrfs_calc_zone_unusable() and it makes btrfs_calc_zone_unusable() difficult to reuse. Move it out and call btrfs_free_excluded_extents() in proper context. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 2 ++ fs/btrfs/zoned.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 64c4685bd7706..4f43825ac7545 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2035,6 +2035,8 @@ static int read_one_block_group(struct btrfs_fs_info *info, */ if (btrfs_is_zoned(info)) { btrfs_calc_zone_unusable(cache); + /* Should not have any excluded extents. Just in case, though. */ + btrfs_free_excluded_extents(cache); } else if (cache->length == cache->used) { cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 10ea5c12c4bae..dd545af225e86 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1273,9 +1273,6 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) cache->cached = BTRFS_CACHE_FINISHED; cache->free_space_ctl->free_space = free; cache->zone_unusable = unusable; - - /* Should not have any excluded extents. Just in case, though */ - btrfs_free_excluded_extents(cache); } void btrfs_redirty_list_add(struct btrfs_transaction *trans, From f173d668b2806f416ec63ba8447096ca67931e0d Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:10 +0900 Subject: [PATCH 0277/1202] btrfs: zoned: calculate free space from zone capacity Now that we introduced capacity in a block group, we need to calculate free space using the capacity instead of the length. Thus, bytes we account capacity - alloc_pointer as free, and account bytes [capacity, length] as zone unusable. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 7 +++++-- fs/btrfs/extent-tree.c | 3 ++- fs/btrfs/free-space-cache.c | 7 ++++++- fs/btrfs/zoned.c | 5 +++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 4f43825ac7545..e9295e3c2cb3a 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2484,7 +2484,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran */ trace_btrfs_add_block_group(fs_info, cache, 1); btrfs_update_space_info(fs_info, cache->flags, size, bytes_used, - cache->bytes_super, 0, &cache->space_info); + cache->bytes_super, cache->zone_unusable, + &cache->space_info); btrfs_update_global_block_rsv(fs_info); link_block_group(cache); @@ -2599,7 +2600,9 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) if (!--cache->ro) { if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes back */ - cache->zone_unusable = cache->alloc_offset - cache->used; + cache->zone_unusable = + (cache->alloc_offset - cache->used) + + (cache->length - cache->zone_capacity); sinfo->bytes_zone_unusable += cache->zone_unusable; sinfo->bytes_readonly -= cache->zone_unusable; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0ab456cb4bf80..165acee66b072 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3796,7 +3796,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, goto out; } - avail = block_group->length - block_group->alloc_offset; + WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity); + avail = block_group->zone_capacity - block_group->alloc_offset; if (avail < num_bytes) { if (ffe_ctl->max_extent_size < avail) { /* diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index da0eee7c9e5f3..b76b608b081f1 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2539,10 +2539,15 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, u64 offset = bytenr - block_group->start; u64 to_free, to_unusable; const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold); + bool initial = (size == block_group->length); + + WARN_ON(!initial && offset + size > block_group->zone_capacity); spin_lock(&ctl->tree_lock); if (!used) to_free = size; + else if (initial) + to_free = block_group->zone_capacity; else if (offset >= block_group->alloc_offset) to_free = size; else if (offset + size <= block_group->alloc_offset) @@ -2755,7 +2760,7 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group, */ if (btrfs_is_zoned(fs_info)) { btrfs_info(fs_info, "free space %llu", - block_group->length - block_group->alloc_offset); + block_group->zone_capacity - block_group->alloc_offset); return; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index dd545af225e86..a92d6e52321d5 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1265,8 +1265,9 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) return; WARN_ON(cache->bytes_super != 0); - unusable = cache->alloc_offset - cache->used; - free = cache->length - cache->alloc_offset; + unusable = (cache->alloc_offset - cache->used) + + (cache->length - cache->zone_capacity); + free = cache->zone_capacity - cache->alloc_offset; /* We only need ->free_space in ALLOC_SEQ block groups */ cache->last_byte_to_unpin = (u64)-1; From 0185bd389ee74f87462f2f33419e878e5f4b9eb6 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:11 +0900 Subject: [PATCH 0278/1202] btrfs: zoned: tweak reclaim threshold for zone capacity With the introduction of zone capacity, the range [capacity, length] is always zone unusable. Counting this region as a reclaim target will cause reclaiming too early. Reclaim block groups based on bytes that can be usable after resetting. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index b76b608b081f1..9ce0f9b238128 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2540,6 +2540,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, u64 to_free, to_unusable; const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold); bool initial = (size == block_group->length); + u64 reclaimable_unusable; WARN_ON(!initial && offset + size > block_group->zone_capacity); @@ -2570,12 +2571,15 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, spin_unlock(&block_group->lock); } + reclaimable_unusable = block_group->zone_unusable - + (block_group->length - block_group->zone_capacity); /* All the region is now unusable. Mark it as unused and reclaim */ if (block_group->zone_unusable == block_group->length) { btrfs_mark_bg_unused(block_group); } else if (bg_reclaim_threshold && - block_group->zone_unusable >= - div_factor_fine(block_group->length, bg_reclaim_threshold)) { + reclaimable_unusable >= + div_factor_fine(block_group->zone_capacity, + bg_reclaim_threshold)) { btrfs_mark_bg_to_reclaim(block_group); } From 079f2e5b7df5ad5ae00b39e3c5657e54004339f0 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:12 +0900 Subject: [PATCH 0279/1202] btrfs: zoned: consider zone as full when no more SB can be written We cannot write beyond zone capacity. So, we should consider a zone as "full" when the write pointer goes beyond capacity - the size of super info. Also, take this opportunity to replace a subtle duplicated code with a loop and fix a typo in comment. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index a92d6e52321d5..c7c63190fbd66 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -45,6 +45,14 @@ */ #define BTRFS_MAX_ZONE_SIZE SZ_8G +#define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT) + +static inline bool sb_zone_is_full(const struct blk_zone *zone) +{ + return (zone->cond == BLK_ZONE_COND_FULL) || + (zone->wp + SUPER_INFO_SECTORS > zone->start + zone->capacity); +} + static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct blk_zone *zones = data; @@ -60,14 +68,13 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, bool empty[BTRFS_NR_SB_LOG_ZONES]; bool full[BTRFS_NR_SB_LOG_ZONES]; sector_t sector; + int i; - ASSERT(zones[0].type != BLK_ZONE_TYPE_CONVENTIONAL && - zones[1].type != BLK_ZONE_TYPE_CONVENTIONAL); - - empty[0] = (zones[0].cond == BLK_ZONE_COND_EMPTY); - empty[1] = (zones[1].cond == BLK_ZONE_COND_EMPTY); - full[0] = (zones[0].cond == BLK_ZONE_COND_FULL); - full[1] = (zones[1].cond == BLK_ZONE_COND_FULL); + for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { + ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL); + empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY); + full[i] = sb_zone_is_full(&zones[i]); + } /* * Possible states of log buffer zones @@ -664,7 +671,7 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones, reset = &zones[1]; if (reset && reset->cond != BLK_ZONE_COND_EMPTY) { - ASSERT(reset->cond == BLK_ZONE_COND_FULL); + ASSERT(sb_zone_is_full(reset)); ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, reset->start, reset->len, From d489382e2470e95c79acbf8e07ba85a090cc0e38 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:13 +0900 Subject: [PATCH 0280/1202] btrfs: zoned: locate superblock position using zone capacity sb_write_pointer() returns the write position of next superblock. For READ, we need a previous location. When the pointer is at the head, the previous one is the last one of the other zone. Calculate the last one's position from zone capacity. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index c7c63190fbd66..9cfb5b741718d 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -683,9 +683,20 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones, reset->wp = reset->start; } } else if (ret != -ENOENT) { - /* For READ, we want the precious one */ + /* + * For READ, we want the previous one. Move write pointer to + * the end of a zone, if it is at the head of a zone. + */ + u64 zone_end = 0; + if (wp == zones[0].start << SECTOR_SHIFT) - wp = (zones[1].start + zones[1].len) << SECTOR_SHIFT; + zone_end = zones[1].start + zones[1].capacity; + else if (wp == zones[1].start << SECTOR_SHIFT) + zone_end = zones[0].start + zones[0].capacity; + if (zone_end) + wp = ALIGN_DOWN(zone_end << SECTOR_SHIFT, + BTRFS_SUPER_INFO_SIZE); + wp -= BTRFS_SUPER_INFO_SIZE; } From 4849a8d73a767108eacca4f0cfd8746c5d345e2c Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:14 +0900 Subject: [PATCH 0281/1202] btrfs: zoned: finish superblock zone once no space left for new SB If there is no more space left for a new superblock in a superblock zone, then it is better to ZONE_FINISH the zone and frees up the active zone count. Since btrfs_advance_sb_log() can now issue REQ_OP_ZONE_FINISH, we also need to convert it to return int for the error case. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 +++- fs/btrfs/zoned.c | 52 ++++++++++++++++++++++++++++++++-------------- fs/btrfs/zoned.h | 8 ++++--- 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 71aafe9606e04..f755d02dc0884 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3887,7 +3887,9 @@ static int write_dev_supers(struct btrfs_device *device, bio->bi_opf |= REQ_FUA; btrfsic_submit_bio(bio); - btrfs_advance_sb_log(device, i); + + if (btrfs_advance_sb_log(device, i)) + errors++; } return errors < i ? 0 : -1; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 9cfb5b741718d..17554caf8b86b 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -789,36 +789,56 @@ static inline bool is_sb_log_zone(struct btrfs_zoned_device_info *zinfo, return true; } -void btrfs_advance_sb_log(struct btrfs_device *device, int mirror) +int btrfs_advance_sb_log(struct btrfs_device *device, int mirror) { struct btrfs_zoned_device_info *zinfo = device->zone_info; struct blk_zone *zone; + int i; if (!is_sb_log_zone(zinfo, mirror)) - return; + return 0; zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror]; - if (zone->cond != BLK_ZONE_COND_FULL) { + for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { + /* Advance the next zone */ + if (zone->cond == BLK_ZONE_COND_FULL) { + zone++; + continue; + } + if (zone->cond == BLK_ZONE_COND_EMPTY) zone->cond = BLK_ZONE_COND_IMP_OPEN; - zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT); + zone->wp += SUPER_INFO_SECTORS; + + if (sb_zone_is_full(zone)) { + /* + * No room left to write new superblock. Since + * superblock is written with REQ_SYNC, it is safe to + * finish the zone now. + * + * If the write pointer is exactly at the capacity, + * explicit ZONE_FINISH is not necessary. + */ + if (zone->wp != zone->start + zone->capacity) { + int ret; + + ret = blkdev_zone_mgmt(device->bdev, + REQ_OP_ZONE_FINISH, zone->start, + zone->len, GFP_NOFS); + if (ret) + return ret; + } - if (zone->wp == zone->start + zone->len) + zone->wp = zone->start + zone->len; zone->cond = BLK_ZONE_COND_FULL; - - return; + } + return 0; } - zone++; - ASSERT(zone->cond != BLK_ZONE_COND_FULL); - if (zone->cond == BLK_ZONE_COND_EMPTY) - zone->cond = BLK_ZONE_COND_IMP_OPEN; - - zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT); - - if (zone->wp == zone->start + zone->len) - zone->cond = BLK_ZONE_COND_FULL; + /* All the zones are FULL. Should not reach here. */ + ASSERT(0); + return -EIO; } int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror) diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 4b299705bb12b..4f30f3bf18864 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -40,7 +40,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, u64 *bytenr_ret); int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw, u64 *bytenr_ret); -void btrfs_advance_sb_log(struct btrfs_device *device, int mirror); +int btrfs_advance_sb_log(struct btrfs_device *device, int mirror); int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror); u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start, u64 hole_end, u64 num_bytes); @@ -113,8 +113,10 @@ static inline int btrfs_sb_log_location(struct btrfs_device *device, int mirror, return 0; } -static inline void btrfs_advance_sb_log(struct btrfs_device *device, int mirror) -{ } +static inline int btrfs_advance_sb_log(struct btrfs_device *device, int mirror) +{ + return 0; +} static inline int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror) { From 13ee495854f72e2f3378191eb5d4181017155a08 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:15 +0900 Subject: [PATCH 0282/1202] btrfs: zoned: load active zone information from devices The ZNS specification defines a limit on the number of zones that can be in the implicit open, explicit open or closed conditions. Any zone with such condition is defined as an active zone and correspond to any zone that is being written or that has been only partially written. If the maximum number of active zones is reached, we must either reset or finish some active zones before being able to chose other zones for storing data. Load queue_max_active_zones() and track the number of active zones left on the device. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/zoned.h | 3 +++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 17554caf8b86b..2ccdc6e1f7937 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "ctree.h" #include "volumes.h" #include "zoned.h" @@ -38,6 +39,16 @@ /* Number of superblock log zones */ #define BTRFS_NR_SB_LOG_ZONES 2 +/* + * Minimum of active zones we need: + * + * - BTRFS_SUPER_MIRROR_MAX zones for superblock mirrors + * - 3 zones to ensure at least one zone per SYSTEM, META and DATA block group + * - 1 zone for tree-log dedicated block group + * - 1 zone for relocation + */ +#define BTRFS_MIN_ACTIVE_ZONES (BTRFS_SUPER_MIRROR_MAX + 5) + /* * Maximum supported zone size. Currently, SMR disks have a zone size of * 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range. We do not @@ -303,6 +314,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_zoned_device_info *zone_info = NULL; struct block_device *bdev = device->bdev; + struct request_queue *queue = bdev_get_queue(bdev); + unsigned int max_active_zones; + unsigned int nactive; sector_t nr_sectors; sector_t sector = 0; struct blk_zone *zones = NULL; @@ -358,6 +372,17 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) if (!IS_ALIGNED(nr_sectors, zone_sectors)) zone_info->nr_zones++; + max_active_zones = queue_max_active_zones(queue); + if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) { + btrfs_err_in_rcu(fs_info, +"zoned: %s: max active zones %u is too small, need at least %u active zones", + rcu_str_deref(device->name), max_active_zones, + BTRFS_MIN_ACTIVE_ZONES); + ret = -EINVAL; + goto out; + } + zone_info->max_active_zones = max_active_zones; + zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); if (!zone_info->seq_zones) { ret = -ENOMEM; @@ -370,6 +395,12 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) goto out; } + zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->active_zones) { + ret = -ENOMEM; + goto out; + } + zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); if (!zones) { ret = -ENOMEM; @@ -377,6 +408,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) } /* Get zones type */ + nactive = 0; while (sector < nr_sectors) { nr_zones = BTRFS_REPORT_NR_ZONES; ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones, @@ -387,8 +419,17 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) for (i = 0; i < nr_zones; i++) { if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ) __set_bit(nreported, zone_info->seq_zones); - if (zones[i].cond == BLK_ZONE_COND_EMPTY) + switch (zones[i].cond) { + case BLK_ZONE_COND_EMPTY: __set_bit(nreported, zone_info->empty_zones); + break; + case BLK_ZONE_COND_IMP_OPEN: + case BLK_ZONE_COND_EXP_OPEN: + case BLK_ZONE_COND_CLOSED: + __set_bit(nreported, zone_info->active_zones); + nactive++; + break; + } nreported++; } sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len; @@ -403,6 +444,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) goto out; } + if (max_active_zones) { + if (nactive > max_active_zones) { + btrfs_err_in_rcu(device->fs_info, + "zoned: %u active zones on %s exceeds max_active_zones %u", + nactive, rcu_str_deref(device->name), + max_active_zones); + ret = -EIO; + goto out; + } + atomic_set(&zone_info->active_zones_left, + max_active_zones - nactive); + } + /* Validate superblock log */ nr_zones = BTRFS_NR_SB_LOG_ZONES; for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { @@ -485,6 +539,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) out: kfree(zones); out_free_zone_info: + bitmap_free(zone_info->active_zones); bitmap_free(zone_info->empty_zones); bitmap_free(zone_info->seq_zones); kfree(zone_info); @@ -500,6 +555,7 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device) if (!zone_info) return; + bitmap_free(zone_info->active_zones); bitmap_free(zone_info->seq_zones); bitmap_free(zone_info->empty_zones); kfree(zone_info); diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 4f30f3bf18864..48628782e4b87 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -23,8 +23,11 @@ struct btrfs_zoned_device_info { u64 zone_size; u8 zone_size_shift; u32 nr_zones; + unsigned int max_active_zones; + atomic_t active_zones_left; unsigned long *seq_zones; unsigned long *empty_zones; + unsigned long *active_zones; struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX]; }; From f8eaa4d454fd4c3510856b21053244c7f0d06d3d Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:16 +0900 Subject: [PATCH 0283/1202] btrfs: zoned: introduce physical_map to btrfs_block_group We will use a block group's physical location to track active zones and finish fully written zones in the following commits. Since the zone activation is done in the extent allocation context which already holding the tree locks, we can't query the chunk tree for the physical locations. So, copy the location info into a block group and use it for activation. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 1 + fs/btrfs/block-group.h | 1 + fs/btrfs/zoned.c | 16 ++++++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index e9295e3c2cb3a..4f8f04ed911e0 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -144,6 +144,7 @@ void btrfs_put_block_group(struct btrfs_block_group *cache) */ WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root)); kfree(cache->free_space_ctl); + kfree(cache->physical_map); kfree(cache); } } diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 2db40a0055129..265db2c316d3d 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -204,6 +204,7 @@ struct btrfs_block_group { u64 zone_unusable; u64 zone_capacity; u64 meta_write_pointer; + struct map_lookup *physical_map; }; static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 2ccdc6e1f7937..4c89ac02843eb 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1158,10 +1158,18 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) map = em->map_lookup; + cache->physical_map = kmalloc(map_lookup_size(map->num_stripes), GFP_NOFS); + if (!cache->physical_map) { + ret = -ENOMEM; + goto out; + } + + memcpy(cache->physical_map, map, map_lookup_size(map->num_stripes)); + alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS); if (!alloc_offsets) { - free_extent_map(em); - return -ENOMEM; + ret = -ENOMEM; + goto out; } caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS); @@ -1344,6 +1352,10 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) if (!ret) cache->meta_write_pointer = cache->alloc_offset + cache->start; + if (ret) { + kfree(cache->physical_map); + cache->physical_map = NULL; + } kfree(caps); kfree(alloc_offsets); free_extent_map(em); From 8bb6d4078852de4b02d2c0586af9520d348273b0 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:17 +0900 Subject: [PATCH 0284/1202] btrfs: zoned: implement active zone tracking Add zone_is_active flag to btrfs_block_group. This flag indicates the underlying zones are all active. Such zone active block groups are tracked by fs_info->active_bg_list. btrfs_dev_{set,clear}_active_zone() take responsibility for the underlying device part. They set/clear the bitmap to indicate zone activeness and count the number of zones we can activate left. btrfs_zone_{activate,finish}() take responsibility for the logical part and the list management. In addition, btrfs_zone_finish() wait for any writes on it and send REQ_OP_ZONE_FINISH to the zone. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 11 ++ fs/btrfs/block-group.h | 2 + fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 2 + fs/btrfs/free-space-cache.c | 5 +- fs/btrfs/zoned.c | 193 ++++++++++++++++++++++++++++++++++++ fs/btrfs/zoned.h | 12 +++ 7 files changed, 226 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 4f8f04ed911e0..8e7b74fa3fc82 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1896,6 +1896,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache( INIT_LIST_HEAD(&cache->discard_list); INIT_LIST_HEAD(&cache->dirty_list); INIT_LIST_HEAD(&cache->io_list); + INIT_LIST_HEAD(&cache->active_bg_list); btrfs_init_free_space_ctl(cache, cache->free_space_ctl); atomic_set(&cache->frozen, 0); mutex_init(&cache->free_space_lock); @@ -3842,6 +3843,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) } spin_unlock(&info->unused_bgs_lock); + spin_lock(&info->zone_active_bgs_lock); + while (!list_empty(&info->zone_active_bgs)) { + block_group = list_first_entry(&info->zone_active_bgs, + struct btrfs_block_group, + active_bg_list); + list_del_init(&block_group->active_bg_list); + btrfs_put_block_group(block_group); + } + spin_unlock(&info->zone_active_bgs_lock); + spin_lock(&info->block_group_cache_lock); while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { block_group = rb_entry(n, struct btrfs_block_group, diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 265db2c316d3d..f751b802b173a 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -98,6 +98,7 @@ struct btrfs_block_group { unsigned int to_copy:1; unsigned int relocating_repair:1; unsigned int chunk_item_inserted:1; + unsigned int zone_is_active:1; int disk_cache_state; @@ -205,6 +206,7 @@ struct btrfs_block_group { u64 zone_capacity; u64 meta_write_pointer; struct map_lookup *physical_map; + struct list_head active_bg_list; }; static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f0174c62274f..bfba85d9f8624 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1018,6 +1018,9 @@ struct btrfs_fs_info { spinlock_t treelog_bg_lock; u64 treelog_bg; + spinlock_t zone_active_bgs_lock; + struct list_head zone_active_bgs; + #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; struct rb_root block_tree; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f755d02dc0884..41ea50f48cfea 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2884,6 +2884,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->treelog_bg_lock); + spin_lock_init(&fs_info->zone_active_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reclaim_bgs_lock); @@ -2897,6 +2898,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_LIST_HEAD(&fs_info->unused_bgs); INIT_LIST_HEAD(&fs_info->reclaim_bgs); + INIT_LIST_HEAD(&fs_info->zone_active_bgs); #ifdef CONFIG_BTRFS_DEBUG INIT_LIST_HEAD(&fs_info->allocated_roots); INIT_LIST_HEAD(&fs_info->allocated_ebs); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9ce0f9b238128..0d26819b1cf6c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2763,8 +2763,9 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group, * out the free space after the allocation offset. */ if (btrfs_is_zoned(fs_info)) { - btrfs_info(fs_info, "free space %llu", - block_group->zone_capacity - block_group->alloc_offset); + btrfs_info(fs_info, "free space %llu active %d", + block_group->zone_capacity - block_group->alloc_offset, + block_group->zone_is_active); return; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 4c89ac02843eb..614499a83e8c0 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -989,6 +989,41 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start, return pos; } +static bool btrfs_dev_set_active_zone(struct btrfs_device *device, u64 pos) +{ + struct btrfs_zoned_device_info *zone_info = device->zone_info; + unsigned int zno = (pos >> zone_info->zone_size_shift); + + /* We can use any number of zones */ + if (zone_info->max_active_zones == 0) + return true; + + if (!test_bit(zno, zone_info->active_zones)) { + /* Active zone left? */ + if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0) + return false; + if (test_and_set_bit(zno, zone_info->active_zones)) { + /* Someone already set the bit */ + atomic_inc(&zone_info->active_zones_left); + } + } + + return true; +} + +static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos) +{ + struct btrfs_zoned_device_info *zone_info = device->zone_info; + unsigned int zno = (pos >> zone_info->zone_size_shift); + + /* We can use any number of zones */ + if (zone_info->max_active_zones == 0) + return; + + if (test_and_clear_bit(zno, zone_info->active_zones)) + atomic_inc(&zone_info->active_zones_left); +} + int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, u64 length, u64 *bytes) { @@ -1004,6 +1039,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, *bytes = length; while (length) { btrfs_dev_set_zone_empty(device, physical); + btrfs_dev_clear_active_zone(device, physical); physical += device->zone_info->zone_size; length -= device->zone_info->zone_size; } @@ -1656,3 +1692,160 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, return device; } + +/** + * Activate block group and underlying device zones + * + * @block_group: the block group to activate + * + * Return: true on success, false otherwise + */ +bool btrfs_zone_activate(struct btrfs_block_group *block_group) +{ + struct btrfs_fs_info *fs_info = block_group->fs_info; + struct map_lookup *map; + struct btrfs_device *device; + u64 physical; + bool ret; + + if (!btrfs_is_zoned(block_group->fs_info)) + return true; + + map = block_group->physical_map; + /* Currently support SINGLE profile only */ + ASSERT(map->num_stripes == 1); + device = map->stripes[0].dev; + physical = map->stripes[0].physical; + + if (device->zone_info->max_active_zones == 0) + return true; + + spin_lock(&block_group->lock); + + if (block_group->zone_is_active) { + ret = true; + goto out_unlock; + } + + /* No space left */ + if (block_group->alloc_offset == block_group->zone_capacity) { + ret = false; + goto out_unlock; + } + + if (!btrfs_dev_set_active_zone(device, physical)) { + /* Cannot activate the zone */ + ret = false; + goto out_unlock; + } + + /* Successfully activated all the zones */ + block_group->zone_is_active = 1; + + spin_unlock(&block_group->lock); + + /* For the active block group list */ + btrfs_get_block_group(block_group); + + spin_lock(&fs_info->zone_active_bgs_lock); + ASSERT(list_empty(&block_group->active_bg_list)); + list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); + spin_unlock(&fs_info->zone_active_bgs_lock); + + return true; + +out_unlock: + spin_unlock(&block_group->lock); + return ret; +} + +int btrfs_zone_finish(struct btrfs_block_group *block_group) +{ + struct btrfs_fs_info *fs_info = block_group->fs_info; + struct map_lookup *map; + struct btrfs_device *device; + u64 physical; + int ret = 0; + + if (!btrfs_is_zoned(fs_info)) + return 0; + + map = block_group->physical_map; + /* Currently support SINGLE profile only */ + ASSERT(map->num_stripes == 1); + + device = map->stripes[0].dev; + physical = map->stripes[0].physical; + + if (device->zone_info->max_active_zones == 0) + return 0; + + spin_lock(&block_group->lock); + if (!block_group->zone_is_active) { + spin_unlock(&block_group->lock); + return 0; + } + + /* Check if we have unwritten allocated space */ + if ((block_group->flags & + (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) && + block_group->alloc_offset > block_group->meta_write_pointer) { + spin_unlock(&block_group->lock); + return -EAGAIN; + } + spin_unlock(&block_group->lock); + + ret = btrfs_inc_block_group_ro(block_group, false); + if (ret) + return ret; + + /* Ensure all writes in this block group finish */ + btrfs_wait_block_group_reservations(block_group); + /* No need to wait for NOCOW writers. Zoned mode does not allow that. */ + btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start, + block_group->length); + + spin_lock(&block_group->lock); + + /* + * Bail out if someone already deactivated the block group, or + * allocated space is left in the block group. + */ + if (!block_group->zone_is_active) { + spin_unlock(&block_group->lock); + btrfs_dec_block_group_ro(block_group); + return 0; + } + + if (block_group->reserved) { + spin_unlock(&block_group->lock); + btrfs_dec_block_group_ro(block_group); + return -EAGAIN; + } + + block_group->zone_is_active = 0; + block_group->alloc_offset = block_group->zone_capacity; + block_group->free_space_ctl->free_space = 0; + btrfs_clear_treelog_bg(block_group); + spin_unlock(&block_group->lock); + + ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, + physical >> SECTOR_SHIFT, + device->zone_info->zone_size >> SECTOR_SHIFT, + GFP_NOFS); + btrfs_dec_block_group_ro(block_group); + + if (!ret) { + btrfs_dev_clear_active_zone(device, physical); + + spin_lock(&fs_info->zone_active_bgs_lock); + ASSERT(!list_empty(&block_group->active_bg_list)); + list_del_init(&block_group->active_bg_list); + spin_unlock(&fs_info->zone_active_bgs_lock); + + /* For active_bg_list */ + btrfs_put_block_group(block_group); + } + + return ret; +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 48628782e4b87..2345ecfa18058 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -69,6 +69,8 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical, u64 physical_start, u64 physical_pos); struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, u64 logical, u64 length); +bool btrfs_zone_activate(struct btrfs_block_group *block_group); +int btrfs_zone_finish(struct btrfs_block_group *block_group); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -204,6 +206,16 @@ static inline struct btrfs_device *btrfs_zoned_get_device( return ERR_PTR(-EOPNOTSUPP); } +static inline bool btrfs_zone_activate(struct btrfs_block_group *block_group) +{ + return true; +} + +static inline int btrfs_zone_finish(struct btrfs_block_group *block_group) +{ + return 0; +} + #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) From 7ba930157fb7093d2489cc1669e064ec2c7ee34d Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:18 +0900 Subject: [PATCH 0285/1202] btrfs: zoned: load active zone info for block group Load activeness of underlying zones of a block group. When underlying zones are active, we add the block group to the fs_info->zone_active_bgs list. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 614499a83e8c0..942a347713835 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1170,6 +1170,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) unsigned int nofs_flag; u64 *alloc_offsets = NULL; u64 *caps = NULL; + unsigned long *active = NULL; u64 last_alloc = 0; u32 num_sequential = 0, num_conventional = 0; @@ -1214,6 +1215,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } + active = bitmap_zalloc(map->num_stripes, GFP_NOFS); + if (!active) { + ret = -ENOMEM; + goto out; + } + for (i = 0; i < map->num_stripes; i++) { bool is_sequential; struct blk_zone zone; @@ -1297,8 +1304,16 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) /* Partially used zone */ alloc_offsets[i] = ((zone.wp - zone.start) << SECTOR_SHIFT); + __set_bit(i, active); break; } + + /* + * Consider a zone as active if we can allow any number of + * active zones. + */ + if (!device->zone_info->max_active_zones) + __set_bit(i, active); } if (num_sequential > 0) @@ -1346,6 +1361,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } cache->alloc_offset = alloc_offsets[0]; cache->zone_capacity = caps[0]; + cache->zone_is_active = test_bit(0, active); break; case BTRFS_BLOCK_GROUP_DUP: case BTRFS_BLOCK_GROUP_RAID1: @@ -1361,6 +1377,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } + if (cache->zone_is_active) { + btrfs_get_block_group(cache); + spin_lock(&fs_info->zone_active_bgs_lock); + list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs); + spin_unlock(&fs_info->zone_active_bgs_lock); + } + out: if (cache->alloc_offset > fs_info->zone_size) { btrfs_err(fs_info, @@ -1392,6 +1415,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) kfree(cache->physical_map); cache->physical_map = NULL; } + bitmap_free(active); kfree(caps); kfree(alloc_offsets); free_extent_map(em); From 473f59744987b755ad003e09efb601c0dc01206a Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:19 +0900 Subject: [PATCH 0286/1202] btrfs: zoned: activate block group on allocation Activate a block group when trying to allocate an extent from it. We check read-only case and no space left case before trying to activate a block group not to consume the number of active zones uselessly. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 165acee66b072..e0932624c53c1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3773,6 +3773,18 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, if (skip) return 1; + /* Check RO and no space case before trying to activate it */ + spin_lock(&block_group->lock); + if (block_group->ro || + block_group->alloc_offset == block_group->zone_capacity) { + spin_unlock(&block_group->lock); + return 1; + } + spin_unlock(&block_group->lock); + + if (!btrfs_zone_activate(block_group)) + return 1; + spin_lock(&space_info->lock); spin_lock(&block_group->lock); spin_lock(&fs_info->treelog_bg_lock); From eee7340266921dae20e7ec7089860645dfe8b925 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:20 +0900 Subject: [PATCH 0287/1202] btrfs: zoned: activate new block group Activate new block group at btrfs_make_block_group(). We do not check the return value. If failed, we can try again later at the actual extent allocation phase. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 8e7b74fa3fc82..1302bf8d0be1d 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2445,6 +2445,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran return ERR_PTR(ret); } + /* + * New block group is likely to be used soon. Try to activate it now. + * Failure is OK for now. + */ + btrfs_zone_activate(cache); + ret = exclude_super_stripes(cache); if (ret) { /* We may have excluded something, so call this just in case */ From 5e42bab77ea19bf7e7915a1286341bd2cfcd4a7e Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:21 +0900 Subject: [PATCH 0288/1202] btrfs: move ffe_ctl one level up We are passing too many variables as it is from btrfs_reserve_extent() to find_free_extent(). The next commit will add min_alloc_size to ffe_ctl, and that means another pass-through argument. Take this opportunity to move ffe_ctl one level up and drop the redundant arguments. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 162 ++++++++++++++++++++++------------------- 1 file changed, 87 insertions(+), 75 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e0932624c53c1..53697d942875e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3476,6 +3476,7 @@ enum btrfs_extent_allocation_policy { */ struct find_free_extent_ctl { /* Basic allocation info */ + u64 ram_bytes; u64 num_bytes; u64 empty_size; u64 flags; @@ -4130,65 +4131,62 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, * |- If not found, re-iterate all block groups */ static noinline int find_free_extent(struct btrfs_root *root, - u64 ram_bytes, u64 num_bytes, u64 empty_size, - u64 hint_byte_orig, struct btrfs_key *ins, - u64 flags, int delalloc) + struct btrfs_key *ins, + struct find_free_extent_ctl *ffe_ctl) { struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; int cache_block_group_error = 0; struct btrfs_block_group *block_group = NULL; - struct find_free_extent_ctl ffe_ctl = {0}; struct btrfs_space_info *space_info; bool full_search = false; - bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); - WARN_ON(num_bytes < fs_info->sectorsize); - - ffe_ctl.num_bytes = num_bytes; - ffe_ctl.empty_size = empty_size; - ffe_ctl.flags = flags; - ffe_ctl.search_start = 0; - ffe_ctl.delalloc = delalloc; - ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags); - ffe_ctl.have_caching_bg = false; - ffe_ctl.orig_have_caching_bg = false; - ffe_ctl.found_offset = 0; - ffe_ctl.hint_byte = hint_byte_orig; - ffe_ctl.for_treelog = for_treelog; - ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED; + WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize); + ffe_ctl->search_start = 0; + /* For clustered allocation */ + ffe_ctl->empty_cluster = 0; + ffe_ctl->last_ptr = NULL; + ffe_ctl->use_cluster = true; + ffe_ctl->have_caching_bg = false; + ffe_ctl->orig_have_caching_bg = false; + ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags); + ffe_ctl->loop = 0; /* For clustered allocation */ - ffe_ctl.retry_clustered = false; - ffe_ctl.retry_unclustered = false; - ffe_ctl.last_ptr = NULL; - ffe_ctl.use_cluster = true; + ffe_ctl->retry_clustered = false; + ffe_ctl->retry_unclustered = false; + ffe_ctl->cached = 0; + ffe_ctl->max_extent_size = 0; + ffe_ctl->total_free_space = 0; + ffe_ctl->found_offset = 0; + ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED; if (btrfs_is_zoned(fs_info)) - ffe_ctl.policy = BTRFS_EXTENT_ALLOC_ZONED; + ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED; ins->type = BTRFS_EXTENT_ITEM_KEY; ins->objectid = 0; ins->offset = 0; - trace_find_free_extent(root, num_bytes, empty_size, flags); + trace_find_free_extent(root, ffe_ctl->num_bytes, ffe_ctl->empty_size, + ffe_ctl->flags); - space_info = btrfs_find_space_info(fs_info, flags); + space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags); if (!space_info) { - btrfs_err(fs_info, "No space info for %llu", flags); + btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags); return -ENOSPC; } - ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins); + ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins); if (ret < 0) return ret; - ffe_ctl.search_start = max(ffe_ctl.search_start, - first_logical_byte(fs_info, 0)); - ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte); - if (ffe_ctl.search_start == ffe_ctl.hint_byte) { + ffe_ctl->search_start = max(ffe_ctl->search_start, + first_logical_byte(fs_info, 0)); + ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte); + if (ffe_ctl->search_start == ffe_ctl->hint_byte) { block_group = btrfs_lookup_block_group(fs_info, - ffe_ctl.search_start); + ffe_ctl->search_start); /* * we don't want to use the block group if it doesn't match our * allocation bits, or if its not cached. @@ -4196,7 +4194,7 @@ static noinline int find_free_extent(struct btrfs_root *root, * However if we are re-searching with an ideal block group * picked out then we don't care that the block group is cached. */ - if (block_group && block_group_bits(block_group, flags) && + if (block_group && block_group_bits(block_group, ffe_ctl->flags) && block_group->cached != BTRFS_CACHE_NO) { down_read(&space_info->groups_sem); if (list_empty(&block_group->list) || @@ -4210,9 +4208,10 @@ static noinline int find_free_extent(struct btrfs_root *root, btrfs_put_block_group(block_group); up_read(&space_info->groups_sem); } else { - ffe_ctl.index = btrfs_bg_flags_to_raid_index( - block_group->flags); - btrfs_lock_block_group(block_group, delalloc); + ffe_ctl->index = btrfs_bg_flags_to_raid_index( + block_group->flags); + btrfs_lock_block_group(block_group, + ffe_ctl->delalloc); goto have_block_group; } } else if (block_group) { @@ -4220,31 +4219,31 @@ static noinline int find_free_extent(struct btrfs_root *root, } } search: - ffe_ctl.have_caching_bg = false; - if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) || - ffe_ctl.index == 0) + ffe_ctl->have_caching_bg = false; + if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) || + ffe_ctl->index == 0) full_search = true; down_read(&space_info->groups_sem); list_for_each_entry(block_group, - &space_info->block_groups[ffe_ctl.index], list) { + &space_info->block_groups[ffe_ctl->index], list) { struct btrfs_block_group *bg_ret; /* If the block group is read-only, we can skip it entirely. */ if (unlikely(block_group->ro)) { - if (for_treelog) + if (ffe_ctl->for_treelog) btrfs_clear_treelog_bg(block_group); continue; } - btrfs_grab_block_group(block_group, delalloc); - ffe_ctl.search_start = block_group->start; + btrfs_grab_block_group(block_group, ffe_ctl->delalloc); + ffe_ctl->search_start = block_group->start; /* * this can happen if we end up cycling through all the * raid types, but we want to make sure we only allocate * for the proper type. */ - if (!block_group_bits(block_group, flags)) { + if (!block_group_bits(block_group, ffe_ctl->flags)) { u64 extra = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID56_MASK | @@ -4255,7 +4254,7 @@ static noinline int find_free_extent(struct btrfs_root *root, * doesn't provide them, bail. This does allow us to * fill raid0 from raid1. */ - if ((flags & extra) && !(block_group->flags & extra)) + if ((ffe_ctl->flags & extra) && !(block_group->flags & extra)) goto loop; /* @@ -4263,14 +4262,14 @@ static noinline int find_free_extent(struct btrfs_root *root, * It's possible that we have MIXED_GROUP flag but no * block group is mixed. Just skip such block group. */ - btrfs_release_block_group(block_group, delalloc); + btrfs_release_block_group(block_group, ffe_ctl->delalloc); continue; } have_block_group: - ffe_ctl.cached = btrfs_block_group_done(block_group); - if (unlikely(!ffe_ctl.cached)) { - ffe_ctl.have_caching_bg = true; + ffe_ctl->cached = btrfs_block_group_done(block_group); + if (unlikely(!ffe_ctl->cached)) { + ffe_ctl->have_caching_bg = true; ret = btrfs_cache_block_group(block_group, 0); /* @@ -4293,10 +4292,11 @@ static noinline int find_free_extent(struct btrfs_root *root, goto loop; bg_ret = NULL; - ret = do_allocation(block_group, &ffe_ctl, &bg_ret); + ret = do_allocation(block_group, ffe_ctl, &bg_ret); if (ret == 0) { if (bg_ret && bg_ret != block_group) { - btrfs_release_block_group(block_group, delalloc); + btrfs_release_block_group(block_group, + ffe_ctl->delalloc); block_group = bg_ret; } } else if (ret == -EAGAIN) { @@ -4306,46 +4306,49 @@ static noinline int find_free_extent(struct btrfs_root *root, } /* Checks */ - ffe_ctl.search_start = round_up(ffe_ctl.found_offset, - fs_info->stripesize); + ffe_ctl->search_start = round_up(ffe_ctl->found_offset, + fs_info->stripesize); /* move on to the next group */ - if (ffe_ctl.search_start + num_bytes > + if (ffe_ctl->search_start + ffe_ctl->num_bytes > block_group->start + block_group->length) { btrfs_add_free_space_unused(block_group, - ffe_ctl.found_offset, num_bytes); + ffe_ctl->found_offset, + ffe_ctl->num_bytes); goto loop; } - if (ffe_ctl.found_offset < ffe_ctl.search_start) + if (ffe_ctl->found_offset < ffe_ctl->search_start) btrfs_add_free_space_unused(block_group, - ffe_ctl.found_offset, - ffe_ctl.search_start - ffe_ctl.found_offset); + ffe_ctl->found_offset, + ffe_ctl->search_start - ffe_ctl->found_offset); - ret = btrfs_add_reserved_bytes(block_group, ram_bytes, - num_bytes, delalloc); + ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes, + ffe_ctl->num_bytes, + ffe_ctl->delalloc); if (ret == -EAGAIN) { btrfs_add_free_space_unused(block_group, - ffe_ctl.found_offset, num_bytes); + ffe_ctl->found_offset, + ffe_ctl->num_bytes); goto loop; } btrfs_inc_block_group_reservations(block_group); /* we are all good, lets return */ - ins->objectid = ffe_ctl.search_start; - ins->offset = num_bytes; + ins->objectid = ffe_ctl->search_start; + ins->offset = ffe_ctl->num_bytes; - trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start, - num_bytes); - btrfs_release_block_group(block_group, delalloc); + trace_btrfs_reserve_extent(block_group, ffe_ctl->search_start, + ffe_ctl->num_bytes); + btrfs_release_block_group(block_group, ffe_ctl->delalloc); break; loop: - release_block_group(block_group, &ffe_ctl, delalloc); + release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc); cond_resched(); } up_read(&space_info->groups_sem); - ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search); + ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search); if (ret > 0) goto search; @@ -4354,12 +4357,12 @@ static noinline int find_free_extent(struct btrfs_root *root, * Use ffe_ctl->total_free_space as fallback if we can't find * any contiguous hole. */ - if (!ffe_ctl.max_extent_size) - ffe_ctl.max_extent_size = ffe_ctl.total_free_space; + if (!ffe_ctl->max_extent_size) + ffe_ctl->max_extent_size = ffe_ctl->total_free_space; spin_lock(&space_info->lock); - space_info->max_extent_size = ffe_ctl.max_extent_size; + space_info->max_extent_size = ffe_ctl->max_extent_size; spin_unlock(&space_info->lock); - ins->offset = ffe_ctl.max_extent_size; + ins->offset = ffe_ctl->max_extent_size; } else if (ret == -ENOSPC) { ret = cache_block_group_error; } @@ -4417,6 +4420,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, struct btrfs_key *ins, int is_data, int delalloc) { struct btrfs_fs_info *fs_info = root->fs_info; + struct find_free_extent_ctl ffe_ctl = {}; bool final_tried = num_bytes == min_alloc_size; u64 flags; int ret; @@ -4425,8 +4429,16 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, flags = get_alloc_profile_by_root(root, is_data); again: WARN_ON(num_bytes < fs_info->sectorsize); - ret = find_free_extent(root, ram_bytes, num_bytes, empty_size, - hint_byte, ins, flags, delalloc); + + ffe_ctl.ram_bytes = ram_bytes; + ffe_ctl.num_bytes = num_bytes; + ffe_ctl.empty_size = empty_size; + ffe_ctl.flags = flags; + ffe_ctl.delalloc = delalloc; + ffe_ctl.hint_byte = hint_byte; + ffe_ctl.for_treelog = for_treelog; + + ret = find_free_extent(root, ins, &ffe_ctl); if (!ret && !is_data) { btrfs_dec_block_group_reservations(fs_info, ins->objectid); } else if (ret == -ENOSPC) { From 997034c2b0ce4ac1e4fa476d89e2da756602e6dc Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:22 +0900 Subject: [PATCH 0289/1202] btrfs: zoned: avoid chunk allocation if active block group has enough space The current extent allocator tries to allocate a new block group when the existing block groups do not have enough space. On a ZNS device, a new block group means a new active zone. If the number of active zones has already reached the max_active_zones, activating a new zone needs to finish an existing zone, leading to wasting the free space there. So, instead, it should reuse the existing active block groups as much as possible when we can't activate any other zones without sacrificing an already activated block group. While at it, I converted find_free_extent_update_loop() to check the found_extent() case early and made the other conditions simpler. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 28 +++++++++++++++++++++------- fs/btrfs/zoned.c | 31 +++++++++++++++++++++++++++++++ fs/btrfs/zoned.h | 8 ++++++++ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 53697d942875e..9aa429a9a235c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3478,6 +3478,7 @@ struct find_free_extent_ctl { /* Basic allocation info */ u64 ram_bytes; u64 num_bytes; + u64 min_alloc_size; u64 empty_size; u64 flags; int delalloc; @@ -3946,18 +3947,30 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg) ffe_ctl->orig_have_caching_bg = true; - if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT && - ffe_ctl->have_caching_bg) - return 1; - - if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES) - return 1; - if (ins->objectid) { found_extent(ffe_ctl, ins); return 0; } + if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size && + !btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->index)) { + /* + * If we have enough free space left in an already active block + * group and we can't activate any other zone now, retry the + * active ones with a smaller allocation size. Returning early + * from here will tell btrfs_reserve_extent() to haven the + * size. + */ + return -ENOSPC; + } + + if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg) + return 1; + + ffe_ctl->index++; + if (ffe_ctl->index < BTRFS_NR_RAID_TYPES) + return 1; + /* * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking * caching kthreads as we move along @@ -4432,6 +4445,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, ffe_ctl.ram_bytes = ram_bytes; ffe_ctl.num_bytes = num_bytes; + ffe_ctl.min_alloc_size = min_alloc_size; ffe_ctl.empty_size = empty_size; ffe_ctl.flags = flags; ffe_ctl.delalloc = delalloc; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 942a347713835..7980694840548 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1873,3 +1873,34 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group) return ret; } + +bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index) +{ + struct btrfs_device *device; + bool ret = false; + + if (!btrfs_is_zoned(fs_devices->fs_info)) + return true; + + /* Non-single profiles are not supported yet */ + if (raid_index != BTRFS_RAID_SINGLE) + return false; + + /* Check if there is a device with active zones left */ + mutex_lock(&fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_devices->devices, dev_list) { + struct btrfs_zoned_device_info *zinfo = device->zone_info; + + if (!device->bdev) + continue; + + if (!zinfo->max_active_zones || + atomic_read(&zinfo->active_zones_left)) { + ret = true; + break; + } + } + mutex_unlock(&fs_devices->device_list_mutex); + + return ret; +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 2345ecfa18058..ade6588c4ccdf 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -71,6 +71,8 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, u64 logical, u64 length); bool btrfs_zone_activate(struct btrfs_block_group *block_group); int btrfs_zone_finish(struct btrfs_block_group *block_group); +bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, + int raid_index); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -216,6 +218,12 @@ static inline int btrfs_zone_finish(struct btrfs_block_group *block_group) return 0; } +static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, + int raid_index) +{ + return true; +} + #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) From 1c3946390eff488354ea48ac1f9229bf25968dcd Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:23 +0900 Subject: [PATCH 0290/1202] btrfs: zoned: finish fully written block group If we have written to the zone capacity, the device automatically deactivates the zone. Sync up block group side (the active BG list and zone_is_active flag) with it. We need to do it both on data BGs and metadata BGs. On data side, we add a hook to btrfs_finish_ordered_io(). On metadata side, we use end_extent_buffer_writeback(). To reduce excess lookup of a block group, we mark the last extent buffer in a block group with EXTENT_BUFFER_ZONE_FINISH flag. This cannot be done for data (ordered_extent), because the address may change due to REQ_OP_ZONE_APPEND. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 10 ++++++++- fs/btrfs/extent_io.h | 1 + fs/btrfs/inode.c | 6 +++++- fs/btrfs/zoned.c | 50 ++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/zoned.h | 5 +++++ 5 files changed, 70 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fdc066cac5722..5ad749e19ff33 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4159,6 +4159,9 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) static void end_extent_buffer_writeback(struct extent_buffer *eb) { + if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags)) + btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len); + clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); smp_mb__after_atomic(); wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); @@ -4760,8 +4763,13 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc, free_extent_buffer(eb); return ret; } - if (cache) + if (cache) { + /* Impiles write in zoned mode */ btrfs_put_block_group(cache); + /* Mark the last eb in a block group */ + if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity) + set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags); + } ret = write_one_eb(eb, wbc, epd); free_extent_buffer(eb); if (ret < 0) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 53abdc280451b..9f3e0a45a5e4b 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -32,6 +32,7 @@ enum { /* write IO error */ EXTENT_BUFFER_WRITE_ERR, EXTENT_BUFFER_NO_CHECK, + EXTENT_BUFFER_ZONE_FINISH, }; /* these are flags for __process_pages_contig */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 487533c35ddb6..10efab2e3bd9e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3010,8 +3010,12 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - if (ordered_extent->bdev) + /* A valid bdev implies a write on a sequential zone */ + if (ordered_extent->bdev) { btrfs_rewrite_logical_zoned(ordered_extent); + btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes); + } btrfs_free_io_failure_record(inode, start, end); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 7980694840548..28a06c2d80adb 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1904,3 +1904,53 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index return ret; } + +void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length) +{ + struct btrfs_block_group *block_group; + struct map_lookup *map; + struct btrfs_device *device; + u64 physical; + + if (!btrfs_is_zoned(fs_info)) + return; + + block_group = btrfs_lookup_block_group(fs_info, logical); + ASSERT(block_group); + + if (logical + length < block_group->start + block_group->zone_capacity) + goto out; + + spin_lock(&block_group->lock); + + if (!block_group->zone_is_active) { + spin_unlock(&block_group->lock); + goto out; + } + + block_group->zone_is_active = 0; + /* We should have consumed all the free space */ + ASSERT(block_group->alloc_offset == block_group->zone_capacity); + ASSERT(block_group->free_space_ctl->free_space == 0); + btrfs_clear_treelog_bg(block_group); + spin_unlock(&block_group->lock); + + map = block_group->physical_map; + device = map->stripes[0].dev; + physical = map->stripes[0].physical; + + if (!device->zone_info->max_active_zones) + goto out; + + btrfs_dev_clear_active_zone(device, physical); + + spin_lock(&fs_info->zone_active_bgs_lock); + ASSERT(!list_empty(&block_group->active_bg_list)); + list_del_init(&block_group->active_bg_list); + spin_unlock(&fs_info->zone_active_bgs_lock); + + btrfs_put_block_group(block_group); + +out: + btrfs_put_block_group(block_group); +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index ade6588c4ccdf..9c512402d7f42 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -73,6 +73,8 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group); int btrfs_zone_finish(struct btrfs_block_group *block_group); bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index); +void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, + u64 length); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -224,6 +226,9 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, return true; } +static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, + u64 logical, u64 length) { } + #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) From e01495b3c2df71a1d7dcb56ece4a89aae6bc2d8c Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Thu, 19 Aug 2021 21:19:24 +0900 Subject: [PATCH 0291/1202] btrfs: zoned: finish relocating block group We will no longer write to a relocating block group. So, we can finish it now. Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 914d403b4415d..63d2b22cf4380 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -25,6 +25,7 @@ #include "backref.h" #include "misc.h" #include "subpage.h" +#include "zoned.h" /* * Relocation overview @@ -4063,6 +4064,9 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) rc->block_group->start, rc->block_group->length); + ret = btrfs_zone_finish(rc->block_group); + WARN_ON(ret && ret != -EAGAIN); + while (1) { int finishes_stage; From d0f4b7b7e669af6fadaf9496847c1fff108ae37a Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 24 Aug 2021 13:05:19 +0800 Subject: [PATCH 0292/1202] btrfs: convert latest_bdev type to btrfs_device and rename In preparation to fix a bug in btrfs_show_devname(). Convert fs_devices::latest_bdev type from struct block_device to struct btrfs_device and, rename the member to fs_devices::latest_dev. So that btrfs_show_devname() can use fs_devices::latest_dev::name. Tested-by: Su Yue Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 +++--- fs/btrfs/extent_io.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/volumes.c | 10 +++++----- fs/btrfs/volumes.h | 6 +++++- 6 files changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 41ea50f48cfea..7d80e5b22d32b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3231,12 +3231,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); btrfs_init_btree_inode(fs_info); - invalidate_bdev(fs_devices->latest_bdev); + invalidate_bdev(fs_devices->latest_dev->bdev); /* * Read super block and check the signature bytes only */ - disk_super = btrfs_read_dev_super(fs_devices->latest_bdev); + disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev); if (IS_ERR(disk_super)) { err = PTR_ERR(disk_super); goto fail_alloc; @@ -3473,7 +3473,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device * below in btrfs_init_dev_replace(). */ btrfs_free_extra_devids(fs_devices); - if (!fs_devices->latest_bdev) { + if (!fs_devices->latest_dev->bdev) { btrfs_err(fs_info, "failed to read devices"); goto fail_tree_roots; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 5ad749e19ff33..470108f07553b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3327,7 +3327,7 @@ static int alloc_new_bio(struct btrfs_inode *inode, if (wbc) { struct block_device *bdev; - bdev = fs_info->fs_devices->latest_bdev; + bdev = fs_info->fs_devices->latest_dev->bdev; bio_set_dev(bio, bdev); wbc_init_bio(wbc, bio); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 10efab2e3bd9e..e6e87dd80a025 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7965,7 +7965,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, iomap->type = IOMAP_MAPPED; } iomap->offset = start; - iomap->bdev = fs_info->fs_devices->latest_bdev; + iomap->bdev = fs_info->fs_devices->latest_dev->bdev; iomap->length = len; if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start)) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 537d90bf5d844..e4963da4dd081 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1705,7 +1705,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, goto error_close_devices; } - bdev = fs_devices->latest_bdev; + bdev = fs_devices->latest_dev->bdev; s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC, fs_info); if (IS_ERR(s)) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 161b2da02fe44..3f0aff6bae4a3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1091,7 +1091,7 @@ void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices) list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list) __btrfs_free_extra_devids(seed_dev, &latest_dev); - fs_devices->latest_bdev = latest_dev->bdev; + fs_devices->latest_dev = latest_dev; mutex_unlock(&uuid_mutex); } @@ -1222,7 +1222,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, return -EINVAL; fs_devices->opened = 1; - fs_devices->latest_bdev = latest_dev->bdev; + fs_devices->latest_dev = latest_dev; fs_devices->total_rw_bytes = 0; fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR; fs_devices->read_policy = BTRFS_READ_POLICY_PID; @@ -1986,7 +1986,7 @@ static struct btrfs_device * btrfs_find_next_active_device( } /* - * Helper function to check if the given device is part of s_bdev / latest_bdev + * Helper function to check if the given device is part of s_bdev / latest_dev * and replace it with the provided or the next active device, in the context * where this function called, there should be always be another device (or * this_dev) which is active. @@ -2005,8 +2005,8 @@ void __cold btrfs_assign_next_active_device(struct btrfs_device *device, (fs_info->sb->s_bdev == device->bdev)) fs_info->sb->s_bdev = next_device->bdev; - if (fs_info->fs_devices->latest_bdev == device->bdev) - fs_info->fs_devices->latest_bdev = next_device->bdev; + if (fs_info->fs_devices->latest_dev->bdev == device->bdev) + fs_info->fs_devices->latest_dev = next_device; } /* diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 937a81f9f6e61..7e8f205978d9e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -246,7 +246,11 @@ struct btrfs_fs_devices { /* Highest generation number of seen devices */ u64 latest_generation; - struct block_device *latest_bdev; + /* + * The mount device or a device with highest generation after removal + * or replace. + */ + struct btrfs_device *latest_dev; /* all of the devices in the FS, protected by a mutex * so we can safely walk it to write out the supers without From 8473c4db80356b2de5df36a2cd2e9760c61e1d93 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 24 Aug 2021 13:05:20 +0800 Subject: [PATCH 0293/1202] btrfs: use latest_dev in btrfs_show_devname The test case btrfs/238 reports the warning below: WARNING: CPU: 3 PID: 481 at fs/btrfs/super.c:2509 btrfs_show_devname+0x104/0x1e8 [btrfs] CPU: 2 PID: 1 Comm: systemd Tainted: G W O 5.14.0-rc1-custom #72 Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 Call trace: btrfs_show_devname+0x108/0x1b4 [btrfs] show_mountinfo+0x234/0x2c4 m_show+0x28/0x34 seq_read_iter+0x12c/0x3c4 vfs_read+0x29c/0x2c8 ksys_read+0x80/0xec __arm64_sys_read+0x28/0x34 invoke_syscall+0x50/0xf8 do_el0_svc+0x88/0x138 el0_svc+0x2c/0x8c el0t_64_sync_handler+0x84/0xe4 el0t_64_sync+0x198/0x19c Reason: While btrfs_prepare_sprout() moves the fs_devices::devices into fs_devices::seed_list, the btrfs_show_devname() searches for the devices and found none, leading to the warning as in above. Fix: latest_dev is updated according to the changes to the device list. That means we could use the latest_dev->name to show the device name in /proc/self/mounts, the pointer will be always valid as it's assigned before the device is deleted from the list in remove or replace. The RCU protection is sufficient as the device structure is freed after synchronization. Reported-by: Su Yue Tested-by: Su Yue Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e4963da4dd081..7f91d62c2225a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2463,30 +2463,16 @@ static int btrfs_unfreeze(struct super_block *sb) static int btrfs_show_devname(struct seq_file *m, struct dentry *root) { struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); - struct btrfs_device *dev, *first_dev = NULL; /* - * Lightweight locking of the devices. We should not need - * device_list_mutex here as we only read the device data and the list - * is protected by RCU. Even if a device is deleted during the list - * traversals, we'll get valid data, the freeing callback will wait at - * least until the rcu_read_unlock. + * There should be always a valid pointer in latest_dev, it may be stale + * for a short moment in case it's being deleted but still valid until + * the end of RCU grace period. */ rcu_read_lock(); - list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) { - if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) - continue; - if (!dev->name) - continue; - if (!first_dev || dev->devid < first_dev->devid) - first_dev = dev; - } - - if (first_dev) - seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\"); - else - WARN_ON(1); + seq_escape(m, rcu_str_deref(fs_info->fs_devices->latest_dev->name), " \t\n\\"); rcu_read_unlock(); + return 0; } From 75a753c509d8a63b59d3832a59df8e0a3dede446 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 24 Aug 2021 13:05:21 +0800 Subject: [PATCH 0294/1202] btrfs: update latest_dev when we create a sprout device When we add a device to the seed filesystem (sprouting) it is a new filesystem (and fsid) on the device added. Update the latest_dev so that /proc/self/mounts shows the correct device. Example: $ btrfstune -S1 /dev/vg/seed $ mount /dev/vg/seed /btrfs mount: /btrfs: WARNING: device write-protected, mounted read-only. $ cat /proc/self/mounts | grep btrfs /dev/mapper/vg-seed /btrfs btrfs ro,relatime,space_cache,subvolid=5,subvol=/ 0 0 $ btrfs dev add -f /dev/vg/new /btrfs Before: $ cat /proc/self/mounts | grep btrfs /dev/mapper/vg-seed /btrfs btrfs ro,relatime,space_cache,subvolid=5,subvol=/ 0 0 After: $ cat /proc/self/mounts | grep btrfs /dev/mapper/vg-new /btrfs btrfs ro,relatime,space_cache,subvolid=5,subvol=/ 0 0 Tested-by: Su Yue Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3f0aff6bae4a3..05eedab5ff156 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2627,6 +2627,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_abort_transaction(trans, ret); goto error_trans; } + btrfs_assign_next_active_device(fs_info->fs_devices->latest_dev, + device); } device->fs_devices = fs_devices; From f8b82c303db18b4fca4700c9b42e3a55d98c0aec Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 24 Aug 2021 13:05:22 +0800 Subject: [PATCH 0295/1202] btrfs: remove stale comment about the btrfs_show_devname There were few lockdep warnings because btrfs_show_devname() was using device_list_mutex as recorded in the commits: 0ccd05285e7f ("btrfs: fix a possible umount deadlock") 779bf3fefa83 ("btrfs: fix lock dep warning, move scratch dev out of device_list_mutex and uuid_mutex") And finally, commit 88c14590cdd6 ("btrfs: use RCU in btrfs_show_devname for device list traversal") removed the device_list_mutex from btrfs_show_devname for performance reasons. This patch removes a stale comment about the function btrfs_show_devname and device_list_mutex. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 05eedab5ff156..2101a5bd4eba8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2305,13 +2305,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) mutex_unlock(&fs_devices->device_list_mutex); - /* - * The update_dev_time() with in btrfs_scratch_superblocks() - * may lead to a call to btrfs_show_devname() which will try - * to hold device_list_mutex. And here this device - * is already out of device list, so we don't have to hold - * the device_list_mutex lock. - */ btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev, tgtdev->name->str); From 7a42fe4500dbbd0edd0e107f630dee2ba2b8a3d4 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Aug 2021 15:30:31 +0100 Subject: [PATCH 0296/1202] btrfs: check if a log tree exists at inode_logged() In case an inode was never logged since it was loaded from disk and was modified in the current transaction (its ->last_trans matches the ID of the current transaction), inode_logged() returns true even if there's no existing log tree. In this case we can simply check if a log tree exists and return false if it does not. This avoids a caller of inode_logged() doing some unnecessary, but harmless, work. For btrfs_log_new_name() it avoids it logging an inode in case it was never logged since it was loaded from disk and there is currently no log tree for the inode's root. For the remaining callers of inode_logged(), btrfs_del_dir_entries_in_log() and btrfs_del_inode_ref_in_log(), it has no effect since they already check if a log tree exists through their calls to join_running_log_trans(). So just add a check to inode_logged() to verify if a log tree exists, and return false if it does not. This patch is part of a patch set comprised of the following patches: btrfs: check if a log tree exists at inode_logged() btrfs: remove no longer needed checks for NULL log context btrfs: do not log new dentries when logging that a new name exists btrfs: always update the logged transaction when logging new names btrfs: avoid expensive search when dropping inode items from log btrfs: add helper to truncate inode items when logging inode btrfs: avoid expensive search when truncating inode items from the log btrfs: avoid search for logged i_size when logging inode if possible btrfs: avoid attempt to drop extents when logging inode for the first time btrfs: do not commit delayed inode when logging a file in full sync mode This is patch 1/10 and test results are listed in the change log of the last patch in the set. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b415c5ec03ea0..56a91abec85e2 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3452,6 +3452,9 @@ static bool inode_logged(struct btrfs_trans_handle *trans, if (inode->logged_trans == trans->transid) return true; + if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) + return false; + /* * The inode's logged_trans is always 0 when we load it (because it is * not persisted in the inode item or elsewhere). So if it is 0, the From 87726a0c15a1326995b2386054ca396df5d569f0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Aug 2021 15:30:32 +0100 Subject: [PATCH 0297/1202] btrfs: remove no longer needed checks for NULL log context Since commit 75b463d2b47aef ("btrfs: do not commit logs and transactions during link and rename operations"), we always pass a non-NULL log context to btrfs_log_inode_parent() and therefore to all the functions that it calls. So remove the checks we have all over the place that test for a NULL log context, making the code shorter and easier to read, as well as reducing the size of the generated code. This patch is part of a patch set comprised of the following patches: btrfs: check if a log tree exists at inode_logged() btrfs: remove no longer needed checks for NULL log context btrfs: do not log new dentries when logging that a new name exists btrfs: always update the logged transaction when logging new names btrfs: avoid expensive search when dropping inode items from log btrfs: add helper to truncate inode items when logging inode btrfs: avoid expensive search when truncating inode items from the log btrfs: avoid search for logged i_size when logging inode if possible btrfs: avoid attempt to drop extents when logging inode for the first time btrfs: do not commit delayed inode when logging a file in full sync mode This is patch 2/10 and test results are listed in the change log of the last patch in the set. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 56a91abec85e2..4d4cab8fd6fa5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -207,7 +207,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, } atomic_inc(&root->log_writers); - if (ctx && !ctx->logging_new_name) { + if (!ctx->logging_new_name) { int index = root->log_transid % 2; list_add_tail(&ctx->list, &root->log_ctxs[index]); ctx->log_transid = root->log_transid; @@ -3037,9 +3037,6 @@ static void wait_for_writer(struct btrfs_root *root) static inline void btrfs_remove_log_ctx(struct btrfs_root *root, struct btrfs_log_ctx *ctx) { - if (!ctx) - return; - mutex_lock(&root->log_mutex); list_del_init(&ctx->list); mutex_unlock(&root->log_mutex); @@ -3782,8 +3779,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, */ di = btrfs_item_ptr(src, i, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(src, di, &tmp); - if (ctx && - (btrfs_dir_transid(src, di) == trans->transid || + if ((btrfs_dir_transid(src, di) == trans->transid || btrfs_dir_type(src, di) == BTRFS_FT_DIR) && tmp.type != BTRFS_ROOT_ITEM_KEY) ctx->log_new_dentries = true; @@ -5242,7 +5238,7 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, &other_ino, &other_parent); if (ret < 0) { return ret; - } else if (ret > 0 && ctx && + } else if (ret > 0 && other_ino != btrfs_ino(BTRFS_I(ctx->inode))) { if (ins_nr > 0) { ins_nr++; @@ -5583,8 +5579,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * So keep it simple for this case and just don't flag the ancestors as * logged. */ - if (!ctx || - !(S_ISDIR(inode->vfs_inode.i_mode) && ctx->logging_new_name && + if (!(S_ISDIR(inode->vfs_inode.i_mode) && ctx->logging_new_name && &inode->vfs_inode != ctx->inode)) { spin_lock(&inode->lock); inode->logged_trans = trans->transid; @@ -5937,11 +5932,10 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, continue; } - if (ctx) - ctx->log_new_dentries = false; + ctx->log_new_dentries = false; ret = btrfs_log_inode(trans, root, BTRFS_I(dir_inode), LOG_INODE_ALL, ctx); - if (!ret && ctx && ctx->log_new_dentries) + if (!ret && ctx->log_new_dentries) ret = log_new_dir_dentries(trans, root, BTRFS_I(dir_inode), ctx); btrfs_add_delayed_iput(dir_inode); @@ -6202,7 +6196,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, goto end_trans; } - if (S_ISDIR(inode->vfs_inode.i_mode) && ctx && ctx->log_new_dentries) + if (S_ISDIR(inode->vfs_inode.i_mode) && ctx->log_new_dentries) log_dentries = true; /* From f446c5627b23311d278d37831f06b1c9c16b2ff7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Aug 2021 15:30:33 +0100 Subject: [PATCH 0298/1202] btrfs: do not log new dentries when logging that a new name exists When logging a new name for an inode, due to a link or rename operation, we don't need to log all new dentries of the parent directories and their subdirectories. We only want to log the names of the inode and that any new parent directories exist. So in this case don't trigger logging of the new dentries, that is only need when doing an explicit fsync on a directory or on a file which requires logging its parent directories. This avoids unnecessary work and reduces contention on the extent buffers of a log tree. This patch is part of a patch set comprised of the following patches: btrfs: check if a log tree exists at inode_logged() btrfs: remove no longer needed checks for NULL log context btrfs: do not log new dentries when logging that a new name exists btrfs: always update the logged transaction when logging new names btrfs: avoid expensive search when dropping inode items from log btrfs: add helper to truncate inode items when logging inode btrfs: avoid expensive search when truncating inode items from the log btrfs: avoid search for logged i_size when logging inode if possible btrfs: avoid attempt to drop extents when logging inode for the first time btrfs: do not commit delayed inode when logging a file in full sync mode This is patch 3/10 and test results are listed in the change log of the last patch in the set. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4d4cab8fd6fa5..ccce82538b77c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5712,6 +5712,14 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, struct btrfs_dir_list *dir_elem; int ret = 0; + /* + * If we are logging a new name, as part of a link or rename operation, + * don't bother logging new dentries, as we just want to log the names + * of an inode and that any new parents exist. + */ + if (ctx->logging_new_name) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; From fbd492bf16a7e9cc62674a2e48a98b334a7d65c1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Aug 2021 15:30:34 +0100 Subject: [PATCH 0299/1202] btrfs: always update the logged transaction when logging new names When we are logging a new name for an inode, due to a link or rename operation, if the inode has ancestor inodes that are new, created in the current transaction, we need to log that these inodes exist. To ensure that a subsequent explicit fsync on one of these ancestor inodes does sync the log, we don't set the logged_trans field of these inodes. This was done in commit 75b463d2b47aef ("btrfs: do not commit logs and transactions during link and rename operations"), to avoid syncing a log after a rename or link operation. In order to allow for future changes to do some optimizations, change this behaviour to always update the logged_trans of any logged inode and don't update the last_log_commit of the inode if we are logging that it exists. This accomplishes that same objective with simpler logic, allowing for some optimizations in the next patches. So just do that simplification. This patch is part of a patch set comprised of the following patches: btrfs: check if a log tree exists at inode_logged() btrfs: remove no longer needed checks for NULL log context btrfs: do not log new dentries when logging that a new name exists btrfs: always update the logged transaction when logging new names btrfs: avoid expensive search when dropping inode items from log btrfs: add helper to truncate inode items when logging inode btrfs: avoid expensive search when truncating inode items from the log btrfs: avoid search for logged i_size when logging inode if possible btrfs: avoid attempt to drop extents when logging inode for the first time btrfs: do not commit delayed inode when logging a file in full sync mode This is patch 4/10 and test results are listed in the change log of the last patch in the set. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 73 +++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ccce82538b77c..fc22cd81ea910 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5569,47 +5569,42 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } } + spin_lock(&inode->lock); + inode->logged_trans = trans->transid; /* - * If we are logging that an ancestor inode exists as part of logging a - * new name from a link or rename operation, don't mark the inode as - * logged - otherwise if an explicit fsync is made against an ancestor, - * the fsync considers the inode in the log and doesn't sync the log, - * resulting in the ancestor missing after a power failure unless the - * log was synced as part of an fsync against any other unrelated inode. - * So keep it simple for this case and just don't flag the ancestors as - * logged. + * Don't update last_log_commit if we logged that an inode exists. + * We do this for three reasons: + * + * 1) We might have had buffered writes to this inode that were + * flushed and had their ordered extents completed in this + * transaction, but we did not previously log the inode with + * LOG_INODE_ALL. Later the inode was evicted and after that + * it was loaded again and this LOG_INODE_EXISTS log operation + * happened. We must make sure that if an explicit fsync against + * the inode is performed later, it logs the new extents, an + * updated inode item, etc, and syncs the log. The same logic + * applies to direct IO writes instead of buffered writes. + * + * 2) When we log the inode with LOG_INODE_EXISTS, its inode item + * is logged with an i_size of 0 or whatever value was logged + * before. If later the i_size of the inode is increased by a + * truncate operation, the log is synced through an fsync of + * some other inode and then finally an explicit fsync against + * this inode is made, we must make sure this fsync logs the + * inode with the new i_size, the hole between old i_size and + * the new i_size, and syncs the log. + * + * 3) If we are logging that an ancestor inode exists as part of + * logging a new name from a link or rename operation, don't update + * its last_log_commit - otherwise if an explicit fsync is made + * against an ancestor, the fsync considers the inode in the log + * and doesn't sync the log, resulting in the ancestor missing after + * a power failure unless the log was synced as part of an fsync + * against any other unrelated inode. */ - if (!(S_ISDIR(inode->vfs_inode.i_mode) && ctx->logging_new_name && - &inode->vfs_inode != ctx->inode)) { - spin_lock(&inode->lock); - inode->logged_trans = trans->transid; - /* - * Don't update last_log_commit if we logged that an inode exists. - * We do this for two reasons: - * - * 1) We might have had buffered writes to this inode that were - * flushed and had their ordered extents completed in this - * transaction, but we did not previously log the inode with - * LOG_INODE_ALL. Later the inode was evicted and after that - * it was loaded again and this LOG_INODE_EXISTS log operation - * happened. We must make sure that if an explicit fsync against - * the inode is performed later, it logs the new extents, an - * updated inode item, etc, and syncs the log. The same logic - * applies to direct IO writes instead of buffered writes. - * - * 2) When we log the inode with LOG_INODE_EXISTS, its inode item - * is logged with an i_size of 0 or whatever value was logged - * before. If later the i_size of the inode is increased by a - * truncate operation, the log is synced through an fsync of - * some other inode and then finally an explicit fsync against - * this inode is made, we must make sure this fsync logs the - * inode with the new i_size, the hole between old i_size and - * the new i_size, and syncs the log. - */ - if (inode_only != LOG_INODE_EXISTS) - inode->last_log_commit = inode->last_sub_trans; - spin_unlock(&inode->lock); - } + if (inode_only != LOG_INODE_EXISTS) + inode->last_log_commit = inode->last_sub_trans; + spin_unlock(&inode->lock); out_unlock: mutex_unlock(&inode->log_mutex); From 15028f480d8b4e6b4d00e4a038f8719d2cb10a2d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Aug 2021 15:30:35 +0100 Subject: [PATCH 0300/1202] btrfs: avoid expensive search when dropping inode items from log Whenever we are logging a directory inode, logging that an inode exists or logging an inode that has changes in its references or xattrs, we attempt to delete items of this inode we may have previously logged (through calls to drop_objectid_items()). That attempt does a btree search for deletion, which is expensive because it always acquires write locks for extent buffers at levels 2, 1 and 0, and it balances any node that is less than half full. Acquiring the write locks can block the task if the extent buffers are already locked or block other tasks attempting to lock them, which is specially bad in case of log trees since they are small due to their short life, with a root node at a level typically not greater than level 2. If we know that we are logging the inode for the first time in the current transaction, we can skip the search. This change does that. This patch is part of a patch set comprised of the following patches: btrfs: check if a log tree exists at inode_logged() btrfs: remove no longer needed checks for NULL log context btrfs: do not log new dentries when logging that a new name exists btrfs: always update the logged transaction when logging new names btrfs: avoid expensive search when dropping inode items from log btrfs: add helper to truncate inode items when logging inode btrfs: avoid expensive search when truncating inode items from the log btrfs: avoid search for logged i_size when logging inode if possible btrfs: avoid attempt to drop extents when logging inode for the first time btrfs: do not commit delayed inode when logging a file in full sync mode This is patch 5/10 and test results are listed in the change log of the last patch in the set. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fc22cd81ea910..39648f4048bc7 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3881,17 +3881,21 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, * This cannot be run for file data extents because it does not * free the extents they point to. */ -static int drop_objectid_items(struct btrfs_trans_handle *trans, +static int drop_inode_items(struct btrfs_trans_handle *trans, struct btrfs_root *log, struct btrfs_path *path, - u64 objectid, int max_key_type) + struct btrfs_inode *inode, + int max_key_type) { int ret; struct btrfs_key key; struct btrfs_key found_key; int start_slot; - key.objectid = objectid; + if (!inode_logged(trans, inode)) + return 0; + + key.objectid = btrfs_ino(inode); key.type = max_key_type; key.offset = (u64)-1; @@ -3908,7 +3912,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - if (found_key.objectid != objectid) + if (found_key.objectid != key.objectid) break; found_key.offset = 0; @@ -5442,7 +5446,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, clear_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); if (inode_only == LOG_INODE_EXISTS) max_key_type = BTRFS_XATTR_ITEM_KEY; - ret = drop_objectid_items(trans, log, path, ino, max_key_type); + ret = drop_inode_items(trans, log, path, inode, max_key_type); } else { if (inode_only == LOG_INODE_EXISTS) { /* @@ -5466,8 +5470,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, &inode->runtime_flags)) { if (inode_only == LOG_INODE_EXISTS) { max_key.type = BTRFS_XATTR_ITEM_KEY; - ret = drop_objectid_items(trans, log, path, ino, - max_key.type); + ret = drop_inode_items(trans, log, path, inode, + max_key.type); } else { clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); @@ -5486,8 +5490,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (inode_only == LOG_INODE_ALL) fast_search = true; max_key.type = BTRFS_XATTR_ITEM_KEY; - ret = drop_objectid_items(trans, log, path, ino, - max_key.type); + ret = drop_inode_items(trans, log, path, inode, + max_key.type); } else { if (inode_only == LOG_INODE_ALL) fast_search = true; From fb65062f649392067c765c1f37638713700bc4c8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Aug 2021 15:30:36 +0100 Subject: [PATCH 0301/1202] btrfs: add helper to truncate inode items when logging inode Move the call to btrfs_truncate_inode_items(), and the surrounding retry loop, into a local helper function. This avoids some repetition and avoids making the next change a bit awkward due to a bit of too much indentation. This patch is part of a patch set comprised of the following patches: btrfs: check if a log tree exists at inode_logged() btrfs: remove no longer needed checks for NULL log context btrfs: do not log new dentries when logging that a new name exists btrfs: always update the logged transaction when logging new names btrfs: avoid expensive search when dropping inode items from log btrfs: add helper to truncate inode items when logging inode btrfs: avoid expensive search when truncating inode items from the log btrfs: avoid search for logged i_size when logging inode if possible btrfs: avoid attempt to drop extents when logging inode for the first time btrfs: do not commit delayed inode when logging a file in full sync mode This is patch 6/10 and test results are listed in the change log of the last patch in the set. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 39648f4048bc7..2d92187a12737 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3937,6 +3937,21 @@ static int drop_inode_items(struct btrfs_trans_handle *trans, return ret; } +static int truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *log_root, + struct btrfs_inode *inode, + u64 new_size, u32 min_type) +{ + int ret; + + do { + ret = btrfs_truncate_inode_items(trans, log_root, inode, + new_size, min_type, NULL); + } while (ret == -EAGAIN); + + return ret; +} + static void fill_inode_item(struct btrfs_trans_handle *trans, struct extent_buffer *leaf, struct btrfs_inode_item *item, @@ -4525,13 +4540,9 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, * Avoid logging extent items logged in past fsync calls * and leading to duplicate keys in the log tree. */ - do { - ret = btrfs_truncate_inode_items(trans, - root->log_root, - inode, truncate_offset, - BTRFS_EXTENT_DATA_KEY, - NULL); - } while (ret == -EAGAIN); + ret = truncate_inode_items(trans, root->log_root, inode, + truncate_offset, + BTRFS_EXTENT_DATA_KEY); if (ret) goto out; dropped_extents = true; @@ -5477,12 +5488,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, &inode->runtime_flags); clear_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); - while(1) { - ret = btrfs_truncate_inode_items(trans, - log, inode, 0, 0, NULL); - if (ret != -EAGAIN) - break; - } + ret = truncate_inode_items(trans, log, inode, 0, 0); } } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags) || From ab61ea9870d8c51202960776739690d5aa24acff Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Aug 2021 15:30:37 +0100 Subject: [PATCH 0302/1202] btrfs: avoid expensive search when truncating inode items from the log Whenever we are logging a file inode in full sync mode we call btrfs_truncate_inode_items() to delete items of the inode we may have previously logged. That results in doing a btree search for deletion, which is expensive because it always acquires write locks for extent buffers at levels 2, 1 and 0, and it balances any node that is less than half full. Acquiring the write locks can block the task if the extent buffers are already locked by another task or block other tasks attempting to lock them, which is specially bad in case of log trees since they are small due to their short life, with a root node at a level typically not greater than level 2. If we know that we are logging the inode for the first time in the current transaction, we can skip the call to btrfs_truncate_inode_items(), avoiding the deletion search. This change does that. This patch is part of a patch set comprised of the following patches: btrfs: check if a log tree exists at inode_logged() btrfs: remove no longer needed checks for NULL log context btrfs: do not log new dentries when logging that a new name exists btrfs: always update the logged transaction when logging new names btrfs: avoid expensive search when dropping inode items from log btrfs: add helper to truncate inode items when logging inode btrfs: avoid expensive search when truncating inode items from the log btrfs: avoid search for logged i_size when logging inode if possible btrfs: avoid attempt to drop extents when logging inode for the first time btrfs: do not commit delayed inode when logging a file in full sync mode This is patch 7/10 and test results are listed in the change log of the last patch in the set. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2d92187a12737..94964e62e430e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5488,7 +5488,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, &inode->runtime_flags); clear_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); - ret = truncate_inode_items(trans, log, inode, 0, 0); + if (inode_logged(trans, inode)) + ret = truncate_inode_items(trans, log, + inode, 0, 0); } } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags) || From b21f49310910aabec325d1c9fe7712afda590729 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Aug 2021 15:30:38 +0100 Subject: [PATCH 0303/1202] btrfs: avoid search for logged i_size when logging inode if possible If we are logging that an inode exists and the inode was not logged before, we can avoid searching in the log tree for the inode item since we know it does not exists. That wastes time and adds more lock contention on the extent buffers of the log tree when there are other tasks that are logging other inodes. This patch is part of a patch set comprised of the following patches: btrfs: check if a log tree exists at inode_logged() btrfs: remove no longer needed checks for NULL log context btrfs: do not log new dentries when logging that a new name exists btrfs: always update the logged transaction when logging new names btrfs: avoid expensive search when dropping inode items from log btrfs: add helper to truncate inode items when logging inode btrfs: avoid expensive search when truncating inode items from the log btrfs: avoid search for logged i_size when logging inode if possible btrfs: avoid attempt to drop extents when logging inode for the first time btrfs: do not commit delayed inode when logging a file in full sync mode This is patch 8/10 and test results are listed in the change log of the last patch in the set. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 94964e62e430e..6684fccee2369 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5459,7 +5459,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, max_key_type = BTRFS_XATTR_ITEM_KEY; ret = drop_inode_items(trans, log, path, inode, max_key_type); } else { - if (inode_only == LOG_INODE_EXISTS) { + if (inode_only == LOG_INODE_EXISTS && inode_logged(trans, inode)) { /* * Make sure the new inode item we write to the log has * the same isize as the current one (if it exists). From d12bb4adddfb249a723bb8d7680957638208636b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Aug 2021 15:30:39 +0100 Subject: [PATCH 0304/1202] btrfs: avoid attempt to drop extents when logging inode for the first time When logging an extent, in the fast fsync path, we always attempt do drop or trim any existing extents with a range that match or overlap the range of the extent we are about to log. We do that through a call to btrfs_drop_extents(). However this is not needed when we are logging the inode for the first time in the current transaction, since we have no inode items of the inode in the log tree. Calling btrfs_drop_extents() does a deletion search on the log tree, which is expensive when we have concurrent tasks accessing the log tree because a deletion search always acquires a write lock on the extent buffers at levels 2, 1 and 0, adding significant lock contention, specially taking into account the height of a log tree rarely (if ever) goes beyond 2 or 3, due to its short life. So skip the call to btrfs_drop_extents() when the inode was not previously logged in the current transaction. This patch is part of a patch set comprised of the following patches: btrfs: check if a log tree exists at inode_logged() btrfs: remove no longer needed checks for NULL log context btrfs: do not log new dentries when logging that a new name exists btrfs: always update the logged transaction when logging new names btrfs: avoid expensive search when dropping inode items from log btrfs: add helper to truncate inode items when logging inode btrfs: avoid expensive search when truncating inode items from the log btrfs: avoid search for logged i_size when logging inode if possible btrfs: avoid attempt to drop extents when logging inode for the first time btrfs: do not commit delayed inode when logging a file in full sync mode This is patch 9/10 and test results are listed in the change log of the last patch in the set. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6684fccee2369..951f7b8f85b70 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4375,14 +4375,25 @@ static int log_one_extent(struct btrfs_trans_handle *trans, if (ret) return ret; - drop_args.path = path; - drop_args.start = em->start; - drop_args.end = em->start + em->len; - drop_args.replace_extent = true; - drop_args.extent_item_size = sizeof(*fi); - ret = btrfs_drop_extents(trans, log, inode, &drop_args); - if (ret) - return ret; + /* + * If this is the first time we are logging the inode in the current + * transaction, we can avoid btrfs_drop_extents(), which is expensive + * because it does a deletion search, which always acquires write locks + * for extent buffers at levels 2, 1 and 0. This not only wastes time + * but also adds significant contention in a log tree, since log trees + * are small, with a root at level 2 or 3 at most, due to their short + * life span. + */ + if (inode_logged(trans, inode)) { + drop_args.path = path; + drop_args.start = em->start; + drop_args.end = em->start + em->len; + drop_args.replace_extent = true; + drop_args.extent_item_size = sizeof(*fi); + ret = btrfs_drop_extents(trans, log, inode, &drop_args); + if (ret) + return ret; + } if (!drop_args.extent_inserted) { key.objectid = btrfs_ino(inode); From e636fd469bc8026ff12ac3f00f6c857e6ab68fd3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Aug 2021 15:30:40 +0100 Subject: [PATCH 0305/1202] btrfs: do not commit delayed inode when logging a file in full sync mode When logging a regular file in full sync mode, we are currently committing its delayed inode item. This is to ensure that we never miss copying the inode item, with its most up to date data, into the log tree. However that is not necessary since commit e4545de5b035 ("Btrfs: fix fsync data loss after append write"), because even if we don't find the leaf with the inode item when looking for leaves that changed in the current transaction, we end up logging the inode item later using the in-memory content. In case we find the leaf containing the inode item, we already end up using the in-memory inode for filling the inode item in the log tree, and not the inode item that is in the fs/subvolume tree, as it might be not up to date (copy_items() -> fill_inode_item()). So don't commit the delayed inode item, which brings a couple of benefits: 1) Avoid writing the inode item to the fs/subvolume btree, saving time and reducing lock contention on the btree; 2) In case no other item for the inode was changed, added or deleted in the same leaf where the inode item is located, we ended up copying all the items in that leaf to the log tree - it's harmless from a functional point of view, but it wastes time and log tree space. This patch is part of a patch set comprised of the following patches: btrfs: check if a log tree exists at inode_logged() btrfs: remove no longer needed checks for NULL log context btrfs: do not log new dentries when logging that a new name exists btrfs: always update the logged transaction when logging new names btrfs: avoid expensive search when dropping inode items from log btrfs: add helper to truncate inode items when logging inode btrfs: avoid expensive search when truncating inode items from the log btrfs: avoid search for logged i_size when logging inode if possible btrfs: avoid attempt to drop extents when logging inode for the first time btrfs: do not commit delayed inode when logging a file in full sync mode This is patch 10/10 and the following test results compare a branch with the whole patch set applied versus a branch without any of the patches applied. The following script was used to test dbench with 8 and 16 jobs on a machine with 12 cores, 64G of RAM, a NVME device and using a non-debug kernel config (Debian's default): $ cat test.sh #!/bin/bash if [ $# -ne 1 ]; then echo "Use $0 NUM_JOBS" exit 1 fi NUM_JOBS=$1 DEV=/dev/nvme0n1 MNT=/mnt/nvme0n1 MOUNT_OPTIONS="-o ssd" MKFS_OPTIONS="-m single -d single" echo "performance" | \ tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor mkfs.btrfs -f $MKFS_OPTIONS $DEV mount $MOUNT_OPTIONS $DEV $MNT dbench -D $MNT -t 120 $NUM_JOBS umount $MNT The results were the following: 8 jobs, before patchset: Operation Count AvgLat MaxLat ---------------------------------------- NTCreateX 4113896 0.009 238.665 Close 3021699 0.001 0.590 Rename 174215 0.082 238.733 Unlink 830977 0.049 238.642 Deltree 96 2.232 8.022 Mkdir 48 0.003 0.005 Qpathinfo 3729013 0.005 2.672 Qfileinfo 653206 0.001 0.152 Qfsinfo 683866 0.002 0.526 Sfileinfo 335055 0.004 1.571 Find 1441800 0.016 4.288 WriteX 2049644 0.010 3.982 ReadX 6449786 0.003 0.969 LockX 13400 0.002 0.043 UnlockX 13400 0.001 0.075 Flush 288349 2.521 245.516 Throughput 1075.73 MB/sec 8 clients 8 procs max_latency=245.520 ms 8 jobs, after patchset: Operation Count AvgLat MaxLat ---------------------------------------- NTCreateX 4154282 0.009 156.675 Close 3051450 0.001 0.843 Rename 175912 0.072 4.444 Unlink 839067 0.048 66.050 Deltree 96 2.131 5.979 Mkdir 48 0.002 0.004 Qpathinfo 3765575 0.005 3.079 Qfileinfo 659582 0.001 0.099 Qfsinfo 690474 0.002 0.155 Sfileinfo 338366 0.004 1.419 Find 1455816 0.016 3.423 WriteX 2069538 0.010 4.328 ReadX 6512429 0.003 0.840 LockX 13530 0.002 0.078 UnlockX 13530 0.001 0.051 Flush 291158 2.500 163.468 Throughput 1105.45 MB/sec 8 clients 8 procs max_latency=163.474 ms +2.7% throughput, -40.1% max latency 16 jobs, before patchset: Operation Count AvgLat MaxLat ---------------------------------------- NTCreateX 5457602 0.033 337.098 Close 4008979 0.002 2.018 Rename 231051 0.323 254.054 Unlink 1102209 0.202 337.243 Deltree 160 6.521 31.720 Mkdir 80 0.003 0.007 Qpathinfo 4946147 0.014 6.988 Qfileinfo 867440 0.001 1.642 Qfsinfo 907081 0.003 1.821 Sfileinfo 444433 0.005 2.053 Find 1912506 0.067 7.854 WriteX 2724852 0.018 7.428 ReadX 8553883 0.003 2.059 LockX 17770 0.003 0.350 UnlockX 17770 0.002 0.627 Flush 382533 2.810 353.691 Throughput 1413.09 MB/sec 16 clients 16 procs max_latency=353.696 ms 16 jobs, after patchset: Operation Count AvgLat MaxLat ---------------------------------------- NTCreateX 5393156 0.034 303.181 Close 3961986 0.002 1.502 Rename 228359 0.320 253.379 Unlink 1088920 0.206 303.409 Deltree 160 6.419 30.088 Mkdir 80 0.003 0.004 Qpathinfo 4887967 0.015 7.722 Qfileinfo 857408 0.001 1.651 Qfsinfo 896343 0.002 2.147 Sfileinfo 439317 0.005 4.298 Find 1890018 0.073 8.347 WriteX 2693356 0.018 6.373 ReadX 8453485 0.003 3.836 LockX 17562 0.003 0.486 UnlockX 17562 0.002 0.635 Flush 378023 2.802 315.904 Throughput 1454.46 MB/sec 16 clients 16 procs max_latency=315.910 ms +2.9% throughput, -11.3% max latency Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 951f7b8f85b70..db5ead891e244 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5414,22 +5414,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * Only run delayed items if we are a directory. We want to make sure * all directory indexes hit the fs/subvolume tree so we can find them * and figure out which index ranges have to be logged. - * - * Otherwise commit the delayed inode only if the full sync flag is set, - * as we want to make sure an up to date version is in the subvolume - * tree so copy_inode_items_to_log() / copy_items() can find it and copy - * it to the log tree. For a non full sync, we always log the inode item - * based on the in-memory struct btrfs_inode which is always up to date. */ - if (S_ISDIR(inode->vfs_inode.i_mode)) - ret = btrfs_commit_inode_delayed_items(trans, inode); - else if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags)) - ret = btrfs_commit_inode_delayed_inode(inode); - - if (ret) { - btrfs_free_path(path); - btrfs_free_path(dst_path); - return ret; + if (S_ISDIR(inode->vfs_inode.i_mode)) { + err = btrfs_commit_inode_delayed_items(trans, inode); + if (err) + goto out; } if (inode_only == LOG_OTHER_INODE || inode_only == LOG_OTHER_INODE_ALL) { @@ -5630,7 +5619,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, spin_unlock(&inode->lock); out_unlock: mutex_unlock(&inode->log_mutex); - +out: btrfs_free_path(path); btrfs_free_path(dst_path); return err; From 72ab4215583b80f714876ec8c2523bcd4cf7325e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 3 Sep 2021 20:45:14 +0800 Subject: [PATCH 0306/1202] btrfs: unexport repair_io_failure() Function repair_io_failure() is no longer used out of extent_io.c since commit 8b9b6f255485 ("btrfs: scrub: cleanup the remaining nodatasum fixup code"), which removes the last external caller. Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 6 +++--- fs/btrfs/extent_io.h | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 470108f07553b..8959ac580f466 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2282,9 +2282,9 @@ int free_io_failure(struct extent_io_tree *failure_tree, * currently, there can be no more than two copies of every data bit. thus, * exactly one rewrite is required. */ -int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, - u64 length, u64 logical, struct page *page, - unsigned int pg_offset, int mirror_num) +static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, + u64 length, u64 logical, struct page *page, + unsigned int pg_offset, int mirror_num) { struct bio *bio; struct btrfs_device *dev; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 9f3e0a45a5e4b..ba471f2063a72 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -283,9 +283,6 @@ struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs); struct bio *btrfs_bio_clone(struct bio *bio); struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size); -int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, - u64 length, u64 logical, struct page *page, - unsigned int pg_offset, int mirror_num); void end_extent_writepage(struct page *page, int err, u64 start, u64 end); int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num); From c64b9b8a58b1cc136b6927556f27a0c917c3697b Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 9 Sep 2021 01:19:25 +0900 Subject: [PATCH 0307/1202] btrfs: introduce btrfs_is_data_reloc_root There are several places in our codebase where we check if a root is the root of the data reloc tree and subsequent patches will introduce more. Factor out the check into a small helper function instead of open coding it multiple times. Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 +++++ fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/inode.c | 19 ++++++++----------- fs/btrfs/relocation.c | 3 +-- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index bfba85d9f8624..f6e624098d531 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3846,6 +3846,11 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info) return fs_info->zoned != 0; } +static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root) +{ + return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID; +} + /* * We use page status Private2 to indicate there is an ordered extent with * unfinished IO. diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7d80e5b22d32b..d63c5e776a964 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1500,7 +1500,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) goto fail; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && - root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { + !btrfs_is_data_reloc_root(root)) { set_bit(BTRFS_ROOT_SHAREABLE, &root->state); btrfs_check_and_init_root_item(&root->root_item); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9aa429a9a235c..74ac37c5f2181 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2376,7 +2376,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, out: btrfs_free_path(path); - if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + if (btrfs_is_data_reloc_root(root)) WARN_ON(ret > 0); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e6e87dd80a025..7d27d6626888f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1150,7 +1150,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, * fails during the stage where it updates the bytenr of file extent * items. */ - if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + if (btrfs_is_data_reloc_root(root)) min_alloc_size = num_bytes; else min_alloc_size = fs_info->sectorsize; @@ -1186,8 +1186,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, if (ret) goto out_drop_extent_cache; - if (root->root_key.objectid == - BTRFS_DATA_RELOC_TREE_OBJECTID) { + if (btrfs_is_data_reloc_root(root)) { ret = btrfs_reloc_clone_csums(inode, start, cur_alloc_size); /* @@ -1503,8 +1502,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page, int *page_started, unsigned long *nr_written) { const bool is_space_ino = btrfs_is_free_space_inode(inode); - const bool is_reloc_ino = (inode->root->root_key.objectid == - BTRFS_DATA_RELOC_TREE_OBJECTID); + const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root); const u64 range_bytes = end + 1 - start; struct extent_io_tree *io_tree = &inode->io_tree; u64 range_start = start; @@ -1866,8 +1864,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, btrfs_dec_nocow_writers(fs_info, disk_bytenr); nocow = false; - if (root->root_key.objectid == - BTRFS_DATA_RELOC_TREE_OBJECTID) + if (btrfs_is_data_reloc_root(root)) /* * Error handled later, as we must prevent * extent_clear_unlock_delalloc() in error handler @@ -2206,7 +2203,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode, if (btrfs_is_testing(fs_info)) return; - if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID && + if (!btrfs_is_data_reloc_root(root) && do_list && !(state->state & EXTENT_NORESERVE) && (*bits & EXTENT_CLEAR_DATA_RESV)) btrfs_free_reserved_data_space_noquota(fs_info, len); @@ -2531,7 +2528,7 @@ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, goto mapit; } else if (async && !skip_sum) { /* csum items have already been cloned */ - if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + if (btrfs_is_data_reloc_root(root)) goto mapit; /* we're doing a write, do the async checksumming */ ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags, @@ -3307,7 +3304,7 @@ unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset, u64 file_offset = pg_off + page_offset(page); int ret; - if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && + if (btrfs_is_data_reloc_root(root) && test_range_bit(io_tree, file_offset, file_offset + sectorsize - 1, EXTENT_NODATASUM, 1, NULL)) { @@ -4008,7 +4005,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, * without delay */ if (!btrfs_is_free_space_inode(inode) - && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID + && !btrfs_is_data_reloc_root(root) && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { btrfs_update_root_times(trans, root); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 63d2b22cf4380..e9e748925cc94 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4390,8 +4390,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, if (!rc) return 0; - BUG_ON(rc->stage == UPDATE_DATA_PTRS && - root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); + BUG_ON(rc->stage == UPDATE_DATA_PTRS && btrfs_is_data_reloc_root(root)); level = btrfs_header_level(buf); if (btrfs_header_generation(buf) <= From 28d22c4a66a5153f05c334fefe5cbbe3e0648f4d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 9 Sep 2021 01:19:26 +0900 Subject: [PATCH 0308/1202] btrfs: zoned: add a dedicated data relocation block group Relocation in a zoned filesystem can fail with a transaction abort with error -22 (EINVAL). This happens because the relocation code assumes that the extents we relocated the data to have the same size the source extents had and ensures this by preallocating the extents. But in a zoned filesystem we currently can't preallocate the extents as this would break the sequential write required rule. Therefore it can happen that the writeback process kicks in while we're still adding pages to a delalloc range and starts writing out dirty pages. This then creates destination extents that are smaller than the source extents, triggering the following safety check in get_new_location(): 1034 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1035 ret = -EINVAL; 1036 goto out; 1037 } Temporarily create a dedicated block group for the relocation process, so no non-relocation data writes can interfere with the relocation writes. This is needed that we can switch the relocation process on a zoned filesystem from the REQ_OP_ZONE_APPEND writing we use for data to a scheme like in a non-zoned filesystem using REQ_OP_WRITE and preallocation. Fixes: 32430c614844 ("btrfs: zoned: enable relocation on a zoned filesystem") Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 1 + fs/btrfs/ctree.h | 7 ++++++ fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 51 ++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/zoned.c | 10 +++++++++ fs/btrfs/zoned.h | 3 +++ 6 files changed, 71 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 1302bf8d0be1d..46fdef7bbe20c 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -903,6 +903,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cluster->refill_lock); btrfs_clear_treelog_bg(block_group); + btrfs_clear_data_reloc_bg(block_group); path = btrfs_alloc_path(); if (!path) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f6e624098d531..41f1718a83df9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1018,6 +1018,13 @@ struct btrfs_fs_info { spinlock_t treelog_bg_lock; u64 treelog_bg; + /* + * Start of the dedicated data relocation block group, protected by + * relocation_bg_lock. + */ + spinlock_t relocation_bg_lock; + u64 data_reloc_bg; + spinlock_t zone_active_bgs_lock; struct list_head zone_active_bgs; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d63c5e776a964..be382276d24f3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2885,6 +2885,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->treelog_bg_lock); spin_lock_init(&fs_info->zone_active_bgs_lock); + spin_lock_init(&fs_info->relocation_bg_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reclaim_bgs_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 74ac37c5f2181..9b7cbb669a589 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3497,6 +3497,9 @@ struct find_free_extent_ctl { /* Allocation is called for tree-log */ bool for_treelog; + /* Allocation is called for data relocation */ + bool for_data_reloc; + /* RAID index, converted from flags */ int index; @@ -3758,6 +3761,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, u64 avail; u64 bytenr = block_group->start; u64 log_bytenr; + u64 data_reloc_bytenr; int ret = 0; bool skip; @@ -3775,6 +3779,19 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, if (skip) return 1; + /* + * Do not allow non-relocation blocks in the dedicated relocation block + * group, and vice versa. + */ + spin_lock(&fs_info->relocation_bg_lock); + data_reloc_bytenr = fs_info->data_reloc_bg; + if (data_reloc_bytenr && + ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) || + (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr))) + skip = true; + spin_unlock(&fs_info->relocation_bg_lock); + if (skip) + return 1; /* Check RO and no space case before trying to activate it */ spin_lock(&block_group->lock); if (block_group->ro || @@ -3790,10 +3807,14 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, spin_lock(&space_info->lock); spin_lock(&block_group->lock); spin_lock(&fs_info->treelog_bg_lock); + spin_lock(&fs_info->relocation_bg_lock); ASSERT(!ffe_ctl->for_treelog || block_group->start == fs_info->treelog_bg || fs_info->treelog_bg == 0); + ASSERT(!ffe_ctl->for_data_reloc || + block_group->start == fs_info->data_reloc_bg || + fs_info->data_reloc_bg == 0); if (block_group->ro) { ret = 1; @@ -3810,6 +3831,16 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, goto out; } + /* + * Do not allow currently used block group to be the data relocation + * dedicated block group. + */ + if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg && + (block_group->used || block_group->reserved)) { + ret = 1; + goto out; + } + WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity); avail = block_group->zone_capacity - block_group->alloc_offset; if (avail < num_bytes) { @@ -3828,6 +3859,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, if (ffe_ctl->for_treelog && !fs_info->treelog_bg) fs_info->treelog_bg = block_group->start; + if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg) + fs_info->data_reloc_bg = block_group->start; + ffe_ctl->found_offset = start + block_group->alloc_offset; block_group->alloc_offset += num_bytes; spin_lock(&ctl->tree_lock); @@ -3844,6 +3878,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, out: if (ret && ffe_ctl->for_treelog) fs_info->treelog_bg = 0; + if (ret && ffe_ctl->for_data_reloc) + fs_info->data_reloc_bg = 0; + spin_unlock(&fs_info->relocation_bg_lock); spin_unlock(&fs_info->treelog_bg_lock); spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); @@ -4112,6 +4149,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, ffe_ctl->hint_byte = fs_info->treelog_bg; spin_unlock(&fs_info->treelog_bg_lock); } + if (ffe_ctl->for_data_reloc) { + spin_lock(&fs_info->relocation_bg_lock); + if (fs_info->data_reloc_bg) + ffe_ctl->hint_byte = fs_info->data_reloc_bg; + spin_unlock(&fs_info->relocation_bg_lock); + } return 0; default: BUG(); @@ -4245,6 +4288,8 @@ static noinline int find_free_extent(struct btrfs_root *root, if (unlikely(block_group->ro)) { if (ffe_ctl->for_treelog) btrfs_clear_treelog_bg(block_group); + if (ffe_ctl->for_data_reloc) + btrfs_clear_data_reloc_bg(block_group); continue; } @@ -4438,6 +4483,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 flags; int ret; bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); + bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data); flags = get_alloc_profile_by_root(root, is_data); again: @@ -4451,6 +4497,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, ffe_ctl.delalloc = delalloc; ffe_ctl.hint_byte = hint_byte; ffe_ctl.for_treelog = for_treelog; + ffe_ctl.for_data_reloc = for_data_reloc; ret = find_free_extent(root, ins, &ffe_ctl); if (!ret && !is_data) { @@ -4470,8 +4517,8 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, sinfo = btrfs_find_space_info(fs_info, flags); btrfs_err(fs_info, - "allocation failed flags %llu, wanted %llu tree-log %d", - flags, num_bytes, for_treelog); + "allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d", + flags, num_bytes, for_treelog, for_data_reloc); if (sinfo) btrfs_dump_space_info(fs_info, sinfo, num_bytes, 1); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 28a06c2d80adb..c7fe3e11e6853 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1954,3 +1954,13 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len out: btrfs_put_block_group(block_group); } + +void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + + spin_lock(&fs_info->relocation_bg_lock); + if (fs_info->data_reloc_bg == bg->start) + fs_info->data_reloc_bg = 0; + spin_unlock(&fs_info->relocation_bg_lock); +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 9c512402d7f42..e53ab7b96437e 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -75,6 +75,7 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index); void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length); +void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -229,6 +230,8 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length) { } +static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { } + #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) From 3a993b3870aab0a462bb8fbba1bc90fd049a9e92 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 9 Sep 2021 01:19:27 +0900 Subject: [PATCH 0309/1202] btrfs: zoned: only allow one process to add pages to a relocation inode Don't allow more than one process to add pages to a relocation inode on a zoned filesystem, otherwise we cannot guarantee the sequential write rule once we're filling preallocated extents on a zoned filesystem. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8959ac580f466..cb366f6bcaede 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5132,6 +5132,9 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, int extent_writepages(struct address_space *mapping, struct writeback_control *wbc) { + struct inode *inode = mapping->host; + const bool data_reloc = btrfs_is_data_reloc_root(BTRFS_I(inode)->root); + const bool zoned = btrfs_is_zoned(BTRFS_I(inode)->root->fs_info); int ret = 0; struct extent_page_data epd = { .bio_ctrl = { 0 }, @@ -5139,7 +5142,15 @@ int extent_writepages(struct address_space *mapping, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; + /* + * Allow only a single thread to do the reloc work in zoned mode to + * protect the write pointer updates. + */ + if (data_reloc && zoned) + btrfs_inode_lock(inode, 0); ret = extent_write_cache_pages(mapping, wbc, &epd); + if (data_reloc && zoned) + btrfs_inode_unlock(inode, 0); ASSERT(ret <= 0); if (ret < 0) { end_write_bio(&epd, ret); From 2d31c34f46452034e1f8307d2ed904927f4efc53 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 9 Sep 2021 01:19:28 +0900 Subject: [PATCH 0310/1202] btrfs: zoned: use regular writes for relocation Now that we have a dedicated block group for relocation, we can use REQ_OP_WRITE instead of REQ_OP_ZONE_APPEND for writing out the data on relocation. Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index c7fe3e11e6853..1433ee220c94c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1490,6 +1490,17 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start) if (!is_data_inode(&inode->vfs_inode)) return false; + /* + * Using REQ_OP_ZONE_APPNED for relocation can break assumptions on the + * extent layout the relocation code has. + * Furthermore we have set aside own block-group from which only the + * relocation "process" can allocate and make sure only one process at a + * time can add pages to an extent that gets relocated, so it's safe to + * use regular REQ_OP_WRITE for this special case. + */ + if (btrfs_is_data_reloc_root(inode->root)) + return false; + cache = btrfs_lookup_block_group(fs_info, start); ASSERT(cache); if (!cache) From 80351124fe14c17359122b8156bc5ab953d4e9e8 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 9 Sep 2021 01:19:29 +0900 Subject: [PATCH 0311/1202] btrfs: check for relocation inodes on zoned btrfs in should_nocow Prepare for allowing preallocation for relocation inodes. Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7d27d6626888f..9b05af75d9109 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1944,7 +1944,15 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page const bool zoned = btrfs_is_zoned(inode->root->fs_info); if (should_nocow(inode, start, end)) { - ASSERT(!zoned); + /* + * Normally on a zoned device we're only doing COW writes, but + * in case of relocation on a zoned filesystem we have taken + * precaution, that we're only writing sequentially. It's safe + * to use run_delalloc_nocow() here, like for regular + * preallocated inodes. + */ + ASSERT(!zoned || + (zoned && btrfs_is_data_reloc_root(inode->root))); ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, nr_written); } else if (!inode_can_compress(inode) || From 1a1ea608bfa2d1c18cb0bb49f8244d77450e74ac Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 9 Sep 2021 01:19:30 +0900 Subject: [PATCH 0312/1202] btrfs: zoned: allow preallocation for relocation inodes Now that we use a dedicated block group and regular writes for data relocation, we can preallocate the space needed for a relocated inode, just like we do in regular mode. Essentially this reverts commit 32430c614844 ("btrfs: zoned: enable relocation on a zoned filesystem") as it is not needed anymore. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 35 ++--------------------------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e9e748925cc94..c33fe522aaaad 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2853,31 +2853,6 @@ static noinline_for_stack int prealloc_file_extent_cluster( if (ret) return ret; - /* - * On a zoned filesystem, we cannot preallocate the file region. - * Instead, we dirty and fiemap_write the region. - */ - if (btrfs_is_zoned(inode->root->fs_info)) { - struct btrfs_root *root = inode->root; - struct btrfs_trans_handle *trans; - - end = cluster->end - offset + 1; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode); - i_size_write(&inode->vfs_inode, end); - ret = btrfs_update_inode(trans, root, inode); - if (ret) { - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); - return ret; - } - - return btrfs_end_transaction(trans); - } - btrfs_inode_lock(&inode->vfs_inode, 0); for (nr = 0; nr < cluster->nr; nr++) { start = cluster->boundary[nr] - offset; @@ -3085,7 +3060,6 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra, static int relocate_file_extent_cluster(struct inode *inode, struct file_extent_cluster *cluster) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 offset = BTRFS_I(inode)->index_cnt; unsigned long index; unsigned long last_index; @@ -3115,8 +3089,6 @@ static int relocate_file_extent_cluster(struct inode *inode, for (index = (cluster->start - offset) >> PAGE_SHIFT; index <= last_index && !ret; index++) ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index); - if (btrfs_is_zoned(fs_info) && !ret) - ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); if (ret == 0) WARN_ON(cluster_nr != cluster->nr); out: @@ -3771,12 +3743,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_inode_item *item; struct extent_buffer *leaf; - u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; int ret; - if (btrfs_is_zoned(trans->fs_info)) - flags &= ~BTRFS_INODE_PREALLOC; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -3791,7 +3759,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_generation(leaf, item, 1); btrfs_set_inode_size(leaf, item, 0); btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, item, flags); + btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | + BTRFS_INODE_PREALLOC); btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); From 7b3abd1b77707d44d196ee645bb2b1b0f4c08eb3 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 9 Sep 2021 01:19:31 +0900 Subject: [PATCH 0313/1202] btrfs: rename setup_extent_mapping in relocation code In btrfs code we have two functions called setup_extent_mapping, one in the extent_map code and one in the relocation code. While both are private to their respective implementation, this can still be confusing for the reader. So rename the version in relocation.c to setup_relocation_extent_mapping. No functional changes. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c33fe522aaaad..5e5066ee03e6f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2879,9 +2879,8 @@ static noinline_for_stack int prealloc_file_extent_cluster( return ret; } -static noinline_for_stack -int setup_extent_mapping(struct inode *inode, u64 start, u64 end, - u64 block_start) +static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode, + u64 start, u64 end, u64 block_start) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em; @@ -3080,7 +3079,7 @@ static int relocate_file_extent_cluster(struct inode *inode, file_ra_state_init(ra, inode->i_mapping); - ret = setup_extent_mapping(inode, cluster->start - offset, + ret = setup_relocation_extent_mapping(inode, cluster->start - offset, cluster->end - offset, cluster->start); if (ret) goto out; From b8bddf9a98f3d173479cd27ca0b78c843c827f00 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 9 Sep 2021 01:19:32 +0900 Subject: [PATCH 0314/1202] btrfs: zoned: let the for_treelog test in the allocator stand out The statement which decides if an extent allocation on a zoned device is for the dedicated tree-log block group or not and if we can use the block group we picked for this allocation is not easy to read but an important part of the allocator. Rewrite into an if condition instead of a plain boolean test to make it stand out more, like the version which tests for the dedicated data-relocation block group. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9b7cbb669a589..ea651b62d3360 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3763,7 +3763,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, u64 log_bytenr; u64 data_reloc_bytenr; int ret = 0; - bool skip; + bool skip = false; ASSERT(btrfs_is_zoned(block_group->fs_info)); @@ -3773,8 +3773,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, */ spin_lock(&fs_info->treelog_bg_lock); log_bytenr = fs_info->treelog_bg; - skip = log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) || - (!ffe_ctl->for_treelog && bytenr == log_bytenr)); + if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) || + (!ffe_ctl->for_treelog && bytenr == log_bytenr))) + skip = true; spin_unlock(&fs_info->treelog_bg_lock); if (skip) return 1; From 5cf29b8c4d71ba3e42b3d7e6d1cb46af9e2c4341 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 16 Sep 2021 11:32:10 +0100 Subject: [PATCH 0315/1202] btrfs: remove root argument from btrfs_log_inode() and its callees The root argument passed to btrfs_log_inode() is unncessary, as it is always the root of the inode we are going to log. This root also gets unnecessarily propagated to several functions called by btrfs_log_inode(), and all of them take the inode as an argument as well. So just remove the root argument from these functions and have them get the root from the inode where needed. This patch is part of a patchset comprised of the following 5 patches: btrfs: remove root argument from btrfs_log_inode() and its callees btrfs: remove redundant log root assignment from log_dir_items() btrfs: factor out the copying loop of dir items from log_dir_items() btrfs: insert items in batches when logging a directory when possible btrfs: keep track of the last logged keys when logging a directory This is patch 1/5. The change log of the last patch (5/5) has performance results. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 52 +++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index db5ead891e244..50f6b8480f708 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -94,7 +94,7 @@ enum { }; static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_inode *inode, + struct btrfs_inode *inode, int inode_only, struct btrfs_log_ctx *ctx); static int link_to_fixup_dir(struct btrfs_trans_handle *trans, @@ -3638,13 +3638,14 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, * to replay anything deleted before the fsync */ static noinline int log_dir_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_inode *inode, + struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_path *dst_path, int key_type, struct btrfs_log_ctx *ctx, u64 min_offset, u64 *last_offset_ret) { struct btrfs_key min_key; + struct btrfs_root *root = inode->root; struct btrfs_root *log = root->log_root; struct extent_buffer *src; int err = 0; @@ -3845,7 +3846,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, * key logged by this transaction. */ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_inode *inode, + struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_path *dst_path, struct btrfs_log_ctx *ctx) @@ -3859,7 +3860,7 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, min_key = 0; max_key = 0; while (1) { - ret = log_dir_items(trans, root, inode, path, dst_path, key_type, + ret = log_dir_items(trans, inode, path, dst_path, key_type, ctx, min_key, &max_key); if (ret) return ret; @@ -4356,13 +4357,13 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, } static int log_one_extent(struct btrfs_trans_handle *trans, - struct btrfs_inode *inode, struct btrfs_root *root, + struct btrfs_inode *inode, const struct extent_map *em, struct btrfs_path *path, struct btrfs_log_ctx *ctx) { struct btrfs_drop_extents_args drop_args = { 0 }; - struct btrfs_root *log = root->log_root; + struct btrfs_root *log = inode->root->log_root; struct btrfs_file_extent_item *fi; struct extent_buffer *leaf; struct btrfs_map_token token; @@ -4580,7 +4581,6 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_log_ctx *ctx) @@ -4645,7 +4645,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, write_unlock(&tree->lock); - ret = log_one_extent(trans, inode, root, em, path, ctx); + ret = log_one_extent(trans, inode, em, path, ctx); write_lock(&tree->lock); clear_em_logging(tree, em); free_extent_map(em); @@ -4734,11 +4734,11 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, * with a journal, ext3/4, xfs, f2fs, etc). */ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path, struct btrfs_path *dst_path) { + struct btrfs_root *root = inode->root; int ret; struct btrfs_key key; const u64 ino = btrfs_ino(inode); @@ -4812,10 +4812,10 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, * truncate operation that changes the inode's size. */ static int btrfs_log_holes(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path) { + struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key key; const u64 ino = btrfs_ino(inode); @@ -5092,7 +5092,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, if (IS_ERR(inode)) { ret = PTR_ERR(inode); } else { - ret = btrfs_log_inode(trans, root, + ret = btrfs_log_inode(trans, BTRFS_I(inode), LOG_OTHER_INODE_ALL, ctx); @@ -5152,8 +5152,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, * well because during a rename we pin the log and update the * log with the new name before we unpin it. */ - ret = btrfs_log_inode(trans, root, BTRFS_I(inode), - LOG_OTHER_INODE, ctx); + ret = btrfs_log_inode(trans, BTRFS_I(inode), LOG_OTHER_INODE, ctx); if (ret) { btrfs_add_delayed_iput(inode); continue; @@ -5364,7 +5363,7 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, * This handles both files and directories. */ static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_inode *inode, + struct btrfs_inode *inode, int inode_only, struct btrfs_log_ctx *ctx) { @@ -5372,7 +5371,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_path *dst_path; struct btrfs_key min_key; struct btrfs_key max_key; - struct btrfs_root *log = root->log_root; + struct btrfs_root *log = inode->root->log_root; int err = 0; int ret = 0; bool fast_search = false; @@ -5522,14 +5521,14 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); + err = btrfs_log_all_xattrs(trans, inode, path, dst_path); if (err) goto out_unlock; xattrs_logged = true; if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_holes(trans, root, inode, path); + err = btrfs_log_holes(trans, inode, path); if (err) goto out_unlock; } @@ -5549,16 +5548,14 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * BTRFS_INODE_COPY_EVERYTHING set. */ if (!xattrs_logged && inode->logged_trans < trans->transid) { - err = btrfs_log_all_xattrs(trans, root, inode, path, - dst_path); + err = btrfs_log_all_xattrs(trans, inode, path, dst_path); if (err) goto out_unlock; btrfs_release_path(path); } } if (fast_search) { - ret = btrfs_log_changed_extents(trans, root, inode, dst_path, - ctx); + ret = btrfs_log_changed_extents(trans, inode, dst_path, ctx); if (ret) { err = ret; goto out_unlock; @@ -5573,8 +5570,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->vfs_inode.i_mode)) { - ret = log_directory_changes(trans, root, inode, path, dst_path, - ctx); + ret = log_directory_changes(trans, inode, path, dst_path, ctx); if (ret) { err = ret; goto out_unlock; @@ -5803,7 +5799,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, ctx->log_new_dentries = false; if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK) log_mode = LOG_INODE_ALL; - ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode), + ret = btrfs_log_inode(trans, BTRFS_I(di_inode), log_mode, ctx); btrfs_add_delayed_iput(di_inode); if (ret) @@ -5948,7 +5944,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, } ctx->log_new_dentries = false; - ret = btrfs_log_inode(trans, root, BTRFS_I(dir_inode), + ret = btrfs_log_inode(trans, BTRFS_I(dir_inode), LOG_INODE_ALL, ctx); if (!ret && ctx->log_new_dentries) ret = log_new_dir_dentries(trans, root, @@ -5996,7 +5992,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, if (BTRFS_I(inode)->generation >= trans->transid && need_log_inode(trans, BTRFS_I(inode))) - ret = btrfs_log_inode(trans, root, BTRFS_I(inode), + ret = btrfs_log_inode(trans, BTRFS_I(inode), LOG_INODE_EXISTS, ctx); btrfs_add_delayed_iput(inode); if (ret) @@ -6051,7 +6047,7 @@ static int log_new_ancestors_fast(struct btrfs_trans_handle *trans, if (inode->generation >= trans->transid && need_log_inode(trans, inode)) { - ret = btrfs_log_inode(trans, root, inode, + ret = btrfs_log_inode(trans, inode, LOG_INODE_EXISTS, ctx); if (ret) break; @@ -6194,7 +6190,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; - ret = btrfs_log_inode(trans, root, inode, inode_only, ctx); + ret = btrfs_log_inode(trans, inode, inode_only, ctx); if (ret) goto end_trans; From dd3353aa607db319ecd1dca63ecdb024de4a4188 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 16 Sep 2021 11:32:11 +0100 Subject: [PATCH 0316/1202] btrfs: remove redundant log root assignment from log_dir_items() At log_dir_items() we are assigning the exact same value to the local variable 'log', once when it's declared and once again shortly after. Remove the later assignment as it's pointless. This patch is part of a patchset comprised of the following 5 patches: btrfs: remove root argument from btrfs_log_inode() and its callees btrfs: remove redundant log root assignment from log_dir_items() btrfs: factor out the copying loop of dir items from log_dir_items() btrfs: insert items in batches when logging a directory when possible btrfs: keep track of the last logged keys when logging a directory This is patch 2/5. The change log of the last patch (5/5) has performance results. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 50f6b8480f708..a7018eb3c8ec5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3656,8 +3656,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, u64 last_offset = (u64)-1; u64 ino = btrfs_ino(inode); - log = root->log_root; - min_key.objectid = ino; min_key.type = key_type; min_key.offset = min_offset; From 5d06e5785236d23868eebe03155aef68fc39954b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 16 Sep 2021 11:32:12 +0100 Subject: [PATCH 0317/1202] btrfs: factor out the copying loop of dir items from log_dir_items() In preparation for the next change, move the loop that processes a leaf and copies its directory items to the log, into a separate helper function. This makes the next change simpler and it also helps making log_dir_items() a bit shorter (specially after the next change). This patch is part of a patchset comprised of the following 5 patches: btrfs: remove root argument from btrfs_log_inode() and its callees btrfs: remove redundant log root assignment from log_dir_items() btrfs: factor out the copying loop of dir items from log_dir_items() btrfs: insert items in batches when logging a directory when possible btrfs: keep track of the last logged keys when logging a directory This is patch 3/5. The change log of the last patch (5/5) has performance results. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 135 ++++++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 60 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a7018eb3c8ec5..21445334efdf8 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3632,6 +3632,66 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, return 0; } +static int process_dir_items_leaf(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path, + int key_type, + struct btrfs_log_ctx *ctx) +{ + struct btrfs_root *log = inode->root->log_root; + struct extent_buffer *src = path->nodes[0]; + const int nritems = btrfs_header_nritems(src); + const u64 ino = btrfs_ino(inode); + int i; + + for (i = path->slots[0]; i < nritems; i++) { + struct btrfs_key key; + struct btrfs_dir_item *di; + int ret; + + btrfs_item_key_to_cpu(src, &key, i); + + if (key.objectid != ino || key.type != key_type) + return 1; + + ret = overwrite_item(trans, log, dst_path, src, i, &key); + if (ret < 0) + return ret; + + /* + * We must make sure that when we log a directory entry, the + * corresponding inode, after log replay, has a matching link + * count. For example: + * + * touch foo + * mkdir mydir + * sync + * ln foo mydir/bar + * xfs_io -c "fsync" mydir + * + * + * + * Would result in a fsync log that when replayed, our file inode + * would have a link count of 1, but we get two directory entries + * pointing to the same inode. After removing one of the names, + * it would not be possible to remove the other name, which + * resulted always in stale file handle errors, and would not be + * possible to rmdir the parent directory, since its i_size could + * never be decremented to the value BTRFS_EMPTY_DIR_SIZE, + * resulting in -ENOTEMPTY errors. + */ + di = btrfs_item_ptr(src, i, struct btrfs_dir_item); + btrfs_dir_item_key_to_cpu(src, di, &key); + if ((btrfs_dir_transid(src, di) == trans->transid || + btrfs_dir_type(src, di) == BTRFS_FT_DIR) && + key.type != BTRFS_ROOT_ITEM_KEY) + ctx->log_new_dentries = true; + } + + return 0; +} + /* * log all the items included in the current transaction for a given * directory. This also creates the range items in the log tree required @@ -3647,11 +3707,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, struct btrfs_key min_key; struct btrfs_root *root = inode->root; struct btrfs_root *log = root->log_root; - struct extent_buffer *src; int err = 0; int ret; - int i; - int nritems; u64 first_offset = min_offset; u64 last_offset = (u64)-1; u64 ino = btrfs_ino(inode); @@ -3729,61 +3786,14 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, * from our directory */ while (1) { - struct btrfs_key tmp; - src = path->nodes[0]; - nritems = btrfs_header_nritems(src); - for (i = path->slots[0]; i < nritems; i++) { - struct btrfs_dir_item *di; - - btrfs_item_key_to_cpu(src, &min_key, i); - - if (min_key.objectid != ino || min_key.type != key_type) - goto done; - - if (need_resched()) { - btrfs_release_path(path); - cond_resched(); - goto search; - } - - ret = overwrite_item(trans, log, dst_path, src, i, - &min_key); - if (ret) { + ret = process_dir_items_leaf(trans, inode, path, dst_path, + key_type, ctx); + if (ret != 0) { + if (ret < 0) err = ret; - goto done; - } - - /* - * We must make sure that when we log a directory entry, - * the corresponding inode, after log replay, has a - * matching link count. For example: - * - * touch foo - * mkdir mydir - * sync - * ln foo mydir/bar - * xfs_io -c "fsync" mydir - * - * - * - * Would result in a fsync log that when replayed, our - * file inode would have a link count of 1, but we get - * two directory entries pointing to the same inode. - * After removing one of the names, it would not be - * possible to remove the other name, which resulted - * always in stale file handle errors, and would not - * be possible to rmdir the parent directory, since - * its i_size could never decrement to the value - * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors. - */ - di = btrfs_item_ptr(src, i, struct btrfs_dir_item); - btrfs_dir_item_key_to_cpu(src, di, &tmp); - if ((btrfs_dir_transid(src, di) == trans->transid || - btrfs_dir_type(src, di) == BTRFS_FT_DIR) && - tmp.type != BTRFS_ROOT_ITEM_KEY) - ctx->log_new_dentries = true; + goto done; } - path->slots[0] = nritems; + path->slots[0] = btrfs_header_nritems(path->nodes[0]); /* * look ahead to the next item and see if it is also @@ -3797,21 +3807,26 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, err = ret; goto done; } - btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); - if (tmp.objectid != ino || tmp.type != key_type) { + btrfs_item_key_to_cpu(path->nodes[0], &min_key, path->slots[0]); + if (min_key.objectid != ino || min_key.type != key_type) { last_offset = (u64)-1; goto done; } if (btrfs_header_generation(path->nodes[0]) != trans->transid) { ret = overwrite_item(trans, log, dst_path, path->nodes[0], path->slots[0], - &tmp); + &min_key); if (ret) err = ret; else - last_offset = tmp.offset; + last_offset = min_key.offset; goto done; } + if (need_resched()) { + btrfs_release_path(path); + cond_resched(); + goto search; + } } done: btrfs_release_path(path); From bbd8514027c1b77a73482ee6e594298355e266ea Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 16 Sep 2021 11:32:13 +0100 Subject: [PATCH 0318/1202] btrfs: insert items in batches when logging a directory when possible When logging a directory, we scan its directory items from the subvolume tree and then copy one by one into the log tree. This is not efficient since we generally are able to insert several items in a batch, using a single btree operation for adding several items at once. The reason we copy items one by one is that we must check if each item was previously logged in the current transaction, and if it was we either overwrite it or skip it in case its content did not change in the subvolume tree (this can happen only for dir item keys, but not for dir index keys), and doing such check makes it a bit cumbersome to attempt batch insertions. However the chances for doing batch insertions are very frequent and always happen when: 1) Logging the directory for the first time in the current transaction, as none of the items exist in the log tree yet; 2) Logging new dir index keys, because the offset for new dir index keys comes from a monotonically increasing counter. This means if we keep adding dentries to a directory, through creation of new files and sub-directories or by adding new links or renaming from some other directory into the one we are logging, all the new dir index keys have a new offset that is greater than the offset of any previously logged index keys, so we can insert them in batches into the log tree. For dir item keys, since their offset depends on the result of an hash function against the dentry's name, unless the directory is being logged for the first time in the current transaction, the chances being able to insert the items in the log using batches is pretty much random and not predictable, as it depends on the names of the dentries, but still happens often enough. So change directory logging to keep track of consecutive directory items that don't exist yet in the log and batch insert them. This patch is part of a patchset comprised of the following 5 patches: btrfs: remove root argument from btrfs_log_inode() and its callees btrfs: remove redundant log root assignment from log_dir_items() btrfs: factor out the copying loop of dir items from log_dir_items() btrfs: insert items in batches when logging a directory when possible btrfs: keep track of the last logged keys when logging a directory This is patch 4/5. The change log of the last patch (5/5) has performance results. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 217 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 180 insertions(+), 37 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 21445334efdf8..3ad5b743dba9b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -368,25 +368,11 @@ static int process_one_buffer(struct btrfs_root *log, return ret; } -/* - * Item overwrite used by replay and tree logging. eb, slot and key all refer - * to the src data we are copying out. - * - * root is the tree we are copying into, and path is a scratch - * path for use in this function (it should be released on entry and - * will be released on exit). - * - * If the key is already in the destination tree the existing item is - * overwritten. If the existing item isn't big enough, it is extended. - * If it is too large, it is truncated. - * - * If the key isn't in the destination yet, a new item is inserted. - */ -static noinline int overwrite_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct extent_buffer *eb, int slot, - struct btrfs_key *key) +static int do_overwrite_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) { int ret; u32 item_size; @@ -403,10 +389,22 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, item_size = btrfs_item_size_nr(eb, slot); src_ptr = btrfs_item_ptr_offset(eb, slot); - /* look for the key in the destination tree */ - ret = btrfs_search_slot(NULL, root, key, path, 0, 0); - if (ret < 0) - return ret; + /* Our caller must have done a search for the key for us. */ + ASSERT(path->nodes[0] != NULL); + + /* + * And the slot must point to the exact key or the slot where the key + * should be at (the first item with a key greater than 'key') + */ + if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) { + struct btrfs_key found_key; + + btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); + ret = btrfs_comp_cpu_keys(&found_key, key); + ASSERT(ret >= 0); + } else { + ret = 1; + } if (ret == 0) { char *src_copy; @@ -584,6 +582,36 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, return 0; } +/* + * Item overwrite used by replay and tree logging. eb, slot and key all refer + * to the src data we are copying out. + * + * root is the tree we are copying into, and path is a scratch + * path for use in this function (it should be released on entry and + * will be released on exit). + * + * If the key is already in the destination tree the existing item is + * overwritten. If the existing item isn't big enough, it is extended. + * If it is too large, it is truncated. + * + * If the key isn't in the destination yet, a new item is inserted. + */ +static int overwrite_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *eb, int slot, + struct btrfs_key *key) +{ + int ret; + + /* Look for the key in the destination tree. */ + ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + + return do_overwrite_item(trans, root, path, eb, slot, key); +} + /* * simple helper to read an inode off the disk from a given root * This can only be called for subvolume roots and not for the log @@ -3632,6 +3660,68 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, return 0; } +static int flush_dir_items_batch(struct btrfs_trans_handle *trans, + struct btrfs_root *log, + struct extent_buffer *src, + struct btrfs_path *dst_path, + int start_slot, + int count) +{ + char *ins_data = NULL; + struct btrfs_key *ins_keys; + u32 *ins_sizes; + struct extent_buffer *dst; + struct btrfs_key key; + u32 item_size; + int ret; + int i; + + ASSERT(count > 0); + + if (count == 1) { + btrfs_item_key_to_cpu(src, &key, start_slot); + item_size = btrfs_item_size_nr(src, start_slot); + ins_keys = &key; + ins_sizes = &item_size; + } else { + ins_data = kmalloc(count * sizeof(u32) + + count * sizeof(struct btrfs_key), GFP_NOFS); + if (!ins_data) + return -ENOMEM; + + ins_sizes = (u32 *)ins_data; + ins_keys = (struct btrfs_key *)(ins_data + count * sizeof(u32)); + + for (i = 0; i < count; i++) { + const int slot = start_slot + i; + + btrfs_item_key_to_cpu(src, &ins_keys[i], slot); + ins_sizes[i] = btrfs_item_size_nr(src, slot); + } + } + + ret = btrfs_insert_empty_items(trans, log, dst_path, ins_keys, ins_sizes, + count); + if (ret) + goto out; + + dst = dst_path->nodes[0]; + for (i = 0; i < count; i++) { + unsigned long src_offset; + unsigned long dst_offset; + + dst_offset = btrfs_item_ptr_offset(dst, dst_path->slots[0]); + src_offset = btrfs_item_ptr_offset(src, start_slot + i); + copy_extent_buffer(dst, src, dst_offset, src_offset, ins_sizes[i]); + dst_path->slots[0]++; + } + btrfs_release_path(dst_path); +out: + kfree(ins_data); + + return ret; +} + static int process_dir_items_leaf(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *path, @@ -3643,21 +3733,22 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, struct extent_buffer *src = path->nodes[0]; const int nritems = btrfs_header_nritems(src); const u64 ino = btrfs_ino(inode); + const bool inode_logged_before = inode_logged(trans, inode); + bool last_found = false; + int batch_start = 0; + int batch_size = 0; int i; for (i = path->slots[0]; i < nritems; i++) { struct btrfs_key key; - struct btrfs_dir_item *di; int ret; btrfs_item_key_to_cpu(src, &key, i); - if (key.objectid != ino || key.type != key_type) - return 1; - - ret = overwrite_item(trans, log, dst_path, src, i, &key); - if (ret < 0) - return ret; + if (key.objectid != ino || key.type != key_type) { + last_found = true; + break; + } /* * We must make sure that when we log a directory entry, the @@ -3681,15 +3772,67 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, * never be decremented to the value BTRFS_EMPTY_DIR_SIZE, * resulting in -ENOTEMPTY errors. */ - di = btrfs_item_ptr(src, i, struct btrfs_dir_item); - btrfs_dir_item_key_to_cpu(src, di, &key); - if ((btrfs_dir_transid(src, di) == trans->transid || - btrfs_dir_type(src, di) == BTRFS_FT_DIR) && - key.type != BTRFS_ROOT_ITEM_KEY) - ctx->log_new_dentries = true; + if (!ctx->log_new_dentries) { + struct btrfs_dir_item *di; + struct btrfs_key di_key; + + di = btrfs_item_ptr(src, i, struct btrfs_dir_item); + btrfs_dir_item_key_to_cpu(src, di, &di_key); + if ((btrfs_dir_transid(src, di) == trans->transid || + btrfs_dir_type(src, di) == BTRFS_FT_DIR) && + di_key.type != BTRFS_ROOT_ITEM_KEY) + ctx->log_new_dentries = true; + } + + if (!inode_logged_before) + goto add_to_batch; + /* + * Check if the key was already logged before. If not we can add + * it to a batch for bulk insertion. + */ + ret = btrfs_search_slot(NULL, log, &key, dst_path, 0, 0); + if (ret < 0) { + return ret; + } else if (ret > 0) { + btrfs_release_path(dst_path); + goto add_to_batch; + } + + /* + * Item exists in the log. Overwrite the item in the log if it + * has different content or do nothing if it has exactly the same + * content. And then flush the current batch if any - do it after + * overwriting the current item, or we would deadlock otherwise, + * since we are holding a path for the existing item. + */ + ret = do_overwrite_item(trans, log, dst_path, src, i, &key); + if (ret < 0) + return ret; + + if (batch_size > 0) { + ret = flush_dir_items_batch(trans, log, src, dst_path, + batch_start, batch_size); + if (ret < 0) + return ret; + batch_size = 0; + } + continue; +add_to_batch: + if (batch_size == 0) + batch_start = i; + batch_size++; } - return 0; + if (batch_size > 0) { + int ret; + + ret = flush_dir_items_batch(trans, log, src, dst_path, + batch_start, batch_size); + if (ret < 0) + return ret; + } + + return last_found ? 1 : 0; } /* From ca997c137e23acd0f9c32961bdeb1221d8897e8a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 16 Sep 2021 11:32:14 +0100 Subject: [PATCH 0319/1202] btrfs: keep track of the last logged keys when logging a directory After the first time we log a directory in the current transaction, for each directory item in a changed leaf of the subvolume tree, we have to check if we previously logged the item, in order to overwrite it in case its data changed or skip it in case its data hasn't changed. Checking if we have logged each item before not only wastes times, but it also adds lock contention on the log tree. So in order to minimize the number of times we do such checks, keep track of the offset of the last key we logged for a directory and, on the next time we log the directory, skip the checks for any new keys that have an offset greater than the offset we have previously saved. This is specially effective for index keys, because the offset for these keys comes from a monotonically increasing counter. This patch is part of a patchset comprised of the following 5 patches: btrfs: remove root argument from btrfs_log_inode() and its callees btrfs: remove redundant log root assignment from log_dir_items() btrfs: factor out the copying loop of dir items from log_dir_items() btrfs: insert items in batches when logging a directory when possible btrfs: keep track of the last logged keys when logging a directory This is patch 5/5. The following test was used on a non-debug kernel to measure the impact it has on a directory fsync: $ cat test-dir-fsync.sh #!/bin/bash DEV=/dev/nvme0n1 MNT=/mnt/nvme0n1 NUM_NEW_FILES=100000 NUM_FILE_DELETES=1000 mkfs.btrfs -f $DEV mount -o ssd $DEV $MNT mkdir $MNT/testdir for ((i = 1; i <= $NUM_NEW_FILES; i++)); do echo -n > $MNT/testdir/file_$i done # fsync the directory, this will log the new dir items and the inodes # they point to, because these are new inodes. start=$(date +%s%N) xfs_io -c "fsync" $MNT/testdir end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "dir fsync took $dur ms after adding $NUM_NEW_FILES files" # sync to force transaction commit and wipeout the log. sync del_inc=$(( $NUM_NEW_FILES / $NUM_FILE_DELETES )) for ((i = 1; i <= $NUM_NEW_FILES; i += $del_inc)); do rm -f $MNT/testdir/file_$i done # fsync the directory, this will only log dir items, there are no # dentries pointing to new inodes. start=$(date +%s%N) xfs_io -c "fsync" $MNT/testdir end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "dir fsync took $dur ms after deleting $NUM_FILE_DELETES files" umount $MNT Test results with NUM_NEW_FILES set to 100 000 and 1 000 000: **** before patchset, 100 000 files, 1000 deletes **** dir fsync took 848 ms after adding 100000 files dir fsync took 175 ms after deleting 1000 files **** after patchset, 100 000 files, 1000 deletes **** dir fsync took 758 ms after adding 100000 files (-11.2%) dir fsync took 63 ms after deleting 1000 files (-94.1%) **** before patchset, 1 000 000 files, 1000 deletes **** dir fsync took 9945 ms after adding 1000000 files dir fsync took 473 ms after deleting 1000 files **** after patchset, 1 000 000 files, 1000 deletes **** dir fsync took 8677 ms after adding 1000000 files (-13.6%) dir fsync took 146 ms after deleting 1000 files (-105.6%) Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/btrfs_inode.h | 39 ++++++++++++++++++++++++++++----------- fs/btrfs/inode.c | 6 ++++-- fs/btrfs/tree-log.c | 41 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-log.h | 2 ++ 4 files changed, 75 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 76ee1452c57ba..602b426c286da 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -138,17 +138,34 @@ struct btrfs_inode { /* a local copy of root's last_log_commit */ int last_log_commit; - /* total number of bytes pending delalloc, used by stat to calc the - * real block usage of the file - */ - u64 delalloc_bytes; - - /* - * Total number of bytes pending delalloc that fall within a file - * range that is either a hole or beyond EOF (and no prealloc extent - * exists in the range). This is always <= delalloc_bytes. - */ - u64 new_delalloc_bytes; + union { + /* + * Total number of bytes pending delalloc, used by stat to + * calculate the real block usage of the file. This is used + * only for files. + */ + u64 delalloc_bytes; + /* + * The offset of the last dir item key that was logged. + * This is used only for directories. + */ + u64 last_dir_item_offset; + }; + + union { + /* + * Total number of bytes pending delalloc that fall within a file + * range that is either a hole or beyond EOF (and no prealloc extent + * exists in the range). This is always <= delalloc_bytes and this + * is used only for files. + */ + u64 new_delalloc_bytes; + /* + * The offset of the last dir index key that was logged. + * This is used only for directories. + */ + u64 last_dir_index_offset; + }; /* * total number of bytes pending defrag, used by stat to check whether diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9b05af75d9109..027f2ebc8dc31 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9161,8 +9161,10 @@ void btrfs_destroy_inode(struct inode *vfs_inode) WARN_ON(inode->block_rsv.reserved); WARN_ON(inode->block_rsv.size); WARN_ON(inode->outstanding_extents); - WARN_ON(inode->delalloc_bytes); - WARN_ON(inode->new_delalloc_bytes); + if (!S_ISDIR(vfs_inode->i_mode)) { + WARN_ON(inode->delalloc_bytes); + WARN_ON(inode->new_delalloc_bytes); + } WARN_ON(inode->csum_bytes); WARN_ON(inode->defrag_bytes); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3ad5b743dba9b..46932ed65f184 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3734,11 +3734,17 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, const int nritems = btrfs_header_nritems(src); const u64 ino = btrfs_ino(inode); const bool inode_logged_before = inode_logged(trans, inode); + u64 last_logged_key_offset; bool last_found = false; int batch_start = 0; int batch_size = 0; int i; + if (key_type == BTRFS_DIR_ITEM_KEY) + last_logged_key_offset = inode->last_dir_item_offset; + else + last_logged_key_offset = inode->last_dir_index_offset; + for (i = path->slots[0]; i < nritems; i++) { struct btrfs_key key; int ret; @@ -3750,6 +3756,7 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, break; } + ctx->last_dir_item_offset = key.offset; /* * We must make sure that when we log a directory entry, the * corresponding inode, after log replay, has a matching link @@ -3786,6 +3793,15 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, if (!inode_logged_before) goto add_to_batch; + + /* + * If we were logged before and have logged dir items, we can skip + * checking if any item with a key offset larger than the last one + * we logged is in the log tree, saving time and avoiding adding + * contention on the log tree. + */ + if (key.offset > last_logged_key_offset) + goto add_to_batch; /* * Check if the key was already logged before. If not we can add * it to a batch for bulk insertion. @@ -4012,9 +4028,31 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, int ret; int key_type = BTRFS_DIR_ITEM_KEY; + /* + * If this is the first time we are being logged in the current + * transaction, or we were logged before but the inode was evicted and + * reloaded later, in which case its logged_trans is 0, reset the values + * of the last logged key offsets. Note that we don't use the helper + * function inode_logged() here - that is because the function returns + * true after an inode eviction, assuming the worst case as it can not + * know for sure if the inode was logged before. So we can not skip key + * searches in the case the inode was evicted, because it may not have + * been logged in this transaction and may have been logged in a past + * transaction, so we need to reset the last dir item and index offsets + * to (u64)-1. + */ + if (inode->logged_trans != trans->transid) { + inode->last_dir_item_offset = (u64)-1; + inode->last_dir_index_offset = (u64)-1; + } again: min_key = 0; max_key = 0; + if (key_type == BTRFS_DIR_ITEM_KEY) + ctx->last_dir_item_offset = inode->last_dir_item_offset; + else + ctx->last_dir_item_offset = inode->last_dir_index_offset; + while (1) { ret = log_dir_items(trans, inode, path, dst_path, key_type, ctx, min_key, &max_key); @@ -4026,8 +4064,11 @@ static noinline int log_directory_changes(struct btrfs_trans_handle *trans, } if (key_type == BTRFS_DIR_ITEM_KEY) { + inode->last_dir_item_offset = ctx->last_dir_item_offset; key_type = BTRFS_DIR_INDEX_KEY; goto again; + } else { + inode->last_dir_index_offset = ctx->last_dir_item_offset; } return 0; } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 731bd9c029f55..3ce6bdb760096 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -17,6 +17,8 @@ struct btrfs_log_ctx { int log_transid; bool log_new_dentries; bool logging_new_name; + /* Tracks the last logged dir item/index key offset. */ + u64 last_dir_item_offset; struct inode *inode; struct list_head list; /* Only used for fast fsyncs. */ From ea092b2c956548aeaea4393f65bce980f13ac8ea Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 15 Sep 2021 15:17:16 +0800 Subject: [PATCH 0320/1202] btrfs: rename btrfs_bio to btrfs_io_context The structure btrfs_bio is used by two different sites: - bio->bi_private for mirror based profiles For those profiles (SINGLE/DUP/RAID1*/RAID10), this structures records how many mirrors are still pending, and save the original endio function of the bio. - RAID56 code In that case, RAID56 only utilize the stripes info, and no long uses that to trace the pending mirrors. So btrfs_bio is not always bind to a bio, and contains more info for IO context, thus renaming it will make the naming less confusing. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/check-integrity.c | 2 +- fs/btrfs/extent-tree.c | 19 ++- fs/btrfs/extent_io.c | 18 +-- fs/btrfs/extent_map.c | 4 +- fs/btrfs/raid56.c | 127 +++++++++--------- fs/btrfs/raid56.h | 8 +- fs/btrfs/reada.c | 26 ++-- fs/btrfs/scrub.c | 115 ++++++++-------- fs/btrfs/volumes.c | 267 ++++++++++++++++++------------------- fs/btrfs/volumes.h | 38 ++++-- fs/btrfs/zoned.c | 16 +-- 11 files changed, 325 insertions(+), 315 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 86816088927f1..81b11124b67a8 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1455,7 +1455,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, struct btrfs_fs_info *fs_info = state->fs_info; int ret; u64 length; - struct btrfs_bio *multi = NULL; + struct btrfs_io_context *multi = NULL; struct btrfs_device *device; length = len; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ea651b62d3360..e4b3eaee716ac 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1266,7 +1266,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, return ret; } -static int do_discard_extent(struct btrfs_bio_stripe *stripe, u64 *bytes) +static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes) { struct btrfs_device *dev = stripe->dev; struct btrfs_fs_info *fs_info = dev->fs_info; @@ -1313,22 +1313,21 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, u64 discarded_bytes = 0; u64 end = bytenr + num_bytes; u64 cur = bytenr; - struct btrfs_bio *bbio = NULL; - + struct btrfs_io_context *bioc = NULL; /* - * Avoid races with device replace and make sure our bbio has devices + * Avoid races with device replace and make sure our bioc has devices * associated to its stripes that don't go away while we are discarding. */ btrfs_bio_counter_inc_blocked(fs_info); while (cur < end) { - struct btrfs_bio_stripe *stripe; + struct btrfs_io_stripe *stripe; int i; num_bytes = end - cur; /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur, - &num_bytes, &bbio, 0); + &num_bytes, &bioc, 0); /* * Error can be -ENOMEM, -ENOENT (no such chunk mapping) or * -EOPNOTSUPP. For any such error, @num_bytes is not updated, @@ -1337,8 +1336,8 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, if (ret < 0) goto out; - stripe = bbio->stripes; - for (i = 0; i < bbio->num_stripes; i++, stripe++) { + stripe = bioc->stripes; + for (i = 0; i < bioc->num_stripes; i++, stripe++) { u64 bytes; struct btrfs_device *device = stripe->dev; @@ -1361,7 +1360,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, * And since there are two loops, explicitly * go to out to avoid confusion. */ - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); goto out; } @@ -1372,7 +1371,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, */ ret = 0; } - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); cur += num_bytes; } out: diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index cb366f6bcaede..86190ce913239 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2290,7 +2290,7 @@ static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, struct btrfs_device *dev; u64 map_length = 0; u64 sector; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; int ret; ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); @@ -2304,7 +2304,7 @@ static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, map_length = length; /* - * Avoid races with device replace and make sure our bbio has devices + * Avoid races with device replace and make sure our bioc has devices * associated to its stripes that don't go away while we are doing the * read repair operation. */ @@ -2317,28 +2317,28 @@ static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, * stripe's dev and sector. */ ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, - &map_length, &bbio, 0); + &map_length, &bioc, 0); if (ret) { btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } - ASSERT(bbio->mirror_num == 1); + ASSERT(bioc->mirror_num == 1); } else { ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, - &map_length, &bbio, mirror_num); + &map_length, &bioc, mirror_num); if (ret) { btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; } - BUG_ON(mirror_num != bbio->mirror_num); + BUG_ON(mirror_num != bioc->mirror_num); } - sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9; + sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; bio->bi_iter.bi_sector = sector; - dev = bbio->stripes[bbio->mirror_num - 1].dev; - btrfs_put_bbio(bbio); + dev = bioc->stripes[bioc->mirror_num - 1].dev; + btrfs_put_bioc(bioc); if (!dev || !dev->bdev || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { btrfs_bio_counter_dec(fs_info); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 4a8e02f7b6c7a..5a36add213053 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -360,7 +360,7 @@ static void extent_map_device_set_bits(struct extent_map *em, unsigned bits) int i; for (i = 0; i < map->num_stripes; i++) { - struct btrfs_bio_stripe *stripe = &map->stripes[i]; + struct btrfs_io_stripe *stripe = &map->stripes[i]; struct btrfs_device *device = stripe->dev; set_extent_bits_nowait(&device->alloc_state, stripe->physical, @@ -375,7 +375,7 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits) int i; for (i = 0; i < map->num_stripes; i++) { - struct btrfs_bio_stripe *stripe = &map->stripes[i]; + struct btrfs_io_stripe *stripe = &map->stripes[i]; struct btrfs_device *device = stripe->dev; __clear_extent_bit(&device->alloc_state, stripe->physical, diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index d8d268ca8aa76..893d93e3c516d 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -61,7 +61,7 @@ enum btrfs_rbio_ops { struct btrfs_raid_bio { struct btrfs_fs_info *fs_info; - struct btrfs_bio *bbio; + struct btrfs_io_context *bioc; /* while we're doing rmw on a stripe * we put it into a hash table so we can @@ -271,7 +271,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) */ static int rbio_bucket(struct btrfs_raid_bio *rbio) { - u64 num = rbio->bbio->raid_map[0]; + u64 num = rbio->bioc->raid_map[0]; /* * we shift down quite a bit. We're using byte @@ -559,8 +559,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, test_bit(RBIO_CACHE_BIT, &cur->flags)) return 0; - if (last->bbio->raid_map[0] != - cur->bbio->raid_map[0]) + if (last->bioc->raid_map[0] != cur->bioc->raid_map[0]) return 0; /* we can't merge with different operations */ @@ -673,7 +672,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) spin_lock_irqsave(&h->lock, flags); list_for_each_entry(cur, &h->hash_list, hash_list) { - if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0]) + if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0]) continue; spin_lock(&cur->bio_list_lock); @@ -838,7 +837,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) } } - btrfs_put_bbio(rbio->bbio); + btrfs_put_bioc(rbio->bioc); kfree(rbio); } @@ -906,7 +905,7 @@ static void raid_write_end_io(struct bio *bio) /* OK, we have read all the stripes we need to. */ max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ? - 0 : rbio->bbio->max_errors; + 0 : rbio->bioc->max_errors; if (atomic_read(&rbio->error) > max_errors) err = BLK_STS_IOERR; @@ -961,12 +960,12 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) * this does not allocate any pages for rbio->pages. */ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, - struct btrfs_bio *bbio, + struct btrfs_io_context *bioc, u64 stripe_len) { struct btrfs_raid_bio *rbio; int nr_data = 0; - int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; + int real_stripes = bioc->num_stripes - bioc->num_tgtdevs; int num_pages = rbio_nr_pages(stripe_len, real_stripes); int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE); void *p; @@ -987,7 +986,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, spin_lock_init(&rbio->bio_list_lock); INIT_LIST_HEAD(&rbio->stripe_cache); INIT_LIST_HEAD(&rbio->hash_list); - rbio->bbio = bbio; + rbio->bioc = bioc; rbio->fs_info = fs_info; rbio->stripe_len = stripe_len; rbio->nr_pages = num_pages; @@ -1015,9 +1014,9 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages)); #undef CONSUME_ALLOC - if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5) + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5) nr_data = real_stripes - 1; - else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) + else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) nr_data = real_stripes - 2; else BUG(); @@ -1077,10 +1076,10 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, struct bio *last = bio_list->tail; int ret; struct bio *bio; - struct btrfs_bio_stripe *stripe; + struct btrfs_io_stripe *stripe; u64 disk_start; - stripe = &rbio->bbio->stripes[stripe_nr]; + stripe = &rbio->bioc->stripes[stripe_nr]; disk_start = stripe->physical + (page_index << PAGE_SHIFT); /* if the device is missing, just fail this stripe */ @@ -1155,7 +1154,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) int i = 0; start = bio->bi_iter.bi_sector << 9; - stripe_offset = start - rbio->bbio->raid_map[0]; + stripe_offset = start - rbio->bioc->raid_map[0]; page_index = stripe_offset >> PAGE_SHIFT; if (bio_flagged(bio, BIO_CLONED)) @@ -1179,7 +1178,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) */ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) { - struct btrfs_bio *bbio = rbio->bbio; + struct btrfs_io_context *bioc = rbio->bioc; void **pointers = rbio->finish_pointers; int nr_data = rbio->nr_data; int stripe; @@ -1284,11 +1283,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) } } - if (likely(!bbio->num_tgtdevs)) + if (likely(!bioc->num_tgtdevs)) goto write_data; for (stripe = 0; stripe < rbio->real_stripes; stripe++) { - if (!bbio->tgtdev_map[stripe]) + if (!bioc->tgtdev_map[stripe]) continue; for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { @@ -1302,7 +1301,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) } ret = rbio_add_io_page(rbio, &bio_list, page, - rbio->bbio->tgtdev_map[stripe], + rbio->bioc->tgtdev_map[stripe], pagenr, rbio->stripe_len); if (ret) goto cleanup; @@ -1339,12 +1338,12 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio, { u64 physical = bio->bi_iter.bi_sector; int i; - struct btrfs_bio_stripe *stripe; + struct btrfs_io_stripe *stripe; physical <<= 9; - for (i = 0; i < rbio->bbio->num_stripes; i++) { - stripe = &rbio->bbio->stripes[i]; + for (i = 0; i < rbio->bioc->num_stripes; i++) { + stripe = &rbio->bioc->stripes[i]; if (in_range(physical, stripe->physical, rbio->stripe_len) && stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) { return i; @@ -1365,7 +1364,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio, int i; for (i = 0; i < rbio->nr_data; i++) { - u64 stripe_start = rbio->bbio->raid_map[i]; + u64 stripe_start = rbio->bioc->raid_map[i]; if (in_range(logical, stripe_start, rbio->stripe_len)) return i; @@ -1456,7 +1455,7 @@ static void raid_rmw_end_io(struct bio *bio) if (!atomic_dec_and_test(&rbio->stripes_pending)) return; - if (atomic_read(&rbio->error) > rbio->bbio->max_errors) + if (atomic_read(&rbio->error) > rbio->bioc->max_errors) goto cleanup; /* @@ -1538,8 +1537,8 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) } /* - * the bbio may be freed once we submit the last bio. Make sure - * not to touch it after that + * The bioc may be freed once we submit the last bio. Make sure not to + * touch it after that. */ atomic_set(&rbio->stripes_pending, bios_to_read); while ((bio = bio_list_pop(&bio_list))) { @@ -1720,16 +1719,16 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) * our main entry point for writes from the rest of the FS. */ int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len) + struct btrfs_io_context *bioc, u64 stripe_len) { struct btrfs_raid_bio *rbio; struct btrfs_plug_cb *plug = NULL; struct blk_plug_cb *cb; int ret; - rbio = alloc_rbio(fs_info, bbio, stripe_len); + rbio = alloc_rbio(fs_info, bioc, stripe_len); if (IS_ERR(rbio)) { - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); return PTR_ERR(rbio); } bio_list_add(&rbio->bio_list, bio); @@ -1842,7 +1841,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) } /* all raid6 handling here */ - if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) { + if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) { /* * single failure, rebuild from parity raid5 * style @@ -1874,8 +1873,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) * here due to a crc mismatch and we can't give them the * data they want */ - if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) { - if (rbio->bbio->raid_map[faila] == + if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) { + if (rbio->bioc->raid_map[faila] == RAID5_P_STRIPE) { err = BLK_STS_IOERR; goto cleanup; @@ -1887,7 +1886,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) goto pstripe; } - if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) { + if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) { raid6_datap_recov(rbio->real_stripes, PAGE_SIZE, faila, pointers); } else { @@ -2006,7 +2005,7 @@ static void raid_recover_end_io(struct bio *bio) if (!atomic_dec_and_test(&rbio->stripes_pending)) return; - if (atomic_read(&rbio->error) > rbio->bbio->max_errors) + if (atomic_read(&rbio->error) > rbio->bioc->max_errors) rbio_orig_end_io(rbio, BLK_STS_IOERR); else __raid_recover_end_io(rbio); @@ -2074,7 +2073,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) * were up to date, or we might have no bios to read because * the devices were gone. */ - if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) { + if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) { __raid_recover_end_io(rbio); return 0; } else { @@ -2083,8 +2082,8 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) } /* - * the bbio may be freed once we submit the last bio. Make sure - * not to touch it after that + * The bioc may be freed once we submit the last bio. Make sure not to + * touch it after that. */ atomic_set(&rbio->stripes_pending, bios_to_read); while ((bio = bio_list_pop(&bio_list))) { @@ -2117,21 +2116,21 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) * of the drive. */ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len, + struct btrfs_io_context *bioc, u64 stripe_len, int mirror_num, int generic_io) { struct btrfs_raid_bio *rbio; int ret; if (generic_io) { - ASSERT(bbio->mirror_num == mirror_num); + ASSERT(bioc->mirror_num == mirror_num); btrfs_io_bio(bio)->mirror_num = mirror_num; } - rbio = alloc_rbio(fs_info, bbio, stripe_len); + rbio = alloc_rbio(fs_info, bioc, stripe_len); if (IS_ERR(rbio)) { if (generic_io) - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); return PTR_ERR(rbio); } @@ -2142,11 +2141,11 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { btrfs_warn(fs_info, - "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)", +"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)", __func__, bio->bi_iter.bi_sector << 9, - (u64)bio->bi_iter.bi_size, bbio->map_type); + (u64)bio->bi_iter.bi_size, bioc->map_type); if (generic_io) - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); kfree(rbio); return -EIO; } @@ -2155,7 +2154,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, btrfs_bio_counter_inc_noblocked(fs_info); rbio->generic_bio_cnt = 1; } else { - btrfs_get_bbio(bbio); + btrfs_get_bioc(bioc); } /* @@ -2214,7 +2213,7 @@ static void read_rebuild_work(struct btrfs_work *work) /* * The following code is used to scrub/replace the parity stripe * - * Caller must have already increased bio_counter for getting @bbio. + * Caller must have already increased bio_counter for getting @bioc. * * Note: We need make sure all the pages that add into the scrub/replace * raid bio are correct and not be changed during the scrub/replace. That @@ -2223,14 +2222,14 @@ static void read_rebuild_work(struct btrfs_work *work) struct btrfs_raid_bio * raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len, + struct btrfs_io_context *bioc, u64 stripe_len, struct btrfs_device *scrub_dev, unsigned long *dbitmap, int stripe_nsectors) { struct btrfs_raid_bio *rbio; int i; - rbio = alloc_rbio(fs_info, bbio, stripe_len); + rbio = alloc_rbio(fs_info, bioc, stripe_len); if (IS_ERR(rbio)) return NULL; bio_list_add(&rbio->bio_list, bio); @@ -2242,12 +2241,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, rbio->operation = BTRFS_RBIO_PARITY_SCRUB; /* - * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted + * After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted * to the end position, so this search can start from the first parity * stripe. */ for (i = rbio->nr_data; i < rbio->real_stripes; i++) { - if (bbio->stripes[i].dev == scrub_dev) { + if (bioc->stripes[i].dev == scrub_dev) { rbio->scrubp = i; break; } @@ -2260,7 +2259,7 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors); /* - * We have already increased bio_counter when getting bbio, record it + * We have already increased bio_counter when getting bioc, record it * so we can free it at rbio_orig_end_io(). */ rbio->generic_bio_cnt = 1; @@ -2275,10 +2274,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, int stripe_offset; int index; - ASSERT(logical >= rbio->bbio->raid_map[0]); - ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] + + ASSERT(logical >= rbio->bioc->raid_map[0]); + ASSERT(logical + PAGE_SIZE <= rbio->bioc->raid_map[0] + rbio->stripe_len * rbio->nr_data); - stripe_offset = (int)(logical - rbio->bbio->raid_map[0]); + stripe_offset = (int)(logical - rbio->bioc->raid_map[0]); index = stripe_offset >> PAGE_SHIFT; rbio->bio_pages[index] = page; } @@ -2312,7 +2311,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) { - struct btrfs_bio *bbio = rbio->bbio; + struct btrfs_io_context *bioc = rbio->bioc; void **pointers = rbio->finish_pointers; unsigned long *pbitmap = rbio->finish_pbitmap; int nr_data = rbio->nr_data; @@ -2335,7 +2334,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, else BUG(); - if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) { + if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) { is_replace = 1; bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages); } @@ -2435,7 +2434,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); ret = rbio_add_io_page(rbio, &bio_list, page, - bbio->tgtdev_map[rbio->scrubp], + bioc->tgtdev_map[rbio->scrubp], pagenr, rbio->stripe_len); if (ret) goto cleanup; @@ -2483,7 +2482,7 @@ static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) */ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) { - if (atomic_read(&rbio->error) > rbio->bbio->max_errors) + if (atomic_read(&rbio->error) > rbio->bioc->max_errors) goto cleanup; if (rbio->faila >= 0 || rbio->failb >= 0) { @@ -2504,7 +2503,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) * the data, so the capability of the repair is declined. * (In the case of RAID5, we can not repair anything) */ - if (dfail > rbio->bbio->max_errors - 1) + if (dfail > rbio->bioc->max_errors - 1) goto cleanup; /* @@ -2625,8 +2624,8 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) } /* - * the bbio may be freed once we submit the last bio. Make sure - * not to touch it after that + * The bioc may be freed once we submit the last bio. Make sure not to + * touch it after that. */ atomic_set(&rbio->stripes_pending, bios_to_read); while ((bio = bio_list_pop(&bio_list))) { @@ -2671,11 +2670,11 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) struct btrfs_raid_bio * raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 length) + struct btrfs_io_context *bioc, u64 length) { struct btrfs_raid_bio *rbio; - rbio = alloc_rbio(fs_info, bbio, length); + rbio = alloc_rbio(fs_info, bioc, length); if (IS_ERR(rbio)) return NULL; @@ -2695,7 +2694,7 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, } /* - * When we get bbio, we have already increased bio_counter, record it + * When we get bioc, we have already increased bio_counter, record it * so we can free it at rbio_orig_end_io() */ rbio->generic_bio_cnt = 1; diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index 2503485db859b..838d3a5e07ef4 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -31,24 +31,24 @@ struct btrfs_raid_bio; struct btrfs_device; int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len, + struct btrfs_io_context *bioc, u64 stripe_len, int mirror_num, int generic_io); int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len); + struct btrfs_io_context *bioc, u64 stripe_len); void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, u64 logical); struct btrfs_raid_bio * raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 stripe_len, + struct btrfs_io_context *bioc, u64 stripe_len, struct btrfs_device *scrub_dev, unsigned long *dbitmap, int stripe_nsectors); void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio); struct btrfs_raid_bio * raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_bio *bbio, u64 length); + struct btrfs_io_context *bioc, u64 length); void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio); int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 06713a8fe26b4..eb96fdc3be25f 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -227,7 +227,7 @@ int btree_readahead_hook(struct extent_buffer *eb, int err) } static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, - struct btrfs_bio *bbio) + struct btrfs_io_context *bioc) { struct btrfs_fs_info *fs_info = dev->fs_info; int ret; @@ -275,11 +275,11 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical, kref_init(&zone->refcnt); zone->elems = 0; zone->device = dev; /* our device always sits at index 0 */ - for (i = 0; i < bbio->num_stripes; ++i) { + for (i = 0; i < bioc->num_stripes; ++i) { /* bounds have already been checked */ - zone->devs[i] = bbio->stripes[i].dev; + zone->devs[i] = bioc->stripes[i].dev; } - zone->ndevs = bbio->num_stripes; + zone->ndevs = bioc->num_stripes; spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&dev->reada_zones, @@ -309,7 +309,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, int ret; struct reada_extent *re = NULL; struct reada_extent *re_exist = NULL; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; struct btrfs_device *dev; struct btrfs_device *prev_dev; u64 length; @@ -345,28 +345,28 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, */ length = fs_info->nodesize; ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, - &length, &bbio, 0); - if (ret || !bbio || length < fs_info->nodesize) + &length, &bioc, 0); + if (ret || !bioc || length < fs_info->nodesize) goto error; - if (bbio->num_stripes > BTRFS_MAX_MIRRORS) { + if (bioc->num_stripes > BTRFS_MAX_MIRRORS) { btrfs_err(fs_info, "readahead: more than %d copies not supported", BTRFS_MAX_MIRRORS); goto error; } - real_stripes = bbio->num_stripes - bbio->num_tgtdevs; + real_stripes = bioc->num_stripes - bioc->num_tgtdevs; for (nzones = 0; nzones < real_stripes; ++nzones) { struct reada_zone *zone; - dev = bbio->stripes[nzones].dev; + dev = bioc->stripes[nzones].dev; /* cannot read ahead on missing device. */ if (!dev->bdev) continue; - zone = reada_find_zone(dev, logical, bbio); + zone = reada_find_zone(dev, logical, bioc); if (!zone) continue; @@ -464,7 +464,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, if (!have_zone) goto error; - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); return re; error: @@ -488,7 +488,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); kfree(re); return re_exist; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 088641ba7a8e6..717d8f98cf0b8 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -57,7 +57,7 @@ struct scrub_ctx; struct scrub_recover { refcount_t refs; - struct btrfs_bio *bbio; + struct btrfs_io_context *bioc; u64 map_length; }; @@ -254,7 +254,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx); static inline int scrub_is_page_on_raid56(struct scrub_page *spage) { return spage->recover && - (spage->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); + (spage->recover->bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); } static void scrub_pending_bio_inc(struct scrub_ctx *sctx) @@ -798,7 +798,7 @@ static inline void scrub_put_recover(struct btrfs_fs_info *fs_info, { if (refcount_dec_and_test(&recover->refs)) { btrfs_bio_counter_dec(fs_info); - btrfs_put_bbio(recover->bbio); + btrfs_put_bioc(recover->bioc); kfree(recover); } } @@ -1027,8 +1027,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sblock_other = sblocks_for_recheck + mirror_index; } else { struct scrub_recover *r = sblock_bad->pagev[0]->recover; - int max_allowed = r->bbio->num_stripes - - r->bbio->num_tgtdevs; + int max_allowed = r->bioc->num_stripes - r->bioc->num_tgtdevs; if (mirror_index >= max_allowed) break; @@ -1218,14 +1217,14 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) return 0; } -static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio) +static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc) { - if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5) + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5) return 2; - else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) + else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) return 3; else - return (int)bbio->num_stripes; + return (int)bioc->num_stripes; } static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type, @@ -1269,7 +1268,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, u64 flags = original_sblock->pagev[0]->flags; u64 have_csum = original_sblock->pagev[0]->have_csum; struct scrub_recover *recover; - struct btrfs_bio *bbio; + struct btrfs_io_context *bioc; u64 sublen; u64 mapped_length; u64 stripe_offset; @@ -1288,7 +1287,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, while (length > 0) { sublen = min_t(u64, length, fs_info->sectorsize); mapped_length = sublen; - bbio = NULL; + bioc = NULL; /* * With a length of sectorsize, each returned stripe represents @@ -1296,27 +1295,27 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, */ btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, - logical, &mapped_length, &bbio); - if (ret || !bbio || mapped_length < sublen) { - btrfs_put_bbio(bbio); + logical, &mapped_length, &bioc); + if (ret || !bioc || mapped_length < sublen) { + btrfs_put_bioc(bioc); btrfs_bio_counter_dec(fs_info); return -EIO; } recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); if (!recover) { - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); btrfs_bio_counter_dec(fs_info); return -ENOMEM; } refcount_set(&recover->refs, 1); - recover->bbio = bbio; + recover->bioc = bioc; recover->map_length = mapped_length; BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK); - nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS); + nmirrors = min(scrub_nr_raid_mirrors(bioc), BTRFS_MAX_MIRRORS); for (mirror_index = 0; mirror_index < nmirrors; mirror_index++) { @@ -1348,17 +1347,17 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, sctx->fs_info->csum_size); scrub_stripe_index_and_offset(logical, - bbio->map_type, - bbio->raid_map, + bioc->map_type, + bioc->raid_map, mapped_length, - bbio->num_stripes - - bbio->num_tgtdevs, + bioc->num_stripes - + bioc->num_tgtdevs, mirror_index, &stripe_index, &stripe_offset); - spage->physical = bbio->stripes[stripe_index].physical + + spage->physical = bioc->stripes[stripe_index].physical + stripe_offset; - spage->dev = bbio->stripes[stripe_index].dev; + spage->dev = bioc->stripes[stripe_index].dev; BUG_ON(page_index >= original_sblock->page_count); spage->physical_for_dev_replace = @@ -1401,7 +1400,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, bio->bi_end_io = scrub_bio_wait_endio; mirror_num = spage->sblock->pagev[0]->mirror_num; - ret = raid56_parity_recover(fs_info, bio, spage->recover->bbio, + ret = raid56_parity_recover(fs_info, bio, spage->recover->bioc, spage->recover->map_length, mirror_num, 0); if (ret) @@ -2203,7 +2202,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) struct btrfs_fs_info *fs_info = sctx->fs_info; u64 length = sblock->page_count * PAGE_SIZE; u64 logical = sblock->pagev[0]->logical; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; struct bio *bio; struct btrfs_raid_bio *rbio; int ret; @@ -2211,19 +2210,19 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, - &length, &bbio); - if (ret || !bbio || !bbio->raid_map) - goto bbio_out; + &length, &bioc); + if (ret || !bioc || !bioc->raid_map) + goto bioc_out; if (WARN_ON(!sctx->is_dev_replace || - !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) { + !(bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) { /* * We shouldn't be scrubbing a missing device. Even for dev * replace, we should only get here for RAID 5/6. We either * managed to mount something with no mirrors remaining or * there's a bug in scrub_remap_extent()/btrfs_map_block(). */ - goto bbio_out; + goto bioc_out; } bio = btrfs_io_bio_alloc(0); @@ -2231,7 +2230,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) bio->bi_private = sblock; bio->bi_end_io = scrub_missing_raid56_end_io; - rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length); + rbio = raid56_alloc_missing_rbio(fs_info, bio, bioc, length); if (!rbio) goto rbio_out; @@ -2249,9 +2248,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) rbio_out: bio_put(bio); -bbio_out: +bioc_out: btrfs_bio_counter_dec(fs_info); - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; spin_unlock(&sctx->stat_lock); @@ -2826,7 +2825,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) struct btrfs_fs_info *fs_info = sctx->fs_info; struct bio *bio; struct btrfs_raid_bio *rbio; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; u64 length; int ret; @@ -2838,16 +2837,16 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start, - &length, &bbio); - if (ret || !bbio || !bbio->raid_map) - goto bbio_out; + &length, &bioc); + if (ret || !bioc || !bioc->raid_map) + goto bioc_out; bio = btrfs_io_bio_alloc(0); bio->bi_iter.bi_sector = sparity->logic_start >> 9; bio->bi_private = sparity; bio->bi_end_io = scrub_parity_bio_endio; - rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio, + rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bioc, length, sparity->scrub_dev, sparity->dbitmap, sparity->nsectors); @@ -2860,9 +2859,9 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) rbio_out: bio_put(bio); -bbio_out: +bioc_out: btrfs_bio_counter_dec(fs_info); - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, sparity->nsectors); spin_lock(&sctx->stat_lock); @@ -2901,7 +2900,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, struct btrfs_root *root = fs_info->extent_root; struct btrfs_root *csum_root = fs_info->csum_root; struct btrfs_extent_item *extent; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; u64 flags; int ret; int slot; @@ -3044,22 +3043,22 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, extent_len); mapped_length = extent_len; - bbio = NULL; + bioc = NULL; ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, - extent_logical, &mapped_length, &bbio, + extent_logical, &mapped_length, &bioc, 0); if (!ret) { - if (!bbio || mapped_length < extent_len) + if (!bioc || mapped_length < extent_len) ret = -EIO; } if (ret) { - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); goto out; } - extent_physical = bbio->stripes[0].physical; - extent_mirror_num = bbio->mirror_num; - extent_dev = bbio->stripes[0].dev; - btrfs_put_bbio(bbio); + extent_physical = bioc->stripes[0].physical; + extent_mirror_num = bioc->mirror_num; + extent_dev = bioc->stripes[0].dev; + btrfs_put_bioc(bioc); ret = btrfs_lookup_csums_range(csum_root, extent_logical, @@ -4309,20 +4308,20 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, int *extent_mirror_num) { u64 mapped_length; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; int ret; mapped_length = extent_len; ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical, - &mapped_length, &bbio, 0); - if (ret || !bbio || mapped_length < extent_len || - !bbio->stripes[0].dev->bdev) { - btrfs_put_bbio(bbio); + &mapped_length, &bioc, 0); + if (ret || !bioc || mapped_length < extent_len || + !bioc->stripes[0].dev->bdev) { + btrfs_put_bioc(bioc); return; } - *extent_physical = bbio->stripes[0].physical; - *extent_mirror_num = bbio->mirror_num; - *extent_dev = bbio->stripes[0].dev; - btrfs_put_bbio(bbio); + *extent_physical = bioc->stripes[0].physical; + *extent_mirror_num = bioc->mirror_num; + *extent_dev = bioc->stripes[0].dev; + btrfs_put_bioc(bioc); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2101a5bd4eba8..627505500467c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -250,7 +250,7 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); static int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret, + struct btrfs_io_context **bioc_ret, int mirror_num, int need_raid_map); /* @@ -5789,7 +5789,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, } /* Bubble-sort the stripe set to put the parity/syndrome stripes last */ -static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) +static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes) { int i; int again = 1; @@ -5798,52 +5798,53 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) again = 0; for (i = 0; i < num_stripes - 1; i++) { /* Swap if parity is on a smaller index */ - if (bbio->raid_map[i] > bbio->raid_map[i + 1]) { - swap(bbio->stripes[i], bbio->stripes[i + 1]); - swap(bbio->raid_map[i], bbio->raid_map[i + 1]); + if (bioc->raid_map[i] > bioc->raid_map[i + 1]) { + swap(bioc->stripes[i], bioc->stripes[i + 1]); + swap(bioc->raid_map[i], bioc->raid_map[i + 1]); again = 1; } } } } -static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) +static struct btrfs_io_context *alloc_btrfs_io_context(int total_stripes, + int real_stripes) { - struct btrfs_bio *bbio = kzalloc( - /* the size of the btrfs_bio */ - sizeof(struct btrfs_bio) + - /* plus the variable array for the stripes */ - sizeof(struct btrfs_bio_stripe) * (total_stripes) + - /* plus the variable array for the tgt dev */ + struct btrfs_io_context *bioc = kzalloc( + /* The size of btrfs_io_context */ + sizeof(struct btrfs_io_context) + + /* Plus the variable array for the stripes */ + sizeof(struct btrfs_io_stripe) * (total_stripes) + + /* Plus the variable array for the tgt dev */ sizeof(int) * (real_stripes) + /* - * plus the raid_map, which includes both the tgt dev - * and the stripes + * Plus the raid_map, which includes both the tgt dev + * and the stripes. */ sizeof(u64) * (total_stripes), GFP_NOFS|__GFP_NOFAIL); - atomic_set(&bbio->error, 0); - refcount_set(&bbio->refs, 1); + atomic_set(&bioc->error, 0); + refcount_set(&bioc->refs, 1); - bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes); - bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes); + bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes); + bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes); - return bbio; + return bioc; } -void btrfs_get_bbio(struct btrfs_bio *bbio) +void btrfs_get_bioc(struct btrfs_io_context *bioc) { - WARN_ON(!refcount_read(&bbio->refs)); - refcount_inc(&bbio->refs); + WARN_ON(!refcount_read(&bioc->refs)); + refcount_inc(&bioc->refs); } -void btrfs_put_bbio(struct btrfs_bio *bbio) +void btrfs_put_bioc(struct btrfs_io_context *bioc) { - if (!bbio) + if (!bioc) return; - if (refcount_dec_and_test(&bbio->refs)) - kfree(bbio); + if (refcount_dec_and_test(&bioc->refs)) + kfree(bioc); } /* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */ @@ -5853,11 +5854,11 @@ void btrfs_put_bbio(struct btrfs_bio *bbio) */ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, u64 logical, u64 *length_ret, - struct btrfs_bio **bbio_ret) + struct btrfs_io_context **bioc_ret) { struct extent_map *em; struct map_lookup *map; - struct btrfs_bio *bbio; + struct btrfs_io_context *bioc; u64 length = *length_ret; u64 offset; u64 stripe_nr; @@ -5876,8 +5877,8 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, int ret = 0; int i; - /* discard always return a bbio */ - ASSERT(bbio_ret); + /* Discard always returns a bioc. */ + ASSERT(bioc_ret); em = btrfs_get_chunk_map(fs_info, logical, length); if (IS_ERR(em)) @@ -5940,26 +5941,25 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, &stripe_index); } - bbio = alloc_btrfs_bio(num_stripes, 0); - if (!bbio) { + bioc = alloc_btrfs_io_context(num_stripes, 0); + if (!bioc) { ret = -ENOMEM; goto out; } for (i = 0; i < num_stripes; i++) { - bbio->stripes[i].physical = + bioc->stripes[i].physical = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - bbio->stripes[i].dev = map->stripes[stripe_index].dev; + bioc->stripes[i].dev = map->stripes[stripe_index].dev; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { - bbio->stripes[i].length = stripes_per_dev * + bioc->stripes[i].length = stripes_per_dev * map->stripe_len; if (i / sub_stripes < remaining_stripes) - bbio->stripes[i].length += - map->stripe_len; + bioc->stripes[i].length += map->stripe_len; /* * Special for the first stripe and @@ -5970,19 +5970,17 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, * off end_off */ if (i < sub_stripes) - bbio->stripes[i].length -= - stripe_offset; + bioc->stripes[i].length -= stripe_offset; if (stripe_index >= last_stripe && stripe_index <= (last_stripe + sub_stripes - 1)) - bbio->stripes[i].length -= - stripe_end_offset; + bioc->stripes[i].length -= stripe_end_offset; if (i == sub_stripes - 1) stripe_offset = 0; } else { - bbio->stripes[i].length = length; + bioc->stripes[i].length = length; } stripe_index++; @@ -5992,9 +5990,9 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, } } - *bbio_ret = bbio; - bbio->map_type = map->type; - bbio->num_stripes = num_stripes; + *bioc_ret = bioc; + bioc->map_type = map->type; + bioc->num_stripes = num_stripes; out: free_extent_map(em); return ret; @@ -6018,7 +6016,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, u64 srcdev_devid, int *mirror_num, u64 *physical) { - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; int num_stripes; int index_srcdev = 0; int found = 0; @@ -6027,20 +6025,20 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, int ret = 0; ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, - logical, &length, &bbio, 0, 0); + logical, &length, &bioc, 0, 0); if (ret) { - ASSERT(bbio == NULL); + ASSERT(bioc == NULL); return ret; } - num_stripes = bbio->num_stripes; + num_stripes = bioc->num_stripes; if (*mirror_num > num_stripes) { /* * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror, * that means that the requested area is not left of the left * cursor */ - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); return -EIO; } @@ -6050,7 +6048,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, * pointer to the one of the target drive. */ for (i = 0; i < num_stripes; i++) { - if (bbio->stripes[i].dev->devid != srcdev_devid) + if (bioc->stripes[i].dev->devid != srcdev_devid) continue; /* @@ -6058,15 +6056,15 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info, * mirror with the lowest physical address */ if (found && - physical_of_found <= bbio->stripes[i].physical) + physical_of_found <= bioc->stripes[i].physical) continue; index_srcdev = i; found = 1; - physical_of_found = bbio->stripes[i].physical; + physical_of_found = bioc->stripes[i].physical; } - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); ASSERT(found); if (!found) @@ -6097,12 +6095,12 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical) } static void handle_ops_on_dev_replace(enum btrfs_map_op op, - struct btrfs_bio **bbio_ret, + struct btrfs_io_context **bioc_ret, struct btrfs_dev_replace *dev_replace, u64 logical, int *num_stripes_ret, int *max_errors_ret) { - struct btrfs_bio *bbio = *bbio_ret; + struct btrfs_io_context *bioc = *bioc_ret; u64 srcdev_devid = dev_replace->srcdev->devid; int tgtdev_indexes = 0; int num_stripes = *num_stripes_ret; @@ -6132,17 +6130,17 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, */ index_where_to_add = num_stripes; for (i = 0; i < num_stripes; i++) { - if (bbio->stripes[i].dev->devid == srcdev_devid) { + if (bioc->stripes[i].dev->devid == srcdev_devid) { /* write to new disk, too */ - struct btrfs_bio_stripe *new = - bbio->stripes + index_where_to_add; - struct btrfs_bio_stripe *old = - bbio->stripes + i; + struct btrfs_io_stripe *new = + bioc->stripes + index_where_to_add; + struct btrfs_io_stripe *old = + bioc->stripes + i; new->physical = old->physical; new->length = old->length; new->dev = dev_replace->tgtdev; - bbio->tgtdev_map[i] = index_where_to_add; + bioc->tgtdev_map[i] = index_where_to_add; index_where_to_add++; max_errors++; tgtdev_indexes++; @@ -6162,30 +6160,29 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, * full copy of the source drive. */ for (i = 0; i < num_stripes; i++) { - if (bbio->stripes[i].dev->devid == srcdev_devid) { + if (bioc->stripes[i].dev->devid == srcdev_devid) { /* * In case of DUP, in order to keep it simple, * only add the mirror with the lowest physical * address */ if (found && - physical_of_found <= - bbio->stripes[i].physical) + physical_of_found <= bioc->stripes[i].physical) continue; index_srcdev = i; found = 1; - physical_of_found = bbio->stripes[i].physical; + physical_of_found = bioc->stripes[i].physical; } } if (found) { - struct btrfs_bio_stripe *tgtdev_stripe = - bbio->stripes + num_stripes; + struct btrfs_io_stripe *tgtdev_stripe = + bioc->stripes + num_stripes; tgtdev_stripe->physical = physical_of_found; tgtdev_stripe->length = - bbio->stripes[index_srcdev].length; + bioc->stripes[index_srcdev].length; tgtdev_stripe->dev = dev_replace->tgtdev; - bbio->tgtdev_map[index_srcdev] = num_stripes; + bioc->tgtdev_map[index_srcdev] = num_stripes; tgtdev_indexes++; num_stripes++; @@ -6194,8 +6191,8 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, *num_stripes_ret = num_stripes; *max_errors_ret = max_errors; - bbio->num_tgtdevs = tgtdev_indexes; - *bbio_ret = bbio; + bioc->num_tgtdevs = tgtdev_indexes; + *bioc_ret = bioc; } static bool need_full_stripe(enum btrfs_map_op op) @@ -6298,7 +6295,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em, static int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret, + struct btrfs_io_context **bioc_ret, int mirror_num, int need_raid_map) { struct extent_map *em; @@ -6313,7 +6310,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int num_stripes; int max_errors = 0; int tgtdev_indexes = 0; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int dev_replace_is_ongoing = 0; int num_alloc_stripes; @@ -6322,7 +6319,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, u64 raid56_full_stripe_start = (u64)-1; struct btrfs_io_geometry geom; - ASSERT(bbio_ret); + ASSERT(bioc_ret); ASSERT(op != BTRFS_MAP_DISCARD); em = btrfs_get_chunk_map(fs_info, logical, *length); @@ -6466,20 +6463,20 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, tgtdev_indexes = num_stripes; } - bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes); - if (!bbio) { + bioc = alloc_btrfs_io_context(num_alloc_stripes, tgtdev_indexes); + if (!bioc) { ret = -ENOMEM; goto out; } for (i = 0; i < num_stripes; i++) { - bbio->stripes[i].physical = map->stripes[stripe_index].physical + + bioc->stripes[i].physical = map->stripes[stripe_index].physical + stripe_offset + stripe_nr * map->stripe_len; - bbio->stripes[i].dev = map->stripes[stripe_index].dev; + bioc->stripes[i].dev = map->stripes[stripe_index].dev; stripe_index++; } - /* build raid_map */ + /* Build raid_map */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map && (need_full_stripe(op) || mirror_num > 1)) { u64 tmp; @@ -6491,15 +6488,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, /* Fill in the logical address of each stripe */ tmp = stripe_nr * data_stripes; for (i = 0; i < data_stripes; i++) - bbio->raid_map[(i+rot) % num_stripes] = + bioc->raid_map[(i + rot) % num_stripes] = em->start + (tmp + i) * map->stripe_len; - bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE; + bioc->raid_map[(i + rot) % map->num_stripes] = RAID5_P_STRIPE; if (map->type & BTRFS_BLOCK_GROUP_RAID6) - bbio->raid_map[(i+rot+1) % num_stripes] = + bioc->raid_map[(i + rot + 1) % num_stripes] = RAID6_Q_STRIPE; - sort_parity_stripes(bbio, num_stripes); + sort_parity_stripes(bioc, num_stripes); } if (need_full_stripe(op)) @@ -6507,15 +6504,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && need_full_stripe(op)) { - handle_ops_on_dev_replace(op, &bbio, dev_replace, logical, + handle_ops_on_dev_replace(op, &bioc, dev_replace, logical, &num_stripes, &max_errors); } - *bbio_ret = bbio; - bbio->map_type = map->type; - bbio->num_stripes = num_stripes; - bbio->max_errors = max_errors; - bbio->mirror_num = mirror_num; + *bioc_ret = bioc; + bioc->map_type = map->type; + bioc->num_stripes = num_stripes; + bioc->max_errors = max_errors; + bioc->mirror_num = mirror_num; /* * this is the case that REQ_READ && dev_replace_is_ongoing && @@ -6524,9 +6521,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, */ if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) { WARN_ON(num_stripes > 1); - bbio->stripes[0].dev = dev_replace->tgtdev; - bbio->stripes[0].physical = physical_to_patch_in_first_stripe; - bbio->mirror_num = map->num_stripes + 1; + bioc->stripes[0].dev = dev_replace->tgtdev; + bioc->stripes[0].physical = physical_to_patch_in_first_stripe; + bioc->mirror_num = map->num_stripes + 1; } out: if (dev_replace_is_ongoing) { @@ -6540,40 +6537,40 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret, int mirror_num) + struct btrfs_io_context **bioc_ret, int mirror_num) { if (op == BTRFS_MAP_DISCARD) return __btrfs_map_block_for_discard(fs_info, logical, - length, bbio_ret); + length, bioc_ret); - return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, + return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, mirror_num, 0); } /* For Scrub/replace */ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret) + struct btrfs_io_context **bioc_ret) { - return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1); + return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1); } -static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio) +static inline void btrfs_end_bioc(struct btrfs_io_context *bioc, struct bio *bio) { - bio->bi_private = bbio->private; - bio->bi_end_io = bbio->end_io; + bio->bi_private = bioc->private; + bio->bi_end_io = bioc->end_io; bio_endio(bio); - btrfs_put_bbio(bbio); + btrfs_put_bioc(bioc); } static void btrfs_end_bio(struct bio *bio) { - struct btrfs_bio *bbio = bio->bi_private; + struct btrfs_io_context *bioc = bio->bi_private; int is_orig_bio = 0; if (bio->bi_status) { - atomic_inc(&bbio->error); + atomic_inc(&bioc->error); if (bio->bi_status == BLK_STS_IOERR || bio->bi_status == BLK_STS_TARGET) { struct btrfs_device *dev = btrfs_io_bio(bio)->device; @@ -6591,22 +6588,22 @@ static void btrfs_end_bio(struct bio *bio) } } - if (bio == bbio->orig_bio) + if (bio == bioc->orig_bio) is_orig_bio = 1; - btrfs_bio_counter_dec(bbio->fs_info); + btrfs_bio_counter_dec(bioc->fs_info); - if (atomic_dec_and_test(&bbio->stripes_pending)) { + if (atomic_dec_and_test(&bioc->stripes_pending)) { if (!is_orig_bio) { bio_put(bio); - bio = bbio->orig_bio; + bio = bioc->orig_bio; } - btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; + btrfs_io_bio(bio)->mirror_num = bioc->mirror_num; /* only send an error to the higher layers if it is * beyond the tolerance of the btrfs bio */ - if (atomic_read(&bbio->error) > bbio->max_errors) { + if (atomic_read(&bioc->error) > bioc->max_errors) { bio->bi_status = BLK_STS_IOERR; } else { /* @@ -6616,18 +6613,18 @@ static void btrfs_end_bio(struct bio *bio) bio->bi_status = BLK_STS_OK; } - btrfs_end_bbio(bbio, bio); + btrfs_end_bioc(bioc, bio); } else if (!is_orig_bio) { bio_put(bio); } } -static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, +static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio, u64 physical, struct btrfs_device *dev) { - struct btrfs_fs_info *fs_info = bbio->fs_info; + struct btrfs_fs_info *fs_info = bioc->fs_info; - bio->bi_private = bbio; + bio->bi_private = bioc; btrfs_io_bio(bio)->device = dev; bio->bi_end_io = btrfs_end_bio; bio->bi_iter.bi_sector = physical >> 9; @@ -6657,20 +6654,20 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, btrfsic_submit_bio(bio); } -static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) +static void bioc_error(struct btrfs_io_context *bioc, struct bio *bio, u64 logical) { - atomic_inc(&bbio->error); - if (atomic_dec_and_test(&bbio->stripes_pending)) { + atomic_inc(&bioc->error); + if (atomic_dec_and_test(&bioc->stripes_pending)) { /* Should be the original bio. */ - WARN_ON(bio != bbio->orig_bio); + WARN_ON(bio != bioc->orig_bio); - btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; + btrfs_io_bio(bio)->mirror_num = bioc->mirror_num; bio->bi_iter.bi_sector = logical >> 9; - if (atomic_read(&bbio->error) > bbio->max_errors) + if (atomic_read(&bioc->error) > bioc->max_errors) bio->bi_status = BLK_STS_IOERR; else bio->bi_status = BLK_STS_OK; - btrfs_end_bbio(bbio, bio); + btrfs_end_bioc(bioc, bio); } } @@ -6685,35 +6682,35 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int ret; int dev_nr; int total_devs; - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; length = bio->bi_iter.bi_size; map_length = length; btrfs_bio_counter_inc_blocked(fs_info); ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, - &map_length, &bbio, mirror_num, 1); + &map_length, &bioc, mirror_num, 1); if (ret) { btrfs_bio_counter_dec(fs_info); return errno_to_blk_status(ret); } - total_devs = bbio->num_stripes; - bbio->orig_bio = first_bio; - bbio->private = first_bio->bi_private; - bbio->end_io = first_bio->bi_end_io; - bbio->fs_info = fs_info; - atomic_set(&bbio->stripes_pending, bbio->num_stripes); + total_devs = bioc->num_stripes; + bioc->orig_bio = first_bio; + bioc->private = first_bio->bi_private; + bioc->end_io = first_bio->bi_end_io; + bioc->fs_info = fs_info; + atomic_set(&bioc->stripes_pending, bioc->num_stripes); - if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && + if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) { /* In this case, map_length has been set to the length of a single stripe; not the whole write */ if (btrfs_op(bio) == BTRFS_MAP_WRITE) { - ret = raid56_parity_write(fs_info, bio, bbio, + ret = raid56_parity_write(fs_info, bio, bioc, map_length); } else { - ret = raid56_parity_recover(fs_info, bio, bbio, + ret = raid56_parity_recover(fs_info, bio, bioc, map_length, mirror_num, 1); } @@ -6729,12 +6726,12 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, } for (dev_nr = 0; dev_nr < total_devs; dev_nr++) { - dev = bbio->stripes[dev_nr].dev; + dev = bioc->stripes[dev_nr].dev; if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) || (btrfs_op(first_bio) == BTRFS_MAP_WRITE && !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) { - bbio_error(bbio, first_bio, logical); + bioc_error(bioc, first_bio, logical); continue; } @@ -6743,7 +6740,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, else bio = first_bio; - submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev); + submit_stripe_bio(bioc, bio, bioc->stripes[dev_nr].physical, dev); } btrfs_bio_counter_dec(fs_info); return BLK_STS_OK; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7e8f205978d9e..b69755fc0e0d4 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -306,11 +306,11 @@ struct btrfs_fs_devices { /* * we need the mirror number and stripe index to be passed around * the call chain while we are processing end_io (especially errors). - * Really, what we need is a btrfs_bio structure that has this info + * Really, what we need is a btrfs_io_context structure that has this info * and is properly sized with its stripe array, but we're not there * quite yet. We have our own btrfs bioset, and all of the bios * we allocate are actually btrfs_io_bios. We'll cram as much of - * struct btrfs_bio as we can into this over time. + * struct btrfs_io_context as we can into this over time. */ struct btrfs_io_bio { unsigned int mirror_num; @@ -339,13 +339,29 @@ static inline void btrfs_io_bio_free_csum(struct btrfs_io_bio *io_bio) } } -struct btrfs_bio_stripe { +struct btrfs_io_stripe { struct btrfs_device *dev; u64 physical; u64 length; /* only used for discard mappings */ }; -struct btrfs_bio { +/* + * Context for IO subsmission for device stripe. + * + * - Track the unfinished mirrors for mirror based profiles + * Mirror based profiles are SINGLE/DUP/RAID1/RAID10. + * + * - Contain the logical -> physical mapping info + * Used by submit_stripe_bio() for mapping logical bio + * into physical device address. + * + * - Contain device replace info + * Used by handle_ops_on_dev_replace() to copy logical bios + * into the new device. + * + * - Contain RAID56 full stripe logical bytenrs + */ +struct btrfs_io_context { refcount_t refs; atomic_t stripes_pending; struct btrfs_fs_info *fs_info; @@ -365,7 +381,7 @@ struct btrfs_bio { * so raid_map[0] is the start of our full stripe */ u64 *raid_map; - struct btrfs_bio_stripe stripes[]; + struct btrfs_io_stripe stripes[]; }; struct btrfs_device_info { @@ -400,11 +416,11 @@ struct map_lookup { int num_stripes; int sub_stripes; int verified_stripes; /* For mount time dev extent verification */ - struct btrfs_bio_stripe stripes[]; + struct btrfs_io_stripe stripes[]; }; #define map_lookup_size(n) (sizeof(struct map_lookup) + \ - (sizeof(struct btrfs_bio_stripe) * (n))) + (sizeof(struct btrfs_io_stripe) * (n))) struct btrfs_balance_args; struct btrfs_balance_progress; @@ -441,14 +457,14 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio) } } -void btrfs_get_bbio(struct btrfs_bio *bbio); -void btrfs_put_bbio(struct btrfs_bio *bbio); +void btrfs_get_bioc(struct btrfs_io_context *bioc); +void btrfs_put_bioc(struct btrfs_io_context *bioc); int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret, int mirror_num); + struct btrfs_io_context **bioc_ret, int mirror_num); int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, - struct btrfs_bio **bbio_ret); + struct btrfs_io_context **bioc_ret); int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map, enum btrfs_map_op op, u64 logical, struct btrfs_io_geometry *io_geom); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 1433ee220c94c..14ea6797bf492 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1637,27 +1637,27 @@ int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 len static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, struct blk_zone *zone) { - struct btrfs_bio *bbio = NULL; + struct btrfs_io_context *bioc = NULL; u64 mapped_length = PAGE_SIZE; unsigned int nofs_flag; int nmirrors; int i, ret; ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, - &mapped_length, &bbio); - if (ret || !bbio || mapped_length < PAGE_SIZE) { - btrfs_put_bbio(bbio); + &mapped_length, &bioc); + if (ret || !bioc || mapped_length < PAGE_SIZE) { + btrfs_put_bioc(bioc); return -EIO; } - if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) return -EINVAL; nofs_flag = memalloc_nofs_save(); - nmirrors = (int)bbio->num_stripes; + nmirrors = (int)bioc->num_stripes; for (i = 0; i < nmirrors; i++) { - u64 physical = bbio->stripes[i].physical; - struct btrfs_device *dev = bbio->stripes[i].dev; + u64 physical = bioc->stripes[i].physical; + struct btrfs_device *dev = bioc->stripes[i].dev; /* Missing device */ if (!dev->bdev) From fdb09b8567c4d408426055aaf43b202d3d21a2bd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 15 Sep 2021 15:17:17 +0800 Subject: [PATCH 0321/1202] btrfs: remove btrfs_bio_alloc() helper The helper btrfs_bio_alloc() is almost the same as btrfs_io_bio_alloc(), except it's allocating using BIO_MAX_VECS as @nr_iovecs, and initializes bio->bi_iter.bi_sector. However the naming itself is not using "btrfs_io_bio" to indicate its parameter is "strcut btrfs_io_bio" and can be easily confused with "struct btrfs_bio". Considering assigned bio->bi_iter.bi_sector is such a simple work and there are already tons of call sites doing that manually, there is no need to do that in a helper. Remove btrfs_bio_alloc() helper, and enhance btrfs_io_bio_alloc() function to provide a fail-safe value for its @nr_iovecs. And then replace all btrfs_bio_alloc() callers with btrfs_io_bio_alloc(). Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 12 ++++++++---- fs/btrfs/extent_io.c | 27 +++++++++------------------ fs/btrfs/extent_io.h | 1 - fs/btrfs/scrub.c | 4 ++-- 4 files changed, 19 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7869ad12bc6e1..4625a3737a6ce 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -418,7 +418,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->orig_bio = NULL; cb->nr_pages = nr_pages; - bio = btrfs_bio_alloc(first_byte); + bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + bio->bi_iter.bi_sector = first_byte >> SECTOR_SHIFT; bio->bi_opf = bio_op | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; @@ -490,7 +491,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, bio_endio(bio); } - bio = btrfs_bio_alloc(first_byte); + bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + bio->bi_iter.bi_sector = first_byte >> SECTOR_SHIFT; bio->bi_opf = bio_op | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; @@ -748,7 +750,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, /* include any pages we added in add_ra-bio_pages */ cb->len = bio->bi_iter.bi_size; - comp_bio = btrfs_bio_alloc(cur_disk_byte); + comp_bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + comp_bio->bi_iter.bi_sector = cur_disk_byte >> SECTOR_SHIFT; comp_bio->bi_opf = REQ_OP_READ; comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; @@ -806,7 +809,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_endio(comp_bio); } - comp_bio = btrfs_bio_alloc(cur_disk_byte); + comp_bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + comp_bio->bi_iter.bi_sector = cur_disk_byte >> SECTOR_SHIFT; comp_bio->bi_opf = REQ_OP_READ; comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 86190ce913239..55a04790df1c0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3121,16 +3121,16 @@ static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio) } /* - * The following helpers allocate a bio. As it's backed by a bioset, it'll - * never fail. We're returning a bio right now but you can call btrfs_io_bio - * for the appropriate container_of magic + * Allocate a btrfs_io_bio, with @nr_iovecs as maximum number of iovecs. + * + * The bio allocation is backed by bioset and does not fail. */ -struct bio *btrfs_bio_alloc(u64 first_byte) +struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs) { struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset); - bio->bi_iter.bi_sector = first_byte >> 9; + ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS); + bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); btrfs_io_bio_init(btrfs_io_bio(bio)); return bio; } @@ -3148,16 +3148,6 @@ struct bio *btrfs_bio_clone(struct bio *bio) return new; } -struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs) -{ - struct bio *bio; - - /* Bio allocation backed by a bioset does not fail */ - bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); - btrfs_io_bio_init(btrfs_io_bio(bio)); - return bio; -} - struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size) { struct bio *bio; @@ -3307,14 +3297,15 @@ static int alloc_new_bio(struct btrfs_inode *inode, struct bio *bio; int ret; + bio = btrfs_io_bio_alloc(BIO_MAX_VECS); /* * For compressed page range, its disk_bytenr is always @disk_bytenr * passed in, no matter if we have added any range into previous bio. */ if (bio_flags & EXTENT_BIO_COMPRESSED) - bio = btrfs_bio_alloc(disk_bytenr); + bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT; else - bio = btrfs_bio_alloc(disk_bytenr + offset); + bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT; bio_ctrl->bio = bio; bio_ctrl->bio_flags = bio_flags; bio->bi_end_io = end_io_func; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index ba471f2063a72..81fa68eaa699e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -278,7 +278,6 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, struct page *locked_page, u32 bits_to_clear, unsigned long page_ops); -struct bio *btrfs_bio_alloc(u64 first_byte); struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs); struct bio *btrfs_bio_clone(struct bio *bio); struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 717d8f98cf0b8..1a9d485f80568 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2225,7 +2225,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) goto bioc_out; } - bio = btrfs_io_bio_alloc(0); + bio = btrfs_io_bio_alloc(BIO_MAX_VECS); bio->bi_iter.bi_sector = logical >> 9; bio->bi_private = sblock; bio->bi_end_io = scrub_missing_raid56_end_io; @@ -2841,7 +2841,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) if (ret || !bioc || !bioc->raid_map) goto bioc_out; - bio = btrfs_io_bio_alloc(0); + bio = btrfs_io_bio_alloc(BIO_MAX_VECS); bio->bi_iter.bi_sector = sparity->logic_start >> 9; bio->bi_private = sparity; bio->bi_end_io = scrub_parity_bio_endio; From f4c03ce36644107ea396c2665767c24cebe28dc2 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 15 Sep 2021 15:17:18 +0800 Subject: [PATCH 0322/1202] btrfs: rename struct btrfs_io_bio to btrfs_bio Previously we had "struct btrfs_bio", which records IO context for mirrored IO and RAID56, and "strcut btrfs_io_bio", which records extra btrfs specific info for logical bytenr bio. With "btrfs_bio" renamed to "btrfs_io_context", we are safe to rename "btrfs_io_bio" to "btrfs_bio" which is a more suitable name now. The struct btrfs_bio changes meaning by this commit. There was a suggested name like btrfs_logical_bio but it's a bit long and we'd prefer to use a shorter name. This could be a concern for backports to older kernels where the different meaning could possibly cause confusion or bugs. Comparing the new and old structures, there's no overlap among the struct members so a build would break in case of incorrect backport. We haven't had many backports to bio code anyway so this is more of a theoretical cause of bugs and a matter of precaution but we'll need to keep the semantic change in mind. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/check-integrity.c | 2 +- fs/btrfs/compression.c | 16 +++++----- fs/btrfs/ctree.h | 6 ++-- fs/btrfs/disk-io.c | 2 +- fs/btrfs/disk-io.h | 2 +- fs/btrfs/extent_io.c | 64 +++++++++++++++++++------------------- fs/btrfs/extent_io.h | 2 +- fs/btrfs/file-item.c | 13 ++++---- fs/btrfs/inode.c | 47 ++++++++++++++-------------- fs/btrfs/raid56.c | 8 ++--- fs/btrfs/scrub.c | 14 ++++----- fs/btrfs/volumes.c | 8 ++--- fs/btrfs/volumes.h | 29 +++++++++-------- 13 files changed, 107 insertions(+), 106 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 81b11124b67a8..ed646f2b46f4e 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1561,7 +1561,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, struct bio *bio; unsigned int j; - bio = btrfs_io_bio_alloc(num_pages - i); + bio = btrfs_bio_alloc(num_pages - i); bio_set_dev(bio, block_ctx->dev->bdev); bio->bi_iter.bi_sector = dev_bytenr >> 9; bio->bi_opf = REQ_OP_READ; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 4625a3737a6ce..2a86a2a1494b3 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -179,9 +179,9 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, if (memcmp(&csum, cb_sum, csum_size) != 0) { btrfs_print_data_csum_error(inode, disk_start, csum, cb_sum, cb->mirror_num); - if (btrfs_io_bio(bio)->device) + if (btrfs_bio(bio)->device) btrfs_dev_stat_inc_and_print( - btrfs_io_bio(bio)->device, + btrfs_bio(bio)->device, BTRFS_DEV_STAT_CORRUPTION_ERRS); return -EIO; } @@ -208,7 +208,7 @@ static void end_compressed_bio_read(struct bio *bio) struct inode *inode; struct page *page; unsigned int index; - unsigned int mirror = btrfs_io_bio(bio)->mirror_num; + unsigned int mirror = btrfs_bio(bio)->mirror_num; int ret = 0; if (bio->bi_status) @@ -224,7 +224,7 @@ static void end_compressed_bio_read(struct bio *bio) * Record the correct mirror_num in cb->orig_bio so that * read-repair can work properly. */ - btrfs_io_bio(cb->orig_bio)->mirror_num = mirror; + btrfs_bio(cb->orig_bio)->mirror_num = mirror; cb->mirror_num = mirror; /* @@ -418,7 +418,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->orig_bio = NULL; cb->nr_pages = nr_pages; - bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + bio = btrfs_bio_alloc(BIO_MAX_VECS); bio->bi_iter.bi_sector = first_byte >> SECTOR_SHIFT; bio->bi_opf = bio_op | write_flags; bio->bi_private = cb; @@ -491,7 +491,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, bio_endio(bio); } - bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + bio = btrfs_bio_alloc(BIO_MAX_VECS); bio->bi_iter.bi_sector = first_byte >> SECTOR_SHIFT; bio->bi_opf = bio_op | write_flags; bio->bi_private = cb; @@ -750,7 +750,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, /* include any pages we added in add_ra-bio_pages */ cb->len = bio->bi_iter.bi_size; - comp_bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + comp_bio = btrfs_bio_alloc(BIO_MAX_VECS); comp_bio->bi_iter.bi_sector = cur_disk_byte >> SECTOR_SHIFT; comp_bio->bi_opf = REQ_OP_READ; comp_bio->bi_private = cb; @@ -809,7 +809,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_endio(comp_bio); } - comp_bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + comp_bio = btrfs_bio_alloc(BIO_MAX_VECS); comp_bio->bi_iter.bi_sector = cur_disk_byte >> SECTOR_SHIFT; comp_bio->bi_opf = REQ_OP_READ; comp_bio->bi_private = cb; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 41f1718a83df9..b394ba4737e41 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -48,6 +48,7 @@ extern struct kmem_cache *btrfs_free_space_cachep; extern struct kmem_cache *btrfs_free_space_bitmap_cachep; struct btrfs_ordered_sum; struct btrfs_ref; +struct btrfs_bio; #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ @@ -3140,8 +3141,9 @@ u64 btrfs_file_extent_end(const struct btrfs_path *path); /* inode.c */ blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags); -unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset, - struct page *page, u64 start, u64 end); +unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, + u32 bio_offset, struct page *page, + u64 start, u64 end); struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, u64 start, u64 len); noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index be382276d24f3..b28638c794136 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -683,7 +683,7 @@ static int validate_subpage_buffer(struct page *page, u64 start, u64 end, return ret; } -int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio, +int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, struct page *page, u64 start, u64 end, int mirror) { diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 0e7e9526b6a83..1d3b749c405a8 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -81,7 +81,7 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info); void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info); void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio, +int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, struct page *page, u64 start, u64 end, int mirror); blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 55a04790df1c0..c56973f7daae5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -241,7 +241,7 @@ int __init extent_io_init(void) return -ENOMEM; if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE, - offsetof(struct btrfs_io_bio, bio), + offsetof(struct btrfs_bio, bio), BIOSET_NEED_BVECS)) goto free_buffer_cache; @@ -2299,7 +2299,7 @@ static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, if (btrfs_is_zoned(fs_info)) return btrfs_repair_one_zone(fs_info, logical); - bio = btrfs_io_bio_alloc(1); + bio = btrfs_bio_alloc(1); bio->bi_iter.bi_size = 0; map_length = length; @@ -2618,10 +2618,10 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; - struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio); + struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio); const int icsum = bio_offset >> fs_info->sectorsize_bits; struct bio *repair_bio; - struct btrfs_io_bio *repair_io_bio; + struct btrfs_bio *repair_bbio; blk_status_t status; btrfs_debug(fs_info, @@ -2639,24 +2639,24 @@ int btrfs_repair_one_sector(struct inode *inode, return -EIO; } - repair_bio = btrfs_io_bio_alloc(1); - repair_io_bio = btrfs_io_bio(repair_bio); + repair_bio = btrfs_bio_alloc(1); + repair_bbio = btrfs_bio(repair_bio); repair_bio->bi_opf = REQ_OP_READ; repair_bio->bi_end_io = failed_bio->bi_end_io; repair_bio->bi_iter.bi_sector = failrec->logical >> 9; repair_bio->bi_private = failed_bio->bi_private; - if (failed_io_bio->csum) { + if (failed_bbio->csum) { const u32 csum_size = fs_info->csum_size; - repair_io_bio->csum = repair_io_bio->csum_inline; - memcpy(repair_io_bio->csum, - failed_io_bio->csum + csum_size * icsum, csum_size); + repair_bbio->csum = repair_bbio->csum_inline; + memcpy(repair_bbio->csum, + failed_bbio->csum + csum_size * icsum, csum_size); } bio_add_page(repair_bio, page, failrec->len, pgoff); - repair_io_bio->logical = failrec->start; - repair_io_bio->iter = repair_bio->bi_iter; + repair_bbio->logical = failrec->start; + repair_bbio->iter = repair_bio->bi_iter; btrfs_debug(btrfs_sb(inode->i_sb), "repair read error: submitting new read to mirror %d", @@ -2976,7 +2976,7 @@ static struct extent_buffer *find_extent_buffer_readpage( static void end_bio_extent_readpage(struct bio *bio) { struct bio_vec *bvec; - struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + struct btrfs_bio *bbio = btrfs_bio(bio); struct extent_io_tree *tree, *failure_tree; struct processed_extent processed = { 0 }; /* @@ -3003,7 +3003,7 @@ static void end_bio_extent_readpage(struct bio *bio) btrfs_debug(fs_info, "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u", bio->bi_iter.bi_sector, bio->bi_status, - io_bio->mirror_num); + bbio->mirror_num); tree = &BTRFS_I(inode)->io_tree; failure_tree = &BTRFS_I(inode)->io_failure_tree; @@ -3028,14 +3028,14 @@ static void end_bio_extent_readpage(struct bio *bio) end = start + bvec->bv_len - 1; len = bvec->bv_len; - mirror = io_bio->mirror_num; + mirror = bbio->mirror_num; if (likely(uptodate)) { if (is_data_inode(inode)) { - error_bitmap = btrfs_verify_data_csum(io_bio, + error_bitmap = btrfs_verify_data_csum(bbio, bio_offset, page, start, end); ret = error_bitmap; } else { - ret = btrfs_validate_metadata_buffer(io_bio, + ret = btrfs_validate_metadata_buffer(bbio, page, start, end, mirror); } if (ret) @@ -3106,7 +3106,7 @@ static void end_bio_extent_readpage(struct bio *bio) } /* Release the last extent */ endio_readpage_release_extent(&processed, NULL, 0, 0, false); - btrfs_io_bio_free_csum(io_bio); + btrfs_bio_free_csum(bbio); bio_put(bio); } @@ -3115,9 +3115,9 @@ static void end_bio_extent_readpage(struct bio *bio) * new bio by bio_alloc_bioset as it does not initialize the bytes outside of * 'bio' because use of __GFP_ZERO is not supported. */ -static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio) +static inline void btrfs_bio_init(struct btrfs_bio *bbio) { - memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio)); + memset(bbio, 0, offsetof(struct btrfs_bio, bio)); } /* @@ -3125,33 +3125,33 @@ static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio) * * The bio allocation is backed by bioset and does not fail. */ -struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs) +struct bio *btrfs_bio_alloc(unsigned int nr_iovecs) { struct bio *bio; ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS); bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); - btrfs_io_bio_init(btrfs_io_bio(bio)); + btrfs_bio_init(btrfs_bio(bio)); return bio; } struct bio *btrfs_bio_clone(struct bio *bio) { - struct btrfs_io_bio *btrfs_bio; + struct btrfs_bio *bbio; struct bio *new; /* Bio allocation backed by a bioset does not fail */ new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset); - btrfs_bio = btrfs_io_bio(new); - btrfs_io_bio_init(btrfs_bio); - btrfs_bio->iter = bio->bi_iter; + bbio = btrfs_bio(new); + btrfs_bio_init(bbio); + bbio->iter = bio->bi_iter; return new; } struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size) { struct bio *bio; - struct btrfs_io_bio *btrfs_bio; + struct btrfs_bio *bbio; ASSERT(offset <= UINT_MAX && size <= UINT_MAX); @@ -3159,11 +3159,11 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size) bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset); ASSERT(bio); - btrfs_bio = btrfs_io_bio(bio); - btrfs_io_bio_init(btrfs_bio); + bbio = btrfs_bio(bio); + btrfs_bio_init(bbio); bio_trim(bio, offset >> 9, size >> 9); - btrfs_bio->iter = bio->bi_iter; + bbio->iter = bio->bi_iter; return bio; } @@ -3297,7 +3297,7 @@ static int alloc_new_bio(struct btrfs_inode *inode, struct bio *bio; int ret; - bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + bio = btrfs_bio_alloc(BIO_MAX_VECS); /* * For compressed page range, its disk_bytenr is always @disk_bytenr * passed in, no matter if we have added any range into previous bio. @@ -3332,7 +3332,7 @@ static int alloc_new_bio(struct btrfs_inode *inode, goto error; } - btrfs_io_bio(bio)->device = device; + btrfs_bio(bio)->device = device; } return 0; error: diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 81fa68eaa699e..a3ecfd4f37ac2 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -278,7 +278,7 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, struct page *locked_page, u32 bits_to_clear, unsigned long page_ops); -struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs); +struct bio *btrfs_bio_alloc(unsigned int nr_iovecs); struct bio *btrfs_bio_clone(struct bio *bio); struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 0b9401a5afd33..ddb9c7ee796f0 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -358,7 +358,7 @@ static int search_file_offset_in_bio(struct bio *bio, struct inode *inode, * @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return * checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If * NULL, the checksum buffer is allocated and returned in - * btrfs_io_bio(bio)->csum instead. + * btrfs_bio(bio)->csum instead. * * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. */ @@ -397,19 +397,18 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst return BLK_STS_RESOURCE; if (!dst) { - struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio); + struct btrfs_bio *bbio = btrfs_bio(bio); if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { - btrfs_bio->csum = kmalloc_array(nblocks, csum_size, - GFP_NOFS); - if (!btrfs_bio->csum) { + bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS); + if (!bbio->csum) { btrfs_free_path(path); return BLK_STS_RESOURCE; } } else { - btrfs_bio->csum = btrfs_bio->csum_inline; + bbio->csum = bbio->csum_inline; } - csum = btrfs_bio->csum; + csum = bbio->csum; } else { csum = dst; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 027f2ebc8dc31..a643be30c18a8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3217,7 +3217,7 @@ void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, * * The length of such check is always one sector size. */ -static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, +static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio, u32 bio_offset, struct page *page, u32 pgoff, u64 start) { @@ -3233,7 +3233,7 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, ASSERT(pgoff + len <= PAGE_SIZE); offset_sectors = bio_offset >> fs_info->sectorsize_bits; - csum_expected = ((u8 *)io_bio->csum) + offset_sectors * csum_size; + csum_expected = ((u8 *)bbio->csum) + offset_sectors * csum_size; kaddr = kmap_atomic(page); shash->tfm = fs_info->csum_shash; @@ -3247,9 +3247,9 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, return 0; zeroit: btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected, - io_bio->mirror_num); - if (io_bio->device) - btrfs_dev_stat_inc_and_print(io_bio->device, + bbio->mirror_num); + if (bbio->device) + btrfs_dev_stat_inc_and_print(bbio->device, BTRFS_DEV_STAT_CORRUPTION_ERRS); memset(kaddr + pgoff, 1, len); flush_dcache_page(page); @@ -3269,8 +3269,9 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, * Return a bitmap where bit set means a csum mismatch, and bit not set means * csum match. */ -unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset, - struct page *page, u64 start, u64 end) +unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, + u32 bio_offset, struct page *page, + u64 start, u64 end) { struct inode *inode = page->mapping->host; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; @@ -3288,14 +3289,14 @@ unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset, * For subpage case, above PageChecked is not safe as it's not subpage * compatible. * But for now only cow fixup and compressed read utilize PageChecked - * flag, while in this context we can easily use io_bio->csum to + * flag, while in this context we can easily use bbio->csum to * determine if we really need to do csum verification. * - * So for now, just exit if io_bio->csum is NULL, as it means it's + * So for now, just exit if bbio->csum is NULL, as it means it's * compressed read, and its compressed data csum has already been * verified. */ - if (io_bio->csum == NULL) + if (bbio->csum == NULL) return 0; if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) @@ -3322,7 +3323,7 @@ unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset, EXTENT_NODATASUM); continue; } - ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off, + ret = check_data_csum(inode, bbio, bio_offset, page, pg_off, page_offset(page) + pg_off); if (ret < 0) { const int nr_bit = (pg_off - offset_in_page(start)) >> @@ -8082,7 +8083,7 @@ static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio, } static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, - struct btrfs_io_bio *io_bio, + struct btrfs_bio *bbio, const bool uptodate) { struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; @@ -8092,11 +8093,11 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); struct bio_vec bvec; struct bvec_iter iter; - u64 start = io_bio->logical; + u64 start = bbio->logical; u32 bio_offset = 0; blk_status_t err = BLK_STS_OK; - __bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) { + __bio_for_each_segment(bvec, &bbio->bio, iter, bbio->iter) { unsigned int i, nr_sectors, pgoff; nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len); @@ -8104,7 +8105,7 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, for (i = 0; i < nr_sectors; i++) { ASSERT(pgoff < PAGE_SIZE); if (uptodate && - (!csum || !check_data_csum(inode, io_bio, + (!csum || !check_data_csum(inode, bbio, bio_offset, bvec.bv_page, pgoff, start))) { clean_io_failure(fs_info, failure_tree, io_tree, @@ -8114,12 +8115,12 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, } else { int ret; - ASSERT((start - io_bio->logical) < UINT_MAX); + ASSERT((start - bbio->logical) < UINT_MAX); ret = btrfs_repair_one_sector(inode, - &io_bio->bio, - start - io_bio->logical, + &bbio->bio, + start - bbio->logical, bvec.bv_page, pgoff, - start, io_bio->mirror_num, + start, bbio->mirror_num, submit_dio_repair_bio); if (ret) err = errno_to_blk_status(ret); @@ -8161,8 +8162,8 @@ static void btrfs_end_dio_bio(struct bio *bio) bio->bi_iter.bi_size, err); if (bio_op(bio) == REQ_OP_READ) { - err = btrfs_check_read_dio_bio(dip->inode, btrfs_io_bio(bio), - !err); + err = btrfs_check_read_dio_bio(dip->inode, + btrfs_bio(bio), !err); } if (err) @@ -8213,7 +8214,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, csum_offset = file_offset - dip->logical_offset; csum_offset >>= fs_info->sectorsize_bits; csum_offset *= fs_info->csum_size; - btrfs_io_bio(bio)->csum = dip->csums + csum_offset; + btrfs_bio(bio)->csum = dip->csums + csum_offset; } map: ret = btrfs_map_bio(fs_info, bio, 0); @@ -8329,7 +8330,7 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; - btrfs_io_bio(bio)->logical = file_offset; + btrfs_bio(bio)->logical = file_offset; if (bio_op(bio) == REQ_OP_ZONE_APPEND) { status = extract_ordered_extent(BTRFS_I(inode), bio, diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 893d93e3c516d..02aa3fbb8108f 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1104,8 +1104,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, } /* put a new bio on the list */ - bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1); - btrfs_io_bio(bio)->device = stripe->dev; + bio = btrfs_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1); + btrfs_bio(bio)->device = stripe->dev; bio->bi_iter.bi_size = 0; bio_set_dev(bio, stripe->dev->bdev); bio->bi_iter.bi_sector = disk_start >> 9; @@ -1158,7 +1158,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) page_index = stripe_offset >> PAGE_SHIFT; if (bio_flagged(bio, BIO_CLONED)) - bio->bi_iter = btrfs_io_bio(bio)->iter; + bio->bi_iter = btrfs_bio(bio)->iter; bio_for_each_segment(bvec, bio, iter) { rbio->bio_pages[page_index + i] = bvec.bv_page; @@ -2124,7 +2124,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, if (generic_io) { ASSERT(bioc->mirror_num == mirror_num); - btrfs_io_bio(bio)->mirror_num = mirror_num; + btrfs_bio(bio)->mirror_num = mirror_num; } rbio = alloc_rbio(fs_info, bioc, stripe_len); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 1a9d485f80568..c8658546607f8 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1422,7 +1422,7 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info, if (!first_page->dev->bdev) goto out; - bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + bio = btrfs_bio_alloc(BIO_MAX_VECS); bio_set_dev(bio, first_page->dev->bdev); for (page_num = 0; page_num < sblock->page_count; page_num++) { @@ -1479,7 +1479,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, } WARN_ON(!spage->page); - bio = btrfs_io_bio_alloc(1); + bio = btrfs_bio_alloc(1); bio_set_dev(bio, spage->dev->bdev); bio_add_page(bio, spage->page, fs_info->sectorsize, 0); @@ -1561,7 +1561,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, return -EIO; } - bio = btrfs_io_bio_alloc(1); + bio = btrfs_bio_alloc(1); bio_set_dev(bio, spage_bad->dev->bdev); bio->bi_iter.bi_sector = spage_bad->physical >> 9; bio->bi_opf = REQ_OP_WRITE; @@ -1675,7 +1675,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, sbio->dev = sctx->wr_tgtdev; bio = sbio->bio; if (!bio) { - bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio); + bio = btrfs_bio_alloc(sctx->pages_per_wr_bio); sbio->bio = bio; } @@ -2101,7 +2101,7 @@ static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, sbio->dev = spage->dev; bio = sbio->bio; if (!bio) { - bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio); + bio = btrfs_bio_alloc(sctx->pages_per_rd_bio); sbio->bio = bio; } @@ -2225,7 +2225,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) goto bioc_out; } - bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + bio = btrfs_bio_alloc(BIO_MAX_VECS); bio->bi_iter.bi_sector = logical >> 9; bio->bi_private = sblock; bio->bi_end_io = scrub_missing_raid56_end_io; @@ -2841,7 +2841,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) if (ret || !bioc || !bioc->raid_map) goto bioc_out; - bio = btrfs_io_bio_alloc(BIO_MAX_VECS); + bio = btrfs_bio_alloc(BIO_MAX_VECS); bio->bi_iter.bi_sector = sparity->logic_start >> 9; bio->bi_private = sparity; bio->bi_end_io = scrub_parity_bio_endio; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 627505500467c..ccc4718ae8a39 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6573,7 +6573,7 @@ static void btrfs_end_bio(struct bio *bio) atomic_inc(&bioc->error); if (bio->bi_status == BLK_STS_IOERR || bio->bi_status == BLK_STS_TARGET) { - struct btrfs_device *dev = btrfs_io_bio(bio)->device; + struct btrfs_device *dev = btrfs_bio(bio)->device; ASSERT(dev->bdev); if (btrfs_op(bio) == BTRFS_MAP_WRITE) @@ -6599,7 +6599,7 @@ static void btrfs_end_bio(struct bio *bio) bio = bioc->orig_bio; } - btrfs_io_bio(bio)->mirror_num = bioc->mirror_num; + btrfs_bio(bio)->mirror_num = bioc->mirror_num; /* only send an error to the higher layers if it is * beyond the tolerance of the btrfs bio */ @@ -6625,7 +6625,7 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, struct bio *bio, struct btrfs_fs_info *fs_info = bioc->fs_info; bio->bi_private = bioc; - btrfs_io_bio(bio)->device = dev; + btrfs_bio(bio)->device = dev; bio->bi_end_io = btrfs_end_bio; bio->bi_iter.bi_sector = physical >> 9; /* @@ -6661,7 +6661,7 @@ static void bioc_error(struct btrfs_io_context *bioc, struct bio *bio, u64 logic /* Should be the original bio. */ WARN_ON(bio != bioc->orig_bio); - btrfs_io_bio(bio)->mirror_num = bioc->mirror_num; + btrfs_bio(bio)->mirror_num = bioc->mirror_num; bio->bi_iter.bi_sector = logical >> 9; if (atomic_read(&bioc->error) > bioc->max_errors) bio->bi_status = BLK_STS_IOERR; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index b69755fc0e0d4..83075d6855dba 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -304,38 +304,37 @@ struct btrfs_fs_devices { / sizeof(struct btrfs_stripe) + 1) /* - * we need the mirror number and stripe index to be passed around - * the call chain while we are processing end_io (especially errors). - * Really, what we need is a btrfs_io_context structure that has this info - * and is properly sized with its stripe array, but we're not there - * quite yet. We have our own btrfs bioset, and all of the bios - * we allocate are actually btrfs_io_bios. We'll cram as much of - * struct btrfs_io_context as we can into this over time. + * Additional info to pass along bio. + * + * Mostly for btrfs specific features like csum and mirror_num. */ -struct btrfs_io_bio { +struct btrfs_bio { unsigned int mirror_num; + + /* @device is for stripe IO submission. */ struct btrfs_device *device; u64 logical; u8 *csum; u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; struct bvec_iter iter; + /* * This member must come last, bio_alloc_bioset will allocate enough - * bytes for entire btrfs_io_bio but relies on bio being last. + * bytes for entire btrfs_bio but relies on bio being last. */ struct bio bio; }; -static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio) +static inline struct btrfs_bio *btrfs_bio(struct bio *bio) { - return container_of(bio, struct btrfs_io_bio, bio); + return container_of(bio, struct btrfs_bio, bio); } -static inline void btrfs_io_bio_free_csum(struct btrfs_io_bio *io_bio) +static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio) { - if (io_bio->csum != io_bio->csum_inline) { - kfree(io_bio->csum); - io_bio->csum = NULL; + if (bbio->csum != bbio->csum_inline) { + kfree(bbio->csum); + bbio->csum = NULL; } } From d5942b5b6f66da14419e62fce1b4bb83ccfff72d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 27 Jul 2021 17:01:14 -0400 Subject: [PATCH 0323/1202] btrfs: do not take the uuid_mutex in btrfs_rm_device We got the following lockdep splat while running fstests (specifically btrfs/003 and btrfs/020 in a row) with the new rc. This was uncovered by 87579e9b7d8d ("loop: use worker per cgroup instead of kworker") which converted loop to using workqueues, which comes with lockdep annotations that don't exist with kworkers. The lockdep splat is as follows: WARNING: possible circular locking dependency detected 5.14.0-rc2-custom+ #34 Not tainted ------------------------------------------------------ losetup/156417 is trying to acquire lock: ffff9c7645b02d38 ((wq_completion)loop0){+.+.}-{0:0}, at: flush_workqueue+0x84/0x600 but task is already holding lock: ffff9c7647395468 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x41/0x650 [loop] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #5 (&lo->lo_mutex){+.+.}-{3:3}: __mutex_lock+0xba/0x7c0 lo_open+0x28/0x60 [loop] blkdev_get_whole+0x28/0xf0 blkdev_get_by_dev.part.0+0x168/0x3c0 blkdev_open+0xd2/0xe0 do_dentry_open+0x163/0x3a0 path_openat+0x74d/0xa40 do_filp_open+0x9c/0x140 do_sys_openat2+0xb1/0x170 __x64_sys_openat+0x54/0x90 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #4 (&disk->open_mutex){+.+.}-{3:3}: __mutex_lock+0xba/0x7c0 blkdev_get_by_dev.part.0+0xd1/0x3c0 blkdev_get_by_path+0xc0/0xd0 btrfs_scan_one_device+0x52/0x1f0 [btrfs] btrfs_control_ioctl+0xac/0x170 [btrfs] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #3 (uuid_mutex){+.+.}-{3:3}: __mutex_lock+0xba/0x7c0 btrfs_rm_device+0x48/0x6a0 [btrfs] btrfs_ioctl+0x2d1c/0x3110 [btrfs] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #2 (sb_writers#11){.+.+}-{0:0}: lo_write_bvec+0x112/0x290 [loop] loop_process_work+0x25f/0xcb0 [loop] process_one_work+0x28f/0x5d0 worker_thread+0x55/0x3c0 kthread+0x140/0x170 ret_from_fork+0x22/0x30 -> #1 ((work_completion)(&lo->rootcg_work)){+.+.}-{0:0}: process_one_work+0x266/0x5d0 worker_thread+0x55/0x3c0 kthread+0x140/0x170 ret_from_fork+0x22/0x30 -> #0 ((wq_completion)loop0){+.+.}-{0:0}: __lock_acquire+0x1130/0x1dc0 lock_acquire+0xf5/0x320 flush_workqueue+0xae/0x600 drain_workqueue+0xa0/0x110 destroy_workqueue+0x36/0x250 __loop_clr_fd+0x9a/0x650 [loop] lo_ioctl+0x29d/0x780 [loop] block_ioctl+0x3f/0x50 __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae other info that might help us debug this: Chain exists of: (wq_completion)loop0 --> &disk->open_mutex --> &lo->lo_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&lo->lo_mutex); lock(&disk->open_mutex); lock(&lo->lo_mutex); lock((wq_completion)loop0); *** DEADLOCK *** 1 lock held by losetup/156417: #0: ffff9c7647395468 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x41/0x650 [loop] stack backtrace: CPU: 8 PID: 156417 Comm: losetup Not tainted 5.14.0-rc2-custom+ #34 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack_lvl+0x57/0x72 check_noncircular+0x10a/0x120 __lock_acquire+0x1130/0x1dc0 lock_acquire+0xf5/0x320 ? flush_workqueue+0x84/0x600 flush_workqueue+0xae/0x600 ? flush_workqueue+0x84/0x600 drain_workqueue+0xa0/0x110 destroy_workqueue+0x36/0x250 __loop_clr_fd+0x9a/0x650 [loop] lo_ioctl+0x29d/0x780 [loop] ? __lock_acquire+0x3a0/0x1dc0 ? update_dl_rq_load_avg+0x152/0x360 ? lock_is_held_type+0xa5/0x120 ? find_held_lock.constprop.0+0x2b/0x80 block_ioctl+0x3f/0x50 __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f645884de6b Usually the uuid_mutex exists to protect the fs_devices that map together all of the devices that match a specific uuid. In rm_device we're messing with the uuid of a device, so it makes sense to protect that here. However in doing that it pulls in a whole host of lockdep dependencies, as we call mnt_may_write() on the sb before we grab the uuid_mutex, thus we end up with the dependency chain under the uuid_mutex being added under the normal sb write dependency chain, which causes problems with loop devices. We don't need the uuid mutex here however. If we call btrfs_scan_one_device() before we scratch the super block we will find the fs_devices and not find the device itself and return EBUSY because the fs_devices is open. If we call it after the scratch happens it will not appear to be a valid btrfs file system. We do not need to worry about other fs_devices modifying operations here because we're protected by the exclusive operations locking. So drop the uuid_mutex here in order to fix the lockdep splat. A more detailed explanation from the discussion: We are worried about rm and scan racing with each other, before this change we'll zero the device out under the UUID mutex so when scan does run it'll make sure that it can go through the whole device scan thing without rm messing with us. We aren't worried if the scratch happens first, because the result is we don't think this is a btrfs device and we bail out. The only case we are concerned with is we scratch _after_ scan is able to read the superblock and gets a seemingly valid super block, so lets consider this case. Scan will call device_list_add() with the device we're removing. We'll call find_fsid_with_metadata_uuid() and get our fs_devices for this UUID. At this point we lock the fs_devices->device_list_mutex. This is what protects us in this case, but we have two cases here. 1. We aren't to the device removal part of the RM. We found our device, and device name matches our path, we go down and we set total_devices to our super number of devices, which doesn't affect anything because we haven't done the remove yet. 2. We are past the device removal part, which is protected by the device_list_mutex. Scan doesn't find the device, it goes down and does the if (fs_devices->opened) return -EBUSY; check and we bail out. Nothing about this situation is ideal, but the lockdep splat is real, and the fix is safe, tho admittedly a bit scary looking. Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ copy more from the discussion ] Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ccc4718ae8a39..d7cc24ed9620a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2081,8 +2081,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, u64 num_devices; int ret = 0; - mutex_lock(&uuid_mutex); - + /* + * The device list in fs_devices is accessed without locks (neither + * uuid_mutex nor device_list_mutex) as it won't change on a mounted + * filesystem and another device rm cannot run. + */ num_devices = btrfs_num_devices(fs_info); ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1); @@ -2126,11 +2129,9 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, mutex_unlock(&fs_info->chunk_mutex); } - mutex_unlock(&uuid_mutex); ret = btrfs_shrink_device(device, 0); if (!ret) btrfs_reada_remove_dev(device); - mutex_lock(&uuid_mutex); if (ret) goto error_undo; @@ -2217,7 +2218,6 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, } out: - mutex_unlock(&uuid_mutex); return ret; error_undo: From ad576a1ff71b70d2a09a3dd94998c7c18d4864e2 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 22 Sep 2021 10:36:45 +0100 Subject: [PATCH 0324/1202] btrfs: assert that extent buffers are write locked instead of only locked We currently use lockdep_assert_held() at btrfs_assert_tree_locked(), and that checks that we hold a lock either in read mode or write mode. However in all contexts we use btrfs_assert_tree_locked(), we actually want to check if we are holding a write lock on the extent buffer's rw semaphore - it would be a bug if in any of those contexts we were holding a read lock instead. So change btrfs_assert_tree_locked() to use lockdep_assert_held_write() instead and, to make it more explicit, rename btrfs_assert_tree_locked() to btrfs_assert_tree_write_locked(), so that it's clear we want to check we are holding a write lock. For now there are no contexts where we want to assert that we must have a read lock, but in case that is needed in the future, we can add a new helper function that just calls out lockdep_assert_held_read(). Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 8 ++++---- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/locking.h | 7 ++++--- fs/btrfs/xattr.c | 2 +- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 84627cbd5b5b5..d0c27a60e6a73 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -395,7 +395,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (*cow_ret == buf) unlock_orig = 1; - btrfs_assert_tree_locked(buf); + btrfs_assert_tree_write_locked(buf); WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && trans->transid != fs_info->running_transaction->transid); @@ -2487,7 +2487,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, int ret; BUG_ON(!path->nodes[level]); - btrfs_assert_tree_locked(path->nodes[level]); + btrfs_assert_tree_write_locked(path->nodes[level]); lower = path->nodes[level]; nritems = btrfs_header_nritems(lower); BUG_ON(slot > nritems); @@ -2827,7 +2827,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - btrfs_assert_tree_locked(path->nodes[1]); + btrfs_assert_tree_write_locked(path->nodes[1]); right = btrfs_read_node_slot(upper, slot + 1); /* @@ -3065,7 +3065,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (right_nritems == 0) return 1; - btrfs_assert_tree_locked(path->nodes[1]); + btrfs_assert_tree_write_locked(path->nodes[1]); left = btrfs_read_node_slot(path->nodes[1], slot - 1); /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b28638c794136..37637539c5ab1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1036,7 +1036,7 @@ static int btree_set_page_dirty(struct page *page) BUG_ON(!eb); BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); BUG_ON(!atomic_read(&eb->refs)); - btrfs_assert_tree_locked(eb); + btrfs_assert_tree_write_locked(eb); return __set_page_dirty_nobuffers(page); } ASSERT(PagePrivate(page) && page->private); @@ -1061,7 +1061,7 @@ static int btree_set_page_dirty(struct page *page) ASSERT(eb); ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); ASSERT(atomic_read(&eb->refs)); - btrfs_assert_tree_locked(eb); + btrfs_assert_tree_write_locked(eb); free_extent_buffer(eb); cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits); @@ -1125,7 +1125,7 @@ void btrfs_clean_tree_block(struct extent_buffer *buf) struct btrfs_fs_info *fs_info = buf->fs_info; if (btrfs_header_generation(buf) == fs_info->running_transaction->transid) { - btrfs_assert_tree_locked(buf); + btrfs_assert_tree_write_locked(buf); if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, @@ -4481,7 +4481,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags))) return; #endif - btrfs_assert_tree_locked(buf); + btrfs_assert_tree_write_locked(buf); if (transid != fs_info->generation) WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n", buf->start, transid, fs_info->generation); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e4b3eaee716ac..ec5de19f0acd7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5836,13 +5836,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, return -ENOMEM; } - btrfs_assert_tree_locked(parent); + btrfs_assert_tree_write_locked(parent); parent_level = btrfs_header_level(parent); atomic_inc(&parent->refs); path->nodes[parent_level] = parent; path->slots[parent_level] = btrfs_header_nritems(parent); - btrfs_assert_tree_locked(node); + btrfs_assert_tree_write_locked(node); level = btrfs_header_level(node); path->nodes[level] = node; path->slots[level] = 0; diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index a2e1f1f5c6e34..bbc45534ae9a6 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -96,11 +96,12 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root); #ifdef CONFIG_BTRFS_DEBUG -static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) { - lockdep_assert_held(&eb->lock); +static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb) +{ + lockdep_assert_held_write(&eb->lock); } #else -static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) { } +static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb) { } #endif void btrfs_unlock_up_safe(struct btrfs_path *path, int level); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 8a4514283a4b8..2837b4c8424d7 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -138,7 +138,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, * matches our target xattr, so lets check. */ ret = 0; - btrfs_assert_tree_locked(path->nodes[0]); + btrfs_assert_tree_write_locked(path->nodes[0]); di = btrfs_match_dir_item_name(fs_info, path, name, name_len); if (!di && !(flags & XATTR_REPLACE)) { ret = -ENOSPC; From e03188dd598d985af8e3f5909c110f30864bc8b3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 23 Sep 2021 14:00:08 +0800 Subject: [PATCH 0325/1202] btrfs: make sure btrfs_io_context::fs_info is always initialized Currently btrfs_io_context::fs_info is only initialized in btrfs_map_bio, but there are call sites like btrfs_map_sblock() which calls __btrfs_map_block() directly, leaving bioc::fs_info uninitialized (NULL). Currently this is fine, but later cleanup will rely on bioc::fs_info to grab fs_info, and this can be a hidden problem for such usage. This patch will remove such hidden uninitialized member by always assigning bioc::fs_info at alloc_btrfs_io_context(). Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d7cc24ed9620a..316c889a70406 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5807,7 +5807,8 @@ static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes) } } -static struct btrfs_io_context *alloc_btrfs_io_context(int total_stripes, +static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info, + int total_stripes, int real_stripes) { struct btrfs_io_context *bioc = kzalloc( @@ -5827,6 +5828,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(int total_stripes, atomic_set(&bioc->error, 0); refcount_set(&bioc->refs, 1); + bioc->fs_info = fs_info; bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes); bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes); @@ -5941,7 +5943,7 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, &stripe_index); } - bioc = alloc_btrfs_io_context(num_stripes, 0); + bioc = alloc_btrfs_io_context(fs_info, num_stripes, 0); if (!bioc) { ret = -ENOMEM; goto out; @@ -6463,7 +6465,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, tgtdev_indexes = num_stripes; } - bioc = alloc_btrfs_io_context(num_alloc_stripes, tgtdev_indexes); + bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes, tgtdev_indexes); if (!bioc) { ret = -ENOMEM; goto out; @@ -6699,7 +6701,6 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, bioc->orig_bio = first_bio; bioc->private = first_bio->bi_private; bioc->end_io = first_bio->bi_end_io; - bioc->fs_info = fs_info; atomic_set(&bioc->stripes_pending, bioc->num_stripes); if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && From 27e8f645a115a9e9d56007186828178cb07bed71 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 23 Sep 2021 14:00:09 +0800 Subject: [PATCH 0326/1202] btrfs: remove btrfs_raid_bio::fs_info member We can grab fs_info reliably from btrfs_raid_bio::bioc, as the bioc is always passed into alloc_rbio(), and only get released when the raid bio is released. Remove btrfs_raid_bio::fs_info member, and cleanup all the @fs_info parameters for alloc_rbio() callers. Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 48 +++++++++++++++++++++++----------------------- fs/btrfs/raid56.h | 22 ++++++++++----------- fs/btrfs/scrub.c | 8 ++++---- fs/btrfs/volumes.c | 7 +++---- 4 files changed, 41 insertions(+), 44 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 02aa3fbb8108f..0e239a4c3b264 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -60,7 +60,6 @@ enum btrfs_rbio_ops { }; struct btrfs_raid_bio { - struct btrfs_fs_info *fs_info; struct btrfs_io_context *bioc; /* while we're doing rmw on a stripe @@ -192,7 +191,7 @@ static void scrub_parity_work(struct btrfs_work *work); static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func) { btrfs_init_work(&rbio->work, work_func, NULL, NULL); - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); + btrfs_queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work); } /* @@ -345,7 +344,7 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio) if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) return; - table = rbio->fs_info->stripe_hash_table; + table = rbio->bioc->fs_info->stripe_hash_table; h = table->table + bucket; /* hold the lock for the bucket because we may be @@ -400,7 +399,7 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio) if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) return; - table = rbio->fs_info->stripe_hash_table; + table = rbio->bioc->fs_info->stripe_hash_table; spin_lock_irqsave(&table->cache_lock, flags); __remove_rbio_from_cache(rbio); @@ -460,7 +459,7 @@ static void cache_rbio(struct btrfs_raid_bio *rbio) if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags)) return; - table = rbio->fs_info->stripe_hash_table; + table = rbio->bioc->fs_info->stripe_hash_table; spin_lock_irqsave(&table->cache_lock, flags); spin_lock(&rbio->bio_list_lock); @@ -668,7 +667,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) struct btrfs_raid_bio *cache_drop = NULL; int ret = 0; - h = rbio->fs_info->stripe_hash_table->table + rbio_bucket(rbio); + h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio); spin_lock_irqsave(&h->lock, flags); list_for_each_entry(cur, &h->hash_list, hash_list) { @@ -750,7 +749,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) int keep_cache = 0; bucket = rbio_bucket(rbio); - h = rbio->fs_info->stripe_hash_table->table + bucket; + h = rbio->bioc->fs_info->stripe_hash_table->table + bucket; if (list_empty(&rbio->plug_list)) cache_rbio(rbio); @@ -864,7 +863,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err) struct bio *extra; if (rbio->generic_bio_cnt) - btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); + btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt); /* * At this moment, rbio->bio_list is empty, however since rbio does not @@ -987,7 +986,6 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, INIT_LIST_HEAD(&rbio->stripe_cache); INIT_LIST_HEAD(&rbio->hash_list); rbio->bioc = bioc; - rbio->fs_info = fs_info; rbio->stripe_len = stripe_len; rbio->nr_pages = num_pages; rbio->real_stripes = real_stripes; @@ -1546,7 +1544,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) bio->bi_end_io = raid_rmw_end_io; bio->bi_opf = REQ_OP_READ; - btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); + btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); submit_bio(bio); } @@ -1718,9 +1716,10 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) /* * our main entry point for writes from the rest of the FS. */ -int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_io_context *bioc, u64 stripe_len) +int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc, + u64 stripe_len) { + struct btrfs_fs_info *fs_info = bioc->fs_info; struct btrfs_raid_bio *rbio; struct btrfs_plug_cb *plug = NULL; struct blk_plug_cb *cb; @@ -2091,7 +2090,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) bio->bi_end_io = raid_recover_end_io; bio->bi_opf = REQ_OP_READ; - btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); + btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); submit_bio(bio); } @@ -2115,10 +2114,10 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) * so we assume the bio they send down corresponds to a failed part * of the drive. */ -int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_io_context *bioc, u64 stripe_len, - int mirror_num, int generic_io) +int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, + u64 stripe_len, int mirror_num, int generic_io) { + struct btrfs_fs_info *fs_info = bioc->fs_info; struct btrfs_raid_bio *rbio; int ret; @@ -2220,12 +2219,12 @@ static void read_rebuild_work(struct btrfs_work *work) * is those pages just hold metadata or file data with checksum. */ -struct btrfs_raid_bio * -raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_io_context *bioc, u64 stripe_len, - struct btrfs_device *scrub_dev, - unsigned long *dbitmap, int stripe_nsectors) +struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio, + struct btrfs_io_context *bioc, + u64 stripe_len, struct btrfs_device *scrub_dev, + unsigned long *dbitmap, int stripe_nsectors) { + struct btrfs_fs_info *fs_info = bioc->fs_info; struct btrfs_raid_bio *rbio; int i; @@ -2633,7 +2632,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) bio->bi_end_io = raid56_parity_scrub_end_io; bio->bi_opf = REQ_OP_READ; - btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); + btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); submit_bio(bio); } @@ -2669,9 +2668,10 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) /* The following code is used for dev replace of a missing RAID 5/6 device. */ struct btrfs_raid_bio * -raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_io_context *bioc, u64 length) +raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc, + u64 length) { + struct btrfs_fs_info *fs_info = bioc->fs_info; struct btrfs_raid_bio *rbio; rbio = alloc_rbio(fs_info, bioc, length); diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index 838d3a5e07ef4..72c00fc284b55 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -30,25 +30,23 @@ static inline int nr_data_stripes(const struct map_lookup *map) struct btrfs_raid_bio; struct btrfs_device; -int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_io_context *bioc, u64 stripe_len, - int mirror_num, int generic_io); -int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_io_context *bioc, u64 stripe_len); +int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, + u64 stripe_len, int mirror_num, int generic_io); +int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc, + u64 stripe_len); void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, u64 logical); -struct btrfs_raid_bio * -raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_io_context *bioc, u64 stripe_len, - struct btrfs_device *scrub_dev, - unsigned long *dbitmap, int stripe_nsectors); +struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio, + struct btrfs_io_context *bioc, u64 stripe_len, + struct btrfs_device *scrub_dev, + unsigned long *dbitmap, int stripe_nsectors); void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio); struct btrfs_raid_bio * -raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, - struct btrfs_io_context *bioc, u64 length); +raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc, + u64 length); void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio); int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index c8658546607f8..bd3cd7427391d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1400,7 +1400,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, bio->bi_end_io = scrub_bio_wait_endio; mirror_num = spage->sblock->pagev[0]->mirror_num; - ret = raid56_parity_recover(fs_info, bio, spage->recover->bioc, + ret = raid56_parity_recover(bio, spage->recover->bioc, spage->recover->map_length, mirror_num, 0); if (ret) @@ -2230,7 +2230,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) bio->bi_private = sblock; bio->bi_end_io = scrub_missing_raid56_end_io; - rbio = raid56_alloc_missing_rbio(fs_info, bio, bioc, length); + rbio = raid56_alloc_missing_rbio(bio, bioc, length); if (!rbio) goto rbio_out; @@ -2846,8 +2846,8 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) bio->bi_private = sparity; bio->bi_end_io = scrub_parity_bio_endio; - rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bioc, - length, sparity->scrub_dev, + rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, length, + sparity->scrub_dev, sparity->dbitmap, sparity->nsectors); if (!rbio) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 316c889a70406..6031e2f4c6bc3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6708,11 +6708,10 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, /* In this case, map_length has been set to the length of a single stripe; not the whole write */ if (btrfs_op(bio) == BTRFS_MAP_WRITE) { - ret = raid56_parity_write(fs_info, bio, bioc, - map_length); + ret = raid56_parity_write(bio, bioc, map_length); } else { - ret = raid56_parity_recover(fs_info, bio, bioc, - map_length, mirror_num, 1); + ret = raid56_parity_recover(bio, bioc, map_length, + mirror_num, 1); } btrfs_bio_counter_dec(fs_info); From e63ecaac593216e3fd3b16ec77cba24464598175 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 24 Sep 2021 12:28:13 +0100 Subject: [PATCH 0327/1202] btrfs: loop only once over data sizes array when inserting an item batch When inserting a batch of items into a btree, we end up looping over the data sizes array 3 times: 1) Once in the caller of btrfs_insert_empty_items(), when it populates the array with the data sizes for each item; 2) Once at btrfs_insert_empty_items() to sum the elements of the data sizes array and compute the total data size; 3) And then once again at setup_items_for_insert(), where we do exactly the same as what we do at btrfs_insert_empty_items(), to compute the total data size. That is not bad for small arrays, but when the arrays have hundreds of elements, the time spent on looping is not negligible. For example when doing batch inserts of delayed items for dir index items or when logging a directory, it's common to have 200 to 260 dir index items in a single batch when using a leaf size of 16K and using file names between 8 and 12 characters. For a 64K leaf size, multiply that by 4. Taking into account that during directory logging or when flushing delayed dir index items we can have many of those large batches, the time spent on the looping adds up quickly. It's also more important to avoid it at setup_items_for_insert(), since we are holding a write lock on a leaf and, in some cases, on upper nodes of the btree, which causes us to block other tasks that want to access the leaf and nodes for longer than necessary. So change the code so that setup_items_for_insert() and btrfs_insert_empty_items() no longer compute the total data size, and instead rely on the caller to supply it. This makes us loop over the array only once, where we can both populate the data size array and compute the total data size, taking advantage of spatial and temporal locality. To make this more manageable, use a structure to contain all the relevant details for a batch of items (keys array, data sizes array, total data size, number of items), and use it as an argument for btrfs_insert_empty_items() and setup_items_for_insert(). This patch is part of a small patchset that is comprised of the following patches: btrfs: loop only once over data sizes array when inserting an item batch btrfs: unexport setup_items_for_insert() btrfs: use single bulk copy operations when logging directories This is patch 1/3 and performance results, and the specific tests, are included in the changelog of patch 3/3. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 57 ++++++++++++---------------- fs/btrfs/ctree.h | 57 +++++++++++++++++++++++++--- fs/btrfs/delayed-inode.c | 41 ++++++++++---------- fs/btrfs/file.c | 3 +- fs/btrfs/inode.c | 8 +++- fs/btrfs/tests/extent-buffer-tests.c | 2 +- fs/btrfs/tests/inode-tests.c | 4 +- fs/btrfs/tree-log.c | 31 ++++++++++----- 8 files changed, 131 insertions(+), 72 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d0c27a60e6a73..49e2bbda2d7ec 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3605,7 +3605,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, return ret; path->slots[0]++; - setup_items_for_insert(root, path, new_key, &item_size, 1); + setup_item_for_insert(root, path, new_key, item_size); leaf = path->nodes[0]; memcpy_extent_buffer(leaf, btrfs_item_ptr_offset(leaf, path->slots[0]), @@ -3785,13 +3785,10 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size) * * @root: root we are inserting items to * @path: points to the leaf/slot where we are going to insert new items - * @cpu_key: array of keys for items to be inserted - * @data_size: size of the body of each item we are going to insert - * @nr: size of @cpu_key/@data_size arrays + * @batch: information about the batch of items to insert */ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, - const struct btrfs_key *cpu_key, u32 *data_size, - int nr) + const struct btrfs_item_batch *batch) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_item *item; @@ -3803,14 +3800,14 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, int slot; struct btrfs_map_token token; u32 total_size; - u32 total_data = 0; - - for (i = 0; i < nr; i++) - total_data += data_size[i]; - total_size = total_data + (nr * sizeof(struct btrfs_item)); + /* + * Before anything else, update keys in the parent and other ancestors + * if needed, then release the write locks on them, so that other tasks + * can use them while we modify the leaf. + */ if (path->slots[0] == 0) { - btrfs_cpu_key_to_disk(&disk_key, cpu_key); + btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]); fixup_low_keys(path, &disk_key, 1); } btrfs_unlock_up_safe(path, 1); @@ -3820,6 +3817,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(leaf); + total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item)); if (btrfs_leaf_free_space(leaf) < total_size) { btrfs_print_leaf(leaf); @@ -3849,31 +3847,32 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, item = btrfs_item_nr(i); ioff = btrfs_token_item_offset(&token, item); btrfs_set_token_item_offset(&token, item, - ioff - total_data); + ioff - batch->total_data_size); } /* shift the items */ - memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + batch->nr), btrfs_item_nr_offset(slot), (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + - data_end - total_data, BTRFS_LEAF_DATA_OFFSET + - data_end, old_data - data_end); + data_end - batch->total_data_size, + BTRFS_LEAF_DATA_OFFSET + data_end, + old_data - data_end); data_end = old_data; } /* setup the item for the new data */ - for (i = 0; i < nr; i++) { - btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); + for (i = 0; i < batch->nr; i++) { + btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]); btrfs_set_item_key(leaf, &disk_key, slot + i); item = btrfs_item_nr(slot + i); - data_end -= data_size[i]; + data_end -= batch->data_sizes[i]; btrfs_set_token_item_offset(&token, item, data_end); - btrfs_set_token_item_size(&token, item, data_size[i]); + btrfs_set_token_item_size(&token, item, batch->data_sizes[i]); } - btrfs_set_header_nritems(leaf, nritems + nr); + btrfs_set_header_nritems(leaf, nritems + batch->nr); btrfs_mark_buffer_dirty(leaf); if (btrfs_leaf_free_space(leaf) < 0) { @@ -3889,20 +3888,14 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - const struct btrfs_key *cpu_key, u32 *data_size, - int nr) + const struct btrfs_item_batch *batch) { int ret = 0; int slot; - int i; - u32 total_size = 0; - u32 total_data = 0; - - for (i = 0; i < nr; i++) - total_data += data_size[i]; + u32 total_size; - total_size = total_data + (nr * sizeof(struct btrfs_item)); - ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); + total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item)); + ret = btrfs_search_slot(trans, root, &batch->keys[0], path, total_size, 1); if (ret == 0) return -EEXIST; if (ret < 0) @@ -3911,7 +3904,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, slot = path->slots[0]; BUG_ON(slot < 0); - setup_items_for_insert(root, path, cpu_key, data_size, nr); + setup_items_for_insert(root, path, batch); return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b394ba4737e41..140fe98f5e385 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2897,16 +2897,56 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans, return btrfs_del_items(trans, root, path, path->slots[0], 1); } +/* + * Describes a batch of items to insert in a btree. This is used by + * btrfs_insert_empty_items() and setup_items_for_insert(). + */ +struct btrfs_item_batch { + /* + * Pointer to an array containing the keys of the items to insert (in + * sorted order). + */ + const struct btrfs_key *keys; + /* Pointer to an array containing the data size for each item to insert. */ + const u32 *data_sizes; + /* + * The sum of data sizes for all items. The caller can compute this while + * setting up the data_sizes array, so it ends up being more efficient + * than having btrfs_insert_empty_items() or setup_item_for_insert() + * doing it, as it would avoid an extra loop over a potentially large + * array, and in the case of setup_item_for_insert(), we would be doing + * it while holding a write lock on a leaf and often on upper level nodes + * too, unnecessarily increasing the size of a critical section. + */ + u32 total_data_size; + /* Size of the keys and data_sizes arrays (number of items in the batch). */ + int nr; +}; + void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, - const struct btrfs_key *cpu_key, u32 *data_size, - int nr); + const struct btrfs_item_batch *batch); + +static inline void setup_item_for_insert(struct btrfs_root *root, + struct btrfs_path *path, + const struct btrfs_key *key, + u32 data_size) +{ + struct btrfs_item_batch batch; + + batch.keys = key; + batch.data_sizes = &data_size; + batch.total_data_size = data_size; + batch.nr = 1; + + setup_items_for_insert(root, path, &batch); +} + int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *key, void *data, u32 data_size); int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - const struct btrfs_key *cpu_key, u32 *data_size, - int nr); + const struct btrfs_item_batch *batch); static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -2914,7 +2954,14 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, const struct btrfs_key *key, u32 data_size) { - return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1); + struct btrfs_item_batch batch; + + batch.keys = key; + batch.data_sizes = &data_size; + batch.total_data_size = data_size; + batch.nr = 1; + + return btrfs_insert_empty_items(trans, root, path, &batch); } int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 1e08eb2b27f0c..e164766dcc384 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -679,19 +679,18 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct btrfs_delayed_item *first_item) { - LIST_HEAD(batch); + LIST_HEAD(item_list); struct btrfs_delayed_item *curr; struct btrfs_delayed_item *next; const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info); + struct btrfs_item_batch batch; int total_size; - int nitems; char *ins_data = NULL; - struct btrfs_key *ins_keys; - u32 *ins_sizes; int ret; - list_add_tail(&first_item->tree_list, &batch); - nitems = 1; + list_add_tail(&first_item->tree_list, &item_list); + batch.total_data_size = first_item->data_len; + batch.nr = 1; total_size = first_item->data_len + sizeof(struct btrfs_item); curr = first_item; @@ -706,39 +705,43 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, if (total_size + next_size > max_size) break; - list_add_tail(&next->tree_list, &batch); - nitems++; + list_add_tail(&next->tree_list, &item_list); + batch.nr++; total_size += next_size; + batch.total_data_size += next->data_len; curr = next; } - if (nitems == 1) { - ins_keys = &first_item->key; - ins_sizes = &first_item->data_len; + if (batch.nr == 1) { + batch.keys = &first_item->key; + batch.data_sizes = &first_item->data_len; } else { + struct btrfs_key *ins_keys; + u32 *ins_sizes; int i = 0; - ins_data = kmalloc(nitems * sizeof(u32) + - nitems * sizeof(struct btrfs_key), GFP_NOFS); + ins_data = kmalloc(batch.nr * sizeof(u32) + + batch.nr * sizeof(struct btrfs_key), GFP_NOFS); if (!ins_data) { ret = -ENOMEM; goto out; } ins_sizes = (u32 *)ins_data; - ins_keys = (struct btrfs_key *)(ins_data + nitems * sizeof(u32)); - list_for_each_entry(curr, &batch, tree_list) { + ins_keys = (struct btrfs_key *)(ins_data + batch.nr * sizeof(u32)); + batch.keys = ins_keys; + batch.data_sizes = ins_sizes; + list_for_each_entry(curr, &item_list, tree_list) { ins_keys[i] = curr->key; ins_sizes[i] = curr->data_len; i++; } } - ret = btrfs_insert_empty_items(trans, root, path, ins_keys, ins_sizes, - nitems); + ret = btrfs_insert_empty_items(trans, root, path, &batch); if (ret) goto out; - list_for_each_entry(curr, &batch, tree_list) { + list_for_each_entry(curr, &item_list, tree_list) { char *data_ptr; data_ptr = btrfs_item_ptr(path->nodes[0], path->slots[0], char); @@ -754,7 +757,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, */ btrfs_release_path(path); - list_for_each_entry_safe(curr, next, &batch, tree_list) { + list_for_each_entry_safe(curr, next, &item_list, tree_list) { list_del(&curr->tree_list); btrfs_delayed_item_release_metadata(root, curr); btrfs_release_delayed_item(curr); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a1762363f61fa..eee512743d52d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1020,8 +1020,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) path->slots[0]++; } - setup_items_for_insert(root, path, &key, - &args->extent_item_size, 1); + setup_item_for_insert(root, path, &key, args->extent_item_size); args->extent_inserted = true; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a643be30c18a8..6b7e01033595d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6445,7 +6445,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_inode_ref *ref; struct btrfs_key key[2]; u32 sizes[2]; - int nitems = name ? 2 : 1; + struct btrfs_item_batch batch; unsigned long ptr; unsigned int nofs_flag; int ret; @@ -6537,7 +6537,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, goto fail; } - ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); + batch.keys = &key[0]; + batch.data_sizes = &sizes[0]; + batch.total_data_size = sizes[0] + (name ? sizes[1] : 0); + batch.nr = name ? 2 : 1; + ret = btrfs_insert_empty_items(trans, root, path, &batch); if (ret != 0) goto fail_unlock; diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index df54cdfdc2508..c9ab65e3d8e8f 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -60,7 +60,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) key.type = BTRFS_EXTENT_CSUM_KEY; key.offset = 0; - setup_items_for_insert(root, path, &key, &value_len, 1); + setup_item_for_insert(root, path, &key, value_len); item = btrfs_item_nr(0); write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0), value_len); diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index c9874b12d337c..af62d05435b96 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -33,7 +33,7 @@ static void insert_extent(struct btrfs_root *root, u64 start, u64 len, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = start; - setup_items_for_insert(root, &path, &key, &value_len, 1); + setup_item_for_insert(root, &path, &key, value_len); fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); btrfs_set_file_extent_generation(leaf, fi, 1); btrfs_set_file_extent_type(leaf, fi, type); @@ -63,7 +63,7 @@ static void insert_inode_item_key(struct btrfs_root *root) key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - setup_items_for_insert(root, &path, &key, &value_len, 1); + setup_item_for_insert(root, &path, &key, value_len); } /* diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 46932ed65f184..f3688e753c36a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3668,8 +3668,7 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans, int count) { char *ins_data = NULL; - struct btrfs_key *ins_keys; - u32 *ins_sizes; + struct btrfs_item_batch batch; struct extent_buffer *dst; struct btrfs_key key; u32 item_size; @@ -3677,13 +3676,18 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans, int i; ASSERT(count > 0); + batch.nr = count; if (count == 1) { btrfs_item_key_to_cpu(src, &key, start_slot); item_size = btrfs_item_size_nr(src, start_slot); - ins_keys = &key; - ins_sizes = &item_size; + batch.keys = &key; + batch.data_sizes = &item_size; + batch.total_data_size = item_size; } else { + struct btrfs_key *ins_keys; + u32 *ins_sizes; + ins_data = kmalloc(count * sizeof(u32) + count * sizeof(struct btrfs_key), GFP_NOFS); if (!ins_data) @@ -3691,17 +3695,20 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans, ins_sizes = (u32 *)ins_data; ins_keys = (struct btrfs_key *)(ins_data + count * sizeof(u32)); + batch.keys = ins_keys; + batch.data_sizes = ins_sizes; + batch.total_data_size = 0; for (i = 0; i < count; i++) { const int slot = start_slot + i; btrfs_item_key_to_cpu(src, &ins_keys[i], slot); ins_sizes[i] = btrfs_item_size_nr(src, slot); + batch.total_data_size += ins_sizes[i]; } } - ret = btrfs_insert_empty_items(trans, log, dst_path, ins_keys, ins_sizes, - count); + ret = btrfs_insert_empty_items(trans, log, dst_path, &batch); if (ret) goto out; @@ -3712,7 +3719,8 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans, dst_offset = btrfs_item_ptr_offset(dst, dst_path->slots[0]); src_offset = btrfs_item_ptr_offset(src, start_slot + i); - copy_extent_buffer(dst, src, dst_offset, src_offset, ins_sizes[i]); + copy_extent_buffer(dst, src, dst_offset, src_offset, + batch.data_sizes[i]); dst_path->slots[0]++; } btrfs_release_path(dst_path); @@ -4322,6 +4330,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, int ret; struct btrfs_key *ins_keys; u32 *ins_sizes; + struct btrfs_item_batch batch; char *ins_data; int i; struct list_head ordered_sums; @@ -4336,13 +4345,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, ins_sizes = (u32 *)ins_data; ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); + batch.keys = ins_keys; + batch.data_sizes = ins_sizes; + batch.total_data_size = 0; + batch.nr = nr; for (i = 0; i < nr; i++) { ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot); + batch.total_data_size += ins_sizes[i]; btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot); } - ret = btrfs_insert_empty_items(trans, log, dst_path, - ins_keys, ins_sizes, nr); + ret = btrfs_insert_empty_items(trans, log, dst_path, &batch); if (ret) { kfree(ins_data); return ret; From 0a7fe481013f43f1c6eee0aa0368b247d2b1de1f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 24 Sep 2021 12:28:14 +0100 Subject: [PATCH 0328/1202] btrfs: unexport setup_items_for_insert() Since setup_items_for_insert() is not used anymore outside of ctree.c, make it static and remove its prototype from ctree.h. This also requires to move the definition of setup_item_for_insert() from ctree.h to ctree.c and move down btrfs_duplicate_item() so that it's defined after setup_items_for_insert(). Further, since setup_item_for_insert() is used outside ctree.c, rename it to btrfs_setup_item_for_insert(). This patch is part of a small patchset that is comprised of the following patches: btrfs: loop only once over data sizes array when inserting an item batch btrfs: unexport setup_items_for_insert() btrfs: use single bulk copy operations when logging directories This is patch 2/3 and performance results, and the specific tests, are included in the changelog of patch 3/3. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 95 +++++++++++++++++----------- fs/btrfs/ctree.h | 24 ++----- fs/btrfs/file.c | 2 +- fs/btrfs/tests/extent-buffer-tests.c | 2 +- fs/btrfs/tests/inode-tests.c | 4 +- 5 files changed, 68 insertions(+), 59 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 49e2bbda2d7ec..74c8e18f3720d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3580,40 +3580,6 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, return ret; } -/* - * This function duplicate a item, giving 'new_key' to the new item. - * It guarantees both items live in the same tree leaf and the new item - * is contiguous with the original item. - * - * This allows us to split file extent in place, keeping a lock on the - * leaf the entire time. - */ -int btrfs_duplicate_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - const struct btrfs_key *new_key) -{ - struct extent_buffer *leaf; - int ret; - u32 item_size; - - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ret = setup_leaf_for_split(trans, root, path, - item_size + sizeof(struct btrfs_item)); - if (ret) - return ret; - - path->slots[0]++; - setup_item_for_insert(root, path, new_key, item_size); - leaf = path->nodes[0]; - memcpy_extent_buffer(leaf, - btrfs_item_ptr_offset(leaf, path->slots[0]), - btrfs_item_ptr_offset(leaf, path->slots[0] - 1), - item_size); - return 0; -} - /* * make the item pointed to by the path smaller. new_size indicates * how small to make it, and from_end tells us if we just chop bytes @@ -3787,8 +3753,8 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size) * @path: points to the leaf/slot where we are going to insert new items * @batch: information about the batch of items to insert */ -void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, - const struct btrfs_item_batch *batch) +static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, + const struct btrfs_item_batch *batch) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_item *item; @@ -3881,6 +3847,29 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, } } +/* + * Insert a new item into a leaf. + * + * @root: The root of the btree. + * @path: A path pointing to the target leaf and slot. + * @key: The key of the new item. + * @data_size: The size of the data associated with the new key. + */ +void btrfs_setup_item_for_insert(struct btrfs_root *root, + struct btrfs_path *path, + const struct btrfs_key *key, + u32 data_size) +{ + struct btrfs_item_batch batch; + + batch.keys = key; + batch.data_sizes = &data_size; + batch.total_data_size = data_size; + batch.nr = 1; + + setup_items_for_insert(root, path, &batch); +} + /* * Given a key and some data, insert items into the tree. * This does all the path init required, making room in the tree if needed. @@ -3935,6 +3924,40 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, return ret; } +/* + * This function duplicates an item, giving 'new_key' to the new item. + * It guarantees both items live in the same tree leaf and the new item is + * contiguous with the original item. + * + * This allows us to split a file extent in place, keeping a lock on the leaf + * the entire time. + */ +int btrfs_duplicate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + const struct btrfs_key *new_key) +{ + struct extent_buffer *leaf; + int ret; + u32 item_size; + + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + ret = setup_leaf_for_split(trans, root, path, + item_size + sizeof(struct btrfs_item)); + if (ret) + return ret; + + path->slots[0]++; + btrfs_setup_item_for_insert(root, path, new_key, item_size); + leaf = path->nodes[0]; + memcpy_extent_buffer(leaf, + btrfs_item_ptr_offset(leaf, path->slots[0]), + btrfs_item_ptr_offset(leaf, path->slots[0] - 1), + item_size); + return 0; +} + /* * delete the pointer from a given node. * diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 140fe98f5e385..ad674f092b4e6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2899,7 +2899,7 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans, /* * Describes a batch of items to insert in a btree. This is used by - * btrfs_insert_empty_items() and setup_items_for_insert(). + * btrfs_insert_empty_items(). */ struct btrfs_item_batch { /* @@ -2923,24 +2923,10 @@ struct btrfs_item_batch { int nr; }; -void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, - const struct btrfs_item_batch *batch); - -static inline void setup_item_for_insert(struct btrfs_root *root, - struct btrfs_path *path, - const struct btrfs_key *key, - u32 data_size) -{ - struct btrfs_item_batch batch; - - batch.keys = key; - batch.data_sizes = &data_size; - batch.total_data_size = data_size; - batch.nr = 1; - - setup_items_for_insert(root, path, &batch); -} - +void btrfs_setup_item_for_insert(struct btrfs_root *root, + struct btrfs_path *path, + const struct btrfs_key *key, + u32 data_size); int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *key, void *data, u32 data_size); int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eee512743d52d..6c50561f932fe 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1020,7 +1020,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) path->slots[0]++; } - setup_item_for_insert(root, path, &key, args->extent_item_size); + btrfs_setup_item_for_insert(root, path, &key, args->extent_item_size); args->extent_inserted = true; } diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index c9ab65e3d8e8f..2a95f7224e185 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -60,7 +60,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) key.type = BTRFS_EXTENT_CSUM_KEY; key.offset = 0; - setup_item_for_insert(root, path, &key, value_len); + btrfs_setup_item_for_insert(root, path, &key, value_len); item = btrfs_item_nr(0); write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0), value_len); diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index af62d05435b96..cac89c3881311 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -33,7 +33,7 @@ static void insert_extent(struct btrfs_root *root, u64 start, u64 len, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = start; - setup_item_for_insert(root, &path, &key, value_len); + btrfs_setup_item_for_insert(root, &path, &key, value_len); fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); btrfs_set_file_extent_generation(leaf, fi, 1); btrfs_set_file_extent_type(leaf, fi, type); @@ -63,7 +63,7 @@ static void insert_inode_item_key(struct btrfs_root *root) key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - setup_item_for_insert(root, &path, &key, value_len); + btrfs_setup_item_for_insert(root, &path, &key, value_len); } /* From ab9027c8aa2c9812200de9475d123078167f27c2 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 24 Sep 2021 12:28:15 +0100 Subject: [PATCH 0329/1202] btrfs: use single bulk copy operations when logging directories When logging a directory and inserting a batch of directory items, we are copying the data of each item from a leaf in the fs/subvolume tree to a leaf in a log tree, separately. This is not really needed, since we are copying from a contiguous memory area into another one, so we can use a single copy operation to copy all items at once. This patch is part of a small patchset that is comprised of the following patches: btrfs: loop only once over data sizes array when inserting an item batch btrfs: unexport setup_items_for_insert() btrfs: use single bulk copy operations when logging directories This is patch 3/3. The following test was used to compare performance of a branch without the patchset versus one branch that has the whole patchset applied: $ cat dir-fsync-test.sh #!/bin/bash DEV=/dev/nvme0n1 MNT=/mnt/nvme0n1 NUM_NEW_FILES=1000000 NUM_FILE_DELETES=1000 LEAF_SIZE=16K mkfs.btrfs -f -n $LEAF_SIZE $DEV mount -o ssd $DEV $MNT mkdir $MNT/testdir for ((i = 1; i <= $NUM_NEW_FILES; i++)); do echo -n > $MNT/testdir/file_$i done # Fsync the directory, this will log the new dir items and the inodes # they point to, because these are new inodes. start=$(date +%s%N) xfs_io -c "fsync" $MNT/testdir end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "dir fsync took $dur ms after adding $NUM_NEW_FILES files" # sync to force transaction commit and wipeout the log. sync del_inc=$(( $NUM_NEW_FILES / $NUM_FILE_DELETES )) for ((i = 1; i <= $NUM_NEW_FILES; i += $del_inc)); do rm -f $MNT/testdir/file_$i done # Fsync the directory, this will only log dir items, there are no # dentries pointing to new inodes. start=$(date +%s%N) xfs_io -c "fsync" $MNT/testdir end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "dir fsync took $dur ms after deleting $NUM_FILE_DELETES files" umount $MNT The tests were run on a non-debug kernel (Debian's default kernel config) and were the following: *** with a leaf size of 16K, before patchset *** dir fsync took 8482 ms after adding 1000000 files dir fsync took 166 ms after deleting 1000 files *** with a leaf size of 16K, after patchset *** dir fsync took 8196 ms after adding 1000000 files (-3.4%) dir fsync took 143 ms after deleting 1000 files (-14.9%) *** with a leaf size of 64K, before patchset *** dir fsync took 12851 ms after adding 1000000 files dir fsync took 466 ms after deleting 1000 files *** with a leaf size of 64K, after patchset *** dir fsync took 12287 ms after adding 1000000 files (-4.5%) dir fsync took 414 ms after deleting 1000 files (-11.8%) Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f3688e753c36a..641e2b5a3b649 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3670,6 +3670,8 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans, char *ins_data = NULL; struct btrfs_item_batch batch; struct extent_buffer *dst; + unsigned long src_offset; + unsigned long dst_offset; struct btrfs_key key; u32 item_size; int ret; @@ -3713,16 +3715,19 @@ static int flush_dir_items_batch(struct btrfs_trans_handle *trans, goto out; dst = dst_path->nodes[0]; - for (i = 0; i < count; i++) { - unsigned long src_offset; - unsigned long dst_offset; - - dst_offset = btrfs_item_ptr_offset(dst, dst_path->slots[0]); - src_offset = btrfs_item_ptr_offset(src, start_slot + i); - copy_extent_buffer(dst, src, dst_offset, src_offset, - batch.data_sizes[i]); - dst_path->slots[0]++; - } + /* + * Copy all the items in bulk, in a single copy operation. Item data is + * organized such that it's placed at the end of a leaf and from right + * to left. For example, the data for the second item ends at an offset + * that matches the offset where the data for the first item starts, the + * data for the third item ends at an offset that matches the offset + * where the data of the second items starts, and so on. + * Therefore our source and destination start offsets for copy match the + * offsets of the last items (highest slots). + */ + dst_offset = btrfs_item_ptr_offset(dst, dst_path->slots[0] + count - 1); + src_offset = btrfs_item_ptr_offset(src, start_slot + count - 1); + copy_extent_buffer(dst, src, dst_offset, src_offset, batch.total_data_size); btrfs_release_path(dst_path); out: kfree(ins_data); From fddcf0e1e2ab643b7a6e629439d405a63a5a0a46 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:43 +0800 Subject: [PATCH 0330/1202] btrfs: remove unused parameter nr_pages in add_ra_bio_pages() Variable @nr_pages only gets increased but never used. Remove it. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 2a86a2a1494b3..b50927740d278 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -551,7 +551,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, u64 isize = i_size_read(inode); int ret; struct page *page; - unsigned long nr_pages = 0; struct extent_map *em; struct address_space *mapping = inode->i_mapping; struct extent_map_tree *em_tree; @@ -647,7 +646,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, PAGE_SIZE, 0); if (ret == PAGE_SIZE) { - nr_pages++; put_page(page); } else { unlock_extent(tree, last_offset, end); From 3200e435491ef74f488d5e7ae3ba6e1441227a91 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:44 +0800 Subject: [PATCH 0331/1202] btrfs: remove unnecessary parameter delalloc_start for writepage_delalloc() In function __extent_writepage() we always pass page start to @delalloc_start for writepage_delalloc(). Thus we don't really need @delalloc_start parameter as we can extract it from @page. Remove @delalloc_start parameter and make __extent_writepage() to declare @page_start and @page_end as const. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c56973f7daae5..2570bbbd6a137 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3768,10 +3768,11 @@ static void update_nr_written(struct writeback_control *wbc, */ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, struct page *page, struct writeback_control *wbc, - u64 delalloc_start, unsigned long *nr_written) + unsigned long *nr_written) { - u64 page_end = delalloc_start + PAGE_SIZE - 1; + u64 page_end = page_offset(page) + PAGE_SIZE - 1; bool found; + u64 delalloc_start = page_offset(page); u64 delalloc_to_write = 0; u64 delalloc_end = 0; int ret; @@ -4049,8 +4050,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct extent_page_data *epd) { struct inode *inode = page->mapping->host; - u64 start = page_offset(page); - u64 page_end = start + PAGE_SIZE - 1; + const u64 page_start = page_offset(page); + const u64 page_end = page_start + PAGE_SIZE - 1; int ret; int nr = 0; size_t pg_offset; @@ -4085,8 +4086,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } if (!epd->extent_locked) { - ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start, - &nr_written); + ret = writepage_delalloc(BTRFS_I(inode), page, wbc, &nr_written); if (ret == 1) return 0; if (ret) @@ -4136,7 +4136,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, * capable of that. */ if (PageError(page)) - end_extent_writepage(page, ret, start, page_end); + end_extent_writepage(page, ret, page_start, page_end); unlock_page(page); ASSERT(ret <= 0); return ret; From 62824c604f463b4fe95fe4e8a4d77caec2c93c55 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:45 +0800 Subject: [PATCH 0332/1202] btrfs: use async_chunk::async_cow to replace the confusing pending pointer For structure async_chunk, we use a very strange member layout to grab structure async_cow who owns this async_chunk. At initialization, it goes like this: async_chunk[i].pending = &ctx->num_chunks; Then at async_cow_free() we do a super weird freeing: /* * Since the pointer to 'pending' is at the beginning of the array of * async_chunk's, freeing it ensures the whole array has been freed. */ if (atomic_dec_and_test(async_chunk->pending)) kvfree(async_chunk->pending); This is absolutely an abuse of kvfree(). Replace async_chunk::pending with async_chunk::async_cow, so that we can grab the async_cow structure directly, without this strange dancing. And with this change, there is no requirement for any specific member location. Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6b7e01033595d..82dc636c4b512 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -455,11 +455,10 @@ struct async_chunk { struct list_head extents; struct cgroup_subsys_state *blkcg_css; struct btrfs_work work; - atomic_t *pending; + struct async_cow *async_cow; }; struct async_cow { - /* Number of chunks in flight; must be first in the structure */ atomic_t num_chunks; struct async_chunk chunks[]; }; @@ -1324,18 +1323,17 @@ static noinline void async_cow_submit(struct btrfs_work *work) static noinline void async_cow_free(struct btrfs_work *work) { struct async_chunk *async_chunk; + struct async_cow *async_cow; async_chunk = container_of(work, struct async_chunk, work); if (async_chunk->inode) btrfs_add_delayed_iput(async_chunk->inode); if (async_chunk->blkcg_css) css_put(async_chunk->blkcg_css); - /* - * Since the pointer to 'pending' is at the beginning of the array of - * async_chunk's, freeing it ensures the whole array has been freed. - */ - if (atomic_dec_and_test(async_chunk->pending)) - kvfree(async_chunk->pending); + + async_cow = async_chunk->async_cow; + if (atomic_dec_and_test(&async_cow->num_chunks)) + kvfree(async_cow); } static int cow_file_range_async(struct btrfs_inode *inode, @@ -1396,7 +1394,7 @@ static int cow_file_range_async(struct btrfs_inode *inode, * lightweight reference for the callback lifetime */ ihold(&inode->vfs_inode); - async_chunk[i].pending = &ctx->num_chunks; + async_chunk[i].async_cow = ctx; async_chunk[i].inode = &inode->vfs_inode; async_chunk[i].start = start; async_chunk[i].end = cur_end; From d9126de867e50ad74eb5a69f950eda5d500b72ef Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:46 +0800 Subject: [PATCH 0333/1202] btrfs: don't pass compressed pages to btrfs_writepage_endio_finish_ordered() Since async_extent holds the compressed page, it would trigger the new ASSERT() in btrfs_mark_ordered_io_finished() which checks that the range is inside the page. Now btrfs_writepage_endio_finish_ordered() can accept @page == NULL, just pass NULL to btrfs_writepage_endio_finish_ordered(). Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82dc636c4b512..9ae417aaea289 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -973,15 +973,12 @@ static noinline void submit_compressed_extents(struct async_chunk *async_chunk) async_extent->nr_pages, async_chunk->write_flags, async_chunk->blkcg_css)) { - struct page *p = async_extent->pages[0]; const u64 start = async_extent->start; const u64 end = start + async_extent->ram_size - 1; - p->mapping = inode->vfs_inode.i_mapping; - btrfs_writepage_endio_finish_ordered(inode, p, start, + btrfs_writepage_endio_finish_ordered(inode, NULL, start, end, false); - p->mapping = NULL; extent_clear_unlock_delalloc(inode, start, end, NULL, 0, PAGE_END_WRITEBACK | PAGE_SET_ERROR); From 79f82f4354abb10919fce34bc0dabb9dfaef059c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:47 +0800 Subject: [PATCH 0334/1202] btrfs: subpage: make add_ra_bio_pages() compatible [BUG] If we remove the subpage limitation in add_ra_bio_pages(), then read a compressed extent which has part of its range in next page, like the following inode layout: 0 32K 64K 96K 128K |<--------------|-------------->| Btrfs will trigger ASSERT() in endio function: assertion failed: atomic_read(&subpage->readers) >= nbits ------------[ cut here ]------------ kernel BUG at fs/btrfs/ctree.h:3431! Internal error: Oops - BUG: 0 [#1] SMP Workqueue: btrfs-endio btrfs_work_helper [btrfs] Call trace: assertfail.constprop.0+0x28/0x2c [btrfs] btrfs_subpage_end_reader+0x148/0x14c [btrfs] end_page_read+0x8c/0x100 [btrfs] end_bio_extent_readpage+0x320/0x6b0 [btrfs] bio_endio+0x15c/0x1dc end_workqueue_fn+0x44/0x64 [btrfs] btrfs_work_helper+0x74/0x250 [btrfs] process_one_work+0x1d4/0x47c worker_thread+0x180/0x400 kthread+0x11c/0x120 ret_from_fork+0x10/0x30 ---[ end trace c8b7b552d3bb408c ]--- [CAUSE] When we read the page range [0, 64K), we find it's a compressed extent, and we will try to add extra pages in add_ra_bio_pages() to avoid reading the same compressed extent. But when we add such page into the read bio, it doesn't follow the behavior of btrfs_do_readpage() to properly set subpage::readers. This means, for page [64K, 128K), its subpage::readers is still 0. And when endio is executed on both pages, since page [64K, 128K) has 0 subpage::readers, it triggers above ASSERT() [FIX] Function add_ra_bio_pages() is far from subpage compatible, it always assume PAGE_SIZE == sectorsize, thus when it skip to next range it always just skip PAGE_SIZE. Make it subpage compatible by: - Skip to next page properly when needed If we find there is already a page cache, we need to skip to next page. For that case, we shouldn't just skip PAGE_SIZE bytes, but use @pg_index to calculate the next bytenr and continue. - Only add the page range covered by current extent map We need to calculate which range is covered by current extent map and only add that part into the read bio. - Update subpage::readers before submitting the bio - Use proper cursor other than confusing @last_offset - Calculate the missed threshold based on sector size It's no longer using missed pages, as for 64K page size, we have at most 3 pages to skip. (If aligned only 2 pages) - Add ASSERT() to make sure our bytenr is always aligned - Add comment for the function Add a special note for subpage case, as the function won't really work well for subpage cases. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 90 +++++++++++++++++++++++++++--------------- fs/btrfs/extent_io.c | 1 + 2 files changed, 59 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b50927740d278..08ba96d999020 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -28,6 +28,7 @@ #include "compression.h" #include "extent_io.h" #include "extent_map.h" +#include "subpage.h" #include "zoned.h" static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" }; @@ -541,13 +542,24 @@ static u64 bio_end_offset(struct bio *bio) return page_offset(last->bv_page) + last->bv_len + last->bv_offset; } +/* + * Add extra pages in the same compressed file extent so that we don't need to + * re-read the same extent again and again. + * + * NOTE: this won't work well for subpage, as for subpage read, we lock the + * full page then submit bio for each compressed/regular extents. + * + * This means, if we have several sectors in the same page points to the same + * on-disk compressed data, we will re-read the same extent many times and + * this function can only help for the next page. + */ static noinline int add_ra_bio_pages(struct inode *inode, u64 compressed_end, struct compressed_bio *cb) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); unsigned long end_index; - unsigned long pg_index; - u64 last_offset; + u64 cur = bio_end_offset(cb->orig_bio); u64 isize = i_size_read(inode); int ret; struct page *page; @@ -555,10 +567,8 @@ static noinline int add_ra_bio_pages(struct inode *inode, struct address_space *mapping = inode->i_mapping; struct extent_map_tree *em_tree; struct extent_io_tree *tree; - u64 end; - int misses = 0; + int sectors_missed = 0; - last_offset = bio_end_offset(cb->orig_bio); em_tree = &BTRFS_I(inode)->extent_tree; tree = &BTRFS_I(inode)->io_tree; @@ -577,18 +587,29 @@ static noinline int add_ra_bio_pages(struct inode *inode, end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; - while (last_offset < compressed_end) { - pg_index = last_offset >> PAGE_SHIFT; + while (cur < compressed_end) { + u64 page_end; + u64 pg_index = cur >> PAGE_SHIFT; + u32 add_size; if (pg_index > end_index) break; page = xa_load(&mapping->i_pages, pg_index); if (page && !xa_is_value(page)) { - misses++; - if (misses > 4) + sectors_missed += (PAGE_SIZE - offset_in_page(cur)) >> + fs_info->sectorsize_bits; + + /* Beyond threshold, no need to continue */ + if (sectors_missed > 4) break; - goto next; + + /* + * Jump to next page start as we already have page for + * current offset. + */ + cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE; + continue; } page = __page_cache_alloc(mapping_gfp_constraint(mapping, @@ -598,14 +619,11 @@ static noinline int add_ra_bio_pages(struct inode *inode, if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) { put_page(page); - goto next; + /* There is already a page, skip to page end */ + cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE; + continue; } - /* - * at this point, we have a locked page in the page cache - * for these bytes in the file. But, we have to make - * sure they map to this compressed extent on disk. - */ ret = set_page_extent_mapped(page); if (ret < 0) { unlock_page(page); @@ -613,18 +631,22 @@ static noinline int add_ra_bio_pages(struct inode *inode, break; } - end = last_offset + PAGE_SIZE - 1; - lock_extent(tree, last_offset, end); + page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1; + lock_extent(tree, cur, page_end); read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, last_offset, - PAGE_SIZE); + em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur); read_unlock(&em_tree->lock); - if (!em || last_offset < em->start || - (last_offset + PAGE_SIZE > extent_map_end(em)) || + /* + * At this point, we have a locked page in the page cache for + * these bytes in the file. But, we have to make sure they map + * to this compressed extent on disk. + */ + if (!em || cur < em->start || + (cur + fs_info->sectorsize > extent_map_end(em)) || (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) { free_extent_map(em); - unlock_extent(tree, last_offset, end); + unlock_extent(tree, cur, page_end); unlock_page(page); put_page(page); break; @@ -642,19 +664,23 @@ static noinline int add_ra_bio_pages(struct inode *inode, } } - ret = bio_add_page(cb->orig_bio, page, - PAGE_SIZE, 0); - - if (ret == PAGE_SIZE) { - put_page(page); - } else { - unlock_extent(tree, last_offset, end); + add_size = min(em->start + em->len, page_end + 1) - cur; + ret = bio_add_page(cb->orig_bio, page, add_size, offset_in_page(cur)); + if (ret != add_size) { + unlock_extent(tree, cur, page_end); unlock_page(page); put_page(page); break; } -next: - last_offset += PAGE_SIZE; + /* + * If it's subpage, we also need to increase its + * subpage::readers number, as at endio we will decrease + * subpage::readers and to unlock the page. + */ + if (fs_info->sectorsize < PAGE_SIZE) + btrfs_subpage_start_reader(fs_info, page, cur, add_size); + put_page(page); + cur += add_size; } return 0; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2570bbbd6a137..e9d68b8ee9e6b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3590,6 +3590,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, bool force_bio_submit = false; u64 disk_bytenr; + ASSERT(IS_ALIGNED(cur, fs_info->sectorsize)); if (cur >= last_byte) { struct extent_state *cached = NULL; From 1ad08ac6a8c1e38de530213425f73a5d886a3bee Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:48 +0800 Subject: [PATCH 0335/1202] btrfs: introduce compressed_bio::pending_sectors to trace compressed bio For btrfs_submit_compressed_read() and btrfs_submit_compressed_write(), we have a pretty weird dance around compressed_bio::pending_bios: btrfs_submit_compressed_read/write() { cb = kmalloc() refcount_set(&cb->pending_bios, 0); bio = btrfs_alloc_bio(); /* NOTE here, we haven't yet submitted any bio */ refcount_set(&cb->pending_bios, 1); for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { if (submit) { /* Here we submit bio, but we always have one * extra pending_bios */ refcount_inc(&cb->pending_bios); ret = btrfs_map_bio(); } } /* Submit the last bio */ ret = btrfs_map_bio(); } There are two reasons why we do this: - compressed_bio::pending_bios is a refcount Thus if it's reduced to 0, it can not be increased again. - To ensure the compressed_bio is not freed by some submitted bios If the submitted bio is finished before the next bio submitted, we can free the compressed_bio completely. But the above code is sometimes confusing, and we can do it better by introducing a new member, compressed_bio::pending_sectors. Now we use compressed_bio::pending_sectors to indicate whether we have any pending sectors under IO or not yet submitted. If pending_sectors == 0, we're definitely the last bio of compressed_bio, and is OK to release the compressed bio. Now the workflow looks like this: btrfs_submit_compressed_read/write() { cb = kmalloc() atomic_set(&cb->pending_bios, 0); refcount_set(&cb->pending_sectors, compressed_len >> sectorsize_bits); bio = btrfs_alloc_bio(); for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { if (submit) { refcount_inc(&cb->pending_bios); ret = btrfs_map_bio(); } } /* Submit the last bio */ refcount_inc(&cb->pending_bios); ret = btrfs_map_bio(); } For now we still need pending_bios for later error handling, but will remove pending_bios eventually after properly handling the errors. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 75 +++++++++++++++++++++++------------------- fs/btrfs/compression.h | 7 ++-- 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 08ba96d999020..deb5d6061b3ce 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -193,6 +193,38 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, return 0; } +/* + * Reduce bio and io accounting for a compressed_bio with its corresponding bio. + * + * Return true if there is no pending bio nor io. + * Return false otherwise. + */ +static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); + unsigned int bi_size = 0; + bool last_io = false; + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + + /* + * At endio time, bi_iter.bi_size doesn't represent the real bio size. + * Thus here we have to iterate through all segments to grab correct + * bio size. + */ + bio_for_each_segment_all(bvec, bio, iter_all) + bi_size += bvec->bv_len; + + if (bio->bi_status) + cb->errors = 1; + + ASSERT(bi_size && bi_size <= cb->compressed_len); + last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits, + &cb->pending_sectors); + atomic_dec(&cb->pending_bios); + return last_io; +} + /* when we finish reading compressed pages from the disk, we * decompress them and then run the bio end_io routines on the * decompressed pages (in the inode address space). @@ -212,13 +244,7 @@ static void end_compressed_bio_read(struct bio *bio) unsigned int mirror = btrfs_bio(bio)->mirror_num; int ret = 0; - if (bio->bi_status) - cb->errors = 1; - - /* if there are more bios still pending for this compressed - * extent, just exit - */ - if (!refcount_dec_and_test(&cb->pending_bios)) + if (!dec_and_test_compressed_bio(cb, bio)) goto out; /* @@ -336,13 +362,7 @@ static void end_compressed_bio_write(struct bio *bio) struct page *page; unsigned int index; - if (bio->bi_status) - cb->errors = 1; - - /* if there are more bios still pending for this compressed - * extent, just exit - */ - if (!refcount_dec_and_test(&cb->pending_bios)) + if (!dec_and_test_compressed_bio(cb, bio)) goto out; /* ok, we're the last bio for this extent, step one is to @@ -408,7 +428,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); if (!cb) return BLK_STS_RESOURCE; - refcount_set(&cb->pending_bios, 0); + atomic_set(&cb->pending_bios, 0); + refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits); cb->errors = 0; cb->inode = &inode->vfs_inode; cb->start = start; @@ -442,7 +463,6 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, bio->bi_opf |= REQ_CGROUP_PUNT; kthread_associate_blkcg(blkcg_css); } - refcount_set(&cb->pending_bios, 1); /* create and submit bios for the compressed pages */ bytes_left = compressed_len; @@ -470,13 +490,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, page->mapping = NULL; if (submit || len < PAGE_SIZE) { - /* - * inc the count before we submit the bio so - * we know the end IO handler won't happen before - * we inc the count. Otherwise, the cb might get - * freed before we're done setting it up - */ - refcount_inc(&cb->pending_bios); + atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ @@ -515,6 +529,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cond_resched(); } + atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ @@ -734,7 +749,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, if (!cb) goto out; - refcount_set(&cb->pending_bios, 0); + atomic_set(&cb->pending_bios, 0); + refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits); cb->errors = 0; cb->inode = inode; cb->mirror_num = mirror_num; @@ -779,7 +795,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio->bi_opf = REQ_OP_READ; comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; - refcount_set(&cb->pending_bios, 1); for (pg_index = 0; pg_index < nr_pages; pg_index++) { u32 pg_len = PAGE_SIZE; @@ -808,18 +823,11 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) { unsigned int nr_sectors; + atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ - /* - * inc the count before we submit the bio so - * we know the end IO handler won't happen before - * we inc the count. Otherwise, the cb might get - * freed before we're done setting it up - */ - refcount_inc(&cb->pending_bios); - ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); BUG_ON(ret); /* -ENOMEM */ @@ -844,6 +852,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cur_disk_byte += pg_len; } + atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 399be0b435bf7..28a558433120f 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -28,8 +28,11 @@ struct btrfs_inode; #define BTRFS_ZLIB_DEFAULT_LEVEL 3 struct compressed_bio { - /* number of bios pending for this compressed extent */ - refcount_t pending_bios; + /* Number of bios pending for this compressed extent */ + atomic_t pending_bios; + + /* Number of sectors with unfinished IO (unsubmitted or unfinished) */ + refcount_t pending_sectors; /* Number of compressed pages in the array */ unsigned int nr_pages; From 82e34731c77ff1c7d316fe5298866386b08b0846 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:49 +0800 Subject: [PATCH 0336/1202] btrfs: subpage: add bitmap for PageChecked flag Although in btrfs we have very limited usage of PageChecked flag, it's still some page flag not yet subpage compatible. Fix it by introducing btrfs_subpage::checked_offset to do the convert. For most call sites, especially for free-space cache, COW fixup and btrfs_invalidatepage(), they all work in full page mode anyway. For other call sites, they work as subpage compatible mode. Some call sites need extra modification: - btrfs_drop_pages() Needs extra parameter to get the real range we need to clear checked flag. Also since btrfs_drop_pages() will accept pages beyond the dirtied range, update btrfs_subpage_clamp_range() to handle such case by setting @len to 0 if the page is beyond target range. - btrfs_invalidatepage() We need to call subpage helper before calling __btrfs_releasepage(), or it will trigger ASSERT() as page->private will be cleared. - btrfs_verify_data_csum() In theory we don't need the io_bio->csum check anymore, but it's won't hurt. Just change the comment. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 10 ++++++-- fs/btrfs/file.c | 17 ++++++++++---- fs/btrfs/free-space-cache.c | 6 ++++- fs/btrfs/inode.c | 28 ++++++++++------------ fs/btrfs/reflink.c | 2 +- fs/btrfs/subpage.c | 47 +++++++++++++++++++++++++++++++++++-- fs/btrfs/subpage.h | 2 ++ 7 files changed, 85 insertions(+), 27 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index deb5d6061b3ce..891a62403534d 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -296,8 +296,14 @@ static void end_compressed_bio_read(struct bio *bio) * checked so the end_io handlers know about it */ ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) - SetPageChecked(bvec->bv_page); + bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) { + u64 bvec_start = page_offset(bvec->bv_page) + + bvec->bv_offset; + + btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb), + bvec->bv_page, bvec_start, + bvec->bv_len); + } bio_endio(cb->orig_bio); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6c50561f932fe..38e8789467614 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -437,9 +437,15 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes, /* * unlocks pages after btrfs_file_write is done with them */ -static void btrfs_drop_pages(struct page **pages, size_t num_pages) +static void btrfs_drop_pages(struct btrfs_fs_info *fs_info, + struct page **pages, size_t num_pages, + u64 pos, u64 copied) { size_t i; + u64 block_start = round_down(pos, fs_info->sectorsize); + u64 block_len = round_up(pos + copied, fs_info->sectorsize) - block_start; + + ASSERT(block_len <= U32_MAX); for (i = 0; i < num_pages; i++) { /* page checked is some magic around finding pages that * have been modified without going through btrfs_set_page_dirty @@ -447,7 +453,8 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) * accessed as prepare_pages should have marked them accessed * in prepare_pages via find_or_create_page() */ - ClearPageChecked(pages[i]); + btrfs_page_clamp_clear_checked(fs_info, pages[i], block_start, + block_len); unlock_page(pages[i]); put_page(pages[i]); } @@ -504,7 +511,7 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, struct page *p = pages[i]; btrfs_page_clamp_set_uptodate(fs_info, p, start_pos, num_bytes); - ClearPageChecked(p); + btrfs_page_clamp_clear_checked(fs_info, p, start_pos, num_bytes); btrfs_page_clamp_set_dirty(fs_info, p, start_pos, num_bytes); } @@ -1843,7 +1850,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); if (ret) { - btrfs_drop_pages(pages, num_pages); + btrfs_drop_pages(fs_info, pages, num_pages, pos, copied); break; } @@ -1851,7 +1858,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, if (only_release_metadata) btrfs_check_nocow_unlock(BTRFS_I(inode)); - btrfs_drop_pages(pages, num_pages); + btrfs_drop_pages(fs_info, pages, num_pages, pos, copied); cond_resched(); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0d26819b1cf6c..f3fee88c8ee09 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -22,6 +22,7 @@ #include "delalloc-space.h" #include "block-group.h" #include "discard.h" +#include "subpage.h" #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) #define MAX_CACHE_BYTES_PER_GIG SZ_64K @@ -411,7 +412,10 @@ static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) for (i = 0; i < io_ctl->num_pages; i++) { if (io_ctl->pages[i]) { - ClearPageChecked(io_ctl->pages[i]); + btrfs_page_clear_checked(io_ctl->fs_info, + io_ctl->pages[i], + page_offset(io_ctl->pages[i]), + PAGE_SIZE); unlock_page(io_ctl->pages[i]); put_page(io_ctl->pages[i]); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9ae417aaea289..2dc0649035283 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2764,7 +2764,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) clear_page_dirty_for_io(page); SetPageError(page); } - ClearPageChecked(page); + btrfs_page_clear_checked(inode->root->fs_info, page, page_start, PAGE_SIZE); unlock_page(page); put_page(page); kfree(fixup); @@ -2819,7 +2819,7 @@ int btrfs_writepage_cow_fixup(struct page *page) * page->mapping outside of the page lock. */ ihold(inode); - SetPageChecked(page); + btrfs_page_set_checked(fs_info, page, page_offset(page), PAGE_SIZE); get_page(page); btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; @@ -3269,27 +3269,22 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio, u64 start, u64 end) { struct inode *inode = page->mapping->host; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_root *root = BTRFS_I(inode)->root; const u32 sectorsize = root->fs_info->sectorsize; u32 pg_off; unsigned int result = 0; - if (PageChecked(page)) { - ClearPageChecked(page); + if (btrfs_page_test_checked(fs_info, page, start, end + 1 - start)) { + btrfs_page_clear_checked(fs_info, page, start, end + 1 - start); return 0; } /* - * For subpage case, above PageChecked is not safe as it's not subpage - * compatible. - * But for now only cow fixup and compressed read utilize PageChecked - * flag, while in this context we can easily use bbio->csum to - * determine if we really need to do csum verification. - * - * So for now, just exit if bbio->csum is NULL, as it means it's - * compressed read, and its compressed data csum has already been - * verified. + * This only happens for NODATASUM or compressed read. + * Normally this should be covered by above check for compressed read + * or the next check for NODATASUM. Just do a quicker exit here. */ if (bbio->csum == NULL) return 0; @@ -5110,7 +5105,8 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, len); flush_dcache_page(page); } - ClearPageChecked(page); + btrfs_page_clear_checked(fs_info, page, block_start, + block_end + 1 - block_start); btrfs_page_set_dirty(fs_info, page, block_start, block_end + 1 - block_start); unlock_extent_cached(io_tree, block_start, block_end, &cached_state); @@ -8705,9 +8701,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, * did something wrong. */ ASSERT(!PageOrdered(page)); + btrfs_page_clear_checked(fs_info, page, page_offset(page), PAGE_SIZE); if (!inode_evicting) __btrfs_releasepage(page, GFP_NOFS); - ClearPageChecked(page); clear_page_extent_mapped(page); } @@ -8851,7 +8847,7 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) memzero_page(page, zero_start, PAGE_SIZE - zero_start); flush_dcache_page(page); } - ClearPageChecked(page); + btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE); btrfs_page_set_dirty(fs_info, page, page_start, end + 1 - page_start); btrfs_page_set_uptodate(fs_info, page, page_start, end + 1 - page_start); diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index c71e49782e86d..e0f93b357548f 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -138,7 +138,7 @@ static int copy_inline_to_page(struct btrfs_inode *inode, } btrfs_page_set_uptodate(fs_info, page, file_offset, block_size); - ClearPageChecked(page); + btrfs_page_clear_checked(fs_info, page, file_offset, block_size); btrfs_page_set_dirty(fs_info, page, file_offset, block_size); out_unlock: if (page) { diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 51f873a680eaf..ed9936cdd267c 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -88,6 +88,9 @@ void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sector subpage_info->ordered_offset = cur; cur += nr_bits; + subpage_info->checked_offset = cur; + cur += nr_bits; + subpage_info->total_nr_bits = cur; } @@ -255,8 +258,16 @@ static void btrfs_subpage_clamp_range(struct page *page, u64 *start, u32 *len) u32 orig_len = *len; *start = max_t(u64, page_offset(page), orig_start); - *len = min_t(u64, page_offset(page) + PAGE_SIZE, - orig_start + orig_len) - *start; + /* + * For certain call sites like btrfs_drop_pages(), we may have pages + * beyond the target range. In that case, just set @len to 0, subpage + * helpers can handle @len == 0 without any problem. + */ + if (page_offset(page) >= orig_start + orig_len) + *len = 0; + else + *len = min_t(u64, page_offset(page) + PAGE_SIZE, + orig_start + orig_len) - *start; } void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info, @@ -532,6 +543,36 @@ void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, ClearPageOrdered(page); spin_unlock_irqrestore(&subpage->lock, flags); } + +void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len) +{ + struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + checked, start, len); + unsigned long flags; + + spin_lock_irqsave(&subpage->lock, flags); + bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + if (subpage_test_bitmap_all_set(fs_info, subpage, checked)) + SetPageChecked(page); + spin_unlock_irqrestore(&subpage->lock, flags); +} + +void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, + struct page *page, u64 start, u32 len) +{ + struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; + unsigned int start_bit = subpage_calc_start_bit(fs_info, page, + checked, start, len); + unsigned long flags; + + spin_lock_irqsave(&subpage->lock, flags); + bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + ClearPageChecked(page); + spin_unlock_irqrestore(&subpage->lock, flags); +} + /* * Unlike set/clear which is dependent on each page status, for test all bits * are tested in the same way. @@ -557,6 +598,7 @@ IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error); IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); +IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); /* * Note that, in selftests (extent-io-tests), we can have empty fs_info passed @@ -627,6 +669,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback, PageWriteback); IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered, PageOrdered); +IMPLEMENT_BTRFS_PAGE_OPS(checked, SetPageChecked, ClearPageChecked, PageChecked); /* * Make sure not only the page dirty bit is cleared, but also subpage dirty bit diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h index ac4dd64ed2572..46224f959c348 100644 --- a/fs/btrfs/subpage.h +++ b/fs/btrfs/subpage.h @@ -36,6 +36,7 @@ struct btrfs_subpage_info { unsigned int dirty_offset; unsigned int writeback_offset; unsigned int ordered_offset; + unsigned int checked_offset; }; /* @@ -142,6 +143,7 @@ DECLARE_BTRFS_SUBPAGE_OPS(error); DECLARE_BTRFS_SUBPAGE_OPS(dirty); DECLARE_BTRFS_SUBPAGE_OPS(writeback); DECLARE_BTRFS_SUBPAGE_OPS(ordered); +DECLARE_BTRFS_SUBPAGE_OPS(checked); bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, struct page *page, u64 start, u32 len); From 2d51630902d9a1afee40ec816f8645c62801b032 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:50 +0800 Subject: [PATCH 0337/1202] btrfs: handle errors properly inside btrfs_submit_compressed_read() There are quite some BUG_ON()s inside btrfs_submit_compressed_read(), namely all errors inside the for() loop relies on BUG_ON() to handle -ENOMEM. Handle these errors properly by: - Wait for submitted bios to finish first Using wake_var_event() APIs to wait without introducing extra memory overhead inside compressed_bio. This allows us to wait for any submitted bio to finish, while still keeps the compressed_bio from being freed. - Introduce finish_compressed_bio_read() to finish the compressed_bio - Properly end the bio and finish compressed_bio when error happens Now in btrfs_submit_compressed_read() even when the bio submission failed, we can properly handle the error without triggering BUG_ON(). Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 133 +++++++++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 50 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 891a62403534d..f7a6c3cda85e3 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -222,9 +222,59 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits, &cb->pending_sectors); atomic_dec(&cb->pending_bios); + /* + * Here we must wake up the possible error handler after all other + * operations on @cb finished, or we can race with + * finish_compressed_bio_*() which may free @cb. + */ + wake_up_var(cb); + return last_io; } +static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio) +{ + unsigned int index; + struct page *page; + + /* Release the compressed pages */ + for (index = 0; index < cb->nr_pages; index++) { + page = cb->compressed_pages[index]; + page->mapping = NULL; + put_page(page); + } + + /* Do io completion on the original bio */ + if (cb->errors) { + bio_io_error(cb->orig_bio); + } else { + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + + ASSERT(bio); + ASSERT(!bio->bi_status); + /* + * We have verified the checksum already, set page checked so + * the end_io handlers know about it + */ + ASSERT(!bio_flagged(bio, BIO_CLONED)); + bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) { + u64 bvec_start = page_offset(bvec->bv_page) + + bvec->bv_offset; + + btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb), + bvec->bv_page, bvec_start, + bvec->bv_len); + } + + bio_endio(cb->orig_bio); + } + + /* Finally free the cb struct */ + kfree(cb->compressed_pages); + kfree(cb); +} + /* when we finish reading compressed pages from the disk, we * decompress them and then run the bio end_io routines on the * decompressed pages (in the inode address space). @@ -239,8 +289,6 @@ static void end_compressed_bio_read(struct bio *bio) { struct compressed_bio *cb = bio->bi_private; struct inode *inode; - struct page *page; - unsigned int index; unsigned int mirror = btrfs_bio(bio)->mirror_num; int ret = 0; @@ -275,42 +323,7 @@ static void end_compressed_bio_read(struct bio *bio) csum_failed: if (ret) cb->errors = 1; - - /* release the compressed pages */ - index = 0; - for (index = 0; index < cb->nr_pages; index++) { - page = cb->compressed_pages[index]; - page->mapping = NULL; - put_page(page); - } - - /* do io completion on the original bio */ - if (cb->errors) { - bio_io_error(cb->orig_bio); - } else { - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - /* - * we have verified the checksum already, set page - * checked so the end_io handlers know about it - */ - ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) { - u64 bvec_start = page_offset(bvec->bv_page) + - bvec->bv_offset; - - btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb), - bvec->bv_page, bvec_start, - bvec->bv_len); - } - - bio_endio(cb->orig_bio); - } - - /* finally free the cb struct */ - kfree(cb->compressed_pages); - kfree(cb); + finish_compressed_bio_read(cb, bio); out: bio_put(bio); } @@ -832,20 +845,20 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto finish_cb; ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto finish_cb; nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size, fs_info->sectorsize); sums += fs_info->csum_size * nr_sectors; ret = btrfs_map_bio(fs_info, comp_bio, mirror_num); - if (ret) { - comp_bio->bi_status = ret; - bio_endio(comp_bio); - } + if (ret) + goto finish_cb; comp_bio = btrfs_bio_alloc(BIO_MAX_VECS); comp_bio->bi_iter.bi_sector = cur_disk_byte >> SECTOR_SHIFT; @@ -860,16 +873,16 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto last_bio; ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto last_bio; ret = btrfs_map_bio(fs_info, comp_bio, mirror_num); - if (ret) { - comp_bio->bi_status = ret; - bio_endio(comp_bio); - } + if (ret) + goto last_bio; return 0; @@ -885,6 +898,26 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, out: free_extent_map(em); return ret; +last_bio: + comp_bio->bi_status = ret; + /* This is the last bio, endio functions will free @cb */ + bio_endio(comp_bio); + return ret; + +finish_cb: + if (comp_bio) { + comp_bio->bi_status = ret; + bio_endio(comp_bio); + } + wait_var_event(cb, atomic_read(&cb->pending_bios) == 0); + /* + * Even with previous bio ended, we should still have io not yet + * submitted, thus need to finish @cb manually. + */ + ASSERT(refcount_read(&cb->pending_sectors)); + /* Now we are the only one referring @cb, can finish it safely. */ + finish_compressed_bio_read(cb, NULL); + return ret; } /* From f2e9f8074e97b5c9e71fcc1f22bc5176e798f639 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:51 +0800 Subject: [PATCH 0338/1202] btrfs: handle errors properly inside btrfs_submit_compressed_write() Just like btrfs_submit_compressed_read(), there are quite some BUG_ON()s inside btrfs_submit_compressed_write() for the bio submission path. Fix them using the same method: - For last bio, just endio the bio As in that case, one of the endio function of all these submitted bio will be able to free the compressed_bio - For half-submitted bio, wait and finish the compressed_bio manually In this case, as long as all other bio finish, we're the only one referring the compressed bio, and can manually finish it. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 98 ++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 36 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index f7a6c3cda85e3..dabb3a88f2e5b 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -366,50 +366,55 @@ static noinline void end_compressed_writeback(struct inode *inode, /* the inode may be gone now */ } -/* - * do the cleanup once all the compressed pages hit the disk. - * This will clear writeback on the file pages and free the compressed - * pages. - * - * This also calls the writeback end hooks for the file pages so that - * metadata and checksums can be updated in the file. - */ -static void end_compressed_bio_write(struct bio *bio) +static void finish_compressed_bio_write(struct compressed_bio *cb) { - struct compressed_bio *cb = bio->bi_private; - struct inode *inode; - struct page *page; + struct inode *inode = cb->inode; unsigned int index; - if (!dec_and_test_compressed_bio(cb, bio)) - goto out; - - /* ok, we're the last bio for this extent, step one is to - * call back into the FS and do all the end_io operations + /* + * Ok, we're the last bio for this extent, step one is to call back + * into the FS and do all the end_io operations. */ - inode = cb->inode; - btrfs_record_physical_zoned(inode, cb->start, bio); btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL, cb->start, cb->start + cb->len - 1, !cb->errors); end_compressed_writeback(inode, cb); - /* note, our inode could be gone now */ + /* Note, our inode could be gone now */ /* - * release the compressed pages, these came from alloc_page and + * Release the compressed pages, these came from alloc_page and * are not attached to the inode at all */ - index = 0; for (index = 0; index < cb->nr_pages; index++) { - page = cb->compressed_pages[index]; + struct page *page = cb->compressed_pages[index]; + page->mapping = NULL; put_page(page); } - /* finally free the cb struct */ + /* Finally free the cb struct */ kfree(cb->compressed_pages); kfree(cb); +} + +/* + * Do the cleanup once all the compressed pages hit the disk. This will clear + * writeback on the file pages and free the compressed pages. + * + * This also calls the writeback end hooks for the file pages so that metadata + * and checksums can be updated in the file. + */ +static void end_compressed_bio_write(struct bio *bio) +{ + struct compressed_bio *cb = bio->bi_private; + + if (!dec_and_test_compressed_bio(cb, bio)) + goto out; + + btrfs_record_physical_zoned(cb->inode, cb->start, bio); + + finish_compressed_bio_write(cb); out: bio_put(bio); } @@ -512,18 +517,18 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto finish_cb; if (!skip_sum) { ret = btrfs_csum_one_bio(inode, bio, start, 1); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto finish_cb; } ret = btrfs_map_bio(fs_info, bio, 0); - if (ret) { - bio->bi_status = ret; - bio_endio(bio); - } + if (ret) + goto finish_cb; bio = btrfs_bio_alloc(BIO_MAX_VECS); bio->bi_iter.bi_sector = first_byte >> SECTOR_SHIFT; @@ -550,23 +555,44 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto last_bio; if (!skip_sum) { ret = btrfs_csum_one_bio(inode, bio, start, 1); - BUG_ON(ret); /* -ENOMEM */ + if (ret) + goto last_bio; } ret = btrfs_map_bio(fs_info, bio, 0); - if (ret) { - bio->bi_status = ret; - bio_endio(bio); - } + if (ret) + goto last_bio; if (blkcg_css) kthread_associate_blkcg(NULL); return 0; +last_bio: + bio->bi_status = ret; + /* One of the bios' endio function will free @cb. */ + bio_endio(bio); + return ret; + +finish_cb: + if (bio) { + bio->bi_status = ret; + bio_endio(bio); + } + + wait_var_event(cb, atomic_read(&cb->pending_bios) == 0); + /* + * Even with previous bio ended, we should still have io not yet + * submitted, thus need to finish manually. + */ + ASSERT(refcount_read(&cb->pending_sectors)); + /* Now we are the only one referring @cb, can finish it safely. */ + finish_compressed_bio_write(cb); + return ret; } static u64 bio_end_offset(struct bio *bio) From 3cdbc2ecdb89fe76a7d059f7f244dfefd336e4a4 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:52 +0800 Subject: [PATCH 0339/1202] btrfs: introduce submit_compressed_bio() for compression The new helper, submit_compressed_bio(), will aggregate the following work: - Increase compressed_bio::pending_bios - Remap the endio function - Map and submit the bio This slightly reorders calls to btrfs_csum_one_bio or btrfs_lookup_bio_sums but but none of them does anything regarding IO submission so this is effectively no change. We mainly care about order of - atomic_inc - btrfs_bio_wq_end_io - btrfs_map_bio Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 45 ++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index dabb3a88f2e5b..60b4da6ce93fb 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -419,6 +419,21 @@ static void end_compressed_bio_write(struct bio *bio) bio_put(bio); } +static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info, + struct compressed_bio *cb, + struct bio *bio, int mirror_num) +{ + blk_status_t ret; + + ASSERT(bio->bi_iter.bi_size); + atomic_inc(&cb->pending_bios); + ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); + if (ret) + return ret; + ret = btrfs_map_bio(fs_info, bio, mirror_num); + return ret; +} + /* * worker function to build and submit bios for previously compressed pages. * The corresponding pages in the inode should be marked for writeback @@ -514,19 +529,13 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, page->mapping = NULL; if (submit || len < PAGE_SIZE) { - atomic_inc(&cb->pending_bios); - ret = btrfs_bio_wq_end_io(fs_info, bio, - BTRFS_WQ_ENDIO_DATA); - if (ret) - goto finish_cb; - if (!skip_sum) { ret = btrfs_csum_one_bio(inode, bio, start, 1); if (ret) goto finish_cb; } - ret = btrfs_map_bio(fs_info, bio, 0); + ret = submit_compressed_bio(fs_info, cb, bio, 0); if (ret) goto finish_cb; @@ -553,18 +562,13 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cond_resched(); } - atomic_inc(&cb->pending_bios); - ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); - if (ret) - goto last_bio; - if (!skip_sum) { ret = btrfs_csum_one_bio(inode, bio, start, 1); if (ret) goto last_bio; } - ret = btrfs_map_bio(fs_info, bio, 0); + ret = submit_compressed_bio(fs_info, cb, bio, 0); if (ret) goto last_bio; @@ -868,12 +872,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) { unsigned int nr_sectors; - atomic_inc(&cb->pending_bios); - ret = btrfs_bio_wq_end_io(fs_info, comp_bio, - BTRFS_WQ_ENDIO_DATA); - if (ret) - goto finish_cb; - ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); if (ret) goto finish_cb; @@ -882,7 +880,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, fs_info->sectorsize); sums += fs_info->csum_size * nr_sectors; - ret = btrfs_map_bio(fs_info, comp_bio, mirror_num); + ret = submit_compressed_bio(fs_info, cb, comp_bio, mirror_num); if (ret) goto finish_cb; @@ -897,16 +895,11 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cur_disk_byte += pg_len; } - atomic_inc(&cb->pending_bios); - ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); - if (ret) - goto last_bio; - ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); if (ret) goto last_bio; - ret = btrfs_map_bio(fs_info, comp_bio, mirror_num); + ret = submit_compressed_bio(fs_info, cb, comp_bio, mirror_num); if (ret) goto last_bio; From efb3f4e772522d407ba6dc93c94abb4c9e9dd560 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:53 +0800 Subject: [PATCH 0340/1202] btrfs: introduce alloc_compressed_bio() for compression Just aggregate the bio allocation code into one helper, so that we can replace 4 call sites. There is one special note for zoned write. Currently btrfs_submit_compressed_write() will only allocate the first bio using ZONE_APPEND. If we have to submit current bio due to stripe boundary, the new bio allocated will not use ZONE_APPEND. In theory this should be a bug, but considering zoned mode currently only support SINGLE profile, which doesn't have any stripe boundary limit, it should never be a problem and we have assertions in place. This function will provide a good entrance for any work which needs to be done at bio allocation time. Like determining the stripe boundary. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 90 +++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 60b4da6ce93fb..13814737420cd 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -434,6 +434,36 @@ static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info, return ret; } +/* + * Allocate a compressed_bio, which will be used to read/write on-disk data. + */ +static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr, + unsigned int opf, bio_end_io_t endio_func) +{ + struct bio *bio; + + bio = btrfs_bio_alloc(BIO_MAX_VECS); + + bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT; + bio->bi_opf = opf; + bio->bi_private = cb; + bio->bi_end_io = endio_func; + + if (bio_op(bio) == REQ_OP_ZONE_APPEND) { + struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); + struct btrfs_device *device; + + device = btrfs_zoned_get_device(fs_info, disk_bytenr, + fs_info->sectorsize); + if (IS_ERR(device)) { + bio_put(bio); + return ERR_CAST(device); + } + bio_set_dev(bio, device->bdev); + } + return bio; +} + /* * worker function to build and submit bios for previously compressed pages. * The corresponding pages in the inode should be marked for writeback @@ -479,23 +509,11 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->orig_bio = NULL; cb->nr_pages = nr_pages; - bio = btrfs_bio_alloc(BIO_MAX_VECS); - bio->bi_iter.bi_sector = first_byte >> SECTOR_SHIFT; - bio->bi_opf = bio_op | write_flags; - bio->bi_private = cb; - bio->bi_end_io = end_compressed_bio_write; - - if (use_append) { - struct btrfs_device *device; - - device = btrfs_zoned_get_device(fs_info, disk_start, PAGE_SIZE); - if (IS_ERR(device)) { - kfree(cb); - bio_put(bio); - return BLK_STS_NOTSUPP; - } - - bio_set_dev(bio, device->bdev); + bio = alloc_compressed_bio(cb, first_byte, bio_op | write_flags, + end_compressed_bio_write); + if (IS_ERR(bio)) { + kfree(cb); + return errno_to_blk_status(PTR_ERR(bio)); } if (blkcg_css) { @@ -539,11 +557,14 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, if (ret) goto finish_cb; - bio = btrfs_bio_alloc(BIO_MAX_VECS); - bio->bi_iter.bi_sector = first_byte >> SECTOR_SHIFT; - bio->bi_opf = bio_op | write_flags; - bio->bi_private = cb; - bio->bi_end_io = end_compressed_bio_write; + bio = alloc_compressed_bio(cb, first_byte, + bio_op | write_flags, + end_compressed_bio_write); + if (IS_ERR(bio)) { + ret = errno_to_blk_status(PTR_ERR(bio)); + bio = NULL; + goto finish_cb; + } if (blkcg_css) bio->bi_opf |= REQ_CGROUP_PUNT; /* @@ -839,11 +860,13 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, /* include any pages we added in add_ra-bio_pages */ cb->len = bio->bi_iter.bi_size; - comp_bio = btrfs_bio_alloc(BIO_MAX_VECS); - comp_bio->bi_iter.bi_sector = cur_disk_byte >> SECTOR_SHIFT; - comp_bio->bi_opf = REQ_OP_READ; - comp_bio->bi_private = cb; - comp_bio->bi_end_io = end_compressed_bio_read; + comp_bio = alloc_compressed_bio(cb, cur_disk_byte, REQ_OP_READ, + end_compressed_bio_read); + if (IS_ERR(comp_bio)) { + ret = errno_to_blk_status(PTR_ERR(comp_bio)); + comp_bio = NULL; + goto fail2; + } for (pg_index = 0; pg_index < nr_pages; pg_index++) { u32 pg_len = PAGE_SIZE; @@ -884,11 +907,14 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, if (ret) goto finish_cb; - comp_bio = btrfs_bio_alloc(BIO_MAX_VECS); - comp_bio->bi_iter.bi_sector = cur_disk_byte >> SECTOR_SHIFT; - comp_bio->bi_opf = REQ_OP_READ; - comp_bio->bi_private = cb; - comp_bio->bi_end_io = end_compressed_bio_read; + comp_bio = alloc_compressed_bio(cb, cur_disk_byte, + REQ_OP_READ, + end_compressed_bio_read); + if (IS_ERR(comp_bio)) { + ret = errno_to_blk_status(PTR_ERR(comp_bio)); + comp_bio = NULL; + goto finish_cb; + } bio_add_page(comp_bio, page, pg_len, 0); } From 8f379e9d2e389a1e9ce3ba443ec604afbfa58f63 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:54 +0800 Subject: [PATCH 0341/1202] btrfs: determine stripe boundary at bio allocation time in btrfs_submit_compressed_read Currently btrfs_submit_compressed_read() will check btrfs_bio_fits_in_stripe() each time a new page is going to be added. Even if compressed extent is small, we don't really need to do that for every page. This patch will align the behavior to extent_io.c, by determining the stripe boundary when allocating a bio. Unlike extent_io.c, in compressed.c we don't need to bother things like different bio flags, thus no need to re-use bio_ctrl. Here we just manually introduce new local variable, next_stripe_start, and teach alloc_compressed_bio() to calculate the stripe boundary. Then each time we add some page range into the bio, we check if we reached the boundary. And if reached, submit it. Also, since we have @cur_disk_byte to determine whether we're the last bio, we don't need a explicit last_bio: tag for error handling any more. And we can use @cur_disk_byte to track which range has been added to bio, we can also use @cur_disk_byte to calculate the wait condition, no need for @pending_bios. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 168 ++++++++++++++++++++++++----------------- 1 file changed, 97 insertions(+), 71 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 13814737420cd..f3cb18afec209 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -435,12 +435,31 @@ static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info, } /* - * Allocate a compressed_bio, which will be used to read/write on-disk data. + * Allocate a compressed_bio, which will be used to read/write on-disk + * (aka, compressed) * data. + * + * @cb: The compressed_bio structure, which records all the needed + * information to bind the compressed data to the uncompressed + * page cache. + * @disk_byten: The logical bytenr where the compressed data will be read + * from or written to. + * @endio_func: The endio function to call after the IO for compressed data + * is finished. + * @next_stripe_start: Return value of logical bytenr of where next stripe starts. + * Let the caller know to only fill the bio up to the stripe + * boundary. */ + + static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr, - unsigned int opf, bio_end_io_t endio_func) + unsigned int opf, bio_end_io_t endio_func, + u64 *next_stripe_start) { + struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); + struct btrfs_io_geometry geom; + struct extent_map *em; struct bio *bio; + int ret; bio = btrfs_bio_alloc(BIO_MAX_VECS); @@ -449,18 +468,23 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte bio->bi_private = cb; bio->bi_end_io = endio_func; - if (bio_op(bio) == REQ_OP_ZONE_APPEND) { - struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); - struct btrfs_device *device; + em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize); + if (IS_ERR(em)) { + bio_put(bio); + return ERR_CAST(em); + } - device = btrfs_zoned_get_device(fs_info, disk_bytenr, - fs_info->sectorsize); - if (IS_ERR(device)) { - bio_put(bio); - return ERR_CAST(device); - } - bio_set_dev(bio, device->bdev); + if (bio_op(bio) == REQ_OP_ZONE_APPEND) + bio_set_dev(bio, em->map_lookup->stripes[0].dev->bdev); + + ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), disk_bytenr, &geom); + free_extent_map(em); + if (ret < 0) { + bio_put(bio); + return ERR_PTR(ret); } + *next_stripe_start = disk_bytenr + geom.len; + return bio; } @@ -488,6 +512,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, int pg_index = 0; struct page *page; u64 first_byte = disk_start; + u64 next_stripe_start; blk_status_t ret; int skip_sum = inode->flags & BTRFS_INODE_NODATASUM; const bool use_append = btrfs_use_zone_append(inode, disk_start); @@ -510,7 +535,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->nr_pages = nr_pages; bio = alloc_compressed_bio(cb, first_byte, bio_op | write_flags, - end_compressed_bio_write); + end_compressed_bio_write, &next_stripe_start); if (IS_ERR(bio)) { kfree(cb); return errno_to_blk_status(PTR_ERR(bio)); @@ -559,7 +584,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, bio = alloc_compressed_bio(cb, first_byte, bio_op | write_flags, - end_compressed_bio_write); + end_compressed_bio_write, + &next_stripe_start); if (IS_ERR(bio)) { ret = errno_to_blk_status(PTR_ERR(bio)); bio = NULL; @@ -790,9 +816,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, unsigned int compressed_len; unsigned int nr_pages; unsigned int pg_index; - struct page *page; - struct bio *comp_bio; - u64 cur_disk_byte = bio->bi_iter.bi_sector << 9; + struct bio *comp_bio = NULL; + const u64 disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT; + u64 cur_disk_byte = disk_bytenr; + u64 next_stripe_start; u64 file_offset; u64 em_len; u64 em_start; @@ -860,39 +887,58 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, /* include any pages we added in add_ra-bio_pages */ cb->len = bio->bi_iter.bi_size; - comp_bio = alloc_compressed_bio(cb, cur_disk_byte, REQ_OP_READ, - end_compressed_bio_read); - if (IS_ERR(comp_bio)) { - ret = errno_to_blk_status(PTR_ERR(comp_bio)); - comp_bio = NULL; - goto fail2; - } + while (cur_disk_byte < disk_bytenr + compressed_len) { + u64 offset = cur_disk_byte - disk_bytenr; + unsigned int index = offset >> PAGE_SHIFT; + unsigned int real_size; + unsigned int added; + struct page *page = cb->compressed_pages[index]; + bool submit = false; - for (pg_index = 0; pg_index < nr_pages; pg_index++) { - u32 pg_len = PAGE_SIZE; - int submit = 0; + /* Allocate new bio if submitted or not yet allocated */ + if (!comp_bio) { + comp_bio = alloc_compressed_bio(cb, cur_disk_byte, + REQ_OP_READ, end_compressed_bio_read, + &next_stripe_start); + if (IS_ERR(comp_bio)) { + ret = errno_to_blk_status(PTR_ERR(comp_bio)); + comp_bio = NULL; + goto finish_cb; + } + } + /* + * We should never reach next_stripe_start start as we will + * submit comp_bio when reach the boundary immediately. + */ + ASSERT(cur_disk_byte != next_stripe_start); + /* + * We have various limit on the real read size: + * - stripe boundary + * - page boundary + * - compressed length boundary + */ + real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_byte); + real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset)); + real_size = min_t(u64, real_size, compressed_len - offset); + ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize)); + added = bio_add_page(comp_bio, page, real_size, offset_in_page(offset)); /* - * To handle subpage case, we need to make sure the bio only - * covers the range we need. - * - * If we're at the last page, truncate the length to only cover - * the remaining part. + * Maximum compressed extent is smaller than bio size limit, + * thus bio_add_page() should always success. */ - if (pg_index == nr_pages - 1) - pg_len = min_t(u32, PAGE_SIZE, - compressed_len - pg_index * PAGE_SIZE); + ASSERT(added == real_size); + cur_disk_byte += added; - page = cb->compressed_pages[pg_index]; - page->mapping = inode->i_mapping; - page->index = em_start >> PAGE_SHIFT; + /* Reached stripe boundary, need to submit */ + if (cur_disk_byte == next_stripe_start) + submit = true; - if (comp_bio->bi_iter.bi_size) - submit = btrfs_bio_fits_in_stripe(page, pg_len, - comp_bio, 0); + /* Has finished the range, need to submit */ + if (cur_disk_byte == disk_bytenr + compressed_len) + submit = true; - page->mapping = NULL; - if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) { + if (submit) { unsigned int nr_sectors; ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); @@ -906,29 +952,9 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ret = submit_compressed_bio(fs_info, cb, comp_bio, mirror_num); if (ret) goto finish_cb; - - comp_bio = alloc_compressed_bio(cb, cur_disk_byte, - REQ_OP_READ, - end_compressed_bio_read); - if (IS_ERR(comp_bio)) { - ret = errno_to_blk_status(PTR_ERR(comp_bio)); - comp_bio = NULL; - goto finish_cb; - } - - bio_add_page(comp_bio, page, pg_len, 0); + comp_bio = NULL; } - cur_disk_byte += pg_len; } - - ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); - if (ret) - goto last_bio; - - ret = submit_compressed_bio(fs_info, cb, comp_bio, mirror_num); - if (ret) - goto last_bio; - return 0; fail2: @@ -943,18 +969,18 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, out: free_extent_map(em); return ret; -last_bio: - comp_bio->bi_status = ret; - /* This is the last bio, endio functions will free @cb */ - bio_endio(comp_bio); - return ret; - finish_cb: if (comp_bio) { comp_bio->bi_status = ret; bio_endio(comp_bio); } - wait_var_event(cb, atomic_read(&cb->pending_bios) == 0); + /* All bytes of @cb is submitted, endio will free @cb */ + if (cur_disk_byte == disk_bytenr + compressed_len) + return ret; + + wait_var_event(cb, refcount_read(&cb->pending_sectors) == + (disk_bytenr + compressed_len - cur_disk_byte) >> + fs_info->sectorsize_bits); /* * Even with previous bio ended, we should still have io not yet * submitted, thus need to finish @cb manually. From 19194814cf0d6e6e320995e84cab097cde3f459c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:55 +0800 Subject: [PATCH 0342/1202] btrfs: determine stripe boundary at bio allocation time in btrfs_submit_compressed_write Currently btrfs_submit_compressed_write() will check btrfs_bio_fits_in_stripe() each time a new page is going to be added. Even if compressed extent is small, we don't really need to do that for every page. Align the behavior to extent_io.c, by determining the stripe boundary when allocating a bio. Unlike extent_io.c, in compressed.c we don't need to bother things like different bio flags, thus no need to re-use bio_ctrl. Here we just manually introduce new local variable, next_stripe_start, and use that value returned from alloc_compressed_bio() to calculate the stripe boundary. Then each time we add some page range into the bio, we check if we reached the boundary. And if reached, submit it. Also, since we have @cur_disk_bytenr to determine whether we're the last bio, we don't need a explicit last_bio: tag for error handling any more. And since we use @cur_disk_bytenr to wait, there is no need for pending_bios, also remove it to save some memory of compressed_bio. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 141 +++++++++++++++++------------------------ fs/btrfs/compression.h | 3 - 2 files changed, 59 insertions(+), 85 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index f3cb18afec209..1debdfeee90ff 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -221,7 +221,6 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b ASSERT(bi_size && bi_size <= cb->compressed_len); last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits, &cb->pending_sectors); - atomic_dec(&cb->pending_bios); /* * Here we must wake up the possible error handler after all other * operations on @cb finished, or we can race with @@ -426,7 +425,6 @@ static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info, blk_status_t ret; ASSERT(bio->bi_iter.bi_size); - atomic_inc(&cb->pending_bios); ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); if (ret) return ret; @@ -508,10 +506,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, struct btrfs_fs_info *fs_info = inode->root->fs_info; struct bio *bio = NULL; struct compressed_bio *cb; - unsigned long bytes_left; - int pg_index = 0; - struct page *page; - u64 first_byte = disk_start; + u64 cur_disk_bytenr = disk_start; u64 next_stripe_start; blk_status_t ret; int skip_sum = inode->flags & BTRFS_INODE_NODATASUM; @@ -522,7 +517,6 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); if (!cb) return BLK_STS_RESOURCE; - atomic_set(&cb->pending_bios, 0); refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits); cb->errors = 0; cb->inode = &inode->vfs_inode; @@ -534,44 +528,62 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, cb->orig_bio = NULL; cb->nr_pages = nr_pages; - bio = alloc_compressed_bio(cb, first_byte, bio_op | write_flags, - end_compressed_bio_write, &next_stripe_start); - if (IS_ERR(bio)) { - kfree(cb); - return errno_to_blk_status(PTR_ERR(bio)); - } - - if (blkcg_css) { - bio->bi_opf |= REQ_CGROUP_PUNT; - kthread_associate_blkcg(blkcg_css); - } - - /* create and submit bios for the compressed pages */ - bytes_left = compressed_len; - for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { - int submit = 0; - int len = 0; + while (cur_disk_bytenr < disk_start + compressed_len) { + u64 offset = cur_disk_bytenr - disk_start; + unsigned int index = offset >> PAGE_SHIFT; + unsigned int real_size; + unsigned int added; + struct page *page = compressed_pages[index]; + bool submit = false; - page = compressed_pages[pg_index]; - page->mapping = inode->vfs_inode.i_mapping; - if (bio->bi_iter.bi_size) - submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio, - 0); + /* Allocate new bio if submitted or not yet allocated */ + if (!bio) { + bio = alloc_compressed_bio(cb, cur_disk_bytenr, + bio_op | write_flags, end_compressed_bio_write, + &next_stripe_start); + if (IS_ERR(bio)) { + ret = errno_to_blk_status(PTR_ERR(bio)); + bio = NULL; + goto finish_cb; + } + } + /* + * We should never reach next_stripe_start start as we will + * submit comp_bio when reach the boundary immediately. + */ + ASSERT(cur_disk_bytenr != next_stripe_start); /* - * Page can only be added to bio if the current bio fits in - * stripe. + * We have various limits on the real read size: + * - stripe boundary + * - page boundary + * - compressed length boundary */ - if (!submit) { - if (pg_index == 0 && use_append) - len = bio_add_zone_append_page(bio, page, - PAGE_SIZE, 0); - else - len = bio_add_page(bio, page, PAGE_SIZE, 0); - } + real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_bytenr); + real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset)); + real_size = min_t(u64, real_size, compressed_len - offset); + ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize)); - page->mapping = NULL; - if (submit || len < PAGE_SIZE) { + if (use_append) + added = bio_add_zone_append_page(bio, page, real_size, + offset_in_page(offset)); + else + added = bio_add_page(bio, page, real_size, + offset_in_page(offset)); + /* Reached zoned boundary */ + if (added == 0) + submit = true; + + cur_disk_bytenr += added; + /* Reached stripe boundary */ + if (cur_disk_bytenr == next_stripe_start) + submit = true; + + /* Finished the range */ + if (cur_disk_bytenr == disk_start + compressed_len) + submit = true; + + if (submit) { if (!skip_sum) { ret = btrfs_csum_one_bio(inode, bio, start, 1); if (ret) @@ -581,61 +593,27 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, ret = submit_compressed_bio(fs_info, cb, bio, 0); if (ret) goto finish_cb; - - bio = alloc_compressed_bio(cb, first_byte, - bio_op | write_flags, - end_compressed_bio_write, - &next_stripe_start); - if (IS_ERR(bio)) { - ret = errno_to_blk_status(PTR_ERR(bio)); - bio = NULL; - goto finish_cb; - } - if (blkcg_css) - bio->bi_opf |= REQ_CGROUP_PUNT; - /* - * Use bio_add_page() to ensure the bio has at least one - * page. - */ - bio_add_page(bio, page, PAGE_SIZE, 0); + bio = NULL; } - if (bytes_left < PAGE_SIZE) { - btrfs_info(fs_info, - "bytes left %lu compress len %u nr %u", - bytes_left, cb->compressed_len, cb->nr_pages); - } - bytes_left -= PAGE_SIZE; - first_byte += PAGE_SIZE; cond_resched(); } - - if (!skip_sum) { - ret = btrfs_csum_one_bio(inode, bio, start, 1); - if (ret) - goto last_bio; - } - - ret = submit_compressed_bio(fs_info, cb, bio, 0); - if (ret) - goto last_bio; - if (blkcg_css) kthread_associate_blkcg(NULL); return 0; -last_bio: - bio->bi_status = ret; - /* One of the bios' endio function will free @cb. */ - bio_endio(bio); - return ret; finish_cb: if (bio) { bio->bi_status = ret; bio_endio(bio); } + /* Last byte of @cb is submitted, endio will free @cb */ + if (cur_disk_bytenr == disk_start + compressed_len) + return ret; - wait_var_event(cb, atomic_read(&cb->pending_bios) == 0); + wait_var_event(cb, refcount_read(&cb->pending_sectors) == + (disk_start + compressed_len - cur_disk_bytenr) >> + fs_info->sectorsize_bits); /* * Even with previous bio ended, we should still have io not yet * submitted, thus need to finish manually. @@ -846,7 +824,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, if (!cb) goto out; - atomic_set(&cb->pending_bios, 0); refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits); cb->errors = 0; cb->inode = inode; diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 28a558433120f..56eef0821e3e3 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -28,9 +28,6 @@ struct btrfs_inode; #define BTRFS_ZLIB_DEFAULT_LEVEL 3 struct compressed_bio { - /* Number of bios pending for this compressed extent */ - atomic_t pending_bios; - /* Number of sectors with unfinished IO (unsubmitted or unfinished) */ refcount_t pending_sectors; From 400ad860197d3159d7443fe6dee1da39531f4fa1 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:56 +0800 Subject: [PATCH 0343/1202] btrfs: remove unused function btrfs_bio_fits_in_stripe() As the last caller in compression.c has been removed, we don't need that function anymore. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 -- fs/btrfs/inode.c | 42 ------------------------------------------ 2 files changed, 44 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad674f092b4e6..2fb177f1de317 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3220,8 +3220,6 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, struct extent_state *other); void btrfs_split_delalloc_extent(struct inode *inode, struct extent_state *orig, u64 split); -int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio, - unsigned long bio_flags); void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end); vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2dc0649035283..be9e078255310 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2233,48 +2233,6 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode, } } -/* - * btrfs_bio_fits_in_stripe - Checks whether the size of the given bio will fit - * in a chunk's stripe. This function ensures that bios do not span a - * stripe/chunk - * - * @page - The page we are about to add to the bio - * @size - size we want to add to the bio - * @bio - bio we want to ensure is smaller than a stripe - * @bio_flags - flags of the bio - * - * return 1 if page cannot be added to the bio - * return 0 if page can be added to the bio - * return error otherwise - */ -int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio, - unsigned long bio_flags) -{ - struct inode *inode = page->mapping->host; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - u64 logical = bio->bi_iter.bi_sector << 9; - u32 bio_len = bio->bi_iter.bi_size; - struct extent_map *em; - int ret = 0; - struct btrfs_io_geometry geom; - - if (bio_flags & EXTENT_BIO_COMPRESSED) - return 0; - - em = btrfs_get_chunk_map(fs_info, logical, fs_info->sectorsize); - if (IS_ERR(em)) - return PTR_ERR(em); - ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), logical, &geom); - if (ret < 0) - goto out; - - if (geom.len < bio_len + size) - ret = 1; -out: - free_extent_map(em); - return ret; -} - /* * in order to insert checksums into the metadata in large chunks, * we wait until bio submission time. All the pages in the bio are From 5d908eaee789cdc4813d1f2a90e5999b7b92ccf8 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:57 +0800 Subject: [PATCH 0344/1202] btrfs: refactor submit_compressed_extents() We have a big chunk of code inside a while() loop, with tons of strange jumps for error handling. It's definitely not to the code standard of today. Move the code into a new function, submit_one_async_extent(). Since we're here, also do the following changes: - Comment style change To follow the current scheme - Don't fallback to non-compressed write then hitting ENOSPC If we hit ENOSPC for compressed write, how could we reserve more space for non-compressed write? Thus we go error path directly. This removes the retry: label. - Add more comment for super long parameter list Explain which parameter is for, so we don't need to check the prototype. - Move the error handling to submit_one_async_extent() Thus no strange code like: out_free: ... goto again; Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 272 +++++++++++++++++++++++------------------------ 1 file changed, 131 insertions(+), 141 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index be9e078255310..bb47b0b04a0ae 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -839,163 +839,121 @@ static void free_async_extent_pages(struct async_extent *async_extent) async_extent->pages = NULL; } -/* - * phase two of compressed writeback. This is the ordered portion - * of the code, which only gets called in the order the work was - * queued. We walk all the async extents created by compress_file_range - * and send them down to the disk. - */ -static noinline void submit_compressed_extents(struct async_chunk *async_chunk) +static int submit_one_async_extent(struct btrfs_inode *inode, + struct async_chunk *async_chunk, + struct async_extent *async_extent, + u64 *alloc_hint) { - struct btrfs_inode *inode = BTRFS_I(async_chunk->inode); - struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct async_extent *async_extent; - u64 alloc_hint = 0; + struct extent_io_tree *io_tree = &inode->io_tree; + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key ins; struct extent_map *em; - struct btrfs_root *root = inode->root; - struct extent_io_tree *io_tree = &inode->io_tree; int ret = 0; + u64 start = async_extent->start; + u64 end = async_extent->start + async_extent->ram_size - 1; -again: - while (!list_empty(&async_chunk->extents)) { - async_extent = list_entry(async_chunk->extents.next, - struct async_extent, list); - list_del(&async_extent->list); - -retry: - lock_extent(io_tree, async_extent->start, - async_extent->start + async_extent->ram_size - 1); - /* did the compression code fall back to uncompressed IO? */ - if (!async_extent->pages) { - int page_started = 0; - unsigned long nr_written = 0; - - /* allocate blocks */ - ret = cow_file_range(inode, async_chunk->locked_page, - async_extent->start, - async_extent->start + - async_extent->ram_size - 1, - &page_started, &nr_written, 0); - - /* JDM XXX */ - - /* - * if page_started, cow_file_range inserted an - * inline extent and took care of all the unlocking - * and IO for us. Otherwise, we need to submit - * all those pages down to the drive. - */ - if (!page_started && !ret) - extent_write_locked_range(&inode->vfs_inode, - async_extent->start, - async_extent->start + - async_extent->ram_size - 1, - WB_SYNC_ALL); - else if (ret && async_chunk->locked_page) - unlock_page(async_chunk->locked_page); - kfree(async_extent); - cond_resched(); - continue; - } + lock_extent(io_tree, start, end); - ret = btrfs_reserve_extent(root, async_extent->ram_size, - async_extent->compressed_size, - async_extent->compressed_size, - 0, alloc_hint, &ins, 1, 1); - if (ret) { - free_async_extent_pages(async_extent); + /* We have fallback to uncompressed write */ + if (!async_extent->pages) { + int page_started = 0; + unsigned long nr_written = 0; - if (ret == -ENOSPC) { - unlock_extent(io_tree, async_extent->start, - async_extent->start + - async_extent->ram_size - 1); - - /* - * we need to redirty the pages if we decide to - * fallback to uncompressed IO, otherwise we - * will not submit these pages down to lower - * layers. - */ - extent_range_redirty_for_io(&inode->vfs_inode, - async_extent->start, - async_extent->start + - async_extent->ram_size - 1); - - goto retry; - } - goto out_free; - } /* - * here we're doing allocation and writeback of the - * compressed pages + * Call cow_file_range() to run the delalloc range directly, + * since we won't go to nocow or async path again. */ - em = create_io_em(inode, async_extent->start, - async_extent->ram_size, /* len */ - async_extent->start, /* orig_start */ - ins.objectid, /* block_start */ - ins.offset, /* block_len */ - ins.offset, /* orig_block_len */ - async_extent->ram_size, /* ram_bytes */ - async_extent->compress_type, - BTRFS_ORDERED_COMPRESSED); - if (IS_ERR(em)) - /* ret value is not necessary due to void function */ - goto out_free_reserve; - free_extent_map(em); - - ret = btrfs_add_ordered_extent_compress(inode, - async_extent->start, - ins.objectid, - async_extent->ram_size, - ins.offset, - async_extent->compress_type); - if (ret) { - btrfs_drop_extent_cache(inode, async_extent->start, - async_extent->start + - async_extent->ram_size - 1, 0); - goto out_free_reserve; - } - btrfs_dec_block_group_reservations(fs_info, ins.objectid); - + ret = cow_file_range(inode, async_chunk->locked_page, + start, end, &page_started, &nr_written, 0); /* - * clear dirty, set writeback and unlock the pages. + * If @page_started, cow_file_range() inserted an inline extent + * and took care of all the unlocking and IO for us. + * Otherwise, we need to submit all those pages down to the + * drive. */ - extent_clear_unlock_delalloc(inode, async_extent->start, - async_extent->start + - async_extent->ram_size - 1, - NULL, EXTENT_LOCKED | EXTENT_DELALLOC, - PAGE_UNLOCK | PAGE_START_WRITEBACK); - if (btrfs_submit_compressed_write(inode, async_extent->start, - async_extent->ram_size, - ins.objectid, - ins.offset, async_extent->pages, - async_extent->nr_pages, - async_chunk->write_flags, - async_chunk->blkcg_css)) { - const u64 start = async_extent->start; - const u64 end = start + async_extent->ram_size - 1; - - btrfs_writepage_endio_finish_ordered(inode, NULL, start, - end, false); - - extent_clear_unlock_delalloc(inode, start, end, NULL, 0, - PAGE_END_WRITEBACK | - PAGE_SET_ERROR); - free_async_extent_pages(async_extent); - } - alloc_hint = ins.objectid + ins.offset; + if (!page_started && !ret) + extent_write_locked_range(&inode->vfs_inode, start, + end, WB_SYNC_ALL); + else if (ret && async_chunk->locked_page) + unlock_page(async_chunk->locked_page); kfree(async_extent); - cond_resched(); + return ret; } - return; + + ret = btrfs_reserve_extent(root, async_extent->ram_size, + async_extent->compressed_size, + async_extent->compressed_size, + 0, *alloc_hint, &ins, 1, 1); + if (ret) { + free_async_extent_pages(async_extent); + /* + * Here we used to try again by going back to non-compressed + * path for ENOSPC. But we can't reserve space even for + * compressed size, how could it work for uncompressed size + * which requires larger size? So here we directly go error + * path. + */ + goto out_free; + } + + /* Here we're doing allocation and writeback of the compressed pages */ + em = create_io_em(inode, start, + async_extent->ram_size, /* len */ + start, /* orig_start */ + ins.objectid, /* block_start */ + ins.offset, /* block_len */ + ins.offset, /* orig_block_len */ + async_extent->ram_size, /* ram_bytes */ + async_extent->compress_type, + BTRFS_ORDERED_COMPRESSED); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out_free_reserve; + } + free_extent_map(em); + + ret = btrfs_add_ordered_extent_compress(inode, start, /* file_offset */ + ins.objectid, /* disk_bytenr */ + async_extent->ram_size, /* num_bytes */ + ins.offset, /* disk_num_bytes */ + async_extent->compress_type); + if (ret) { + btrfs_drop_extent_cache(inode, start, end, 0); + goto out_free_reserve; + } + btrfs_dec_block_group_reservations(fs_info, ins.objectid); + + /* Clear dirty, set writeback and unlock the pages. */ + extent_clear_unlock_delalloc(inode, start, end, + NULL, EXTENT_LOCKED | EXTENT_DELALLOC, + PAGE_UNLOCK | PAGE_START_WRITEBACK); + if (btrfs_submit_compressed_write(inode, start, /* file_offset */ + async_extent->ram_size, /* num_bytes */ + ins.objectid, /* disk_bytenr */ + ins.offset, /* compressed_len */ + async_extent->pages, /* compressed_pages */ + async_extent->nr_pages, + async_chunk->write_flags, + async_chunk->blkcg_css)) { + const u64 start = async_extent->start; + const u64 end = start + async_extent->ram_size - 1; + + btrfs_writepage_endio_finish_ordered(inode, NULL, start, end, 0); + + extent_clear_unlock_delalloc(inode, start, end, NULL, 0, + PAGE_END_WRITEBACK | PAGE_SET_ERROR); + free_async_extent_pages(async_extent); + } + *alloc_hint = ins.objectid + ins.offset; + kfree(async_extent); + return ret; + out_free_reserve: btrfs_dec_block_group_reservations(fs_info, ins.objectid); btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); out_free: - extent_clear_unlock_delalloc(inode, async_extent->start, - async_extent->start + - async_extent->ram_size - 1, + extent_clear_unlock_delalloc(inode, start, end, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, @@ -1003,7 +961,39 @@ static noinline void submit_compressed_extents(struct async_chunk *async_chunk) PAGE_END_WRITEBACK | PAGE_SET_ERROR); free_async_extent_pages(async_extent); kfree(async_extent); - goto again; + return ret; +} + +/* + * Phase two of compressed writeback. This is the ordered portion of the code, + * which only gets called in the order the work was queued. We walk all the + * async extents created by compress_file_range and send them down to the disk. + */ +static noinline void submit_compressed_extents(struct async_chunk *async_chunk) +{ + struct btrfs_inode *inode = BTRFS_I(async_chunk->inode); + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct async_extent *async_extent; + u64 alloc_hint = 0; + int ret = 0; + + while (!list_empty(&async_chunk->extents)) { + u64 extent_start; + u64 ram_size; + + async_extent = list_entry(async_chunk->extents.next, + struct async_extent, list); + list_del(&async_extent->list); + extent_start = async_extent->start; + ram_size = async_extent->ram_size; + + ret = submit_one_async_extent(inode, async_chunk, async_extent, + &alloc_hint); + btrfs_debug(fs_info, +"async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d", + inode->root->root_key.objectid, + btrfs_ino(inode), extent_start, ram_size, ret); + } } static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start, From 418ac4b41ce2cd0a3fd3823d02c0eac52d73c4d3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:58 +0800 Subject: [PATCH 0345/1202] btrfs: cleanup for extent_write_locked_range() There are several cleanups for extent_write_locked_range(), most of them are pure cleanups, but with some preparation for future subpage support. - Add a proper comment for which call sites are suitable Unlike regular synchronized extent write back, if async COW or zoned COW happens, we have all pages in the range still locked. Thus for those (only) two call sites, we need this function to submit page content into bios and submit them. - Remove @mode parameter All the existing two call sites pass WB_SYNC_ALL. No need for @mode parameter. - Better error handling Currently if we hit an error during the page iteration loop, we overwrite @ret, causing only the last error can be recorded. Here we add @found_error and @first_error variable to record if we hit any error, and the first error we hit. So the first error won't get lost. - Don't reuse @start as the cursor We reuse the parameter @start as the cursor to iterate the range, not a big problem, but since we're here, introduce a proper @cur as the cursor. - Remove impossible branch Since all pages are still locked after the ordered extent is inserted, there is no way that pages can get its dirty bit cleared. Remove the branch where page is not dirty and replace it with an ASSERT(). Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 45 ++++++++++++++++++++++++++++---------------- fs/btrfs/extent_io.h | 3 +-- fs/btrfs/inode.c | 5 ++--- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e9d68b8ee9e6b..a010a40582077 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5073,23 +5073,29 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc) return ret; } -int extent_write_locked_range(struct inode *inode, u64 start, u64 end, - int mode) +/* + * Submit the pages in the range to bio for call sites which delalloc range has + * already been ran (aka, ordered extent inserted) and all pages are still + * locked. + */ +int extent_write_locked_range(struct inode *inode, u64 start, u64 end) { + bool found_error = false; + int first_error = 0; int ret = 0; struct address_space *mapping = inode->i_mapping; struct page *page; + u64 cur = start; unsigned long nr_pages = (end - start + PAGE_SIZE) >> PAGE_SHIFT; - struct extent_page_data epd = { .bio_ctrl = { 0 }, .extent_locked = 1, - .sync_io = mode == WB_SYNC_ALL, + .sync_io = 1, }; struct writeback_control wbc_writepages = { - .sync_mode = mode, .nr_to_write = nr_pages * 2, + .sync_mode = WB_SYNC_ALL, .range_start = start, .range_end = end + 1, /* We're called from an async helper function */ @@ -5098,26 +5104,33 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, }; wbc_attach_fdatawrite_inode(&wbc_writepages, inode); - while (start <= end) { - page = find_get_page(mapping, start >> PAGE_SHIFT); - if (clear_page_dirty_for_io(page)) - ret = __extent_writepage(page, &wbc_writepages, &epd); - else { - btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), - page, start, start + PAGE_SIZE - 1, true); - unlock_page(page); + while (cur <= end) { + page = find_get_page(mapping, cur >> PAGE_SHIFT); + /* + * All pages in the range are locked since + * btrfs_run_delalloc_range(), thus there is no way to clear + * the page dirty flag. + */ + ASSERT(PageDirty(page)); + clear_page_dirty_for_io(page); + ret = __extent_writepage(page, &wbc_writepages, &epd); + ASSERT(ret <= 0); + if (ret < 0) { + found_error = true; + first_error = ret; } put_page(page); - start += PAGE_SIZE; + cur += PAGE_SIZE; } - ASSERT(ret <= 0); - if (ret == 0) + if (!found_error) ret = flush_write_bio(&epd); else end_write_bio(&epd, ret); wbc_detach_inode(&wbc_writepages); + if (found_error) + return first_error; return ret; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a3ecfd4f37ac2..0399cf8e3c32c 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -184,8 +184,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, struct btrfs_bio_ctrl *bio_ctrl, unsigned int read_flags, u64 *prev_em_start); int extent_write_full_page(struct page *page, struct writeback_control *wbc); -int extent_write_locked_range(struct inode *inode, u64 start, u64 end, - int mode); +int extent_write_locked_range(struct inode *inode, u64 start, u64 end); int extent_writepages(struct address_space *mapping, struct writeback_control *wbc); int btree_write_cache_pages(struct address_space *mapping, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bb47b0b04a0ae..aaeb2192b24d3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -873,8 +873,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode, * drive. */ if (!page_started && !ret) - extent_write_locked_range(&inode->vfs_inode, start, - end, WB_SYNC_ALL); + extent_write_locked_range(&inode->vfs_inode, start, end); else if (ret && async_chunk->locked_page) unlock_page(async_chunk->locked_page); kfree(async_extent); @@ -1454,7 +1453,7 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode, __set_page_dirty_nobuffers(locked_page); account_page_redirty(locked_page); - extent_write_locked_range(&inode->vfs_inode, start, end, WB_SYNC_ALL); + extent_write_locked_range(&inode->vfs_inode, start, end); *page_started = 1; return 0; From 3c503efb0b239f47ca253dafe91f14561021094f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:21:59 +0800 Subject: [PATCH 0346/1202] btrfs: subpage: make compress_file_range() compatible In function compress_file_range(), when the compression is finished, the function just rounds up @total_in to PAGE_SIZE. This is fine for regular sectorsize == PAGE_SIZE case, but not for subpage. Just change the ALIGN(, PAGE_SIZE) to round_up(, sectorsize) so that both regular sectorsize and subpage sectorsize will be happy. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index aaeb2192b24d3..dd91f4e60fb63 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -758,7 +758,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk) * win, compare the page count read with the blocks on disk, * compression must free at least one sector size */ - total_in = ALIGN(total_in, PAGE_SIZE); + total_in = round_up(total_in, fs_info->sectorsize); if (total_compressed + blocksize <= total_in) { compressed_extents++; From 728be295617b1b3a62fa3065b68df224b8d86816 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:22:00 +0800 Subject: [PATCH 0347/1202] btrfs: subpage: make btrfs_submit_compressed_write() compatible There is a WARN_ON() checking if @start is aligned to PAGE_SIZE, not sectorsize, which will cause false alert for subpage. Fix it to check against sectorsize. Furthermore: - Use ASSERT() to do the check So that in the future we may skip the check for production build - Also check alignment for @len Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 1debdfeee90ff..ab2eb936975fd 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -513,7 +513,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, const bool use_append = btrfs_use_zone_append(inode, disk_start); const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE; - WARN_ON(!PAGE_ALIGNED(start)); + ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && + IS_ALIGNED(len, fs_info->sectorsize)); cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); if (!cb) return BLK_STS_RESOURCE; From d9bcdb2a075ab83acbc484b8db9f70afd2eca646 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:22:01 +0800 Subject: [PATCH 0348/1202] btrfs: subpage: make end_compressed_bio_writeback() compatible In end_compressed_writeback() we just clear the full page writeback. For subpage case, if there are two delalloc ranges in the same page, the 2nd range will trigger a BUG_ON() as the page writeback is already cleared by previous range. Fix it by using btrfs_page_clamp_clear_writeback() helper. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ab2eb936975fd..1d071c8d6fff8 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -334,6 +334,7 @@ static void end_compressed_bio_read(struct bio *bio) static noinline void end_compressed_writeback(struct inode *inode, const struct compressed_bio *cb) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); unsigned long index = cb->start >> PAGE_SHIFT; unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT; struct page *pages[16]; @@ -356,7 +357,8 @@ static noinline void end_compressed_writeback(struct inode *inode, for (i = 0; i < ret; i++) { if (cb->errors) SetPageError(pages[i]); - end_page_writeback(pages[i]); + btrfs_page_clamp_clear_writeback(fs_info, pages[i], + cb->start, cb->len); put_page(pages[i]); } nr_pages -= ret; From addced0266f45a7ab68945288123a60d72e17df3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:22:02 +0800 Subject: [PATCH 0349/1202] btrfs: subpage: make extent_write_locked_range() compatible There are two sites are not subpage compatible yet for extent_write_locked_range(): - How @nr_pages are calculated For subpage we can have the following range with 64K page size: 0 32K 64K 96K 128K | |////|/////| | In that case, although 96K - 32K == 64K, thus it looks like one page is enough, but the range spans two pages, not one. Fix it by doing proper round_up() and round_down() to calculate @nr_pages. Also add some extra ASSERT()s to ensure the range passed in is already aligned. - How the page end is calculated Currently we just use cur + PAGE_SIZE - 1 to calculate the page end. Which can't handle the above range layout, and will trigger ASSERT() in btrfs_writepage_endio_finish_ordered(), as the range is no longer covered by the page range. Fix it by taking page end into consideration. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a010a40582077..655e78ae376e5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5086,15 +5086,14 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end) struct address_space *mapping = inode->i_mapping; struct page *page; u64 cur = start; - unsigned long nr_pages = (end - start + PAGE_SIZE) >> - PAGE_SHIFT; + unsigned long nr_pages; + const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize; struct extent_page_data epd = { .bio_ctrl = { 0 }, .extent_locked = 1, .sync_io = 1, }; struct writeback_control wbc_writepages = { - .nr_to_write = nr_pages * 2, .sync_mode = WB_SYNC_ALL, .range_start = start, .range_end = end + 1, @@ -5103,14 +5102,22 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end) .no_cgroup_owner = 1, }; + ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize)); + nr_pages = (round_up(end, PAGE_SIZE) - round_down(start, PAGE_SIZE)) >> + PAGE_SHIFT; + wbc_writepages.nr_to_write = nr_pages * 2; + wbc_attach_fdatawrite_inode(&wbc_writepages, inode); while (cur <= end) { + u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end); + page = find_get_page(mapping, cur >> PAGE_SHIFT); /* * All pages in the range are locked since * btrfs_run_delalloc_range(), thus there is no way to clear * the page dirty flag. */ + ASSERT(PageLocked(page)); ASSERT(PageDirty(page)); clear_page_dirty_for_io(page); ret = __extent_writepage(page, &wbc_writepages, &epd); @@ -5120,7 +5127,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end) first_error = ret; } put_page(page); - cur += PAGE_SIZE; + cur = cur_end + 1; } if (!found_error) From 2cd6c8bac1dcfc87a8c0081aced00a77fe1fad41 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:22:03 +0800 Subject: [PATCH 0350/1202] btrfs: factor uncompressed async extent submission code into a new helper Introduce a new helper, submit_uncompressed_range(), for async cow cases where we fallback to COW. There are some new updates introduced to the helper: - Proper locked_page detection It's possible that the async_extent range doesn't cover the locked page. In that case we shouldn't unlock the locked page. In the new helper, we will ensure that we only unlock the locked page when: * The locked page covers part of the async_extent range * The locked page is not unlocked by cow_file_range() nor extent_write_locked_range() This also means extra comments are added focusing on the page locking. - Add extra comment on some rare parameter used. We use @unlock_page = 0 for cow_file_range(), where only two call sites doing the same thing, including the new helper. It's definitely worth some comments. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 76 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dd91f4e60fb63..57a2bc25b4d2d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -839,6 +839,43 @@ static void free_async_extent_pages(struct async_extent *async_extent) async_extent->pages = NULL; } +static int submit_uncompressed_range(struct btrfs_inode *inode, + struct async_extent *async_extent, + struct page *locked_page) +{ + u64 start = async_extent->start; + u64 end = async_extent->start + async_extent->ram_size - 1; + unsigned long nr_written = 0; + int page_started = 0; + int ret; + + /* + * Call cow_file_range() to run the delalloc range directly, since we + * won't go to NOCOW or async path again. + * + * Also we call cow_file_range() with @unlock_page == 0, so that we + * can directly submit them without interruption. + */ + ret = cow_file_range(inode, locked_page, start, end, &page_started, + &nr_written, 0); + /* Inline extent inserted, page gets unlocked and everything is done */ + if (page_started) { + ret = 0; + goto out; + } + if (ret < 0) { + if (locked_page) + unlock_page(locked_page); + goto out; + } + + ret = extent_write_locked_range(&inode->vfs_inode, start, end); + /* All pages will be unlocked, including @locked_page */ +out: + kfree(async_extent); + return ret; +} + static int submit_one_async_extent(struct btrfs_inode *inode, struct async_chunk *async_chunk, struct async_extent *async_extent, @@ -848,37 +885,28 @@ static int submit_one_async_extent(struct btrfs_inode *inode, struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key ins; + struct page *locked_page = NULL; struct extent_map *em; int ret = 0; u64 start = async_extent->start; u64 end = async_extent->start + async_extent->ram_size - 1; - lock_extent(io_tree, start, end); - - /* We have fallback to uncompressed write */ - if (!async_extent->pages) { - int page_started = 0; - unsigned long nr_written = 0; + /* + * If async_chunk->locked_page is in the async_extent range, we need to + * handle it. + */ + if (async_chunk->locked_page) { + u64 locked_page_start = page_offset(async_chunk->locked_page); + u64 locked_page_end = locked_page_start + PAGE_SIZE - 1; - /* - * Call cow_file_range() to run the delalloc range directly, - * since we won't go to nocow or async path again. - */ - ret = cow_file_range(inode, async_chunk->locked_page, - start, end, &page_started, &nr_written, 0); - /* - * If @page_started, cow_file_range() inserted an inline extent - * and took care of all the unlocking and IO for us. - * Otherwise, we need to submit all those pages down to the - * drive. - */ - if (!page_started && !ret) - extent_write_locked_range(&inode->vfs_inode, start, end); - else if (ret && async_chunk->locked_page) - unlock_page(async_chunk->locked_page); - kfree(async_extent); - return ret; + if (!(start >= locked_page_end || end <= locked_page_start)) + locked_page = async_chunk->locked_page; } + lock_extent(io_tree, start, end); + + /* We have fall back to uncompressed write */ + if (!async_extent->pages) + return submit_uncompressed_range(inode, async_extent, locked_page); ret = btrfs_reserve_extent(root, async_extent->ram_size, async_extent->compressed_size, From e06ff8658d1b25483af59440dd43d04fd2f3b2c9 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:22:04 +0800 Subject: [PATCH 0351/1202] btrfs: subpage: make lzo_compress_pages() compatible There are several problems in lzo_compress_pages() preventing it from being subpage compatible: - No page offset is calculated when reading from inode pages For subpage case, we could have @start which is not aligned to PAGE_SIZE. Thus the destination where we read data from must take offset in page into consideration. - The padding for segment header is bound to PAGE_SIZE This means, for subpage case we can skip several corners where on x86 machines we need to add padding zeros. The rework will: - Update the comment to replace "page" with "sector" - Introduce a new helper, copy_compressed_data_to_page(), to do the copy So that we don't need to bother page switching for both input and output. Now in lzo_compress_pages() we only care about page switching for input, while in copy_compressed_data_to_page() we only care about the page switching for output. - Only one main cursor For lzo_compress_pages() we use @cur_in as main cursor. It will be the file offset we are currently at. All other helper variables will be only declared inside the loop. For copy_compressed_data_to_page() it's similar, we will have @cur_out at the main cursor, which records how many bytes are in the output. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/lzo.c | 270 ++++++++++++++++++++++++------------------------- 1 file changed, 134 insertions(+), 136 deletions(-) diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index c25dfd1a8a54a..00cffc183ec07 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -32,19 +32,19 @@ * payload. * One regular LZO compressed extent can have one or more segments. * For inlined LZO compressed extent, only one segment is allowed. - * One segment represents at most one page of uncompressed data. + * One segment represents at most one sector of uncompressed data. * * 2.1 Segment header * Fixed size. LZO_LEN (4) bytes long, LE32. * Records the total size of the segment (not including the header). - * Segment header never crosses page boundary, thus it's possible to - * have at most 3 padding zeros at the end of the page. + * Segment header never crosses sector boundary, thus it's possible to + * have at most 3 padding zeros at the end of the sector. * * 2.2 Data Payload - * Variable size. Size up limit should be lzo1x_worst_compress(PAGE_SIZE) - * which is 4419 for a 4KiB page. + * Variable size. Size up limit should be lzo1x_worst_compress(sectorsize) + * which is 4419 for a 4KiB sectorsize. * - * Example: + * Example with 4K sectorsize: * Page 1: * 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10 * 0x0000 | Header | SegHdr 01 | Data payload 01 ... | @@ -112,163 +112,161 @@ static inline size_t read_compress_length(const char *buf) return le32_to_cpu(dlen); } +/* + * Will do: + * + * - Write a segment header into the destination + * - Copy the compressed buffer into the destination + * - Make sure we have enough space in the last sector to fit a segment header + * If not, we will pad at most (LZO_LEN (4)) - 1 bytes of zeros. + * + * Will allocate new pages when needed. + */ +static int copy_compressed_data_to_page(char *compressed_data, + size_t compressed_size, + struct page **out_pages, + u32 *cur_out, + const u32 sectorsize) +{ + u32 sector_bytes_left; + u32 orig_out; + struct page *cur_page; + + /* + * We never allow a segment header crossing sector boundary, previous + * run should ensure we have enough space left inside the sector. + */ + ASSERT((*cur_out / sectorsize) == (*cur_out + LZO_LEN - 1) / sectorsize); + + cur_page = out_pages[*cur_out / PAGE_SIZE]; + /* Allocate a new page */ + if (!cur_page) { + cur_page = alloc_page(GFP_NOFS); + if (!cur_page) + return -ENOMEM; + out_pages[*cur_out / PAGE_SIZE] = cur_page; + } + + write_compress_length(page_address(cur_page) + offset_in_page(*cur_out), + compressed_size); + *cur_out += LZO_LEN; + + orig_out = *cur_out; + + /* Copy compressed data */ + while (*cur_out - orig_out < compressed_size) { + u32 copy_len = min_t(u32, sectorsize - *cur_out % sectorsize, + orig_out + compressed_size - *cur_out); + + cur_page = out_pages[*cur_out / PAGE_SIZE]; + /* Allocate a new page */ + if (!cur_page) { + cur_page = alloc_page(GFP_NOFS); + if (!cur_page) + return -ENOMEM; + out_pages[*cur_out / PAGE_SIZE] = cur_page; + } + + memcpy(page_address(cur_page) + offset_in_page(*cur_out), + compressed_data + *cur_out - orig_out, copy_len); + + *cur_out += copy_len; + } + + /* + * Check if we can fit the next segment header into the remaining space + * of the sector. + */ + sector_bytes_left = round_up(*cur_out, sectorsize) - *cur_out; + if (sector_bytes_left >= LZO_LEN || sector_bytes_left == 0) + return 0; + + /* The remaining size is not enough, pad it with zeros */ + memset(page_address(cur_page) + offset_in_page(*cur_out), 0, + sector_bytes_left); + *cur_out += sector_bytes_left; + return 0; +} + int lzo_compress_pages(struct list_head *ws, struct address_space *mapping, u64 start, struct page **pages, unsigned long *out_pages, unsigned long *total_in, unsigned long *total_out) { struct workspace *workspace = list_entry(ws, struct workspace, list); + const u32 sectorsize = btrfs_sb(mapping->host->i_sb)->sectorsize; + struct page *page_in = NULL; int ret = 0; - char *data_in; - char *cpage_out, *sizes_ptr; - int nr_pages = 0; - struct page *in_page = NULL; - struct page *out_page = NULL; - unsigned long bytes_left; - unsigned long len = *total_out; - unsigned long nr_dest_pages = *out_pages; - const unsigned long max_out = nr_dest_pages * PAGE_SIZE; - size_t in_len; - size_t out_len; - char *buf; - unsigned long tot_in = 0; - unsigned long tot_out = 0; - unsigned long pg_bytes_left; - unsigned long out_offset; - unsigned long bytes; + /* Points to the file offset of input data */ + u64 cur_in = start; + /* Points to the current output byte */ + u32 cur_out = 0; + u32 len = *total_out; *out_pages = 0; *total_out = 0; *total_in = 0; - in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = page_address(in_page); - /* - * store the size of all chunks of compressed data in - * the first 4 bytes + * Skip the header for now, we will later come back and write the total + * compressed size */ - out_page = alloc_page(GFP_NOFS); - if (out_page == NULL) { - ret = -ENOMEM; - goto out; - } - cpage_out = page_address(out_page); - out_offset = LZO_LEN; - tot_out = LZO_LEN; - pages[0] = out_page; - nr_pages = 1; - pg_bytes_left = PAGE_SIZE - LZO_LEN; - - /* compress at most one page of data each time */ - in_len = min(len, PAGE_SIZE); - while (tot_in < len) { - ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, - &out_len, workspace->mem); - if (ret != LZO_E_OK) { - pr_debug("BTRFS: lzo in loop returned %d\n", - ret); + cur_out += LZO_LEN; + while (cur_in < start + len) { + const u32 sectorsize_mask = sectorsize - 1; + u32 sector_off = (cur_in - start) & sectorsize_mask; + u32 in_len; + size_t out_len; + + /* Get the input page first */ + if (!page_in) { + page_in = find_get_page(mapping, cur_in >> PAGE_SHIFT); + ASSERT(page_in); + } + + /* Compress at most one sector of data each time */ + in_len = min_t(u32, start + len - cur_in, sectorsize - sector_off); + ASSERT(in_len); + ret = lzo1x_1_compress(page_address(page_in) + + offset_in_page(cur_in), in_len, + workspace->cbuf, &out_len, + workspace->mem); + if (ret < 0) { + pr_debug("BTRFS: lzo in loop returned %d\n", ret); ret = -EIO; goto out; } - /* store the size of this chunk of compressed data */ - write_compress_length(cpage_out + out_offset, out_len); - tot_out += LZO_LEN; - out_offset += LZO_LEN; - pg_bytes_left -= LZO_LEN; - - tot_in += in_len; - tot_out += out_len; - - /* copy bytes from the working buffer into the pages */ - buf = workspace->cbuf; - while (out_len) { - bytes = min_t(unsigned long, pg_bytes_left, out_len); - - memcpy(cpage_out + out_offset, buf, bytes); - - out_len -= bytes; - pg_bytes_left -= bytes; - buf += bytes; - out_offset += bytes; - - /* - * we need another page for writing out. - * - * Note if there's less than 4 bytes left, we just - * skip to a new page. - */ - if ((out_len == 0 && pg_bytes_left < LZO_LEN) || - pg_bytes_left == 0) { - if (pg_bytes_left) { - memset(cpage_out + out_offset, 0, - pg_bytes_left); - tot_out += pg_bytes_left; - } - - /* we're done, don't allocate new page */ - if (out_len == 0 && tot_in >= len) - break; - - if (nr_pages == nr_dest_pages) { - out_page = NULL; - ret = -E2BIG; - goto out; - } - - out_page = alloc_page(GFP_NOFS); - if (out_page == NULL) { - ret = -ENOMEM; - goto out; - } - cpage_out = page_address(out_page); - pages[nr_pages++] = out_page; - - pg_bytes_left = PAGE_SIZE; - out_offset = 0; - } - } + ret = copy_compressed_data_to_page(workspace->cbuf, out_len, + pages, &cur_out, sectorsize); + if (ret < 0) + goto out; + + cur_in += in_len; - /* we're making it bigger, give up */ - if (tot_in > 8192 && tot_in < tot_out) { + /* + * Check if we're making it bigger after two sectors. And if + * it is so, give up. + */ + if (cur_in - start > sectorsize * 2 && cur_in - start < cur_out) { ret = -E2BIG; goto out; } - /* we're all done */ - if (tot_in >= len) - break; - - if (tot_out > max_out) - break; - - bytes_left = len - tot_in; - put_page(in_page); - - start += PAGE_SIZE; - in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = page_address(in_page); - in_len = min(bytes_left, PAGE_SIZE); - } - - if (tot_out >= tot_in) { - ret = -E2BIG; - goto out; + /* Check if we have reached page boundary */ + if (IS_ALIGNED(cur_in, PAGE_SIZE)) { + put_page(page_in); + page_in = NULL; + } } - /* store the size of all chunks of compressed data */ - sizes_ptr = page_address(pages[0]); - write_compress_length(sizes_ptr, tot_out); + /* Store the size of all chunks of compressed data */ + write_compress_length(page_address(pages[0]), cur_out); ret = 0; - *total_out = tot_out; - *total_in = tot_in; + *total_out = cur_out; + *total_in = cur_in - start; out: - *out_pages = nr_pages; - - if (in_page) - put_page(in_page); - + *out_pages = DIV_ROUND_UP(cur_out, PAGE_SIZE); return ret; } From 90dd141dffc659160270f27601124d9e6f3c06ac Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:22:05 +0800 Subject: [PATCH 0352/1202] btrfs: rework page locking in __extent_writepage() Pages passed to __extent_writepage() are always locked, but they may be locked by different functions. There are two types of locked page for __extent_writepage(): - Page locked by plain lock_page() It should not have any subpage::writers count. Can be unlocked by unlock_page(). This is the most common locked page for __extent_writepage() called inside extent_write_cache_pages() or extent_write_full_page(). Rarer cases include the @locked_page from extent_write_locked_range(). - Page locked by lock_delalloc_pages() There is only one caller, all pages except @locked_page for extent_write_locked_range(). In this case, we have to call subpage helper to handle the case. So here we introduce a helper, btrfs_page_unlock_writer(), to allow __extent_writepage() to unlock different locked pages. And since for all other callers of __extent_writepage() their pages are ensured to be locked by lock_page(), also add an extra check for epd::extent_locked to unlock such pages directly. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 15 ++++++++++++++- fs/btrfs/subpage.c | 43 +++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/subpage.h | 2 ++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 655e78ae376e5..18ef234fe3dc9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4051,6 +4051,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct extent_page_data *epd) { struct inode *inode = page->mapping->host; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); const u64 page_start = page_offset(page); const u64 page_end = page_start + PAGE_SIZE - 1; int ret; @@ -4138,7 +4139,19 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, */ if (PageError(page)) end_extent_writepage(page, ret, page_start, page_end); - unlock_page(page); + if (epd->extent_locked) { + /* + * If epd->extent_locked, it's from extent_write_locked_range(), + * the page can either be locked by lock_page() or + * process_one_page(). + * Let btrfs_page_unlock_writer() handle both cases. + */ + ASSERT(wbc); + btrfs_page_unlock_writer(fs_info, page, wbc->range_start, + wbc->range_end + 1 - wbc->range_start); + } else { + unlock_page(page); + } ASSERT(ret <= 0); return ret; } diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index ed9936cdd267c..265b6aa1c662d 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -690,3 +690,46 @@ void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info, ASSERT(PagePrivate(page) && page->private); ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty)); } + +/* + * Handle different locked pages with different page sizes: + * + * - Page locked by plain lock_page() + * It should not have any subpage::writers count. + * Can be unlocked by unlock_page(). + * This is the most common locked page for __extent_writepage() called + * inside extent_write_cache_pages() or extent_write_full_page(). + * Rarer cases include the @locked_page from extent_write_locked_range(). + * + * - Page locked by lock_delalloc_pages() + * There is only one caller, all pages except @locked_page for + * extent_write_locked_range(). + * In this case, we have to call subpage helper to handle the case. + */ +void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page, + u64 start, u32 len) +{ + struct btrfs_subpage *subpage; + + ASSERT(PageLocked(page)); + /* For regular page size case, we just unlock the page */ + if (fs_info->sectorsize == PAGE_SIZE) + return unlock_page(page); + + ASSERT(PagePrivate(page) && page->private); + subpage = (struct btrfs_subpage *)page->private; + + /* + * For subpage case, there are two types of locked page. With or + * without writers number. + * + * Since we own the page lock, no one else could touch subpage::writers + * and we are safe to do several atomic operations without spinlock. + */ + if (atomic_read(&subpage->writers)) + /* No writers, locked by plain lock_page() */ + return unlock_page(page); + + /* Have writers, use proper subpage helper to end it */ + btrfs_page_end_writer_lock(fs_info, page, start, len); +} diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h index 46224f959c348..7accb5c40d33b 100644 --- a/fs/btrfs/subpage.h +++ b/fs/btrfs/subpage.h @@ -150,5 +150,7 @@ bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info, struct page *page); +void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page, + u64 start, u32 len); #endif From bf95eb3e1244459503043823b3c67dc775b0301c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:22:06 +0800 Subject: [PATCH 0353/1202] btrfs: handle page locking in btrfs_page_end_writer_lock with no writers There are several call sites of extent_clear_unlock_delalloc() which get @locked_page = NULL. So that extent_clear_unlock_delalloc() will try to call process_one_page() to unlock every page even the first page is not locked by btrfs_page_start_writer_lock(). This will trigger an ASSERT() in btrfs_subpage_end_and_test_writer() as previously we require every page passed to btrfs_subpage_end_and_test_writer() to be locked by btrfs_page_start_writer_lock(). But compression path doesn't go that way. Thankfully it's not hard to distinguish page locked by lock_page() and btrfs_page_start_writer_lock(). So do the check in btrfs_subpage_end_and_test_writer() so now it can handle both cases well. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/subpage.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 265b6aa1c662d..29bd8c7a7706b 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -292,6 +292,16 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info, btrfs_subpage_assert(fs_info, page, start, len); + /* + * We have call sites passing @lock_page into + * extent_clear_unlock_delalloc() for compression path. + * + * This @locked_page is locked by plain lock_page(), thus its + * subpage::writers is 0. Handle them in a special way. + */ + if (atomic_read(&subpage->writers) == 0) + return true; + ASSERT(atomic_read(&subpage->writers) >= nbits); return atomic_sub_and_test(nbits, &subpage->writers); } From 31ddf5fd023c87cd960a340edbaaf925eb0ff101 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:22:07 +0800 Subject: [PATCH 0354/1202] btrfs: subpage: avoid potential deadlock with compression and delalloc [BUG] With experimental subpage compression enabled, a simple fsstress can lead to self deadlock on page 720896: mkfs.btrfs -f -s 4k $dev > /dev/null mount $dev -o compress $mnt $fsstress -p 1 -n 100 -w -d $mnt -v -s 1625511156 [CAUSE] If we have a file layout looks like below: 0 32K 64K 96K 128K |//| |///////////////| 4K Then we run delalloc range for the inode, it will: - Call find_lock_delalloc_range() with @delalloc_start = 0 Then we got a delalloc range [0, 4K). This range will be COWed. - Call find_lock_delalloc_range() again with @delalloc_start = 4K Since find_lock_delalloc_range() never cares whether the range is still inside page range [0, 64K), it will return range [64K, 128K). This range meets the condition for subpage compression, will go through async COW path. And async COW path will return @page_started. But that @page_started is now for range [64K, 128K), not for range [0, 64K). - writepage_dellloc() returned 1 for page [0, 64K) Thus page [0, 64K) will not be unlocked, nor its page dirty status will be cleared. Next time when we try to lock page [0, 64K) we will deadlock, as there is no one to release page [0, 64K). This problem will never happen for regular page size as one page only contains one sector. After the first find_lock_delalloc_range() call, the @delalloc_end will go beyond @page_end no matter if we found a delalloc range or not Thus this bug only happens for subpage, as now we need multiple runs to exhaust the delalloc range of a page. [FIX] Fix the problem by ensuring the delalloc range we ran at least started inside @locked_page. So that we will never get incorrect @page_started. And to prevent such problem from happening again: - Make find_lock_delalloc_range() return false if the found range is beyond @end value passed in. Since @end will be utilized now, add an ASSERT() to ensure we pass correct @end into find_lock_delalloc_range(). This also means, for selftests we needs to populate @end before calling find_lock_delalloc_range(). - New ASSERT() in find_lock_delalloc_range() Now we will make sure the @start/@end passed in at least covers part of the page. - New ASSERT() in run_delalloc_range() To make sure the range at least starts inside @locked page. - Use @delalloc_start as proper cursor, while @delalloc_end is always reset to @page_end. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 36 ++++++++++++++++++++++++-------- fs/btrfs/inode.c | 7 +++++++ fs/btrfs/tests/extent-io-tests.c | 12 +++++------ 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 18ef234fe3dc9..d022654af4202 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1975,10 +1975,18 @@ static noinline int lock_delalloc_pages(struct inode *inode, /* * Find and lock a contiguous range of bytes in the file marked as delalloc, no - * more than @max_bytes. @Start and @end are used to return the range, + * more than @max_bytes. * - * Return: true if we find something - * false if nothing was in the tree + * @start: The original start bytenr to search. + * Will store the extent range start bytenr. + * @end: The original end bytenr of the search range + * Will store the extent range end bytenr. + * + * Return true if we find a delalloc range which starts inside the original + * range, and @start/@end will store the delalloc range start/end. + * + * Return false if we can't find any delalloc range which starts inside the + * original range, and @start/@end will be the non-delalloc range start/end. */ EXPORT_FOR_TESTS noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, @@ -1986,6 +1994,8 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, u64 *end) { struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; + const u64 orig_start = *start; + const u64 orig_end = *end; u64 max_bytes = BTRFS_MAX_EXTENT_SIZE; u64 delalloc_start; u64 delalloc_end; @@ -1994,15 +2004,23 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, int ret; int loops = 0; + /* Caller should pass a valid @end to indicate the search range end */ + ASSERT(orig_end > orig_start); + + /* The range should at least cover part of the page */ + ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE || + orig_end <= page_offset(locked_page))); again: /* step one, find a bunch of delalloc bytes starting at start */ delalloc_start = *start; delalloc_end = 0; found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end, max_bytes, &cached_state); - if (!found || delalloc_end <= *start) { + if (!found || delalloc_end <= *start || delalloc_start > orig_end) { *start = delalloc_start; - *end = delalloc_end; + + /* @delalloc_end can be -1, never go beyond @orig_end */ + *end = min(delalloc_end, orig_end); free_extent_state(cached_state); return false; } @@ -3771,16 +3789,16 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, struct page *page, struct writeback_control *wbc, unsigned long *nr_written) { - u64 page_end = page_offset(page) + PAGE_SIZE - 1; - bool found; + const u64 page_end = page_offset(page) + PAGE_SIZE - 1; u64 delalloc_start = page_offset(page); u64 delalloc_to_write = 0; - u64 delalloc_end = 0; int ret; int page_started = 0; + while (delalloc_start < page_end) { + u64 delalloc_end = page_end; + bool found; - while (delalloc_end < page_end) { found = find_lock_delalloc_range(&inode->vfs_inode, page, &delalloc_start, &delalloc_end); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 57a2bc25b4d2d..0276fa9516414 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1955,6 +1955,13 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page int ret; const bool zoned = btrfs_is_zoned(inode->root->fs_info); + /* + * The range must cover part of the @locked_page, or the returned + * @page_started can confuse the caller. + */ + ASSERT(!(end <= page_offset(locked_page) || + start >= page_offset(locked_page) + PAGE_SIZE)); + if (should_nocow(inode, start, end)) { /* * Normally on a zoned device we're only doing COW writes, but diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 73e96d505f4f7..c2e72e7a8ff0e 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -112,7 +112,7 @@ static int test_find_delalloc(u32 sectorsize) */ set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL); start = 0; - end = 0; + end = start + PAGE_SIZE - 1; found = find_lock_delalloc_range(inode, locked_page, &start, &end); if (!found) { @@ -143,7 +143,7 @@ static int test_find_delalloc(u32 sectorsize) } set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL); start = test_start; - end = 0; + end = start + PAGE_SIZE - 1; found = find_lock_delalloc_range(inode, locked_page, &start, &end); if (!found) { @@ -177,14 +177,14 @@ static int test_find_delalloc(u32 sectorsize) goto out_bits; } start = test_start; - end = 0; + end = start + PAGE_SIZE - 1; found = find_lock_delalloc_range(inode, locked_page, &start, &end); if (found) { test_err("found range when we shouldn't have"); goto out_bits; } - if (end != (u64)-1) { + if (end != test_start + PAGE_SIZE - 1) { test_err("did not return the proper end offset"); goto out_bits; } @@ -198,7 +198,7 @@ static int test_find_delalloc(u32 sectorsize) */ set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL); start = test_start; - end = 0; + end = start + PAGE_SIZE - 1; found = find_lock_delalloc_range(inode, locked_page, &start, &end); if (!found) { @@ -233,7 +233,7 @@ static int test_find_delalloc(u32 sectorsize) /* We unlocked it in the previous test */ lock_page(locked_page); start = test_start; - end = 0; + end = start + PAGE_SIZE - 1; /* * Currently if we fail to find dirty pages in the delalloc range we * will adjust max_bytes down to PAGE_SIZE and then re-search. If From 08ef5b1c10449abfc84aa2e9a09c5406a2ab7df6 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 27 Sep 2021 15:22:08 +0800 Subject: [PATCH 0355/1202] btrfs: subpage: only allow compression if the range is fully page aligned For compressed write, we use a mechanism called async COW, which unlike regular run_delalloc_cow() or cow_file_range() will also unlock the first page. This mechanism allows us to continue handling next ranges, without waiting for the time consuming compression. But this has a problem for subpage case, as we could have the following delalloc range for a page: 0 32K 64K | |///////| |///////| \- A \- B In the above case, if we pass both ranges to cow_file_range_async(), both range A and range B will try to unlock the full page [0, 64K). And which one finishes later than the other one will try to do other page operations like end_page_writeback() on a unlocked page, triggering VM layer BUG_ON(). To make subpage compression work at least partially, here we add another restriction for it, only allow compression if the delalloc range is fully page aligned. By that, async extent is always ensured to unlock the first page exclusively, just like it used to be for regular sectorsize. In theory, we only need to make sure the delalloc range fully covers its first page, but the tail page will be locked anyway, blocking later writeback until the compression finishes. Thus here we choose to make sure the range is fully page aligned before doing the compression. In the future, we could optimize the situation by properly increasing subpage::writers number for the locked page, but that also means we need to change how we run delalloc range of page. (Instead of running each delalloc range we hit, we need to find and lock all delalloc ranges covering the page, then run each of them). Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 48 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0276fa9516414..28e0ad1478c3f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -489,9 +489,6 @@ static noinline int add_async_extent(struct async_chunk *cow, */ static inline bool inode_can_compress(struct btrfs_inode *inode) { - /* Subpage doesn't support compression yet */ - if (inode->root->fs_info->sectorsize < PAGE_SIZE) - return false; if (inode->flags & BTRFS_INODE_NODATACOW || inode->flags & BTRFS_INODE_NODATASUM) return false; @@ -513,6 +510,38 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start, btrfs_ino(inode)); return 0; } + /* + * Special check for subpage. + * + * We lock the full page then run each delalloc range in the page, thus + * for the following case, we will hit some subpage specific corner case: + * + * 0 32K 64K + * | |///////| |///////| + * \- A \- B + * + * In above case, both range A and range B will try to unlock the full + * page [0, 64K), causing the one finished later will have page + * unlocked already, triggering various page lock requirement BUG_ON()s. + * + * So here we add an artificial limit that subpage compression can only + * if the range is fully page aligned. + * + * In theory we only need to ensure the first page is fully covered, but + * the tailing partial page will be locked until the full compression + * finishes, delaying the write of other range. + * + * TODO: Make btrfs_run_delalloc_range() to lock all delalloc range + * first to prevent any submitted async extent to unlock the full page. + * By this, we can ensure for subpage case that only the last async_cow + * will unlock the full page. + */ + if (fs_info->sectorsize < PAGE_SIZE) { + if (!IS_ALIGNED(start, PAGE_SIZE) || + !IS_ALIGNED(end + 1, PAGE_SIZE)) + return 0; + } + /* force compress */ if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) return 1; @@ -614,13 +643,24 @@ static noinline int compress_file_range(struct async_chunk *async_chunk) total_compressed = actual_end - start; /* - * skip compression for a small file range(<=blocksize) that + * Skip compression for a small file range(<=blocksize) that * isn't an inline extent, since it doesn't save disk space at all. */ if (total_compressed <= blocksize && (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) goto cleanup_and_bail_uncompressed; + /* + * For subpage case, we require full page alignment for the sector + * aligned range. + * Thus we must also check against @actual_end, not just @end. + */ + if (blocksize < PAGE_SIZE) { + if (!IS_ALIGNED(start, PAGE_SIZE) || + !IS_ALIGNED(round_up(actual_end, blocksize), PAGE_SIZE)) + goto cleanup_and_bail_uncompressed; + } + total_compressed = min_t(unsigned long, total_compressed, BTRFS_MAX_UNCOMPRESSED); total_in = 0; From 2a3bfc324fa3b32cdd006219c56536ba9369c03b Mon Sep 17 00:00:00 2001 From: Kai Song Date: Sun, 3 Oct 2021 16:06:56 +0800 Subject: [PATCH 0356/1202] btrfs: zoned: use kmemdup() to replace kmalloc + memcpy Fix memdup.cocci warning: fs/btrfs/zoned.c:1198:23-30: WARNING opportunity for kmemdup Reviewed-by: Johannes Thumshirn Signed-off-by: Kai Song Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 14ea6797bf492..67d932d707984 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1195,14 +1195,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) map = em->map_lookup; - cache->physical_map = kmalloc(map_lookup_size(map->num_stripes), GFP_NOFS); + cache->physical_map = kmemdup(map, map_lookup_size(map->num_stripes), GFP_NOFS); if (!cache->physical_map) { ret = -ENOMEM; goto out; } - memcpy(cache->physical_map, map, map_lookup_size(map->num_stripes)); - alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS); if (!alloc_offsets) { ret = -ENOMEM; From 902ce3648c918d81ebe6117167a528c7c0fd4204 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:35:23 -0400 Subject: [PATCH 0357/1202] btrfs: change handle_fs_error in recover_log_trees to aborts During inspection of the return path for replay I noticed that we don't actually abort the transaction if we get a failure during replay. This isn't a problem necessarily, as we properly return the error and will fail to mount. However we still leave this dangling transaction that could conceivably be committed without thinking there was an error. We were using btrfs_handle_fs_error() here, but that pre-dates the transaction abort code. Simply replace the btrfs_handle_fs_error() calls with transaction aborts, so we still know where exactly things went wrong, and add a few in some other un-handled error cases. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 641e2b5a3b649..9bfadce594332 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6548,8 +6548,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) ret = walk_log_tree(trans, log_root_tree, &wc); if (ret) { - btrfs_handle_fs_error(fs_info, ret, - "Failed to pin buffers while recovering log root tree."); + btrfs_abort_transaction(trans, ret); goto error; } @@ -6562,8 +6561,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); if (ret < 0) { - btrfs_handle_fs_error(fs_info, ret, - "Couldn't find tree log root."); + btrfs_abort_transaction(trans, ret); goto error; } if (ret > 0) { @@ -6580,8 +6578,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) log = btrfs_read_tree_root(log_root_tree, &found_key); if (IS_ERR(log)) { ret = PTR_ERR(log); - btrfs_handle_fs_error(fs_info, ret, - "Couldn't read tree log root."); + btrfs_abort_transaction(trans, ret); goto error; } @@ -6609,8 +6606,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) if (!ret) goto next; - btrfs_handle_fs_error(fs_info, ret, - "Couldn't read target root for tree log recovery."); + btrfs_abort_transaction(trans, ret); goto error; } @@ -6618,14 +6614,15 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) ret = btrfs_record_root_in_trans(trans, wc.replay_dest); if (ret) /* The loop needs to continue due to the root refs */ - btrfs_handle_fs_error(fs_info, ret, - "failed to record the log root in transaction"); + btrfs_abort_transaction(trans, ret); else ret = walk_log_tree(trans, log, &wc); if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { ret = fixup_inode_link_counts(trans, wc.replay_dest, path); + if (ret) + btrfs_abort_transaction(trans, ret); } if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { @@ -6642,6 +6639,8 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) * could only happen during mount. */ ret = btrfs_init_root_free_objectid(root); + if (ret) + btrfs_abort_transaction(trans, ret); } wc.replay_dest->log_root = NULL; From af7139c714e03d76666be22982e0633f22aeb92e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:35:24 -0400 Subject: [PATCH 0358/1202] btrfs: change error handling for btrfs_delete_*_in_log Currently we will abort the transaction if we get a random error (like -EIO) while trying to remove the directory entries from the root log during rename. However since these are simply log tree related errors, we can mark the trans as needing a full commit. Then if the error was truly catastrophic we'll hit it during the normal commit and abort as appropriate. Reviewed-by: Nikolay Borisov Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/inode.c | 16 +++------------- fs/btrfs/tree-log.c | 40 ++++++++++++++-------------------------- fs/btrfs/tree-log.h | 16 ++++++++-------- 3 files changed, 25 insertions(+), 47 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 28e0ad1478c3f..1e083ecd6951a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4120,19 +4120,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, goto err; } - ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, - dir_ino); - if (ret != 0 && ret != -ENOENT) { - btrfs_abort_transaction(trans, ret); - goto err; - } - - ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, - index); - if (ret == -ENOENT) - ret = 0; - else if (ret) - btrfs_abort_transaction(trans, ret); + btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode, + dir_ino); + btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir, index); /* * If we have a pending delayed iput we could end up with the final iput diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9bfadce594332..eb8b61decc717 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3518,10 +3518,10 @@ static bool inode_logged(struct btrfs_trans_handle *trans, * This optimizations allows us to avoid relogging the entire inode * or the entire directory. */ -int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, - struct btrfs_inode *dir, u64 index) +void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct btrfs_inode *dir, u64 index) { struct btrfs_root *log; struct btrfs_dir_item *di; @@ -3531,11 +3531,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, u64 dir_ino = btrfs_ino(dir); if (!inode_logged(trans, dir)) - return 0; + return; ret = join_running_log_trans(root); if (ret) - return 0; + return; mutex_lock(&dir->log_mutex); @@ -3583,48 +3583,36 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, btrfs_free_path(path); out_unlock: mutex_unlock(&dir->log_mutex); - if (err == -ENOSPC) { + if (err < 0) btrfs_set_log_full_commit(trans); - err = 0; - } else if (err < 0) { - btrfs_abort_transaction(trans, err); - } - btrfs_end_log_trans(root); - - return err; } /* see comments for btrfs_del_dir_entries_in_log */ -int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, - struct btrfs_inode *inode, u64 dirid) +void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct btrfs_inode *inode, u64 dirid) { struct btrfs_root *log; u64 index; int ret; if (!inode_logged(trans, inode)) - return 0; + return; ret = join_running_log_trans(root); if (ret) - return 0; + return; log = root->log_root; mutex_lock(&inode->log_mutex); ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), dirid, &index); mutex_unlock(&inode->log_mutex); - if (ret == -ENOSPC) { + if (ret < 0 && ret != -ENOENT) btrfs_set_log_full_commit(trans); - ret = 0; - } else if (ret < 0 && ret != -ENOENT) - btrfs_abort_transaction(trans, ret); btrfs_end_log_trans(root); - - return ret; } /* diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 3ce6bdb760096..f6811c3df38a6 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -70,14 +70,14 @@ int btrfs_recover_log_trees(struct btrfs_root *tree_root); int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct dentry *dentry, struct btrfs_log_ctx *ctx); -int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, - struct btrfs_inode *dir, u64 index); -int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - const char *name, int name_len, - struct btrfs_inode *inode, u64 dirid); +void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct btrfs_inode *dir, u64 index); +void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + const char *name, int name_len, + struct btrfs_inode *inode, u64 dirid); void btrfs_end_log_trans(struct btrfs_root *root); void btrfs_pin_log_trans(struct btrfs_root *root); void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, From ec2d36992c3cd8d8038505fcf752ebf6f56abd7d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:35:25 -0400 Subject: [PATCH 0359/1202] btrfs: add a BTRFS_FS_ERROR helper We have a few flags that are inconsistently used to describe the fs in different states of failure. As of 5963ffcaf383 ("btrfs: always abort the transaction if we abort a trans handle") we will always set BTRFS_FS_STATE_ERROR if we abort, so we don't have to check both ABORTED and ERROR to see if things have gone wrong. Add a helper to check BTRFS_FS_STATE_ERROR and then convert all checkers of FS_STATE_ERROR to use the helper. The TRANS_ABORTED bit check was added in af7227338135 ("Btrfs: clean up resources during umount after trans is aborted") but is not actually specific. Reviewed-by: Anand Jain Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/disk-io.c | 8 +++----- fs/btrfs/extent_io.c | 2 +- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 6 +++--- fs/btrfs/scrub.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 11 +++++------ fs/btrfs/tree-log.c | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2fb177f1de317..1401268985774 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3607,6 +3607,9 @@ do { \ (errno), fmt, ##args); \ } while (0) +#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \ + &(fs_info)->fs_state))) + __printf(5, 6) __cold void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 37637539c5ab1..1ae30b29f2b5d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1954,8 +1954,7 @@ static int transaction_kthread(void *arg) wake_up_process(fs_info->cleaner_kthread); mutex_unlock(&fs_info->transaction_kthread_mutex); - if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, - &fs_info->fs_state))) + if (BTRFS_FS_ERROR(fs_info)) btrfs_cleanup_transaction(fs_info); if (!kthread_should_stop() && (!btrfs_transaction_blocked(fs_info) || @@ -4232,7 +4231,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, drop_ref = true; spin_unlock(&fs_info->fs_roots_radix_lock); - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (BTRFS_FS_ERROR(fs_info)) { ASSERT(root->log_root == NULL); if (root->reloc_root) { btrfs_put_root(root->reloc_root); @@ -4383,8 +4382,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_err(fs_info, "commit super ret %d", ret); } - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state) || - test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) + if (BTRFS_FS_ERROR(fs_info)) btrfs_error_commit_super(fs_info); kthread_stop(fs_info->transaction_kthread); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d022654af4202..b10dc75eef1c3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4908,7 +4908,7 @@ int btree_write_cache_pages(struct address_space *mapping, * extent io tree. Thus we don't want to submit such wild eb * if the fs already has error. */ - if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (!BTRFS_FS_ERROR(fs_info)) { ret = flush_write_bio(&epd); } else { ret = -EROFS; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 38e8789467614..9785ea32d39cf 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2018,7 +2018,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, * have opened a file as writable, we have to stop this write operation * to ensure consistency. */ - if (test_bit(BTRFS_FS_STATE_ERROR, &inode->root->fs_info->fs_state)) + if (BTRFS_FS_ERROR(inode->root->fs_info)) return -EROFS; if (!(iocb->ki_flags & IOCB_DIRECT) && diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1e083ecd6951a..6d171fa0ac68b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4380,7 +4380,7 @@ static void btrfs_prune_dentries(struct btrfs_root *root) struct inode *inode; u64 objectid = 0; - if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + if (!BTRFS_FS_ERROR(fs_info)) WARN_ON(btrfs_root_refs(&root->root_item) != 0); spin_lock(&root->inode_lock); @@ -9998,7 +9998,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_conte }; struct btrfs_fs_info *fs_info = root->fs_info; - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + if (BTRFS_FS_ERROR(fs_info)) return -EROFS; return start_delalloc_inodes(root, &wbc, true, in_reclaim_context); @@ -10017,7 +10017,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, struct list_head splice; int ret; - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + if (BTRFS_FS_ERROR(fs_info)) return -EROFS; INIT_LIST_HEAD(&splice); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index bd3cd7427391d..b1c26a90243bd 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3955,7 +3955,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, int ret; struct btrfs_fs_info *fs_info = sctx->fs_info; - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + if (BTRFS_FS_ERROR(fs_info)) return -EROFS; /* Seed devices of a new filesystem has their own generation. */ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7f91d62c2225a..a1c54a2c787c5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2006,7 +2006,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; } else { - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (BTRFS_FS_ERROR(fs_info)) { btrfs_err(fs_info, "Remounting read-write after error is not allowed"); ret = -EINVAL; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 14b9fdc8aaa9a..1c3a1189c0bdf 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -283,7 +283,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, spin_lock(&fs_info->trans_lock); loop: /* The file system has been taken offline. No new transactions. */ - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (BTRFS_FS_ERROR(fs_info)) { spin_unlock(&fs_info->trans_lock); return -EROFS; } @@ -331,7 +331,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, */ kfree(cur_trans); goto loop; - } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + } else if (BTRFS_FS_ERROR(fs_info)) { spin_unlock(&fs_info->trans_lock); kfree(cur_trans); return -EROFS; @@ -579,7 +579,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, bool do_chunk_alloc = false; int ret; - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + if (BTRFS_FS_ERROR(fs_info)) return ERR_PTR(-EROFS); if (current->journal_info) { @@ -991,8 +991,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (throttle) btrfs_run_delayed_iputs(info); - if (TRANS_ABORTED(trans) || - test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) { + if (TRANS_ABORTED(trans) || BTRFS_FS_ERROR(info)) { wake_up_process(info->transaction_kthread); if (TRANS_ABORTED(trans)) err = trans->aborted; @@ -2155,7 +2154,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * abort to prevent writing a new superblock that reflects a * corrupt state (pointing to trees with unwritten nodes/leafs). */ - if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) { + if (BTRFS_FS_ERROR(fs_info)) { ret = -EROFS; goto cleanup_transaction; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index eb8b61decc717..71b3ddb0333df 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3353,7 +3353,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * writing the super here would result in transid mismatches. If there * is an error here just bail. */ - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (BTRFS_FS_ERROR(fs_info)) { ret = -EIO; btrfs_set_log_full_commit(trans); btrfs_abort_transaction(trans, ret); From c243cb74d916b3e38ef445903d290e5034ff6caa Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:35:26 -0400 Subject: [PATCH 0360/1202] btrfs: do not infinite loop in data reclaim if we aborted Error injection stressing uncovered a busy loop in our data reclaim loop. There are two cases here, one where we loop creating block groups until space_info->full is set, or in the main loop we will skip erroring out any tickets if space_info->full == 0. Unfortunately if we aborted the transaction then we will never allocate chunks or reclaim any space and thus never get ->full, and you'll see stack traces like this: watchdog: BUG: soft lockup - CPU#0 stuck for 26s! [kworker/u4:4:139] CPU: 0 PID: 139 Comm: kworker/u4:4 Tainted: G W 5.13.0-rc1+ #328 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Workqueue: events_unbound btrfs_async_reclaim_data_space RIP: 0010:btrfs_join_transaction+0x12/0x20 RSP: 0018:ffffb2b780b77de0 EFLAGS: 00000246 RAX: ffffb2b781863d58 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000801 RSI: ffff987952b57400 RDI: ffff987940aa3000 RBP: ffff987954d55000 R08: 0000000000000001 R09: ffff98795539e8f0 R10: 000000000000000f R11: 000000000000000f R12: ffffffffffffffff R13: ffff987952b574c8 R14: ffff987952b57400 R15: 0000000000000008 FS: 0000000000000000(0000) GS:ffff9879bbc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0703da4000 CR3: 0000000113398004 CR4: 0000000000370ef0 Call Trace: flush_space+0x4a8/0x660 btrfs_async_reclaim_data_space+0x55/0x130 process_one_work+0x1e9/0x380 worker_thread+0x53/0x3e0 ? process_one_work+0x380/0x380 kthread+0x118/0x140 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x1f/0x30 Fix this by checking to see if we have a btrfs fs error in either of the reclaim loops, and if so fail the tickets and bail. In addition to this, fix maybe_fail_all_tickets() to not try to grant tickets if we've aborted, simply fail everything. Reviewed-by: Nikolay Borisov Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/space-info.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index aa5be0b24987a..48d77f360a249 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -885,6 +885,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info, { struct reserve_ticket *ticket; u64 tickets_id = space_info->tickets_id; + const bool aborted = BTRFS_FS_ERROR(fs_info); trace_btrfs_fail_all_tickets(fs_info, space_info); @@ -898,16 +899,19 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info, ticket = list_first_entry(&space_info->tickets, struct reserve_ticket, list); - if (ticket->steal && + if (!aborted && ticket->steal && steal_from_global_rsv(fs_info, space_info, ticket)) return true; - if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) + if (!aborted && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) btrfs_info(fs_info, "failing ticket with %llu bytes", ticket->bytes); remove_ticket(space_info, ticket); - ticket->error = -ENOSPC; + if (aborted) + ticket->error = -EIO; + else + ticket->error = -ENOSPC; wake_up(&ticket->wait); /* @@ -916,7 +920,8 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info, * here to see if we can make progress with the next ticket in * the list. */ - btrfs_try_granting_tickets(fs_info, space_info); + if (!aborted) + btrfs_try_granting_tickets(fs_info, space_info); } return (tickets_id != space_info->tickets_id); } @@ -1172,6 +1177,10 @@ static void btrfs_async_reclaim_data_space(struct work_struct *work) spin_unlock(&space_info->lock); return; } + + /* Something happened, fail everything and bail. */ + if (BTRFS_FS_ERROR(fs_info)) + goto aborted_fs; last_tickets_id = space_info->tickets_id; spin_unlock(&space_info->lock); } @@ -1202,9 +1211,20 @@ static void btrfs_async_reclaim_data_space(struct work_struct *work) } else { flush_state = 0; } + + /* Something happened, fail everything and bail. */ + if (BTRFS_FS_ERROR(fs_info)) + goto aborted_fs; + } spin_unlock(&space_info->lock); } + return; + +aborted_fs: + maybe_fail_all_tickets(fs_info, space_info); + space_info->flush = 0; + spin_unlock(&space_info->lock); } void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info) From a19a46c284312ce2698f8435dc719aec1dddec65 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Oct 2021 11:21:33 +0300 Subject: [PATCH 0361/1202] btrfs: rename root fields in delayed refs structs Both data and metadata delayed ref structures have fields named root/ref_root respectively. Those are somewhat cryptic and don't really convey the real meaning. In fact those roots are really the original owners of the respective block (i.e in case of a snapshot a data delayed ref will contain the original root that owns the given block). Rename those fields accordingly and adjust comments. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 13 +++++++------ fs/btrfs/delayed-ref.h | 12 ++++++------ fs/btrfs/extent-tree.c | 10 +++++----- fs/btrfs/ref-verify.c | 4 ++-- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index ca848b1834747..53a80163c1d79 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -922,7 +922,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && is_fstree(generic_ref->real_root) && - is_fstree(generic_ref->tree_ref.root) && + is_fstree(generic_ref->tree_ref.owning_root) && !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { @@ -938,14 +938,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, ref_type = BTRFS_TREE_BLOCK_REF_KEY; init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, - generic_ref->tree_ref.root, action, ref_type); - ref->root = generic_ref->tree_ref.root; + generic_ref->tree_ref.owning_root, action, + ref_type); + ref->root = generic_ref->tree_ref.owning_root; ref->parent = parent; ref->level = level; init_delayed_ref_head(head_ref, record, bytenr, num_bytes, - generic_ref->tree_ref.root, 0, action, false, - is_system); + generic_ref->tree_ref.owning_root, 0, action, + false, is_system); head_ref->extent_op = extent_op; delayed_refs = &trans->transaction->delayed_refs; @@ -997,7 +998,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, u64 bytenr = generic_ref->bytenr; u64 num_bytes = generic_ref->len; u64 parent = generic_ref->parent; - u64 ref_root = generic_ref->data_ref.ref_root; + u64 ref_root = generic_ref->data_ref.owning_root; u64 owner = generic_ref->data_ref.ino; u64 offset = generic_ref->data_ref.offset; u8 ref_type; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index e22fba272e4fd..8826f3e2b809d 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -186,8 +186,8 @@ enum btrfs_ref_type { struct btrfs_data_ref { /* For EXTENT_DATA_REF */ - /* Root which refers to this data extent */ - u64 ref_root; + /* Original root this data extent belongs to */ + u64 owning_root; /* Inode which refers to this data extent */ u64 ino; @@ -210,11 +210,11 @@ struct btrfs_tree_ref { int level; /* - * Root which refers to this tree block. + * Root which owns this tree block. * * For TREE_BLOCK_REF (skinny metadata, either inline or keyed) */ - u64 root; + u64 owning_root; /* For non-skinny metadata, no special member needed */ }; @@ -277,7 +277,7 @@ static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, if (!generic_ref->real_root) generic_ref->real_root = root; generic_ref->tree_ref.level = level; - generic_ref->tree_ref.root = root; + generic_ref->tree_ref.owning_root = root; generic_ref->type = BTRFS_REF_METADATA; } @@ -287,7 +287,7 @@ static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref, /* If @real_root not set, use @root as fallback */ if (!generic_ref->real_root) generic_ref->real_root = ref_root; - generic_ref->data_ref.ref_root = ref_root; + generic_ref->data_ref.owning_root = ref_root; generic_ref->data_ref.ino = ino; generic_ref->data_ref.offset = offset; generic_ref->type = BTRFS_REF_DATA; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ec5de19f0acd7..de099de3829ec 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1396,7 +1396,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ASSERT(generic_ref->type != BTRFS_REF_NOT_SET && generic_ref->action); BUG_ON(generic_ref->type == BTRFS_REF_METADATA && - generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID); + generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID); if (generic_ref->type == BTRFS_REF_METADATA) ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL); @@ -3372,9 +3372,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) * tree, just update pinning info and exit early. */ if ((ref->type == BTRFS_REF_METADATA && - ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) || + ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) || (ref->type == BTRFS_REF_DATA && - ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) { + ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) { /* unlocks the pinned mutex */ btrfs_pin_extent(trans, ref->bytenr, ref->len, 1); ret = 0; @@ -3385,9 +3385,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) } if (!((ref->type == BTRFS_REF_METADATA && - ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) || + ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) || (ref->type == BTRFS_REF_DATA && - ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID))) + ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID))) btrfs_ref_tree_mod(fs_info, ref); return ret; diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index d2062d5f71dd3..e2b9f86165017 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -678,10 +678,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, if (generic_ref->type == BTRFS_REF_METADATA) { if (!parent) - ref_root = generic_ref->tree_ref.root; + ref_root = generic_ref->tree_ref.owning_root; owner = generic_ref->tree_ref.level; } else if (!parent) { - ref_root = generic_ref->data_ref.ref_root; + ref_root = generic_ref->data_ref.owning_root; owner = generic_ref->data_ref.ino; offset = generic_ref->data_ref.offset; } From e47b867382268f745eff3b22952bcf1f3738a4d6 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Oct 2021 11:21:34 +0300 Subject: [PATCH 0362/1202] btrfs: rely on owning_root field in btrfs_add_delayed_tree_ref to detect CHUNK_ROOT The real_root field is going to be used only by ref-verify tool so limit its use outside of it. Blocks belonging to the chunk root will always have it as an owner so the check is equivalent. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 53a80163c1d79..15470582dca99 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -906,7 +906,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, u64 parent = generic_ref->parent; u8 ref_type; - is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID); + is_system = (generic_ref->tree_ref.owning_root == BTRFS_CHUNK_TREE_OBJECTID); ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action); BUG_ON(extent_op && extent_op->is_data); From b970071bc5ce851191cfea9e96c17693b56997d6 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Oct 2021 11:21:35 +0300 Subject: [PATCH 0363/1202] btrfs: add additional parameters to btrfs_init_tree_ref/btrfs_init_data_ref In order to make 'real_root' used only in ref-verify it's required to have the necessary context to perform the same checks that this member is used for. So add 'mod_root' which will contain the root on behalf of which a delayed ref was created and a 'skip_group' parameter which will contain callsite-specific override of skip_qgroup. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.h | 5 +++-- fs/btrfs/extent-tree.c | 17 +++++++++++------ fs/btrfs/file.c | 13 ++++++++----- fs/btrfs/inode.c | 3 ++- fs/btrfs/relocation.c | 21 ++++++++++++++------- fs/btrfs/tree-log.c | 2 +- 6 files changed, 39 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 8826f3e2b809d..8ab79def8e989 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -271,7 +271,7 @@ static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, } static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, - int level, u64 root) + int level, u64 root, u64 mod_root, bool skip_qgroup) { /* If @real_root not set, use @root as fallback */ if (!generic_ref->real_root) @@ -282,7 +282,8 @@ static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, } static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref, - u64 ref_root, u64 ino, u64 offset) + u64 ref_root, u64 ino, u64 offset, u64 mod_root, + bool skip_qgroup) { /* If @real_root not set, use @root as fallback */ if (!generic_ref->real_root) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index de099de3829ec..a4fbe4b1df80e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2439,7 +2439,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, num_bytes, parent); generic_ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&generic_ref, ref_root, key.objectid, - key.offset); + key.offset, root->root_key.objectid, + for_reloc); generic_ref.skip_qgroup = for_reloc; if (inc) ret = btrfs_inc_extent_ref(trans, &generic_ref); @@ -2453,7 +2454,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, action, bytenr, num_bytes, parent); generic_ref.real_root = root->root_key.objectid; - btrfs_init_tree_ref(&generic_ref, level - 1, ref_root); + btrfs_init_tree_ref(&generic_ref, level - 1, ref_root, + root->root_key.objectid, for_reloc); generic_ref.skip_qgroup = for_reloc; if (inc) ret = btrfs_inc_extent_ref(trans, &generic_ref); @@ -3288,7 +3290,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, buf->start, buf->len, parent); btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf), - root->root_key.objectid); + root->root_key.objectid, 0, false); if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { btrfs_ref_tree_mod(fs_info, &generic_ref); @@ -4741,7 +4743,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, ins->objectid, ins->offset, 0); - btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset); + btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, + offset, 0, false); btrfs_ref_tree_mod(root->fs_info, &generic_ref); return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes); @@ -4934,7 +4937,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, ins.objectid, ins.offset, parent); generic_ref.real_root = root->root_key.objectid; - btrfs_init_tree_ref(&generic_ref, level, root_objectid); + btrfs_init_tree_ref(&generic_ref, level, root_objectid, + root->root_key.objectid, false); btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op); if (ret) @@ -5351,7 +5355,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, fs_info->nodesize, parent); - btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid); + btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid, + 0, false); ret = btrfs_free_extent(trans, &ref); if (ret) goto out_unlock; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9785ea32d39cf..9a3db1365ae84 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -876,7 +876,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, btrfs_init_data_ref(&ref, root->root_key.objectid, new_key.objectid, - args->start - extent_offset); + args->start - extent_offset, + 0, false); ret = btrfs_inc_extent_ref(trans, &ref); BUG_ON(ret); /* -ENOMEM */ } @@ -962,7 +963,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, btrfs_init_data_ref(&ref, root->root_key.objectid, key.objectid, - key.offset - extent_offset); + key.offset - extent_offset, 0, + false); ret = btrfs_free_extent(trans, &ref); BUG_ON(ret); /* -ENOMEM */ args->bytes_found += extent_end - key.offset; @@ -1238,7 +1240,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr, num_bytes, 0); btrfs_init_data_ref(&ref, root->root_key.objectid, ino, - orig_offset); + orig_offset, 0, false); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1263,7 +1265,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, other_end = 0; btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, num_bytes, 0); - btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset); + btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset, + 0, false); if (extent_mergeable(leaf, path->slots[0] + 1, ino, bytenr, orig_offset, &other_start, &other_end)) { @@ -2626,7 +2629,7 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans, extent_info->disk_len, 0); ref_offset = extent_info->file_offset - extent_info->data_offset; btrfs_init_data_ref(&ref, root->root_key.objectid, - btrfs_ino(inode), ref_offset); + btrfs_ino(inode), ref_offset, 0, false); ret = btrfs_inc_extent_ref(trans, &ref); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d171fa0ac68b..fbff30bf1af93 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4921,7 +4921,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, extent_start, extent_num_bytes, 0); ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), - ino, extent_offset); + ino, extent_offset, + root->root_key.objectid, false); ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 5e5066ee03e6f..ce3631760ef4d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1148,7 +1148,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans, num_bytes, parent); ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), - key.objectid, key.offset); + key.objectid, key.offset, + root->root_key.objectid, false); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1159,7 +1160,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans, num_bytes, parent); ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), - key.objectid, key.offset); + key.objectid, key.offset, + root->root_key.objectid, false); ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1369,7 +1371,8 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr, blocksize, path->nodes[level]->start); ref.skip_qgroup = true; - btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid); + btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid, + 0, true); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1378,7 +1381,8 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr, blocksize, 0); ref.skip_qgroup = true; - btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid); + btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0, + true); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1387,7 +1391,8 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr, blocksize, path->nodes[level]->start); - btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid); + btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid, + 0, true); ref.skip_qgroup = true; ret = btrfs_free_extent(trans, &ref); if (ret) { @@ -1397,7 +1402,8 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr, blocksize, 0); - btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid); + btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, + 0, true); ref.skip_qgroup = true; ret = btrfs_free_extent(trans, &ref); if (ret) { @@ -2476,7 +2482,8 @@ static int do_relocation(struct btrfs_trans_handle *trans, upper->eb->start); ref.real_root = root->root_key.objectid; btrfs_init_tree_ref(&ref, node->level, - btrfs_header_owner(upper->eb)); + btrfs_header_owner(upper->eb), + root->root_key.objectid, false); ret = btrfs_inc_extent_ref(trans, &ref); if (!ret) ret = btrfs_drop_subtree(trans, root, eb, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 71b3ddb0333df..cda7bb2e9a1e5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -789,7 +789,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ins.objectid, ins.offset, 0); btrfs_init_data_ref(&ref, root->root_key.objectid, - key->objectid, offset); + key->objectid, offset, 0, false); ret = btrfs_inc_extent_ref(trans, &ref); if (ret) goto out; From df5a39db0549c134a13a5e1b90f419f35bd0b5f1 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Oct 2021 11:21:36 +0300 Subject: [PATCH 0364/1202] btrfs: pull up qgroup checks from delayed-ref core to init time Instead of checking whether qgroup processing for a dealyed ref has to happen in the core of delayed ref, simply pull the check at init time of respective delayed ref structures. This eliminates the final use of real_root in delayed-ref core paving the way to making this member optional. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 4 ---- fs/btrfs/delayed-ref.h | 11 +++++++++++ fs/btrfs/extent-tree.c | 5 ----- fs/btrfs/inode.c | 1 - fs/btrfs/relocation.c | 7 ------- 5 files changed, 11 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 15470582dca99..cca7e85e32dd6 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -921,8 +921,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, } if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && - is_fstree(generic_ref->real_root) && - is_fstree(generic_ref->tree_ref.owning_root) && !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { @@ -1027,8 +1025,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, } if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && - is_fstree(ref_root) && - is_fstree(generic_ref->real_root) && !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 8ab79def8e989..7a0a2b21975df 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -279,6 +279,12 @@ static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, generic_ref->tree_ref.level = level; generic_ref->tree_ref.owning_root = root; generic_ref->type = BTRFS_REF_METADATA; + if (skip_qgroup || !(is_fstree(root) && + (!mod_root || is_fstree(mod_root)))) + generic_ref->skip_qgroup = true; + else + generic_ref->skip_qgroup = false; + } static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref, @@ -292,6 +298,11 @@ static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref, generic_ref->data_ref.ino = ino; generic_ref->data_ref.offset = offset; generic_ref->type = BTRFS_REF_DATA; + if (skip_qgroup || !(is_fstree(ref_root) && + (!mod_root || is_fstree(mod_root)))) + generic_ref->skip_qgroup = true; + else + generic_ref->skip_qgroup = false; } static inline struct btrfs_delayed_extent_op * diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a4fbe4b1df80e..5a07386bd6d9e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2437,11 +2437,9 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, key.offset -= btrfs_file_extent_offset(buf, fi); btrfs_init_generic_ref(&generic_ref, action, bytenr, num_bytes, parent); - generic_ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&generic_ref, ref_root, key.objectid, key.offset, root->root_key.objectid, for_reloc); - generic_ref.skip_qgroup = for_reloc; if (inc) ret = btrfs_inc_extent_ref(trans, &generic_ref); else @@ -2453,10 +2451,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, num_bytes = fs_info->nodesize; btrfs_init_generic_ref(&generic_ref, action, bytenr, num_bytes, parent); - generic_ref.real_root = root->root_key.objectid; btrfs_init_tree_ref(&generic_ref, level - 1, ref_root, root->root_key.objectid, for_reloc); - generic_ref.skip_qgroup = for_reloc; if (inc) ret = btrfs_inc_extent_ref(trans, &generic_ref); else @@ -4936,7 +4932,6 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, ins.objectid, ins.offset, parent); - generic_ref.real_root = root->root_key.objectid; btrfs_init_tree_ref(&generic_ref, level, root_objectid, root->root_key.objectid, false); btrfs_ref_tree_mod(fs_info, &generic_ref); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fbff30bf1af93..dcc363fa4655e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4919,7 +4919,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, extent_start, extent_num_bytes, 0); - ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), ino, extent_offset, root->root_key.objectid, false); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ce3631760ef4d..fed8235962488 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1146,7 +1146,6 @@ int replace_file_extents(struct btrfs_trans_handle *trans, key.offset -= btrfs_file_extent_offset(leaf, fi); btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr, num_bytes, parent); - ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), key.objectid, key.offset, root->root_key.objectid, false); @@ -1158,7 +1157,6 @@ int replace_file_extents(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, num_bytes, parent); - ref.real_root = root->root_key.objectid; btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), key.objectid, key.offset, root->root_key.objectid, false); @@ -1370,7 +1368,6 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr, blocksize, path->nodes[level]->start); - ref.skip_qgroup = true; btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid, 0, true); ret = btrfs_inc_extent_ref(trans, &ref); @@ -1380,7 +1377,6 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, } btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr, blocksize, 0); - ref.skip_qgroup = true; btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0, true); ret = btrfs_inc_extent_ref(trans, &ref); @@ -1393,7 +1389,6 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, blocksize, path->nodes[level]->start); btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid, 0, true); - ref.skip_qgroup = true; ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -1404,7 +1399,6 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, blocksize, 0); btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0, true); - ref.skip_qgroup = true; ret = btrfs_free_extent(trans, &ref); if (ret) { btrfs_abort_transaction(trans, ret); @@ -2480,7 +2474,6 @@ static int do_relocation(struct btrfs_trans_handle *trans, btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, node->eb->start, blocksize, upper->eb->start); - ref.real_root = root->root_key.objectid; btrfs_init_tree_ref(&ref, node->level, btrfs_header_owner(upper->eb), root->root_key.objectid, false); From e71f2d171ada5384e75c8783b14eeb4b0e376fde Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Oct 2021 11:21:37 +0300 Subject: [PATCH 0365/1202] btrfs: make btrfs_ref::real_root optional Now that real_root is only used in ref-verify core gate it behind CONFIG_BTRFS_FS_REF_VERIFY ifdef. This shrinks the size of pending delayed refs by 8 bytes per ref, of which we can have many at any one time depending on intensity of the workload. Also change the comment about the member as it no longer deals with qgroups. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.h | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 7a0a2b21975df..91a3aabad1507 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -231,17 +231,10 @@ struct btrfs_ref { */ bool skip_qgroup; - /* - * Optional. For which root is this modification. - * Mostly used for qgroup optimization. - * - * When unset, data/tree ref init code will populate it. - * In certain cases, we're modifying reference for a different root. - * E.g. COW fs tree blocks for balance. - * In that case, tree_ref::root will be fs tree, but we're doing this - * for reloc tree, then we should set @real_root to reloc tree. - */ +#ifdef CONFIG_BTRFS_FS_REF_VERIFY + /* Through which root is this modification. */ u64 real_root; +#endif u64 bytenr; u64 len; @@ -273,9 +266,10 @@ static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 root, u64 mod_root, bool skip_qgroup) { +#ifdef CONFIG_BTRFS_FS_REF_VERIFY /* If @real_root not set, use @root as fallback */ - if (!generic_ref->real_root) - generic_ref->real_root = root; + generic_ref->real_root = mod_root ?: root; +#endif generic_ref->tree_ref.level = level; generic_ref->tree_ref.owning_root = root; generic_ref->type = BTRFS_REF_METADATA; @@ -291,9 +285,10 @@ static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ref_root, u64 ino, u64 offset, u64 mod_root, bool skip_qgroup) { +#ifdef CONFIG_BTRFS_FS_REF_VERIFY /* If @real_root not set, use @root as fallback */ - if (!generic_ref->real_root) - generic_ref->real_root = ref_root; + generic_ref->real_root = mod_root ?: ref_root; +#endif generic_ref->data_ref.owning_root = ref_root; generic_ref->data_ref.ino = ino; generic_ref->data_ref.offset = offset; From 4e39b120607ace79a77b1c6c7efc82045fbf2987 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 13 Oct 2021 14:05:14 +0800 Subject: [PATCH 0366/1202] btrfs: reduce btrfs_update_block_group alloc argument to bool btrfs_update_block_group() accounts for the number of bytes allocated or freed. Argument @alloc specifies whether the call is for alloc or free. Convert the argument @alloc type from int to bool. Reviewed-by: Su Yue Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 2 +- fs/btrfs/block-group.h | 2 +- fs/btrfs/extent-tree.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 46fdef7bbe20c..7dba9028c80c0 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3160,7 +3160,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) } int btrfs_update_block_group(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, int alloc) + u64 bytenr, u64 num_bytes, bool alloc) { struct btrfs_fs_info *info = trans->fs_info; struct btrfs_block_group *cache = NULL; diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index f751b802b173a..07f977d3816c7 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -284,7 +284,7 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans); int btrfs_setup_space_cache(struct btrfs_trans_handle *trans); int btrfs_update_block_group(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, int alloc); + u64 bytenr, u64 num_bytes, bool alloc); int btrfs_add_reserved_bytes(struct btrfs_block_group *cache, u64 ram_bytes, u64 num_bytes, int delalloc); void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5a07386bd6d9e..3fd736a02c1e6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3193,7 +3193,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, goto out; } - ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0); + ret = btrfs_update_block_group(trans, bytenr, num_bytes, false); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -4627,7 +4627,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, if (ret) return ret; - ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1); + ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, true); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", ins->objectid, ins->offset); @@ -4716,7 +4716,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, return ret; ret = btrfs_update_block_group(trans, extent_key.objectid, - fs_info->nodesize, 1); + fs_info->nodesize, true); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", extent_key.objectid, extent_key.offset); From 276b9e8b55e0dec32e56bc24ccda5fc102b25798 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Oct 2021 08:31:53 +0200 Subject: [PATCH 0367/1202] btrfs: use bvec_kmap_local in btrfs_csum_one_bio Using local kmaps slightly reduces the chances to stray writes, and the bvec interface cleans up the code a little bit. Reviewed-by: Nikolay Borisov Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file-item.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ddb9c7ee796f0..d1cbb64a78f3a 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -708,12 +708,12 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, index = 0; } - data = kmap_atomic(bvec.bv_page); - crypto_shash_digest(shash, data + bvec.bv_offset - + (i * fs_info->sectorsize), + data = bvec_kmap_local(&bvec); + crypto_shash_digest(shash, + data + (i * fs_info->sectorsize), fs_info->sectorsize, sums->sums + index); - kunmap_atomic(data); + kunmap_local(data); index += fs_info->csum_size; offset += fs_info->sectorsize; this_sum_bytes += fs_info->sectorsize; From 6fe92b8b545ce1eae4e745ac053b0ebd87f6600d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 8 Oct 2021 15:29:59 +0800 Subject: [PATCH 0368/1202] btrfs: rename btrfs_dio_private::logical_offset to file_offset The naming of "logical_offset" can be confused with logical bytenr of the dio range. In fact it's file offset, and the naming "file_offset" is already widely used in all other sites. Just do the rename to avoid confusion. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/btrfs_inode.h | 7 ++++++- fs/btrfs/inode.c | 12 ++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 602b426c286da..ab2a4a52e0bb6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -356,7 +356,12 @@ static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) struct btrfs_dio_private { struct inode *inode; - u64 logical_offset; + + /* + * Since DIO can use anonymous page, we cannot use page_offset() to + * grab the file offset, thus need a dedicated member for file offset. + */ + u64 file_offset; u64 disk_bytenr; /* Used for bio::bi_size */ u32 bytes; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dcc363fa4655e..9e1bab127fb9d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8055,13 +8055,13 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip) if (btrfs_op(dip->dio_bio) == BTRFS_MAP_WRITE) { __endio_write_update_ordered(BTRFS_I(dip->inode), - dip->logical_offset, + dip->file_offset, dip->bytes, !dip->dio_bio->bi_status); } else { unlock_extent(&BTRFS_I(dip->inode)->io_tree, - dip->logical_offset, - dip->logical_offset + dip->bytes - 1); + dip->file_offset, + dip->file_offset + dip->bytes - 1); } bio_endio(dip->dio_bio); @@ -8176,7 +8176,7 @@ static void btrfs_end_dio_bio(struct bio *bio) if (err) dip->dio_bio->bi_status = err; - btrfs_record_physical_zoned(dip->inode, dip->logical_offset, bio); + btrfs_record_physical_zoned(dip->inode, dip->file_offset, bio); bio_put(bio); btrfs_dio_private_put(dip); @@ -8218,7 +8218,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio, } else { u64 csum_offset; - csum_offset = file_offset - dip->logical_offset; + csum_offset = file_offset - dip->file_offset; csum_offset >>= fs_info->sectorsize_bits; csum_offset *= fs_info->csum_size; btrfs_bio(bio)->csum = dip->csums + csum_offset; @@ -8256,7 +8256,7 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio, return NULL; dip->inode = inode; - dip->logical_offset = file_offset; + dip->file_offset = file_offset; dip->bytes = dio_bio->bi_iter.bi_size; dip->disk_bytenr = dio_bio->bi_iter.bi_sector << 9; dip->dio_bio = dio_bio; From 7315ac1a7cf9b5d558813ae67f75b3c38e679e64 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 8 Oct 2021 15:30:00 +0800 Subject: [PATCH 0369/1202] btrfs: remove btrfs_bio::logical member The member btrfs_bio::logical is only initialized by two call sites: - btrfs_repair_one_sector() No corresponding site to utilize it. - btrfs_submit_direct() The corresponding site to utilize it is btrfs_check_read_dio_bio(). However for btrfs_check_read_dio_bio(), we can grab the file_offset from btrfs_dio_private::file_offset directly. Thus it turns out we don't really need that btrfs_bio::logical member at all. For btrfs_bio, the logical bytenr can be fetched from its bio->bi_iter.bi_sector directly. So let's just remove the member to save 8 bytes for structure btrfs_bio. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 1 - fs/btrfs/inode.c | 17 ++++++++--------- fs/btrfs/volumes.h | 1 - 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b10dc75eef1c3..4e03a6d3aa324 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2673,7 +2673,6 @@ int btrfs_repair_one_sector(struct inode *inode, } bio_add_page(repair_bio, page, failrec->len, pgoff); - repair_bbio->logical = failrec->start; repair_bbio->iter = repair_bio->bi_iter; btrfs_debug(btrfs_sb(inode->i_sb), diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9e1bab127fb9d..3032893efee5b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8089,10 +8089,11 @@ static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio, return ret; } -static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, +static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip, struct btrfs_bio *bbio, const bool uptodate) { + struct inode *inode = dip->inode; struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; const u32 sectorsize = fs_info->sectorsize; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; @@ -8100,7 +8101,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); struct bio_vec bvec; struct bvec_iter iter; - u64 start = bbio->logical; + const u64 orig_file_offset = dip->file_offset; + u64 start = orig_file_offset; u32 bio_offset = 0; blk_status_t err = BLK_STS_OK; @@ -8122,10 +8124,10 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, } else { int ret; - ASSERT((start - bbio->logical) < UINT_MAX); + ASSERT((start - orig_file_offset) < UINT_MAX); ret = btrfs_repair_one_sector(inode, &bbio->bio, - start - bbio->logical, + start - orig_file_offset, bvec.bv_page, pgoff, start, bbio->mirror_num, submit_dio_repair_bio); @@ -8168,10 +8170,8 @@ static void btrfs_end_dio_bio(struct bio *bio) bio->bi_opf, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, err); - if (bio_op(bio) == REQ_OP_READ) { - err = btrfs_check_read_dio_bio(dip->inode, - btrfs_bio(bio), !err); - } + if (bio_op(bio) == REQ_OP_READ) + err = btrfs_check_read_dio_bio(dip, btrfs_bio(bio), !err); if (err) dip->dio_bio->bi_status = err; @@ -8337,7 +8337,6 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len); bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; - btrfs_bio(bio)->logical = file_offset; if (bio_op(bio) == REQ_OP_ZONE_APPEND) { status = extract_ordered_extent(BTRFS_I(inode), bio, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 83075d6855dba..49322a43593a6 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -313,7 +313,6 @@ struct btrfs_bio { /* @device is for stripe IO submission. */ struct btrfs_device *device; - u64 logical; u8 *csum; u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; struct bvec_iter iter; From c855b961d8e07eae977c9b30f23ac8fdd6047621 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 14 Oct 2021 17:26:04 +0100 Subject: [PATCH 0370/1202] btrfs: fix lost error handling when replaying directory deletes At replay_dir_deletes(), if find_dir_range() returns an error we break out of the main while loop and then assign a value of 0 (success) to the 'ret' variable, resulting in completely ignoring that an error happened. Fix that by jumping to the 'out' label when find_dir_range() returns an error (negative value). CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cda7bb2e9a1e5..7c900eb27cf8b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2528,7 +2528,9 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, else { ret = find_dir_range(log, path, dirid, key_type, &range_start, &range_end); - if (ret != 0) + if (ret < 0) + goto out; + else if (ret > 0) break; } From c716c210332b85d9339c6b25a2385e234715830f Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 5 Oct 2021 16:12:39 -0400 Subject: [PATCH 0371/1202] btrfs: use num_device to check for the last surviving seed device For both sprout and seed fsids, btrfs_fs_devices::num_devices provides device count including missing btrfs_fs_devices::open_devices provides device count excluding missing We create a dummy struct btrfs_device for the missing device, so num_devices != open_devices when there is a missing device. In btrfs_rm_devices() we wrongly check for %cur_devices->open_devices before freeing the seed fs_devices. Instead we should check for %cur_devices->num_devices. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6031e2f4c6bc3..0941f61d80710 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2211,7 +2211,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, synchronize_rcu(); btrfs_free_device(device); - if (cur_devices->open_devices == 0) { + if (cur_devices->num_devices == 0) { list_del_init(&cur_devices->seed_list); close_fs_devices(cur_devices); free_fs_devices(cur_devices); From b8e51756a2356a300dc77f4c736c53c8a845d05a Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 5 Oct 2021 16:12:40 -0400 Subject: [PATCH 0372/1202] btrfs: add comments for device counts in struct btrfs_fs_devices A bug was was checking a wrong device count before we delete the struct btrfs_fs_devices in btrfs_rm_device(). To avoid future confusion and easy reference add a comment about the various device counts that we have in the struct btrfs_fs_devices. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/volumes.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 49322a43593a6..874406f51b863 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -236,11 +236,30 @@ struct btrfs_fs_devices { bool fsid_change; struct list_head fs_list; + /* + * Number of devices under this fsid including missing and + * replace-target device and excludes seed devices. + */ u64 num_devices; + + /* + * The number of devices that successfully opened, including + * replace-target, excludes seed devices. + */ u64 open_devices; + + /* The number of devices that are under the chunk allocation list. */ u64 rw_devices; + + /* Count of missing devices under this fsid excluding seed device. */ u64 missing_devices; u64 total_rw_bytes; + + /* + * Count of devices from btrfs_super_block::num_devices for this fsid, + * which includes the seed device, excludes the transient replace-target + * device. + */ u64 total_devices; /* Highest generation number of seen devices */ From 6ab1495bb16d90bb87229caaff46f5c6e08d4a61 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:12:41 -0400 Subject: [PATCH 0373/1202] btrfs: do not call close_fs_devices in btrfs_rm_device There's a subtle case where if we're removing the seed device from a file system we need to free its private copy of the fs_devices. However we do not need to call close_fs_devices(), because at this point there are no devices left to close as we've closed the last one. The only thing that close_fs_devices() does is decrement ->opened, which should be 1. We want to avoid calling close_fs_devices() here because it has a lockdep_assert_held(&uuid_mutex), and we are going to stop holding the uuid_mutex in this path. So simply decrement the ->opened counter like we should, and then clean up like normal. Also add a comment explaining what we're doing here as I initially removed this code erroneously. Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0941f61d80710..918ad3790791e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2211,9 +2211,17 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, synchronize_rcu(); btrfs_free_device(device); + /* + * This can happen if cur_devices is the private seed devices list. We + * cannot call close_fs_devices() here because it expects the uuid_mutex + * to be held, but in fact we don't need that for the private + * seed_devices, we can simply decrement cur_devices->opened and then + * remove it from our list and free the fs_devices. + */ if (cur_devices->num_devices == 0) { list_del_init(&cur_devices->seed_list); - close_fs_devices(cur_devices); + ASSERT(cur_devices->opened == 1); + cur_devices->opened--; free_fs_devices(cur_devices); } From 357ff614605f481436f9734b84772eb8023d498c Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:12:42 -0400 Subject: [PATCH 0374/1202] btrfs: handle device lookup with btrfs_dev_lookup_args We have a lot of device lookup functions that all do something slightly different. Clean this up by adding a struct to hold the different lookup criteria, and then pass this around to btrfs_find_device() so it can do the proper matching based on the lookup criteria. Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 16 +++--- fs/btrfs/ioctl.c | 13 +++-- fs/btrfs/scrub.c | 6 +- fs/btrfs/volumes.c | 122 +++++++++++++++++++++++++---------------- fs/btrfs/volumes.h | 20 ++++++- 5 files changed, 112 insertions(+), 65 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index d029be40ea6f0..59ef388d43bc3 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -70,6 +70,7 @@ static int btrfs_dev_replace_kthread(void *data); int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) { + struct btrfs_dev_lookup_args args = { .devid = BTRFS_DEV_REPLACE_DEVID }; struct btrfs_key key; struct btrfs_root *dev_root = fs_info->dev_root; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; @@ -100,8 +101,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) * We don't have a replace item or it's corrupted. If there is * a replace target, fail the mount. */ - if (btrfs_find_device(fs_info->fs_devices, - BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) { + if (btrfs_find_device(fs_info->fs_devices, &args)) { btrfs_err(fs_info, "found replace target device without a valid replace item"); ret = -EUCLEAN; @@ -163,8 +163,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) * We don't have an active replace item but if there is a * replace target, fail the mount. */ - if (btrfs_find_device(fs_info->fs_devices, - BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) { + if (btrfs_find_device(fs_info->fs_devices, &args)) { btrfs_err(fs_info, "replace devid present without an active replace item"); ret = -EUCLEAN; @@ -175,11 +174,10 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: - dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices, - src_devid, NULL, NULL); - dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices, - BTRFS_DEV_REPLACE_DEVID, - NULL, NULL); + dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices, &args); + args.devid = src_devid; + dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices, &args); + /* * allow 'btrfs dev replace_cancel' if src/tgt device is * missing diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9eb0c1eb568e5..b8508af4e5395 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1602,6 +1602,7 @@ static int exclop_start_or_cancel_reloc(struct btrfs_fs_info *fs_info, static noinline int btrfs_ioctl_resize(struct file *file, void __user *arg) { + BTRFS_DEV_LOOKUP_ARGS(args); struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 new_size; @@ -1657,7 +1658,8 @@ static noinline int btrfs_ioctl_resize(struct file *file, btrfs_info(fs_info, "resizing devid %llu", devid); } - device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL); + args.devid = devid; + device = btrfs_find_device(fs_info->fs_devices, &args); if (!device) { btrfs_info(fs_info, "resizer unable to find device %llu", devid); @@ -3317,22 +3319,21 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, void __user *arg) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_ioctl_dev_info_args *di_args; struct btrfs_device *dev; int ret = 0; - char *s_uuid = NULL; di_args = memdup_user(arg, sizeof(*di_args)); if (IS_ERR(di_args)) return PTR_ERR(di_args); + args.devid = di_args->devid; if (!btrfs_is_empty_uuid(di_args->uuid)) - s_uuid = di_args->uuid; + args.uuid = di_args->uuid; rcu_read_lock(); - dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid, - NULL); - + dev = btrfs_find_device(fs_info->fs_devices, &args); if (!dev) { ret = -ENODEV; goto out; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b1c26a90243bd..cf82ea6f54fb4 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4067,6 +4067,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, u64 end, struct btrfs_scrub_progress *progress, int readonly, int is_dev_replace) { + struct btrfs_dev_lookup_args args = { .devid = devid }; struct scrub_ctx *sctx; int ret; struct btrfs_device *dev; @@ -4114,7 +4115,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, goto out_free_ctx; mutex_lock(&fs_info->fs_devices->device_list_mutex); - dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL); + dev = btrfs_find_device(fs_info->fs_devices, &args); if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) && !is_dev_replace)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); @@ -4287,11 +4288,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev) int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, struct btrfs_scrub_progress *progress) { + struct btrfs_dev_lookup_args args = { .devid = devid }; struct btrfs_device *dev; struct scrub_ctx *sctx = NULL; mutex_lock(&fs_info->fs_devices->device_list_mutex); - dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL); + dev = btrfs_find_device(fs_info->fs_devices, &args); if (dev) sctx = dev->scrub_ctx; if (sctx) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 918ad3790791e..eddd2e50cb65b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -812,9 +812,13 @@ static noinline struct btrfs_device *device_list_add(const char *path, device = NULL; } else { + struct btrfs_dev_lookup_args args = { + .devid = devid, + .uuid = disk_super->dev_item.uuid, + }; + mutex_lock(&fs_devices->device_list_mutex); - device = btrfs_find_device(fs_devices, devid, - disk_super->dev_item.uuid, NULL); + device = btrfs_find_device(fs_devices, &args); /* * If this disk has been pulled into an fs devices created by @@ -2324,10 +2328,9 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) static struct btrfs_device *btrfs_find_device_by_path( struct btrfs_fs_info *fs_info, const char *device_path) { + BTRFS_DEV_LOOKUP_ARGS(args); int ret = 0; struct btrfs_super_block *disk_super; - u64 devid; - u8 *dev_uuid; struct block_device *bdev; struct btrfs_device *device; @@ -2336,14 +2339,14 @@ static struct btrfs_device *btrfs_find_device_by_path( if (ret) return ERR_PTR(ret); - devid = btrfs_stack_device_id(&disk_super->dev_item); - dev_uuid = disk_super->dev_item.uuid; + args.devid = btrfs_stack_device_id(&disk_super->dev_item); + args.uuid = disk_super->dev_item.uuid; if (btrfs_fs_incompat(fs_info, METADATA_UUID)) - device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - disk_super->metadata_uuid); + args.fsid = disk_super->metadata_uuid; else - device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - disk_super->fsid); + args.fsid = disk_super->fsid; + + device = btrfs_find_device(fs_info->fs_devices, &args); btrfs_release_disk_super(disk_super); if (!device) @@ -2359,11 +2362,12 @@ struct btrfs_device *btrfs_find_device_by_devspec( struct btrfs_fs_info *fs_info, u64 devid, const char *device_path) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_device *device; if (devid) { - device = btrfs_find_device(fs_info->fs_devices, devid, NULL, - NULL); + args.devid = devid; + device = btrfs_find_device(fs_info->fs_devices, &args); if (!device) return ERR_PTR(-ENOENT); return device; @@ -2373,14 +2377,11 @@ struct btrfs_device *btrfs_find_device_by_devspec( return ERR_PTR(-EINVAL); if (strcmp(device_path, "missing") == 0) { - /* Find first missing device */ - list_for_each_entry(device, &fs_info->fs_devices->devices, - dev_list) { - if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, - &device->dev_state) && !device->bdev) - return device; - } - return ERR_PTR(-ENOENT); + args.missing = true; + device = btrfs_find_device(fs_info->fs_devices, &args); + if (!device) + return ERR_PTR(-ENOENT); + return device; } return btrfs_find_device_by_path(fs_info, device_path); @@ -2460,6 +2461,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) */ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root = fs_info->chunk_root; struct btrfs_path *path; @@ -2469,7 +2471,6 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) struct btrfs_key key; u8 fs_uuid[BTRFS_FSID_SIZE]; u8 dev_uuid[BTRFS_UUID_SIZE]; - u64 devid; int ret; path = btrfs_alloc_path(); @@ -2506,13 +2507,14 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); - devid = btrfs_device_id(leaf, dev_item); + args.devid = btrfs_device_id(leaf, dev_item); read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_FSID_SIZE); - device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - fs_uuid); + args.uuid = dev_uuid; + args.fsid = fs_uuid; + device = btrfs_find_device(fs_info->fs_devices, &args); BUG_ON(!device); /* Logic error */ if (device->fs_devices->seeding) { @@ -6754,6 +6756,33 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, return BLK_STS_OK; } +static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args, + const struct btrfs_fs_devices *fs_devices) +{ + if (args->fsid == NULL) + return true; + if (memcmp(fs_devices->metadata_uuid, args->fsid, BTRFS_FSID_SIZE) == 0) + return true; + return false; +} + +static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args, + const struct btrfs_device *device) +{ + ASSERT((args->devid != (u64)-1) || args->missing); + + if ((args->devid != (u64)-1) && device->devid != args->devid) + return false; + if (args->uuid && memcmp(device->uuid, args->uuid, BTRFS_UUID_SIZE) != 0) + return false; + if (!args->missing) + return true; + if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) && + !device->bdev) + return true; + return false; +} + /* * Find a device specified by @devid or @uuid in the list of @fs_devices, or * return NULL. @@ -6761,31 +6790,25 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, * If devid and uuid are both specified, the match must be exact, otherwise * only devid is used. */ -struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices, - u64 devid, u8 *uuid, u8 *fsid) +struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices, + const struct btrfs_dev_lookup_args *args) { struct btrfs_device *device; struct btrfs_fs_devices *seed_devs; - if (!fsid || !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) { + if (dev_args_match_fs_devices(args, fs_devices)) { list_for_each_entry(device, &fs_devices->devices, dev_list) { - if (device->devid == devid && - (!uuid || memcmp(device->uuid, uuid, - BTRFS_UUID_SIZE) == 0)) + if (dev_args_match_device(args, device)) return device; } } list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) { - if (!fsid || - !memcmp(seed_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE)) { - list_for_each_entry(device, &seed_devs->devices, - dev_list) { - if (device->devid == devid && - (!uuid || memcmp(device->uuid, uuid, - BTRFS_UUID_SIZE) == 0)) - return device; - } + if (!dev_args_match_fs_devices(args, seed_devs)) + continue; + list_for_each_entry(device, &seed_devs->devices, dev_list) { + if (dev_args_match_device(args, device)) + return device; } } @@ -6951,6 +6974,7 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info, static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_fs_info *fs_info = leaf->fs_info; struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct map_lookup *map; @@ -7028,11 +7052,12 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, map->stripes[i].physical = btrfs_stripe_offset_nr(leaf, chunk, i); devid = btrfs_stripe_devid_nr(leaf, chunk, i); + args.devid = devid; read_extent_buffer(leaf, uuid, (unsigned long) btrfs_stripe_dev_uuid_nr(chunk, i), BTRFS_UUID_SIZE); - map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, - devid, uuid, NULL); + args.uuid = uuid; + map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args); if (!map->stripes[i].dev && !btrfs_test_opt(fs_info, DEGRADED)) { free_extent_map(em); @@ -7150,6 +7175,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, static int read_one_dev(struct extent_buffer *leaf, struct btrfs_dev_item *dev_item) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; @@ -7158,11 +7184,13 @@ static int read_one_dev(struct extent_buffer *leaf, u8 fs_uuid[BTRFS_FSID_SIZE]; u8 dev_uuid[BTRFS_UUID_SIZE]; - devid = btrfs_device_id(leaf, dev_item); + devid = args.devid = btrfs_device_id(leaf, dev_item); read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_FSID_SIZE); + args.uuid = dev_uuid; + args.fsid = fs_uuid; if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) { fs_devices = open_seed_devices(fs_info, fs_uuid); @@ -7170,8 +7198,7 @@ static int read_one_dev(struct extent_buffer *leaf, return PTR_ERR(fs_devices); } - device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid, - fs_uuid); + device = btrfs_find_device(fs_info->fs_devices, &args); if (!device) { if (!btrfs_test_opt(fs_info, DEGRADED)) { btrfs_report_missing_device(fs_info, devid, @@ -7840,12 +7867,14 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_get_dev_stats *stats) { + BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_device *dev; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; int i; mutex_lock(&fs_devices->device_list_mutex); - dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL); + args.devid = stats->devid; + dev = btrfs_find_device(fs_info->fs_devices, &args); mutex_unlock(&fs_devices->device_list_mutex); if (!dev) { @@ -7921,6 +7950,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, u64 chunk_offset, u64 devid, u64 physical_offset, u64 physical_len) { + struct btrfs_dev_lookup_args args = { .devid = devid }; struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; @@ -7976,7 +8006,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, } /* Make sure no dev extent is beyond device boundary */ - dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL); + dev = btrfs_find_device(fs_info->fs_devices, &args); if (!dev) { btrfs_err(fs_info, "failed to find devid %llu", devid); ret = -EUCLEAN; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 874406f51b863..bdcf7641e0bb7 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -451,6 +451,22 @@ struct btrfs_balance_control { struct btrfs_balance_progress stat; }; +/* + * Search for a given device by the set parameters + */ +struct btrfs_dev_lookup_args { + u64 devid; + u8 *uuid; + u8 *fsid; + bool missing; +}; + +/* We have to initialize to -1 because BTRFS_DEV_REPLACE_DEVID is 0 */ +#define BTRFS_DEV_LOOKUP_ARGS_INIT { .devid = (u64)-1 } + +#define BTRFS_DEV_LOOKUP_ARGS(name) \ + struct btrfs_dev_lookup_args name = BTRFS_DEV_LOOKUP_ARGS_INIT + enum btrfs_map_op { BTRFS_MAP_READ, BTRFS_MAP_WRITE, @@ -515,8 +531,8 @@ void __exit btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size); -struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices, - u64 devid, u8 *uuid, u8 *fsid); +struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices, + const struct btrfs_dev_lookup_args *args); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path); int btrfs_balance(struct btrfs_fs_info *fs_info, From 72911a55a24761ad0f26d3f58356fc73a5048565 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:12:43 -0400 Subject: [PATCH 0375/1202] btrfs: add a btrfs_get_dev_args_from_path helper We are going to want to populate our device lookup args outside of any locks and then do the actual device lookup later, so add a helper to do this work and make btrfs_find_device_by_devspec() use this helper for now. Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 96 ++++++++++++++++++++++++++++++---------------- fs/btrfs/volumes.h | 4 ++ 2 files changed, 68 insertions(+), 32 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index eddd2e50cb65b..340583942d222 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2325,45 +2325,81 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) btrfs_free_device(tgtdev); } -static struct btrfs_device *btrfs_find_device_by_path( - struct btrfs_fs_info *fs_info, const char *device_path) +/** + * Populate args from device at path + * + * @fs_info: the filesystem + * @args: the args to populate + * @path: the path to the device + * + * This will read the super block of the device at @path and populate @args with + * the devid, fsid, and uuid. This is meant to be used for ioctls that need to + * lookup a device to operate on, but need to do it before we take any locks. + * This properly handles the special case of "missing" that a user may pass in, + * and does some basic sanity checks. The caller must make sure that @path is + * properly NUL terminated before calling in, and must call + * btrfs_put_dev_args_from_path() in order to free up the temporary fsid and + * uuid buffers. + * + * Return: 0 for success, -errno for failure + */ +int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, + struct btrfs_dev_lookup_args *args, + const char *path) { - BTRFS_DEV_LOOKUP_ARGS(args); - int ret = 0; struct btrfs_super_block *disk_super; struct block_device *bdev; - struct btrfs_device *device; + int ret; - ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ, - fs_info->bdev_holder, 0, &bdev, &disk_super); - if (ret) - return ERR_PTR(ret); + if (!path || !path[0]) + return -EINVAL; + if (!strcmp(path, "missing")) { + args->missing = true; + return 0; + } + + args->uuid = kzalloc(BTRFS_UUID_SIZE, GFP_KERNEL); + args->fsid = kzalloc(BTRFS_FSID_SIZE, GFP_KERNEL); + if (!args->uuid || !args->fsid) { + btrfs_put_dev_args_from_path(args); + return -ENOMEM; + } - args.devid = btrfs_stack_device_id(&disk_super->dev_item); - args.uuid = disk_super->dev_item.uuid; + ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0, + &bdev, &disk_super); + if (ret) + return ret; + args->devid = btrfs_stack_device_id(&disk_super->dev_item); + memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); if (btrfs_fs_incompat(fs_info, METADATA_UUID)) - args.fsid = disk_super->metadata_uuid; + memcpy(args->fsid, disk_super->metadata_uuid, BTRFS_FSID_SIZE); else - args.fsid = disk_super->fsid; - - device = btrfs_find_device(fs_info->fs_devices, &args); - + memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE); btrfs_release_disk_super(disk_super); - if (!device) - device = ERR_PTR(-ENOENT); blkdev_put(bdev, FMODE_READ); - return device; + return 0; } /* - * Lookup a device given by device id, or the path if the id is 0. + * Only use this jointly with btrfs_get_dev_args_from_path() because we will + * allocate our ->uuid and ->fsid pointers, everybody else uses local variables + * that don't need to be freed. */ +void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args) +{ + kfree(args->uuid); + kfree(args->fsid); + args->uuid = NULL; + args->fsid = NULL; +} + struct btrfs_device *btrfs_find_device_by_devspec( struct btrfs_fs_info *fs_info, u64 devid, const char *device_path) { BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_device *device; + int ret; if (devid) { args.devid = devid; @@ -2373,18 +2409,14 @@ struct btrfs_device *btrfs_find_device_by_devspec( return device; } - if (!device_path || !device_path[0]) - return ERR_PTR(-EINVAL); - - if (strcmp(device_path, "missing") == 0) { - args.missing = true; - device = btrfs_find_device(fs_info->fs_devices, &args); - if (!device) - return ERR_PTR(-ENOENT); - return device; - } - - return btrfs_find_device_by_path(fs_info, device_path); + ret = btrfs_get_dev_args_from_path(fs_info, &args, device_path); + if (ret) + return ERR_PTR(ret); + device = btrfs_find_device(fs_info->fs_devices, &args); + btrfs_put_dev_args_from_path(&args); + if (!device) + return ERR_PTR(-ENOENT); + return device; } /* diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bdcf7641e0bb7..acf7c1307e147 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -520,9 +520,13 @@ void btrfs_assign_next_active_device(struct btrfs_device *device, struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid, const char *devpath); +int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, + struct btrfs_dev_lookup_args *args, + const char *path); struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, const u64 *devid, const u8 *uuid); +void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); void btrfs_free_device(struct btrfs_device *device); int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, u64 devid, From bc42ef7492c1c778d7db8536afe20d97a58f9d68 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 18 Oct 2021 14:54:55 +0200 Subject: [PATCH 0376/1202] dt-bindings: clock: samsung: add IDs for some core clocks Add IDs for some core clocks referenced during the boot process. Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20211018125456.8292-1-m.szyprowski@samsung.com Signed-off-by: Sylwester Nawrocki --- include/dt-bindings/clock/exynos4.h | 4 +++- include/dt-bindings/clock/exynos5250.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/dt-bindings/clock/exynos4.h b/include/dt-bindings/clock/exynos4.h index 88ec3968b90a6..acbfbab875ec3 100644 --- a/include/dt-bindings/clock/exynos4.h +++ b/include/dt-bindings/clock/exynos4.h @@ -209,6 +209,7 @@ #define CLK_ACLK400_MCUISP 395 /* Exynos4x12 only */ #define CLK_MOUT_HDMI 396 #define CLK_MOUT_MIXER 397 +#define CLK_MOUT_VPLLSRC 398 /* gate clocks - ppmu */ #define CLK_PPMULEFT 400 @@ -236,9 +237,10 @@ #define CLK_DIV_C2C 458 /* Exynos4x12 only */ #define CLK_DIV_GDL 459 #define CLK_DIV_GDR 460 +#define CLK_DIV_CORE2 461 /* must be greater than maximal clock id */ -#define CLK_NR_CLKS 461 +#define CLK_NR_CLKS 462 /* Exynos4x12 ISP clocks */ #define CLK_ISP_FIMC_ISP 1 diff --git a/include/dt-bindings/clock/exynos5250.h b/include/dt-bindings/clock/exynos5250.h index e259cc01f22f8..4680da7357d32 100644 --- a/include/dt-bindings/clock/exynos5250.h +++ b/include/dt-bindings/clock/exynos5250.h @@ -19,6 +19,7 @@ #define CLK_FOUT_EPLL 7 #define CLK_FOUT_VPLL 8 #define CLK_ARM_CLK 9 +#define CLK_DIV_ARM2 10 /* gate for special clocks (sclk) */ #define CLK_SCLK_CAM_BAYER 128 @@ -174,8 +175,9 @@ #define CLK_MOUT_ACLK300_DISP1_SUB 1027 #define CLK_MOUT_APLL 1028 #define CLK_MOUT_MPLL 1029 +#define CLK_MOUT_VPLLSRC 1030 /* must be greater than maximal clock id */ -#define CLK_NR_CLKS 1030 +#define CLK_NR_CLKS 1031 #endif /* _DT_BINDINGS_CLOCK_EXYNOS_5250_H */ From 651521d396a8712be1e4eb5b67a1ad340faa6c56 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 18 Oct 2021 14:54:56 +0200 Subject: [PATCH 0377/1202] clk: samsung: remove __clk_lookup() usage __clk_lookup() interface is obsolete, so remove it from the Samsung clock drivers. This has been achieved by getting rid of custom _get_rate() helper and replacing it with clk_hw_get_rate(). Signed-off-by: Marek Szyprowski Acked-by: Krzysztof Kozlowski Acked-by: Chanwoo Choi Link: https://lore.kernel.org/r/20211018125456.8292-2-m.szyprowski@samsung.com Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-exynos4.c | 18 ++++++++++-------- drivers/clk/samsung/clk-exynos5250.c | 10 +++++----- drivers/clk/samsung/clk-exynos5420.c | 2 +- drivers/clk/samsung/clk-s3c2410.c | 6 ++++-- drivers/clk/samsung/clk-s3c64xx.c | 8 ++++++-- drivers/clk/samsung/clk-s5pv210.c | 8 ++++++-- drivers/clk/samsung/clk.c | 14 -------------- drivers/clk/samsung/clk.h | 2 -- 8 files changed, 32 insertions(+), 36 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index bf13e29a655c7..42b4b62bd483d 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -437,7 +437,7 @@ static const struct samsung_mux_clock exynos4_mux_clks[] __initconst = { /* list of mux clocks supported in exynos4210 soc */ static const struct samsung_mux_clock exynos4210_mux_early[] __initconst = { - MUX(0, "mout_vpllsrc", mout_vpllsrc_p, SRC_TOP1, 0, 1), + MUX(CLK_MOUT_VPLLSRC, "mout_vpllsrc", mout_vpllsrc_p, SRC_TOP1, 0, 1), }; static const struct samsung_mux_clock exynos4210_mux_clks[] __initconst = { @@ -603,7 +603,7 @@ static const struct samsung_div_clock exynos4_div_clks[] __initconst = { DIV(0, "div_periph", "div_core2", DIV_CPU0, 12, 3), DIV(0, "div_atb", "mout_core", DIV_CPU0, 16, 3), DIV(0, "div_pclk_dbg", "div_atb", DIV_CPU0, 20, 3), - DIV(0, "div_core2", "div_core", DIV_CPU0, 28, 3), + DIV(CLK_DIV_CORE2, "div_core2", "div_core", DIV_CPU0, 28, 3), DIV(0, "div_copy", "mout_hpm", DIV_CPU1, 0, 3), DIV(0, "div_hpm", "div_copy", DIV_CPU1, 4, 3), DIV(0, "div_clkout_cpu", "mout_clkout_cpu", CLKOUT_CMU_CPU, 8, 6), @@ -1254,21 +1254,21 @@ static void __init exynos4_clk_init(struct device_node *np, samsung_clk_register_mux(ctx, exynos4210_mux_early, ARRAY_SIZE(exynos4210_mux_early)); - if (_get_rate("fin_pll") == 24000000) { + if (clk_hw_get_rate(hws[CLK_FIN_PLL]) == 24000000) { exynos4210_plls[apll].rate_table = exynos4210_apll_rates; exynos4210_plls[epll].rate_table = exynos4210_epll_rates; } - if (_get_rate("mout_vpllsrc") == 24000000) + if (clk_hw_get_rate(hws[CLK_MOUT_VPLLSRC]) == 24000000) exynos4210_plls[vpll].rate_table = exynos4210_vpll_rates; samsung_clk_register_pll(ctx, exynos4210_plls, ARRAY_SIZE(exynos4210_plls), reg_base); } else { - if (_get_rate("fin_pll") == 24000000) { + if (clk_hw_get_rate(hws[CLK_FIN_PLL]) == 24000000) { exynos4x12_plls[apll].rate_table = exynos4x12_apll_rates; exynos4x12_plls[epll].rate_table = @@ -1344,9 +1344,11 @@ static void __init exynos4_clk_init(struct device_node *np, pr_info("%s clocks: sclk_apll = %ld, sclk_mpll = %ld\n" "\tsclk_epll = %ld, sclk_vpll = %ld, arm_clk = %ld\n", exynos4_soc == EXYNOS4210 ? "Exynos4210" : "Exynos4x12", - _get_rate("sclk_apll"), _get_rate("sclk_mpll"), - _get_rate("sclk_epll"), _get_rate("sclk_vpll"), - _get_rate("div_core2")); + clk_hw_get_rate(hws[CLK_SCLK_APLL]), + clk_hw_get_rate(hws[CLK_SCLK_MPLL]), + clk_hw_get_rate(hws[CLK_SCLK_EPLL]), + clk_hw_get_rate(hws[CLK_SCLK_VPLL]), + clk_hw_get_rate(hws[CLK_DIV_CORE2])); } diff --git a/drivers/clk/samsung/clk-exynos5250.c b/drivers/clk/samsung/clk-exynos5250.c index 06588fab408a7..fde4998d2aab6 100644 --- a/drivers/clk/samsung/clk-exynos5250.c +++ b/drivers/clk/samsung/clk-exynos5250.c @@ -239,7 +239,7 @@ static const struct samsung_fixed_factor_clock exynos5250_fixed_factor_clks[] __ }; static const struct samsung_mux_clock exynos5250_pll_pmux_clks[] __initconst = { - MUX(0, "mout_vpllsrc", mout_vpllsrc_p, SRC_TOP2, 0, 1), + MUX(CLK_MOUT_VPLLSRC, "mout_vpllsrc", mout_vpllsrc_p, SRC_TOP2, 0, 1), }; static const struct samsung_mux_clock exynos5250_mux_clks[] __initconst = { @@ -351,7 +351,7 @@ static const struct samsung_div_clock exynos5250_div_clks[] __initconst = { */ DIV(0, "div_arm", "mout_cpu", DIV_CPU0, 0, 3), DIV(0, "div_apll", "mout_apll", DIV_CPU0, 24, 3), - DIV(0, "div_arm2", "div_arm", DIV_CPU0, 28, 3), + DIV(CLK_DIV_ARM2, "div_arm2", "div_arm", DIV_CPU0, 28, 3), /* * CMU_TOP @@ -801,12 +801,12 @@ static void __init exynos5250_clk_init(struct device_node *np) samsung_clk_register_mux(ctx, exynos5250_pll_pmux_clks, ARRAY_SIZE(exynos5250_pll_pmux_clks)); - if (_get_rate("fin_pll") == 24 * MHZ) { + if (clk_hw_get_rate(hws[CLK_FIN_PLL]) == 24 * MHZ) { exynos5250_plls[epll].rate_table = epll_24mhz_tbl; exynos5250_plls[apll].rate_table = apll_24mhz_tbl; } - if (_get_rate("mout_vpllsrc") == 24 * MHZ) + if (clk_hw_get_rate(hws[CLK_MOUT_VPLLSRC]) == 24 * MHZ) exynos5250_plls[vpll].rate_table = vpll_24mhz_tbl; samsung_clk_register_pll(ctx, exynos5250_plls, @@ -855,6 +855,6 @@ static void __init exynos5250_clk_init(struct device_node *np) samsung_clk_of_add_provider(np, ctx); pr_info("Exynos5250: clock setup completed, armclk=%ld\n", - _get_rate("div_arm2")); + clk_hw_get_rate(hws[CLK_DIV_ARM2])); } CLK_OF_DECLARE_DRIVER(exynos5250_clk, "samsung,exynos5250-clock", exynos5250_clk_init); diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 3ccd4eabd2a69..747196bbea2a8 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -1580,7 +1580,7 @@ static void __init exynos5x_clk_init(struct device_node *np, ARRAY_SIZE(exynos5x_fixed_rate_ext_clks), ext_clk_match); - if (_get_rate("fin_pll") == 24 * MHZ) { + if (clk_hw_get_rate(hws[CLK_FIN_PLL]) == 24 * MHZ) { exynos5x_plls[apll].rate_table = exynos5420_pll2550x_24mhz_tbl; exynos5x_plls[epll].rate_table = exynos5420_epll_24mhz_tbl; exynos5x_plls[kpll].rate_table = exynos5420_pll2550x_24mhz_tbl; diff --git a/drivers/clk/samsung/clk-s3c2410.c b/drivers/clk/samsung/clk-s3c2410.c index 5831d06060771..3d152a46169ba 100644 --- a/drivers/clk/samsung/clk-s3c2410.c +++ b/drivers/clk/samsung/clk-s3c2410.c @@ -323,6 +323,7 @@ void __init s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f, void __iomem *base) { struct samsung_clk_provider *ctx; + struct clk_hw **hws; reg_base = base; if (np) { @@ -332,13 +333,14 @@ void __init s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f, } ctx = samsung_clk_init(np, reg_base, NR_CLKS); + hws = ctx->clk_data.hws; /* Register external clocks only in non-dt cases */ if (!np) s3c2410_common_clk_register_fixed_ext(ctx, xti_f); if (current_soc == S3C2410) { - if (_get_rate("xti") == 12 * MHZ) { + if (clk_hw_get_rate(hws[XTI]) == 12 * MHZ) { s3c2410_plls[mpll].rate_table = pll_s3c2410_12mhz_tbl; s3c2410_plls[upll].rate_table = pll_s3c2410_12mhz_tbl; } @@ -348,7 +350,7 @@ void __init s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f, ARRAY_SIZE(s3c2410_plls), reg_base); } else { /* S3C2440, S3C2442 */ - if (_get_rate("xti") == 12 * MHZ) { + if (clk_hw_get_rate(hws[XTI]) == 12 * MHZ) { /* * plls follow different calculation schemes, with the * upll following the same scheme as the s3c2410 plls diff --git a/drivers/clk/samsung/clk-s3c64xx.c b/drivers/clk/samsung/clk-s3c64xx.c index 56f95b63f71f0..d6b432a26d639 100644 --- a/drivers/clk/samsung/clk-s3c64xx.c +++ b/drivers/clk/samsung/clk-s3c64xx.c @@ -394,6 +394,7 @@ void __init s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, void __iomem *base) { struct samsung_clk_provider *ctx; + struct clk_hw **hws; reg_base = base; is_s3c6400 = s3c6400; @@ -405,6 +406,7 @@ void __init s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, } ctx = samsung_clk_init(np, reg_base, NR_CLKS); + hws = ctx->clk_data.hws; /* Register external clocks. */ if (!np) @@ -459,8 +461,10 @@ void __init s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f, pr_info("%s clocks: apll = %lu, mpll = %lu\n" "\tepll = %lu, arm_clk = %lu\n", is_s3c6400 ? "S3C6400" : "S3C6410", - _get_rate("fout_apll"), _get_rate("fout_mpll"), - _get_rate("fout_epll"), _get_rate("armclk")); + clk_hw_get_rate(hws[MOUT_APLL]), + clk_hw_get_rate(hws[MOUT_MPLL]), + clk_hw_get_rate(hws[MOUT_EPLL]), + clk_hw_get_rate(hws[ARMCLK])); } static void __init s3c6400_clk_init(struct device_node *np) diff --git a/drivers/clk/samsung/clk-s5pv210.c b/drivers/clk/samsung/clk-s5pv210.c index e7b68ffe36de0..4425186bdcab5 100644 --- a/drivers/clk/samsung/clk-s5pv210.c +++ b/drivers/clk/samsung/clk-s5pv210.c @@ -741,8 +741,10 @@ static void __init __s5pv210_clk_init(struct device_node *np, bool is_s5p6442) { struct samsung_clk_provider *ctx; + struct clk_hw **hws; ctx = samsung_clk_init(np, reg_base, NR_CLKS); + hws = ctx->clk_data.hws; samsung_clk_register_mux(ctx, early_mux_clks, ARRAY_SIZE(early_mux_clks)); @@ -789,8 +791,10 @@ static void __init __s5pv210_clk_init(struct device_node *np, pr_info("%s clocks: mout_apll = %ld, mout_mpll = %ld\n" "\tmout_epll = %ld, mout_vpll = %ld\n", is_s5p6442 ? "S5P6442" : "S5PV210", - _get_rate("mout_apll"), _get_rate("mout_mpll"), - _get_rate("mout_epll"), _get_rate("mout_vpll")); + clk_hw_get_rate(hws[MOUT_APLL]), + clk_hw_get_rate(hws[MOUT_MPLL]), + clk_hw_get_rate(hws[MOUT_EPLL]), + clk_hw_get_rate(hws[MOUT_VPLL])); } static void __init s5pv210_clk_dt_init(struct device_node *np) diff --git a/drivers/clk/samsung/clk.c b/drivers/clk/samsung/clk.c index 336243c6f120a..bca4731b14ea5 100644 --- a/drivers/clk/samsung/clk.c +++ b/drivers/clk/samsung/clk.c @@ -268,20 +268,6 @@ void __init samsung_clk_of_register_fixed_ext(struct samsung_clk_provider *ctx, samsung_clk_register_fixed_rate(ctx, fixed_rate_clk, nr_fixed_rate_clk); } -/* utility function to get the rate of a specified clock */ -unsigned long _get_rate(const char *clk_name) -{ - struct clk *clk; - - clk = __clk_lookup(clk_name); - if (!clk) { - pr_err("%s: could not find clock %s\n", __func__, clk_name); - return 0; - } - - return clk_get_rate(clk); -} - #ifdef CONFIG_PM_SLEEP static int samsung_clk_suspend(void) { diff --git a/drivers/clk/samsung/clk.h b/drivers/clk/samsung/clk.h index 26499e97275b8..b23776d540eba 100644 --- a/drivers/clk/samsung/clk.h +++ b/drivers/clk/samsung/clk.h @@ -381,8 +381,6 @@ extern struct samsung_clk_provider __init *samsung_cmu_register_one( struct device_node *, const struct samsung_cmu_info *); -extern unsigned long _get_rate(const char *clk_name); - #ifdef CONFIG_PM_SLEEP extern void samsung_clk_extended_sleep_init(void __iomem *reg_base, const unsigned long *rdump, From 13d001549466631728c61ad2b59eabf3bfa89d63 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 23 Sep 2021 07:50:08 -0400 Subject: [PATCH 0378/1202] ceph: convert to noop_direct_IO We have our own op, but the WARN_ON is not terribly helpful, and it's otherwise identical to the noop one. Just use that. Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 99b80b5c7a931..b429204314196 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1306,17 +1306,6 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, return copied; } -/* - * we set .direct_IO to indicate direct io is supported, but since we - * intercept O_DIRECT reads and writes early, this function should - * never get called. - */ -static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter) -{ - WARN_ON(1); - return -EINVAL; -} - const struct address_space_operations ceph_aops = { .readpage = ceph_readpage, .readahead = ceph_readahead, @@ -1327,7 +1316,7 @@ const struct address_space_operations ceph_aops = { .set_page_dirty = ceph_set_page_dirty, .invalidatepage = ceph_invalidatepage, .releasepage = ceph_releasepage, - .direct_IO = ceph_direct_io, + .direct_IO = noop_direct_IO, }; static void ceph_block_sigs(sigset_t *oldset) From 324bfaa1a6cc948c58f7af521994d0edc1836708 Mon Sep 17 00:00:00 2001 From: Jean Sacren Date: Mon, 11 Oct 2021 00:45:24 -0600 Subject: [PATCH 0379/1202] libceph: drop ->monmap and err initialization Call to build_initial_monmap() is one stone two birds. Explicitly it initializes err variable. Implicitly it initializes ->monmap via call to kzalloc(). We should only declare err and ->monmap is taken care of by ceph_monc_init() prototype. Signed-off-by: Jean Sacren Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- net/ceph/mon_client.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 013cbdb6cfe26..6a6898ee40495 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1153,12 +1153,11 @@ static int build_initial_monmap(struct ceph_mon_client *monc) int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) { - int err = 0; + int err; dout("init\n"); memset(monc, 0, sizeof(*monc)); monc->client = cl; - monc->monmap = NULL; mutex_init(&monc->mutex); err = build_initial_monmap(monc); From 6b0e686e7ca7ac6eaa72038049a08c99f51f7ea9 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Sat, 28 Aug 2021 13:12:17 +0200 Subject: [PATCH 0380/1202] dt-bindings: phy: rockchip: remove usb-phy fallback string for rk3066a/rk3188 With the conversion of rockchip-usb-phy.yaml a long time used fallback string for rk3066a/rk3188 was added. The linux driver doesn't do much with the GRF phy address range, however the u-boot driver rockchip_usb2_phy.c does. The bits in GRF_UOC0_CON2 for rk3066a/rk3188 and rk3288 for example don't match. Remove the usb-phy fallback string for rk3066a/rk3188 to prevent possible strange side effects. Signed-off-by: Johan Jonker Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210828111218.10026-1-jbx6244@gmail.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/rockchip-usb-phy.yaml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.yaml b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.yaml index f0fc8275dcd00..499d55131aa8b 100644 --- a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.yaml @@ -11,13 +11,10 @@ maintainers: properties: compatible: - oneOf: - - const: rockchip,rk3288-usb-phy - - items: - - enum: - - rockchip,rk3066a-usb-phy - - rockchip,rk3188-usb-phy - - const: rockchip,rk3288-usb-phy + enum: + - rockchip,rk3066a-usb-phy + - rockchip,rk3188-usb-phy + - rockchip,rk3288-usb-phy "#address-cells": const: 1 From 29aecc40f44f0e6b9565cfcb37c4e6e209a69ae5 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 15 Oct 2021 18:14:25 +0200 Subject: [PATCH 0381/1202] phy: stm32: restore utmi switch on resume UTMI switch value can be lost during suspend/resume, depending on the power state reached. This patch adds resume function to usbphyc, to reconfigure utmi switch after suspend. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20211015161427.220784-2-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/phy/st/phy-stm32-usbphyc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index 937a14fa7448a..083593aea53ad 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -598,6 +598,18 @@ static int stm32_usbphyc_remove(struct platform_device *pdev) return 0; } +static int __maybe_unused stm32_usbphyc_resume(struct device *dev) +{ + struct stm32_usbphyc *usbphyc = dev_get_drvdata(dev); + + if (usbphyc->switch_setup >= 0) + stm32_usbphyc_switch_setup(usbphyc, usbphyc->switch_setup); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(stm32_usbphyc_pm_ops, NULL, stm32_usbphyc_resume); + static const struct of_device_id stm32_usbphyc_of_match[] = { { .compatible = "st,stm32mp1-usbphyc", }, { }, @@ -610,6 +622,7 @@ static struct platform_driver stm32_usbphyc_driver = { .driver = { .of_match_table = stm32_usbphyc_of_match, .name = "stm32-usbphyc", + .pm = &stm32_usbphyc_pm_ops, } }; module_platform_driver(stm32_usbphyc_driver); From 689862d749af85fbd7d7f08ffa08949a8a217aff Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 15 Oct 2021 18:14:26 +0200 Subject: [PATCH 0382/1202] dt-bindings: phy: phy-stm32-usbphyc: add optional phy tuning properties This patch adds the description of new optional phy tuning properties for usbphyc phy sub nodes. Signed-off-by: Amelie Delaunay Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211015161427.220784-3-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- .../bindings/phy/phy-stm32-usbphyc.yaml | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml b/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml index 3329f1d33a4fe..225128364a637 100644 --- a/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml +++ b/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml @@ -81,6 +81,119 @@ patternProperties: properties: vbus-supply: true + # It can be necessary to adjust the PHY settings to compensate parasitics, which can be due + # to USB connector/receptacle, routing, ESD protection component,... Here is the list of + # all optional parameters to tune the interface of the PHY (HS for High-Speed, FS for Full- + # Speed, LS for Low-Speed) + + st,current-boost-microamp: + description: Current boosting in uA + enum: [ 1000, 2000 ] + + st,no-lsfs-fb-cap: + description: Disables the LS/FS feedback capacitor + type: boolean + + st,decrease-hs-slew-rate: + description: Decreases the HS driver slew rate by 10% + type: boolean + + st,tune-hs-dc-level: + description: | + Tunes the HS driver DC level + - <0> normal level + - <1> increases the level by 5 to 7 mV + - <2> increases the level by 10 to 14 mV + - <3> decreases the level by 5 to 7 mV + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 3 + default: 0 + + st,enable-fs-rftime-tuning: + description: Enables the FS rise/fall tuning option + type: boolean + + st,enable-hs-rftime-reduction: + description: Enables the HS rise/fall reduction feature + type: boolean + + st,trim-hs-current: + description: | + Controls HS driver current trimming for choke compensation + - <0> = 18.87 mA target current / nominal + 0% + - <1> = 19.165 mA target current / nominal + 1.56% + - <2> = 19.46 mA target current / nominal + 3.12% + - <3> = 19.755 mA target current / nominal + 4.68% + - <4> = 20.05 mA target current / nominal + 6.24% + - <5> = 20.345 mA target current / nominal + 7.8% + - <6> = 20.64 mA target current / nominal + 9.36% + - <7> = 20.935 mA target current / nominal + 10.92% + - <8> = 21.23 mA target current / nominal + 12.48% + - <9> = 21.525 mA target current / nominal + 14.04% + - <10> = 21.82 mA target current / nominal + 15.6% + - <11> = 22.115 mA target current / nominal + 17.16% + - <12> = 22.458 mA target current / nominal + 19.01% + - <13> = 22.755 mA target current / nominal + 20.58% + - <14> = 23.052 mA target current / nominal + 22.16% + - <15> = 23.348 mA target current / nominal + 23.73% + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 15 + default: 0 + + st,trim-hs-impedance: + description: | + Controls HS driver impedance tuning for choke compensation + - <0> = no impedance offset + - <1> = reduce the impedance by 2 ohms + - <2> = reduce the impedance by 4 ohms + - <3> = reduce the impedance by 6 ohms + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 3 + default: 0 + + st,tune-squelch-level: + description: | + Tunes the squelch DC threshold value + - <0> = no shift in threshold + - <1> = threshold shift by +7 mV + - <2> = threshold shift by -5 mV + - <3> = threshold shift by +14 mV + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 3 + default: 0 + + st,enable-hs-rx-gain-eq: + description: Enables the HS Rx gain equalizer + type: boolean + + st,tune-hs-rx-offset: + description: | + Adjusts the HS Rx offset + - <0> = no offset + - <1> = offset of +5 mV + - <2> = offset of +10 mV + - <3> = offset of -5 mV + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 0 + maximum: 3 + default: 0 + + st,no-hs-ftime-ctrl: + description: Disables the HS fall time control of single ended signals during pre-emphasis + type: boolean + + st,no-lsfs-sc: + description: Disables the short circuit protection in LS/FS driver + type: boolean + + st,enable-hs-tx-staggering: + description: Enables the basic staggering in HS Tx mode + type: boolean + allOf: - if: properties: @@ -137,6 +250,14 @@ examples: reg = <0>; phy-supply = <&vdd_usb>; #phy-cells = <0>; + st,tune-hs-dc-level = <2>; + st,enable-fs-rftime-tuning; + st,enable-hs-rftime-reduction; + st,trim-hs-current = <15>; + st,trim-hs-impedance = <1>; + st,tune-squelch-level = <3>; + st,tune-hs-rx-offset = <2>; + st,no-lsfs-sc; connector { compatible = "usb-a-connector"; vbus-supply = <&vbus_sw>; @@ -147,6 +268,14 @@ examples: reg = <1>; phy-supply = <&vdd_usb>; #phy-cells = <1>; + st,tune-hs-dc-level = <2>; + st,enable-fs-rftime-tuning; + st,enable-hs-rftime-reduction; + st,trim-hs-current = <15>; + st,trim-hs-impedance = <1>; + st,tune-squelch-level = <3>; + st,tune-hs-rx-offset = <2>; + st,no-lsfs-sc; }; }; ... From 030997ccad9273d8d801f1c8982afbe1769d4b8c Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 15 Oct 2021 18:14:27 +0200 Subject: [PATCH 0383/1202] phy: stm32: add phy tuning support It can be necessary to adjust the phys settings to compensate parasitics. This patch adds support of new optional properties to configure the tune interface of the phys of stm32-usbphyc. Properties are optional, that's why each property is skipped if not found (-EINVAL). Phy tuning is restored on resume because if deep low power state is achieved, phy tuning configuration is reset. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20211015161427.220784-4-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/phy/st/phy-stm32-usbphyc.c | 190 +++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index 083593aea53ad..7df6a63ad37ba 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -20,6 +20,7 @@ #define STM32_USBPHYC_PLL 0x0 #define STM32_USBPHYC_MISC 0x8 #define STM32_USBPHYC_MONITOR(X) (0x108 + ((X) * 0x100)) +#define STM32_USBPHYC_TUNE(X) (0x10C + ((X) * 0x100)) #define STM32_USBPHYC_VERSION 0x3F4 /* STM32_USBPHYC_PLL bit fields */ @@ -41,6 +42,83 @@ #define STM32_USBPHYC_MON_SEL_LOCKP 0x1F #define STM32_USBPHYC_MON_OUT_LOCKP BIT(3) +/* STM32_USBPHYC_TUNE bit fields */ +#define INCURREN BIT(0) +#define INCURRINT BIT(1) +#define LFSCAPEN BIT(2) +#define HSDRVSLEW BIT(3) +#define HSDRVDCCUR BIT(4) +#define HSDRVDCLEV BIT(5) +#define HSDRVCURINCR BIT(6) +#define FSDRVRFADJ BIT(7) +#define HSDRVRFRED BIT(8) +#define HSDRVCHKITRM GENMASK(12, 9) +#define HSDRVCHKZTRM GENMASK(14, 13) +#define OTPCOMP GENMASK(19, 15) +#define SQLCHCTL GENMASK(21, 20) +#define HDRXGNEQEN BIT(22) +#define HSRXOFF GENMASK(24, 23) +#define HSFALLPREEM BIT(25) +#define SHTCCTCTLPROT BIT(26) +#define STAGSEL BIT(27) + +enum boosting_vals { + BOOST_1000_UA = 1000, + BOOST_2000_UA = 2000, +}; + +enum dc_level_vals { + DC_NOMINAL, + DC_PLUS_5_TO_7_MV, + DC_PLUS_10_TO_14_MV, + DC_MINUS_5_TO_7_MV, + DC_MAX, +}; + +enum current_trim { + CUR_NOMINAL, + CUR_PLUS_1_56_PCT, + CUR_PLUS_3_12_PCT, + CUR_PLUS_4_68_PCT, + CUR_PLUS_6_24_PCT, + CUR_PLUS_7_8_PCT, + CUR_PLUS_9_36_PCT, + CUR_PLUS_10_92_PCT, + CUR_PLUS_12_48_PCT, + CUR_PLUS_14_04_PCT, + CUR_PLUS_15_6_PCT, + CUR_PLUS_17_16_PCT, + CUR_PLUS_19_01_PCT, + CUR_PLUS_20_58_PCT, + CUR_PLUS_22_16_PCT, + CUR_PLUS_23_73_PCT, + CUR_MAX, +}; + +enum impedance_trim { + IMP_NOMINAL, + IMP_MINUS_2_OHMS, + IMP_MINUS_4_OMHS, + IMP_MINUS_6_OHMS, + IMP_MAX, +}; + +enum squelch_level { + SQLCH_NOMINAL, + SQLCH_PLUS_7_MV, + SQLCH_MINUS_5_MV, + SQLCH_PLUS_14_MV, + SQLCH_MAX, +}; + +enum rx_offset { + NO_RX_OFFSET, + RX_OFFSET_PLUS_5_MV, + RX_OFFSET_PLUS_10_MV, + RX_OFFSET_MINUS_5_MV, + RX_OFFSET_MAX, +}; + /* STM32_USBPHYC_VERSION bit fields */ #define MINREV GENMASK(3, 0) #define MAJREV GENMASK(7, 4) @@ -60,6 +138,7 @@ struct stm32_usbphyc_phy { struct regulator *vbus; u32 index; bool active; + u32 tune; }; struct stm32_usbphyc { @@ -375,6 +454,107 @@ static int stm32_usbphyc_clk48_register(struct stm32_usbphyc *usbphyc) return ret; } +static void stm32_usbphyc_phy_tuning(struct stm32_usbphyc *usbphyc, + struct device_node *np, u32 index) +{ + struct stm32_usbphyc_phy *usbphyc_phy = usbphyc->phys[index]; + u32 reg = STM32_USBPHYC_TUNE(index); + u32 otpcomp, val; + int ret; + + /* Backup OTP compensation code */ + otpcomp = FIELD_GET(OTPCOMP, readl_relaxed(usbphyc->base + reg)); + + ret = of_property_read_u32(np, "st,current-boost-microamp", &val); + if (ret != -EINVAL) { + if (!ret && (val == BOOST_1000_UA || val == BOOST_2000_UA)) { + val = (val == BOOST_2000_UA) ? 1 : 0; + usbphyc_phy->tune |= INCURREN | FIELD_PREP(INCURRINT, val); + } else { + dev_warn(usbphyc->dev, "phy%d: invalid st,current-boost-microamp\n", index); + } + } + + if (!of_property_read_bool(np, "st,no-lsfs-fb-cap")) + usbphyc_phy->tune |= LFSCAPEN; + + if (of_property_read_bool(np, "st,slow-hs-slew-rate")) + usbphyc_phy->tune |= HSDRVSLEW; + + ret = of_property_read_u32(np, "st,tune-hs-dc-level", &val); + if (ret != -EINVAL) { + if (!ret && val < DC_MAX) { + if (val == DC_MINUS_5_TO_7_MV) {/* Decreases HS driver DC level */ + usbphyc_phy->tune |= HSDRVDCCUR; + } else if (val > 0) { /* Increases HS driver DC level */ + val = (val == DC_PLUS_10_TO_14_MV) ? 1 : 0; + usbphyc_phy->tune |= HSDRVCURINCR | FIELD_PREP(HSDRVDCLEV, val); + } + } else { + dev_warn(usbphyc->dev, "phy%d: invalid st,tune-hs-dc-level\n", index); + } + } + + if (of_property_read_bool(np, "st,enable-fs-rftime-tuning")) + usbphyc_phy->tune |= FSDRVRFADJ; + + if (of_property_read_bool(np, "st,enable-hs-rftime-reduction")) + usbphyc_phy->tune |= HSDRVRFRED; + + ret = of_property_read_u32(np, "st,trim-hs-current", &val); + if (ret != -EINVAL) { + if (!ret && val < CUR_MAX) + usbphyc_phy->tune |= FIELD_PREP(HSDRVCHKITRM, val); + else + dev_warn(usbphyc->dev, "phy%d: invalid st,trim-hs-current\n", index); + } + + ret = of_property_read_u32(np, "st,trim-hs-impedance", &val); + if (ret != -EINVAL) { + if (!ret && val < IMP_MAX) + usbphyc_phy->tune |= FIELD_PREP(HSDRVCHKZTRM, val); + else + dev_warn(usbphyc->dev, "phy%d: invalid st,trim-hs-impedance\n", index); + } + + ret = of_property_read_u32(np, "st,tune-squelch-level", &val); + if (ret != -EINVAL) { + if (!ret && val < SQLCH_MAX) + usbphyc_phy->tune |= FIELD_PREP(SQLCHCTL, val); + else + dev_warn(usbphyc->dev, "phy%d: invalid st,tune-squelch\n", index); + } + + if (of_property_read_bool(np, "st,enable-hs-rx-gain-eq")) + usbphyc_phy->tune |= HDRXGNEQEN; + + ret = of_property_read_u32(np, "st,tune-hs-rx-offset", &val); + if (ret != -EINVAL) { + if (!ret && val < RX_OFFSET_MAX) + usbphyc_phy->tune |= FIELD_PREP(HSRXOFF, val); + else + dev_warn(usbphyc->dev, "phy%d: invalid st,tune-hs-rx-offset\n", index); + } + + if (of_property_read_bool(np, "st,no-hs-ftime-ctrl")) + usbphyc_phy->tune |= HSFALLPREEM; + + if (!of_property_read_bool(np, "st,no-lsfs-sc")) + usbphyc_phy->tune |= SHTCCTCTLPROT; + + if (of_property_read_bool(np, "st,enable-hs-tx-staggering")) + usbphyc_phy->tune |= STAGSEL; + + /* Restore OTP compensation code */ + usbphyc_phy->tune |= FIELD_PREP(OTPCOMP, otpcomp); + + /* + * By default, if no st,xxx tuning property is used, usbphyc_phy->tune is equal to + * STM32_USBPHYC_TUNE reset value (LFSCAPEN | SHTCCTCTLPROT | OTPCOMP). + */ + writel_relaxed(usbphyc_phy->tune, usbphyc->base + reg); +} + static void stm32_usbphyc_switch_setup(struct stm32_usbphyc *usbphyc, u32 utmi_switch) { @@ -550,6 +730,9 @@ static int stm32_usbphyc_probe(struct platform_device *pdev) usbphyc->phys[port]->vbus = NULL; } + /* Configure phy tuning */ + stm32_usbphyc_phy_tuning(usbphyc, child, index); + port++; } @@ -601,10 +784,17 @@ static int stm32_usbphyc_remove(struct platform_device *pdev) static int __maybe_unused stm32_usbphyc_resume(struct device *dev) { struct stm32_usbphyc *usbphyc = dev_get_drvdata(dev); + struct stm32_usbphyc_phy *usbphyc_phy; + int port; if (usbphyc->switch_setup >= 0) stm32_usbphyc_switch_setup(usbphyc, usbphyc->switch_setup); + for (port = 0; port < usbphyc->nphys; port++) { + usbphyc_phy = usbphyc->phys[port]; + writel_relaxed(usbphyc_phy->tune, usbphyc->base + STM32_USBPHYC_TUNE(port)); + } + return 0; } From e2450531ec392715d8fc8ac0b0f333672cdd6721 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 20 Oct 2021 00:13:39 +0200 Subject: [PATCH 0384/1202] soc: document merges Signed-off-by: Arnd Bergmann --- arch/arm/arm-soc-for-next-contents.txt | 33 +++++++++++++++++++------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/arch/arm/arm-soc-for-next-contents.txt b/arch/arm/arm-soc-for-next-contents.txt index 0ad4afdced9bf..743eb57a9a09d 100644 --- a/arch/arm/arm-soc-for-next-contents.txt +++ b/arch/arm/arm-soc-for-next-contents.txt @@ -38,6 +38,30 @@ arm/dt git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux tags/tegra-for-5.16-arm-dt k3/dts git://git.kernel.org/pub/scm/linux/kernel/git/nmenon/linux tags/ti-k3-dt-for-v5.16 + mediatek/dt + git://git.kernel.org/pub/scm/linux/kernel/git/matthias.bgg/linux tags/v5.15-next-dts64 + qcom/dt + git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux tags/qcom-dts-for-5.16 + qcom/dt64 + git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux tags/qcom-arm64-for-5.16 + samsung/dt + git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux tags/samsung-dt-5.16 + samsung/dt64 + git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux tags/samsung-dt64-5.16 + broadcom/dt + https://github.com/Broadcom/stblinux tags/arm-soc/for-5.16/devicetree + broadcom/dt64 + https://github.com/Broadcom/stblinux tags/arm-soc/for-5.16/devicetree-arm64 + ux500/dt + git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik tags/ux500-dts-for-v5.16 + hisilicon/dt64 + git://github.com/hisilicon/linux-hisi tags/hisi-arm64-dt-for-5.16 + aspeed/dt + git://git.kernel.org/pub/scm/linux/kernel/git/joel/bmc tags/aspeed-5.16-devicetree + renesas/dt-2 + git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel tags/renesas-arm-dt-for-v5.16-tag2 + stm32/dt + git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32 tags/stm32-dt-for-v5.16-1 arm/drivers renesas/drivers @@ -80,11 +104,4 @@ arm/newsoc arm/late arm/fixes - firmware/ffa - git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux tags/ffa-fixes-5.15 - (bc22b6208f416ba702c17a93c9af6474f19e8212) - https://github.com/Broadcom/stblinux tags/arm-soc/for-5.15/devicetree - (7f565d0ead264329749c0da488de9c8dfa2f18ce) - git://git.linaro.org/people/jens.wiklander/linux-tee tags/optee-fix2-for-v5.15 - patch - iommu/arm: fix ARM_SMMU_QCOM compilation + From 29ec3c898c956d60a7587181a8efcf39e0c4e35d Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 14 Oct 2021 12:05:03 -0700 Subject: [PATCH 0385/1202] f2fs: remove circular locking between sb_internal and fs_reclaim We detected the below circular locking dependency between sb_internal and fs_reclaim. So, removed it by calling dquot_initialize() before sb_start_intwrite(). ====================================================== WARNING: possible circular locking dependency detected ------------------------------------------------------ kswapd0/133 is trying to acquire lock: ffffff80d5fb9680 (sb_internal#2){.+.+}-{0:0}, at: evict+0xd4/0x2f8 but task is already holding lock: ffffffda597c93a8 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x4/0x50 which lock already depends on the new lock. ... other info that might help us debug this: Chain exists of: sb_internal#2 --> &s->s_dquot.dqio_sem --> fs_reclaim Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(&s->s_dquot.dqio_sem); lock(fs_reclaim); lock(sb_internal#2); Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 86eeb019cc52f..a133932333c5b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1370,6 +1370,8 @@ static int f2fs_drop_inode(struct inode *inode) /* should remain fi->extent_tree for writepage */ f2fs_destroy_extent_node(inode); + dquot_initialize(inode); + sb_start_intwrite(inode->i_sb); f2fs_i_size_write(inode, 0); From efc7bd8527d02155ec71f0aa6a149d3e2cc01bdc Mon Sep 17 00:00:00 2001 From: Sungjong Seo Date: Tue, 19 Oct 2021 15:14:21 +0900 Subject: [PATCH 0386/1202] exfat: fix incorrect loading of i_blocks for large files When calculating i_blocks, there was a mistake that was masked with a 32-bit variable. So i_blocks for files larger than 4 GiB had incorrect values. Mask with a 64-bit variable instead of 32-bit one. Fixes: 5f2aa075070c ("exfat: add inode operations") Cc: stable@vger.kernel.org # v5.7+ Reported-by: Ganapathi Kamath Signed-off-by: Sungjong Seo Signed-off-by: Namjae Jeon --- fs/exfat/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index ca37d43443612..1c7aa1ea4724c 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -604,7 +604,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) exfat_save_attr(inode, info->attr); inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) & - ~(sbi->cluster_size - 1)) >> inode->i_blkbits; + ~((loff_t)sbi->cluster_size - 1)) >> inode->i_blkbits; inode->i_mtime = info->mtime; inode->i_ctime = info->mtime; ei->i_crtime = info->crtime; From fdc63184c87cfe52c1fe6435370dd4acf5e88b57 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:16:07 +0200 Subject: [PATCH 0387/1202] rust: add `kernel` crate The `kernel` crate currently includes all the abstractions that wrap kernel features written in C. These abstractions call the C side of the kernel via the generated bindings with the `bindgen` tool. Modules developed in Rust should never call the bindings themselves. In the future, as the abstractions grow in number, we may need to split this crate into several, possibly following a similar subdivision in subsystems as the kernel itself. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- include/linux/spinlock.h | 17 +- kernel/printk/printk.c | 5 +- rust/kernel/allocator.rs | 63 +++ rust/kernel/bindings.rs | 46 +++ rust/kernel/bindings_helper.h | 25 ++ rust/kernel/buffer.rs | 39 ++ rust/kernel/build_assert.rs | 80 ++++ rust/kernel/c_types.rs | 119 ++++++ rust/kernel/chrdev.rs | 212 ++++++++++ rust/kernel/error.rs | 523 ++++++++++++++++++++++++ rust/kernel/file.rs | 131 ++++++ rust/kernel/file_operations.rs | 715 +++++++++++++++++++++++++++++++++ rust/kernel/io_buffer.rs | 153 +++++++ rust/kernel/io_mem.rs | 207 ++++++++++ rust/kernel/iov_iter.rs | 81 ++++ rust/kernel/lib.rs | 251 ++++++++++++ rust/kernel/linked_list.rs | 247 ++++++++++++ rust/kernel/miscdev.rs | 111 +++++ rust/kernel/module_param.rs | 497 +++++++++++++++++++++++ rust/kernel/of.rs | 101 +++++ rust/kernel/pages.rs | 162 ++++++++ rust/kernel/platdev.rs | 153 +++++++ rust/kernel/power.rs | 118 ++++++ rust/kernel/prelude.rs | 26 ++ rust/kernel/print.rs | 441 ++++++++++++++++++++ rust/kernel/random.rs | 50 +++ rust/kernel/raw_list.rs | 361 +++++++++++++++++ rust/kernel/rbtree.rs | 562 ++++++++++++++++++++++++++ rust/kernel/security.rs | 56 +++ rust/kernel/static_assert.rs | 39 ++ rust/kernel/std_vendor.rs | 150 +++++++ rust/kernel/str.rs | 253 ++++++++++++ rust/kernel/sync/arc.rs | 480 ++++++++++++++++++++++ rust/kernel/sync/condvar.rs | 132 ++++++ rust/kernel/sync/guard.rs | 91 +++++ rust/kernel/sync/locked_by.rs | 112 ++++++ rust/kernel/sync/mod.rs | 80 ++++ rust/kernel/sync/mutex.rs | 101 +++++ rust/kernel/sync/spinlock.rs | 102 +++++ rust/kernel/sysctl.rs | 198 +++++++++ rust/kernel/task.rs | 182 +++++++++ rust/kernel/types.rs | 228 +++++++++++ rust/kernel/user_ptr.rs | 175 ++++++++ 43 files changed, 7868 insertions(+), 7 deletions(-) create mode 100644 rust/kernel/allocator.rs create mode 100644 rust/kernel/bindings.rs create mode 100644 rust/kernel/bindings_helper.h create mode 100644 rust/kernel/buffer.rs create mode 100644 rust/kernel/build_assert.rs create mode 100644 rust/kernel/c_types.rs create mode 100644 rust/kernel/chrdev.rs create mode 100644 rust/kernel/error.rs create mode 100644 rust/kernel/file.rs create mode 100644 rust/kernel/file_operations.rs create mode 100644 rust/kernel/io_buffer.rs create mode 100644 rust/kernel/io_mem.rs create mode 100644 rust/kernel/iov_iter.rs create mode 100644 rust/kernel/lib.rs create mode 100644 rust/kernel/linked_list.rs create mode 100644 rust/kernel/miscdev.rs create mode 100644 rust/kernel/module_param.rs create mode 100644 rust/kernel/of.rs create mode 100644 rust/kernel/pages.rs create mode 100644 rust/kernel/platdev.rs create mode 100644 rust/kernel/power.rs create mode 100644 rust/kernel/prelude.rs create mode 100644 rust/kernel/print.rs create mode 100644 rust/kernel/random.rs create mode 100644 rust/kernel/raw_list.rs create mode 100644 rust/kernel/rbtree.rs create mode 100644 rust/kernel/security.rs create mode 100644 rust/kernel/static_assert.rs create mode 100644 rust/kernel/std_vendor.rs create mode 100644 rust/kernel/str.rs create mode 100644 rust/kernel/sync/arc.rs create mode 100644 rust/kernel/sync/condvar.rs create mode 100644 rust/kernel/sync/guard.rs create mode 100644 rust/kernel/sync/locked_by.rs create mode 100644 rust/kernel/sync/mod.rs create mode 100644 rust/kernel/sync/mutex.rs create mode 100644 rust/kernel/sync/spinlock.rs create mode 100644 rust/kernel/sysctl.rs create mode 100644 rust/kernel/task.rs create mode 100644 rust/kernel/types.rs create mode 100644 rust/kernel/user_ptr.rs diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 45310ea1b1d78..76a855b3ecdef 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -340,12 +340,17 @@ static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock) #ifdef CONFIG_DEBUG_SPINLOCK -# define spin_lock_init(lock) \ -do { \ - static struct lock_class_key __key; \ - \ - __raw_spin_lock_init(spinlock_check(lock), \ - #lock, &__key, LD_WAIT_CONFIG); \ +static inline void __spin_lock_init(spinlock_t *lock, const char *name, + struct lock_class_key *key) +{ + __raw_spin_lock_init(spinlock_check(lock), name, key, LD_WAIT_CONFIG); +} + +# define spin_lock_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + __spin_lock_init(lock, #lock, &__key); \ } while (0) #else diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a8d0a58deebc7..54566b82a5dde 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -392,7 +392,10 @@ static struct latched_seq clear_seq = { /* the maximum size of a formatted record (i.e. with prefix added per line) */ #define CONSOLE_LOG_MAX 1024 -/* the maximum size allowed to be reserved for a record */ +/* + * The maximum size allowed to be reserved for a record. + * Keep in sync with rust/kernel/print.rs. + */ #define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX) #define LOG_LEVEL(v) ((v) & 0x07) diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs new file mode 100644 index 0000000000000..759cec47de2b1 --- /dev/null +++ b/rust/kernel/allocator.rs @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Allocator support. + +use core::alloc::{GlobalAlloc, Layout}; +use core::ptr; + +use crate::bindings; +use crate::c_types; + +pub struct KernelAllocator; + +unsafe impl GlobalAlloc for KernelAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + // `krealloc()` is used instead of `kmalloc()` because the latter is + // an inline function and cannot be bound to as a result. + unsafe { bindings::krealloc(ptr::null(), layout.size(), bindings::GFP_KERNEL) as *mut u8 } + } + + unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { + unsafe { + bindings::kfree(ptr as *const c_types::c_void); + } + } +} + +#[global_allocator] +static ALLOCATOR: KernelAllocator = KernelAllocator; + +// `rustc` only generates these for some crate types. Even then, we would need +// to extract the object file that has them from the archive. For the moment, +// let's generate them ourselves instead. +#[no_mangle] +pub fn __rust_alloc(size: usize, _align: usize) -> *mut u8 { + unsafe { bindings::krealloc(core::ptr::null(), size, bindings::GFP_KERNEL) as *mut u8 } +} + +#[no_mangle] +pub fn __rust_dealloc(ptr: *mut u8, _size: usize, _align: usize) { + unsafe { bindings::kfree(ptr as *const c_types::c_void) }; +} + +#[no_mangle] +pub fn __rust_realloc(ptr: *mut u8, _old_size: usize, _align: usize, new_size: usize) -> *mut u8 { + unsafe { + bindings::krealloc( + ptr as *const c_types::c_void, + new_size, + bindings::GFP_KERNEL, + ) as *mut u8 + } +} + +#[no_mangle] +pub fn __rust_alloc_zeroed(size: usize, _align: usize) -> *mut u8 { + unsafe { + bindings::krealloc( + core::ptr::null(), + size, + bindings::GFP_KERNEL | bindings::__GFP_ZERO, + ) as *mut u8 + } +} diff --git a/rust/kernel/bindings.rs b/rust/kernel/bindings.rs new file mode 100644 index 0000000000000..08182f9476547 --- /dev/null +++ b/rust/kernel/bindings.rs @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Bindings +//! +//! Imports the generated bindings by `bindgen`. + +// See https://github.com/rust-lang/rust-bindgen/issues/1651. +#![cfg_attr(test, allow(deref_nullptr))] +#![cfg_attr(test, allow(unaligned_references))] +#![cfg_attr(test, allow(unsafe_op_in_unsafe_fn))] +#![allow( + clippy::all, + non_camel_case_types, + non_upper_case_globals, + non_snake_case, + improper_ctypes, + unsafe_op_in_unsafe_fn +)] + +mod bindings_raw { + // Use glob import here to expose all helpers. + // Symbols defined within the module will take precedence to the glob import. + pub use super::bindings_helper::*; + use crate::c_types; + include!(concat!(env!("OBJTREE"), "/rust/bindings_generated.rs")); +} + +// When both a directly exposed symbol and a helper exists for the same function, +// the directly exposed symbol is preferred and the helper becomes dead code, so +// ignore the warning here. +#[allow(dead_code)] +mod bindings_helper { + // Import the generated bindings for types. + use super::bindings_raw::*; + use crate::c_types; + include!(concat!( + env!("OBJTREE"), + "/rust/bindings_helpers_generated.rs" + )); +} + +pub use bindings_raw::*; + +pub const GFP_KERNEL: gfp_t = BINDINGS_GFP_KERNEL; +pub const __GFP_ZERO: gfp_t = BINDINGS___GFP_ZERO; +pub const __GFP_HIGHMEM: gfp_t = ___GFP_HIGHMEM; diff --git a/rust/kernel/bindings_helper.h b/rust/kernel/bindings_helper.h new file mode 100644 index 0000000000000..b01169f7609f3 --- /dev/null +++ b/rust/kernel/bindings_helper.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// `bindgen` gets confused at certain things +const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL; +const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO; diff --git a/rust/kernel/buffer.rs b/rust/kernel/buffer.rs new file mode 100644 index 0000000000000..b2502fa968fe9 --- /dev/null +++ b/rust/kernel/buffer.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Struct for writing to a pre-allocated buffer with the [`write!`] macro. + +use core::fmt; + +/// A pre-allocated buffer that implements [`core::fmt::Write`]. +/// +/// Consecutive writes will append to what has already been written. +/// Writes that don't fit in the buffer will fail. +pub struct Buffer<'a> { + slice: &'a mut [u8], + pos: usize, +} + +impl<'a> Buffer<'a> { + /// Create a new buffer from an existing array. + pub fn new(slice: &'a mut [u8]) -> Self { + Buffer { slice, pos: 0 } + } + + /// Number of bytes that have already been written to the buffer. + /// This will always be less than the length of the original array. + pub fn bytes_written(&self) -> usize { + self.pos + } +} + +impl<'a> fmt::Write for Buffer<'a> { + fn write_str(&mut self, s: &str) -> fmt::Result { + if s.len() > self.slice.len() - self.pos { + Err(fmt::Error) + } else { + self.slice[self.pos..self.pos + s.len()].copy_from_slice(s.as_bytes()); + self.pos += s.len(); + Ok(()) + } + } +} diff --git a/rust/kernel/build_assert.rs b/rust/kernel/build_assert.rs new file mode 100644 index 0000000000000..f726927185c0d --- /dev/null +++ b/rust/kernel/build_assert.rs @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Build-time assert. + +/// Fails the build if the code path calling `build_error!` can possibly be executed. +/// +/// If the macro is executed in const context, `build_error!` will panic. +/// If the compiler or optimizer cannot guarantee that `build_error!` can never +/// be called, a build error will be triggered. +/// +/// # Examples +/// ``` +/// # use kernel::build_error; +/// #[inline] +/// fn foo(a: usize) -> usize { +/// a.checked_add(1).unwrap_or_else(|| build_error!("overflow")) +/// } +/// ``` +#[macro_export] +macro_rules! build_error { + () => {{ + $crate::build_error("") + }}; + ($msg:expr) => {{ + $crate::build_error($msg) + }}; +} + +/// Asserts that a boolean expression is `true` at compile time. +/// +/// If the condition is evaluated to `false` in const context, `build_assert!` +/// will panic. If the compiler or optimizer cannot guarantee the condition will +/// be evaluated to `true`, a build error will be triggered. +/// +/// [`static_assert!`] should be preferred to `build_assert!` whenever possible. +/// +/// # Examples +/// +/// These examples show that different types of [`assert!`] will trigger errors +/// at different stage of compilation. It is preferred to err as early as +/// possible, so [`static_assert!`] should be used whenever possible. +/// ```compile_fail +/// # use kernel::prelude::*; +/// fn foo() { +/// static_assert!(1 > 1); // Compile-time error +/// build_assert!(1 > 1); // Build-time error +/// assert!(1 > 1); // Run-time error +/// } +/// ``` +/// +/// When the condition refers to generic parameters or parameters of an inline function, +/// [`static_assert!`] cannot be used. Use `build_assert!` in this scenario. +/// ```no_run +/// # use kernel::prelude::*; +/// fn foo() { +/// // `static_assert!(N > 1);` is not allowed +/// build_assert!(N > 1); // Build-time check +/// assert!(N > 1); // Run-time check +/// } +/// +/// #[inline] +/// fn bar(n: usize) { +/// // `static_assert!(n > 1);` is not allowed +/// build_assert!(n > 1); // Build-time check +/// assert!(n > 1); // Run-time check +/// } +/// ``` +#[macro_export] +macro_rules! build_assert { + ($cond:expr $(,)?) => {{ + if !$cond { + $crate::build_error(concat!("assertion failed: ", stringify!($cond))); + } + }}; + ($cond:expr, $msg:expr) => {{ + if !$cond { + $crate::build_error($msg); + } + }}; +} diff --git a/rust/kernel/c_types.rs b/rust/kernel/c_types.rs new file mode 100644 index 0000000000000..07593a3ba8bed --- /dev/null +++ b/rust/kernel/c_types.rs @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! C types for the bindings. +//! +//! The bindings generated by `bindgen` use these types to map to the C ones. +//! +//! C's standard integer types may differ in width depending on +//! the architecture, thus we need to conditionally compile those. + +#![allow(non_camel_case_types)] + +#[cfg(any(target_arch = "arm", target_arch = "x86", target_arch = "riscv32",))] +mod c { + /// C `void` type. + pub type c_void = core::ffi::c_void; + + /// C `char` type. + pub type c_char = i8; + + /// C `signed char` type. + pub type c_schar = i8; + + /// C `unsigned char` type. + pub type c_uchar = u8; + + /// C `short` type. + pub type c_short = i16; + + /// C `unsigned short` type. + pub type c_ushort = u16; + + /// C `int` type. + pub type c_int = i32; + + /// C `unsigned int` type. + pub type c_uint = u32; + + /// C `long` type. + pub type c_long = i32; + + /// C `unsigned long` type. + pub type c_ulong = u32; + + /// C `long long` type. + pub type c_longlong = i64; + + /// C `unsigned long long` type. + pub type c_ulonglong = u64; + + /// C `ssize_t` type (typically defined in `` by POSIX). + /// + /// For some 32-bit architectures like this one, the kernel defines it as + /// `int`, i.e. it is an [`i32`]. + pub type c_ssize_t = isize; + + /// C `size_t` type (typically defined in ``). + /// + /// For some 32-bit architectures like this one, the kernel defines it as + /// `unsigned int`, i.e. it is an [`u32`]. + pub type c_size_t = usize; +} + +#[cfg(any( + target_arch = "aarch64", + target_arch = "x86_64", + target_arch = "powerpc64", + target_arch = "riscv64", +))] +mod c { + /// C `void` type. + pub type c_void = core::ffi::c_void; + + /// C `char` type. + pub type c_char = i8; + + /// C `signed char` type. + pub type c_schar = i8; + + /// C `unsigned char` type. + pub type c_uchar = u8; + + /// C `short` type. + pub type c_short = i16; + + /// C `unsigned short` type. + pub type c_ushort = u16; + + /// C `int` type. + pub type c_int = i32; + + /// C `unsigned int` type. + pub type c_uint = u32; + + /// C `long` type. + pub type c_long = i64; + + /// C `unsigned long` type. + pub type c_ulong = u64; + + /// C `long long` type. + pub type c_longlong = i64; + + /// C `unsigned long long` type. + pub type c_ulonglong = u64; + + /// C `ssize_t` type (typically defined in `` by POSIX). + /// + /// For 64-bit architectures like this one, the kernel defines it as + /// `long`, i.e. it is an [`i64`]. + pub type c_ssize_t = isize; + + /// C `size_t` type (typically defined in ``). + /// + /// For 64-bit architectures like this one, the kernel defines it as + /// `unsigned long`, i.e. it is an [`u64`]. + pub type c_size_t = usize; +} + +pub use c::*; diff --git a/rust/kernel/chrdev.rs b/rust/kernel/chrdev.rs new file mode 100644 index 0000000000000..20e93ec05def3 --- /dev/null +++ b/rust/kernel/chrdev.rs @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Character devices. +//! +//! Also called "char devices", `chrdev`, `cdev`. +//! +//! C header: [`include/linux/cdev.h`](../../../../include/linux/cdev.h) +//! +//! Reference: + +use alloc::boxed::Box; +use core::convert::TryInto; +use core::marker::PhantomPinned; +use core::pin::Pin; + +use crate::bindings; +use crate::c_types; +use crate::error::{Error, Result}; +use crate::file_operations; +use crate::str::CStr; + +/// Character device. +/// +/// # Invariants +/// +/// - [`self.0`] is valid and non-null. +/// - [`(*self.0).ops`] is valid, non-null and has static lifetime. +/// - [`(*self.0).owner`] is valid and, if non-null, has module lifetime. +struct Cdev(*mut bindings::cdev); + +impl Cdev { + fn alloc( + fops: &'static bindings::file_operations, + module: &'static crate::ThisModule, + ) -> Result { + // SAFETY: FFI call. + let cdev = unsafe { bindings::cdev_alloc() }; + if cdev.is_null() { + return Err(Error::ENOMEM); + } + // SAFETY: `cdev` is valid and non-null since `cdev_alloc()` + // returned a valid pointer which was null-checked. + unsafe { + (*cdev).ops = fops; + (*cdev).owner = module.0; + } + // INVARIANTS: + // - [`self.0`] is valid and non-null. + // - [`(*self.0).ops`] is valid, non-null and has static lifetime, + // because it was coerced from a reference with static lifetime. + // - [`(*self.0).owner`] is valid and, if non-null, has module lifetime, + // guaranteed by the [`ThisModule`] invariant. + Ok(Self(cdev)) + } + + fn add(&mut self, dev: bindings::dev_t, count: c_types::c_uint) -> Result { + // SAFETY: according to the type invariants: + // - [`self.0`] can be safely passed to [`bindings::cdev_add`]. + // - [`(*self.0).ops`] will live at least as long as [`self.0`]. + // - [`(*self.0).owner`] will live at least as long as the + // module, which is an implicit requirement. + let rc = unsafe { bindings::cdev_add(self.0, dev, count) }; + if rc != 0 { + return Err(Error::from_kernel_errno(rc)); + } + Ok(()) + } +} + +impl Drop for Cdev { + fn drop(&mut self) { + // SAFETY: [`self.0`] is valid and non-null by the type invariants. + unsafe { + bindings::cdev_del(self.0); + } + } +} + +struct RegistrationInner { + dev: bindings::dev_t, + used: usize, + cdevs: [Option; N], + _pin: PhantomPinned, +} + +/// Character device registration. +/// +/// May contain up to a fixed number (`N`) of devices. Must be pinned. +pub struct Registration { + name: &'static CStr, + minors_start: u16, + this_module: &'static crate::ThisModule, + inner: Option>, +} + +impl Registration<{ N }> { + /// Creates a [`Registration`] object for a character device. + /// + /// This does *not* register the device: see [`Self::register()`]. + /// + /// This associated function is intended to be used when you need to avoid + /// a memory allocation, e.g. when the [`Registration`] is a member of + /// a bigger structure inside your [`crate::KernelModule`] instance. If you + /// are going to pin the registration right away, call + /// [`Self::new_pinned()`] instead. + pub fn new( + name: &'static CStr, + minors_start: u16, + this_module: &'static crate::ThisModule, + ) -> Self { + Registration { + name, + minors_start, + this_module, + inner: None, + } + } + + /// Creates a pinned [`Registration`] object for a character device. + /// + /// This does *not* register the device: see [`Self::register()`]. + pub fn new_pinned( + name: &'static CStr, + minors_start: u16, + this_module: &'static crate::ThisModule, + ) -> Result>> { + Ok(Pin::from(Box::try_new(Self::new( + name, + minors_start, + this_module, + ))?)) + } + + /// Registers a character device. + /// + /// You may call this once per device type, up to `N` times. + pub fn register>(self: Pin<&mut Self>) -> Result { + // SAFETY: We must ensure that we never move out of `this`. + let this = unsafe { self.get_unchecked_mut() }; + if this.inner.is_none() { + let mut dev: bindings::dev_t = 0; + // SAFETY: Calling unsafe function. `this.name` has `'static` + // lifetime. + let res = unsafe { + bindings::alloc_chrdev_region( + &mut dev, + this.minors_start.into(), + N.try_into()?, + this.name.as_char_ptr(), + ) + }; + if res != 0 { + return Err(Error::from_kernel_errno(res)); + } + const NONE: Option = None; + this.inner = Some(RegistrationInner { + dev, + used: 0, + cdevs: [NONE; N], + _pin: PhantomPinned, + }); + } + + let mut inner = this.inner.as_mut().unwrap(); + if inner.used == N { + return Err(Error::EINVAL); + } + + // SAFETY: The adapter doesn't retrieve any state yet, so it's compatible with any + // registration. + let fops = unsafe { file_operations::FileOperationsVtable::::build() }; + let mut cdev = Cdev::alloc(fops, this.this_module)?; + cdev.add(inner.dev + inner.used as bindings::dev_t, 1)?; + inner.cdevs[inner.used].replace(cdev); + inner.used += 1; + Ok(()) + } +} + +impl file_operations::FileOpenAdapter for Registration<{ N }> { + type Arg = (); + + unsafe fn convert( + _inode: *mut bindings::inode, + _file: *mut bindings::file, + ) -> *const Self::Arg { + // TODO: Update the SAFETY comment on the call to `FileOperationsVTable::build` above once + // this is updated to retrieve state. + &() + } +} + +// SAFETY: `Registration` does not expose any of its state across threads +// (it is fine for multiple threads to have a shared reference to it). +unsafe impl Sync for Registration<{ N }> {} + +impl Drop for Registration<{ N }> { + fn drop(&mut self) { + if let Some(inner) = self.inner.as_mut() { + // Replicate kernel C behaviour: drop [`Cdev`]s before calling + // [`bindings::unregister_chrdev_region`]. + for i in 0..inner.used { + inner.cdevs[i].take(); + } + // SAFETY: [`self.inner`] is Some, so [`inner.dev`] was previously + // created using [`bindings::alloc_chrdev_region`]. + unsafe { + bindings::unregister_chrdev_region(inner.dev, N.try_into().unwrap()); + } + } + } +} diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs new file mode 100644 index 0000000000000..fc567d0c256f2 --- /dev/null +++ b/rust/kernel/error.rs @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel errors. +//! +//! C header: [`include/uapi/asm-generic/errno-base.h`](../../../include/uapi/asm-generic/errno-base.h) + +use crate::str::CStr; +use crate::{bindings, c_types}; +use alloc::{ + alloc::{AllocError, LayoutError}, + collections::TryReserveError, +}; +use core::convert::From; +use core::fmt; +use core::num::TryFromIntError; +use core::str::{self, Utf8Error}; + +macro_rules! declare_err { + ($err:tt) => { + pub const $err: Self = Error(-(bindings::$err as i32)); + }; + ($err:tt, $($doc:expr),+) => { + $( + #[doc = $doc] + )* + pub const $err: Self = Error(-(bindings::$err as i32)); + }; +} + +/// Generic integer kernel error. +/// +/// The kernel defines a set of integer generic error codes based on C and +/// POSIX ones. These codes may have a more specific meaning in some contexts. +/// +/// # Invariants +/// +/// The value is a valid `errno` (i.e. `>= -MAX_ERRNO && < 0`). +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Error(c_types::c_int); + +impl Error { + declare_err!(EPERM, "Operation not permitted."); + + declare_err!(ENOENT, "No such file or directory."); + + declare_err!(ESRCH, "No such process."); + + declare_err!(EINTR, "Interrupted system call."); + + declare_err!(EIO, "I/O error."); + + declare_err!(ENXIO, "No such device or address."); + + declare_err!(E2BIG, "Argument list too long."); + + declare_err!(ENOEXEC, "Exec format error."); + + declare_err!(EBADF, "Bad file number."); + + declare_err!(ECHILD, "Exec format error."); + + declare_err!(EAGAIN, "Try again."); + + declare_err!(ENOMEM, "Out of memory."); + + declare_err!(EACCES, "Permission denied."); + + declare_err!(EFAULT, "Bad address."); + + declare_err!(ENOTBLK, "Block device required."); + + declare_err!(EBUSY, "Device or resource busy."); + + declare_err!(EEXIST, "File exists."); + + declare_err!(EXDEV, "Cross-device link."); + + declare_err!(ENODEV, "No such device."); + + declare_err!(ENOTDIR, "Not a directory."); + + declare_err!(EISDIR, "Is a directory."); + + declare_err!(EINVAL, "Invalid argument."); + + declare_err!(ENFILE, "File table overflow."); + + declare_err!(EMFILE, "Too many open files."); + + declare_err!(ENOTTY, "Not a typewriter."); + + declare_err!(ETXTBSY, "Text file busy."); + + declare_err!(EFBIG, "File too large."); + + declare_err!(ENOSPC, "No space left on device."); + + declare_err!(ESPIPE, "Illegal seek."); + + declare_err!(EROFS, "Read-only file system."); + + declare_err!(EMLINK, "Too many links."); + + declare_err!(EPIPE, "Broken pipe."); + + declare_err!(EDOM, "Math argument out of domain of func."); + + declare_err!(ERANGE, "Math result not representable."); + + declare_err!(EDEADLK, "Resource deadlock would occur"); + + declare_err!(ENAMETOOLONG, "File name too long"); + + declare_err!(ENOLCK, "No record locks available"); + + declare_err!( + ENOSYS, + "Invalid system call number.", + "", + "This error code is special: arch syscall entry code will return", + "[`Self::ENOSYS`] if users try to call a syscall that doesn't exist.", + "To keep failures of syscalls that really do exist distinguishable from", + "failures due to attempts to use a nonexistent syscall, syscall", + "implementations should refrain from returning [`Self::ENOSYS`]." + ); + + declare_err!(ENOTEMPTY, "Directory not empty."); + + declare_err!(ELOOP, "Too many symbolic links encountered."); + + declare_err!(EWOULDBLOCK, "Operation would block."); + + declare_err!(ENOMSG, "No message of desired type."); + + declare_err!(EIDRM, "Identifier removed."); + + declare_err!(ECHRNG, "Channel number out of range."); + + declare_err!(EL2NSYNC, "Level 2 not synchronized."); + + declare_err!(EL3HLT, "Level 3 halted."); + + declare_err!(EL3RST, "Level 3 reset."); + + declare_err!(ELNRNG, "Link number out of range."); + + declare_err!(EUNATCH, "Protocol driver not attached."); + + declare_err!(ENOCSI, "No CSI structure available."); + + declare_err!(EL2HLT, "Level 2 halted."); + + declare_err!(EBADE, "Invalid exchange."); + + declare_err!(EBADR, "Invalid request descriptor."); + + declare_err!(EXFULL, "Exchange full."); + + declare_err!(ENOANO, "No anode."); + + declare_err!(EBADRQC, "Invalid request code."); + + declare_err!(EBADSLT, "Invalid slot."); + + declare_err!(EDEADLOCK, "Resource deadlock would occur."); + + declare_err!(EBFONT, "Bad font file format."); + + declare_err!(ENOSTR, "Device not a stream."); + + declare_err!(ENODATA, "No data available."); + + declare_err!(ETIME, "Timer expired."); + + declare_err!(ENOSR, "Out of streams resources."); + + declare_err!(ENONET, "Machine is not on the network."); + + declare_err!(ENOPKG, "Package not installed."); + + declare_err!(EREMOTE, "Object is remote."); + + declare_err!(ENOLINK, "Link has been severed."); + + declare_err!(EADV, "Advertise error."); + + declare_err!(ESRMNT, "Srmount error."); + + declare_err!(ECOMM, "Communication error on send."); + + declare_err!(EPROTO, "Protocol error."); + + declare_err!(EMULTIHOP, "Multihop attempted."); + + declare_err!(EDOTDOT, "RFS specific error."); + + declare_err!(EBADMSG, "Not a data message."); + + declare_err!(EOVERFLOW, "Value too large for defined data type."); + + declare_err!(ENOTUNIQ, "Name not unique on network."); + + declare_err!(EBADFD, "File descriptor in bad state."); + + declare_err!(EREMCHG, "Remote address changed."); + + declare_err!(ELIBACC, "Can not access a needed shared library."); + + declare_err!(ELIBBAD, "Accessing a corrupted shared library."); + + declare_err!(ELIBSCN, ".lib section in a.out corrupted."); + + declare_err!(ELIBMAX, "Attempting to link in too many shared libraries."); + + declare_err!(ELIBEXEC, "Cannot exec a shared library directly."); + + declare_err!(EILSEQ, "Illegal byte sequence."); + + declare_err!(ERESTART, "Interrupted system call should be restarted."); + + declare_err!(ESTRPIPE, "Streams pipe error."); + + declare_err!(EUSERS, "Too many users."); + + declare_err!(ENOTSOCK, "Socket operation on non-socket."); + + declare_err!(EDESTADDRREQ, "Destination address required."); + + declare_err!(EMSGSIZE, "Message too long."); + + declare_err!(EPROTOTYPE, "Protocol wrong type for socket."); + + declare_err!(ENOPROTOOPT, "Protocol not available."); + + declare_err!(EPROTONOSUPPORT, "Protocol not supported."); + + declare_err!(ESOCKTNOSUPPORT, "Socket type not supported."); + + declare_err!(EOPNOTSUPP, "Operation not supported on transport endpoint."); + + declare_err!(EPFNOSUPPORT, "Protocol family not supported."); + + declare_err!(EAFNOSUPPORT, "Address family not supported by protocol."); + + declare_err!(EADDRINUSE, "Address already in use."); + + declare_err!(EADDRNOTAVAIL, "Cannot assign requested address."); + + declare_err!(ENETDOWN, "Network is down."); + + declare_err!(ENETUNREACH, "Network is unreachable."); + + declare_err!(ENETRESET, "Network dropped connection because of reset."); + + declare_err!(ECONNABORTED, "Software caused connection abort."); + + declare_err!(ECONNRESET, "Connection reset by peer."); + + declare_err!(ENOBUFS, "No buffer space available."); + + declare_err!(EISCONN, "Transport endpoint is already connected."); + + declare_err!(ENOTCONN, "Transport endpoint is not connected."); + + declare_err!(ESHUTDOWN, "Cannot send after transport endpoint shutdown."); + + declare_err!(ETOOMANYREFS, "Too many references: cannot splice."); + + declare_err!(ETIMEDOUT, "Connection timed out."); + + declare_err!(ECONNREFUSED, "Connection refused."); + + declare_err!(EHOSTDOWN, "Host is down."); + + declare_err!(EHOSTUNREACH, "No route to host."); + + declare_err!(EALREADY, "Operation already in progress."); + + declare_err!(EINPROGRESS, "Operation now in progress."); + + declare_err!(ESTALE, "Stale file handle."); + + declare_err!(EUCLEAN, "Structure needs cleaning."); + + declare_err!(ENOTNAM, "Not a XENIX named type file."); + + declare_err!(ENAVAIL, "No XENIX semaphores available."); + + declare_err!(EISNAM, "Is a named type file."); + + declare_err!(EREMOTEIO, "Remote I/O error."); + + declare_err!(EDQUOT, "Quota exceeded."); + + declare_err!(ENOMEDIUM, "No medium found."); + + declare_err!(EMEDIUMTYPE, "Wrong medium type."); + + declare_err!(ECANCELED, "Operation Canceled."); + + declare_err!(ENOKEY, "Required key not available."); + + declare_err!(EKEYEXPIRED, "Key has expired."); + + declare_err!(EKEYREVOKED, "Key has been revoked."); + + declare_err!(EKEYREJECTED, "Key was rejected by service."); + + declare_err!(EOWNERDEAD, "Owner died.", "", "For robust mutexes."); + + declare_err!(ENOTRECOVERABLE, "State not recoverable."); + + declare_err!(ERFKILL, "Operation not possible due to RF-kill."); + + declare_err!(EHWPOISON, "Memory page has hardware error."); + + declare_err!(ERESTARTSYS, "Restart the system call."); + + /// Creates an [`Error`] from a kernel error code. + /// + /// It is a bug to pass an out-of-range `errno`. `EINVAL` would + /// be returned in such a case. + pub(crate) fn from_kernel_errno(errno: c_types::c_int) -> Error { + if errno < -(bindings::MAX_ERRNO as i32) || errno >= 0 { + // TODO: make it a `WARN_ONCE` once available. + crate::pr_warn!( + "attempted to create `Error` with out of range `errno`: {}", + errno + ); + return Error::EINVAL; + } + + // INVARIANT: the check above ensures the type invariant + // will hold. + Error(errno) + } + + /// Creates an [`Error`] from a kernel error code. + /// + /// # Safety + /// + /// `errno` must be within error code range (i.e. `>= -MAX_ERRNO && < 0`). + pub(crate) unsafe fn from_kernel_errno_unchecked(errno: c_types::c_int) -> Error { + // INVARIANT: the contract ensures the type invariant + // will hold. + Error(errno) + } + + /// Returns the kernel error code. + pub fn to_kernel_errno(self) -> c_types::c_int { + self.0 + } +} + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // SAFETY: FFI call. + let name = unsafe { bindings::errname(-self.0) }; + + if name.is_null() { + // Print out number if no name can be found. + return f.debug_tuple("Error").field(&-self.0).finish(); + } + + // SAFETY: `'static` string from C, and is not NULL. + let cstr = unsafe { CStr::from_char_ptr(name) }; + // SAFETY: These strings are ASCII-only. + let str = unsafe { str::from_utf8_unchecked(cstr) }; + f.debug_tuple(str).finish() + } +} + +impl From for Error { + fn from(_: TryFromIntError) -> Error { + Error::EINVAL + } +} + +impl From for Error { + fn from(_: Utf8Error) -> Error { + Error::EINVAL + } +} + +impl From for Error { + fn from(_: TryReserveError) -> Error { + Error::ENOMEM + } +} + +impl From for Error { + fn from(_: LayoutError) -> Error { + Error::ENOMEM + } +} + +/// A [`Result`] with an [`Error`] error type. +/// +/// To be used as the return type for functions that may fail. +/// +/// # Error codes in C and Rust +/// +/// In C, it is common that functions indicate success or failure through +/// their return value; modifying or returning extra data through non-`const` +/// pointer parameters. In particular, in the kernel, functions that may fail +/// typically return an `int` that represents a generic error code. We model +/// those as [`Error`]. +/// +/// In Rust, it is idiomatic to model functions that may fail as returning +/// a [`Result`]. Since in the kernel many functions return an error code, +/// [`Result`] is a type alias for a [`core::result::Result`] that uses +/// [`Error`] as its error type. +/// +/// Note that even if a function does not return anything when it succeeds, +/// it should still be modeled as returning a `Result` rather than +/// just an [`Error`]. +pub type Result = core::result::Result; + +impl From for Error { + fn from(_: AllocError) -> Error { + Error::ENOMEM + } +} + +// # Invariant: `-bindings::MAX_ERRNO` fits in an `i16`. +crate::static_assert!(bindings::MAX_ERRNO <= -(i16::MIN as i32) as u32); + +#[doc(hidden)] +pub fn from_kernel_result_helper(r: Result) -> T +where + T: From, +{ + match r { + Ok(v) => v, + // NO-OVERFLOW: negative `errno`s are no smaller than `-bindings::MAX_ERRNO`, + // `-bindings::MAX_ERRNO` fits in an `i16` as per invariant above, + // therefore a negative `errno` always fits in an `i16` and will not overflow. + Err(e) => T::from(e.to_kernel_errno() as i16), + } +} + +/// Transforms a [`crate::error::Result`] to a kernel C integer result. +/// +/// This is useful when calling Rust functions that return [`crate::error::Result`] +/// from inside `extern "C"` functions that need to return an integer +/// error result. +/// +/// `T` should be convertible to an `i16` via `From`. +/// +/// # Examples +/// +/// ```ignore +/// # use kernel::from_kernel_result; +/// # use kernel::c_types; +/// # use kernel::bindings; +/// unsafe extern "C" fn probe_callback( +/// pdev: *mut bindings::platform_device, +/// ) -> c_types::c_int { +/// from_kernel_result! { +/// let ptr = devm_alloc(pdev)?; +/// bindings::platform_set_drvdata(pdev, ptr); +/// Ok(0) +/// } +/// } +/// ``` +#[macro_export] +macro_rules! from_kernel_result { + ($($tt:tt)*) => {{ + $crate::error::from_kernel_result_helper((|| { + $($tt)* + })()) + }}; +} + +/// Transform a kernel "error pointer" to a normal pointer. +/// +/// Some kernel C API functions return an "error pointer" which optionally +/// embeds an `errno`. Callers are supposed to check the returned pointer +/// for errors. This function performs the check and converts the "error pointer" +/// to a normal pointer in an idiomatic fashion. +/// +/// # Examples +/// +/// ```ignore +/// # use kernel::prelude::*; +/// # use kernel::from_kernel_err_ptr; +/// # use kernel::c_types; +/// # use kernel::bindings; +/// fn devm_platform_ioremap_resource( +/// pdev: &mut PlatformDevice, +/// index: u32, +/// ) -> Result<*mut c_types::c_void> { +/// // SAFETY: FFI call. +/// unsafe { +/// from_kernel_err_ptr(bindings::devm_platform_ioremap_resource( +/// pdev.to_ptr(), +/// index, +/// )) +/// } +/// } +/// ``` +// TODO: remove `dead_code` marker once an in-kernel client is available. +#[allow(dead_code)] +pub(crate) fn from_kernel_err_ptr(ptr: *mut T) -> Result<*mut T> { + // CAST: casting a pointer to `*const c_types::c_void` is always valid. + let const_ptr: *const c_types::c_void = ptr.cast(); + // SAFETY: the FFI function does not deref the pointer. + if unsafe { bindings::IS_ERR(const_ptr) } { + // SAFETY: the FFI function does not deref the pointer. + let err = unsafe { bindings::PTR_ERR(const_ptr) }; + // CAST: if `IS_ERR()` returns `true`, + // then `PTR_ERR()` is guaranteed to return a + // negative value greater-or-equal to `-bindings::MAX_ERRNO`, + // which always fits in an `i16`, as per the invariant above. + // And an `i16` always fits in an `i32`. So casting `err` to + // an `i32` can never overflow, and is always valid. + // + // SAFETY: `IS_ERR()` ensures `err` is a + // negative value greater-or-equal to `-bindings::MAX_ERRNO` + return Err(unsafe { Error::from_kernel_errno_unchecked(err as i32) }); + } + Ok(ptr) +} diff --git a/rust/kernel/file.rs b/rust/kernel/file.rs new file mode 100644 index 0000000000000..9e205ad34d7f6 --- /dev/null +++ b/rust/kernel/file.rs @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Files and file descriptors. +//! +//! C headers: [`include/linux/fs.h`](../../../../include/linux/fs.h) and +//! [`include/linux/file.h`](../../../../include/linux/file.h) + +use crate::{bindings, error::Error, Result}; +use core::{mem::ManuallyDrop, ops::Deref}; + +/// Wraps the kernel's `struct file`. +/// +/// # Invariants +/// +/// The pointer `File::ptr` is non-null and valid. Its reference count is also non-zero. +pub struct File { + pub(crate) ptr: *mut bindings::file, +} + +impl File { + /// Constructs a new [`struct file`] wrapper from a file descriptor. + /// + /// The file descriptor belongs to the current process. + pub fn from_fd(fd: u32) -> Result { + // SAFETY: FFI call, there are no requirements on `fd`. + let ptr = unsafe { bindings::fget(fd) }; + if ptr.is_null() { + return Err(Error::EBADF); + } + + // INVARIANTS: We checked that `ptr` is non-null, so it is valid. `fget` increments the ref + // count before returning. + Ok(Self { ptr }) + } + + /// Returns the current seek/cursor/pointer position (`struct file::f_pos`). + pub fn pos(&self) -> u64 { + // SAFETY: `File::ptr` is guaranteed to be valid by the type invariants. + unsafe { (*self.ptr).f_pos as u64 } + } + + /// Returns whether the file is in blocking mode. + pub fn is_blocking(&self) -> bool { + // SAFETY: `File::ptr` is guaranteed to be valid by the type invariants. + unsafe { (*self.ptr).f_flags & bindings::O_NONBLOCK == 0 } + } +} + +impl Drop for File { + fn drop(&mut self) { + // SAFETY: The type invariants guarantee that `File::ptr` has a non-zero reference count. + unsafe { bindings::fput(self.ptr) }; + } +} + +/// A wrapper for [`File`] that doesn't automatically decrement the refcount when dropped. +/// +/// We need the wrapper because [`ManuallyDrop`] alone would allow callers to call +/// [`ManuallyDrop::into_inner`]. This would allow an unsafe sequence to be triggered without +/// `unsafe` blocks because it would trigger an unbalanced call to `fput`. +/// +/// # Invariants +/// +/// The wrapped [`File`] remains valid for the lifetime of the object. +pub(crate) struct FileRef(ManuallyDrop); + +impl FileRef { + /// Constructs a new [`struct file`] wrapper that doesn't change its reference count. + /// + /// # Safety + /// + /// The pointer `ptr` must be non-null and valid for the lifetime of the object. + pub(crate) unsafe fn from_ptr(ptr: *mut bindings::file) -> Self { + Self(ManuallyDrop::new(File { ptr })) + } +} + +impl Deref for FileRef { + type Target = File; + + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +/// A file descriptor reservation. +/// +/// This allows the creation of a file descriptor in two steps: first, we reserve a slot for it, +/// then we commit or drop the reservation. The first step may fail (e.g., the current process ran +/// out of available slots), but commit and drop never fail (and are mutually exclusive). +pub struct FileDescriptorReservation { + fd: u32, +} + +impl FileDescriptorReservation { + /// Creates a new file descriptor reservation. + pub fn new(flags: u32) -> Result { + // SAFETY: FFI call, there are no safety requirements on `flags`. + let fd = unsafe { bindings::get_unused_fd_flags(flags) }; + if fd < 0 { + return Err(Error::from_kernel_errno(fd)); + } + Ok(Self { fd: fd as _ }) + } + + /// Returns the file descriptor number that was reserved. + pub fn reserved_fd(&self) -> u32 { + self.fd + } + + /// Commits the reservation. + /// + /// The previously reserved file descriptor is bound to `file`. + pub fn commit(self, file: File) { + // SAFETY: `self.fd` was previously returned by `get_unused_fd_flags`, and `file.ptr` is + // guaranteed to have an owned ref count by its type invariants. + unsafe { bindings::fd_install(self.fd, file.ptr) }; + + // `fd_install` consumes both the file descriptor and the file reference, so we cannot run + // the destructors. + core::mem::forget(self); + core::mem::forget(file); + } +} + +impl Drop for FileDescriptorReservation { + fn drop(&mut self) { + // SAFETY: `self.fd` was returned by a previous call to `get_unused_fd_flags`. + unsafe { bindings::put_unused_fd(self.fd) }; + } +} diff --git a/rust/kernel/file_operations.rs b/rust/kernel/file_operations.rs new file mode 100644 index 0000000000000..c6f03938de97d --- /dev/null +++ b/rust/kernel/file_operations.rs @@ -0,0 +1,715 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! File operations. +//! +//! C header: [`include/linux/fs.h`](../../../../include/linux/fs.h) + +use core::convert::{TryFrom, TryInto}; +use core::{marker, mem, ops::Deref, ptr}; + +use alloc::boxed::Box; + +use crate::{ + bindings, c_types, + error::{Error, Result}, + file::{File, FileRef}, + from_kernel_result, + io_buffer::{IoBufferReader, IoBufferWriter}, + iov_iter::IovIter, + sync::CondVar, + types::PointerWrapper, + user_ptr::{UserSlicePtr, UserSlicePtrReader, UserSlicePtrWriter}, +}; + +/// Wraps the kernel's `struct poll_table_struct`. +/// +/// # Invariants +/// +/// The pointer `PollTable::ptr` is null or valid. +pub struct PollTable { + ptr: *mut bindings::poll_table_struct, +} + +impl PollTable { + /// Constructors a new `struct poll_table_struct` wrapper. + /// + /// # Safety + /// + /// The pointer `ptr` must be either null or a valid pointer for the lifetime of the object. + unsafe fn from_ptr(ptr: *mut bindings::poll_table_struct) -> Self { + Self { ptr } + } + + /// Associates the given file and condition variable to this poll table. It means notifying the + /// condition variable will notify the poll table as well; additionally, the association + /// between the condition variable and the file will automatically be undone by the kernel when + /// the file is destructed. To unilaterally remove the association before then, one can call + /// [`CondVar::free_waiters`]. + /// + /// # Safety + /// + /// If the condition variable is destroyed before the file, then [`CondVar::free_waiters`] must + /// be called to ensure that all waiters are flushed out. + pub unsafe fn register_wait<'a>(&self, file: &'a File, cv: &'a CondVar) { + if self.ptr.is_null() { + return; + } + + // SAFETY: `PollTable::ptr` is guaranteed to be valid by the type invariants and the null + // check above. + let table = unsafe { &*self.ptr }; + if let Some(proc) = table._qproc { + // SAFETY: All pointers are known to be valid. + unsafe { proc(file.ptr as _, cv.wait_list.get(), self.ptr) } + } + } +} + +/// Equivalent to [`std::io::SeekFrom`]. +/// +/// [`std::io::SeekFrom`]: https://doc.rust-lang.org/std/io/enum.SeekFrom.html +pub enum SeekFrom { + /// Equivalent to C's `SEEK_SET`. + Start(u64), + + /// Equivalent to C's `SEEK_END`. + End(i64), + + /// Equivalent to C's `SEEK_CUR`. + Current(i64), +} + +/// Called by the VFS when an inode should be opened. +/// +/// Calls `T::open` on the returned value of `A::convert`. +/// +/// # Safety +/// +/// The returned value of `A::convert` must be a valid non-null pointer and +/// `T:open` must return a valid non-null pointer on an `Ok` result. +unsafe extern "C" fn open_callback>( + inode: *mut bindings::inode, + file: *mut bindings::file, +) -> c_types::c_int { + from_kernel_result! { + // SAFETY: `A::convert` must return a valid non-null pointer that + // should point to data in the inode or file that lives longer + // than the following use of `T::open`. + let arg = unsafe { A::convert(inode, file) }; + // SAFETY: `arg` was previously returned by `A::convert` and must + // be a valid non-null pointer. + let ptr = T::open(unsafe { &*arg })?.into_pointer(); + // SAFETY: `ptr` was previously returned by `T::open`. The returned + // value should be a boxed value and should live the length of the + // given file. + unsafe { (*file).private_data = ptr as *mut c_types::c_void }; + Ok(0) + } +} + +unsafe extern "C" fn read_callback( + file: *mut bindings::file, + buf: *mut c_types::c_char, + len: c_types::c_size_t, + offset: *mut bindings::loff_t, +) -> c_types::c_ssize_t { + from_kernel_result! { + let mut data = unsafe { UserSlicePtr::new(buf as *mut c_types::c_void, len).writer() }; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + // No `FMODE_UNSIGNED_OFFSET` support, so `offset` must be in [0, 2^63). + // See discussion in https://github.com/fishinabarrel/linux-kernel-module-rust/pull/113 + let read = T::read(&f, unsafe { &FileRef::from_ptr(file) }, &mut data, unsafe { *offset }.try_into()?)?; + unsafe { (*offset) += bindings::loff_t::try_from(read).unwrap() }; + Ok(read as _) + } +} + +unsafe extern "C" fn read_iter_callback( + iocb: *mut bindings::kiocb, + raw_iter: *mut bindings::iov_iter, +) -> isize { + from_kernel_result! { + let mut iter = unsafe { IovIter::from_ptr(raw_iter) }; + let file = unsafe { (*iocb).ki_filp }; + let offset = unsafe { (*iocb).ki_pos }; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let read = T::read(&f, unsafe { &FileRef::from_ptr(file) }, &mut iter, offset.try_into()?)?; + unsafe { (*iocb).ki_pos += bindings::loff_t::try_from(read).unwrap() }; + Ok(read as _) + } +} + +unsafe extern "C" fn write_callback( + file: *mut bindings::file, + buf: *const c_types::c_char, + len: c_types::c_size_t, + offset: *mut bindings::loff_t, +) -> c_types::c_ssize_t { + from_kernel_result! { + let mut data = unsafe { UserSlicePtr::new(buf as *mut c_types::c_void, len).reader() }; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + // No `FMODE_UNSIGNED_OFFSET` support, so `offset` must be in [0, 2^63). + // See discussion in https://github.com/fishinabarrel/linux-kernel-module-rust/pull/113 + let written = T::write(&f, unsafe { &FileRef::from_ptr(file) }, &mut data, unsafe { *offset }.try_into()?)?; + unsafe { (*offset) += bindings::loff_t::try_from(written).unwrap() }; + Ok(written as _) + } +} + +unsafe extern "C" fn write_iter_callback( + iocb: *mut bindings::kiocb, + raw_iter: *mut bindings::iov_iter, +) -> isize { + from_kernel_result! { + let mut iter = unsafe { IovIter::from_ptr(raw_iter) }; + let file = unsafe { (*iocb).ki_filp }; + let offset = unsafe { (*iocb).ki_pos }; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let written = T::write(&f, unsafe { &FileRef::from_ptr(file) }, &mut iter, offset.try_into()?)?; + unsafe { (*iocb).ki_pos += bindings::loff_t::try_from(written).unwrap() }; + Ok(written as _) + } +} + +unsafe extern "C" fn release_callback( + _inode: *mut bindings::inode, + file: *mut bindings::file, +) -> c_types::c_int { + let ptr = mem::replace(unsafe { &mut (*file).private_data }, ptr::null_mut()); + T::release(unsafe { T::Wrapper::from_pointer(ptr as _) }, unsafe { + &FileRef::from_ptr(file) + }); + 0 +} + +unsafe extern "C" fn llseek_callback( + file: *mut bindings::file, + offset: bindings::loff_t, + whence: c_types::c_int, +) -> bindings::loff_t { + from_kernel_result! { + let off = match whence as u32 { + bindings::SEEK_SET => SeekFrom::Start(offset.try_into()?), + bindings::SEEK_CUR => SeekFrom::Current(offset), + bindings::SEEK_END => SeekFrom::End(offset), + _ => return Err(Error::EINVAL), + }; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let off = T::seek(&f, unsafe { &FileRef::from_ptr(file) }, off)?; + Ok(off as bindings::loff_t) + } +} + +unsafe extern "C" fn unlocked_ioctl_callback( + file: *mut bindings::file, + cmd: c_types::c_uint, + arg: c_types::c_ulong, +) -> c_types::c_long { + from_kernel_result! { + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let mut cmd = IoctlCommand::new(cmd as _, arg as _); + let ret = T::ioctl(&f, unsafe { &FileRef::from_ptr(file) }, &mut cmd)?; + Ok(ret as _) + } +} + +unsafe extern "C" fn compat_ioctl_callback( + file: *mut bindings::file, + cmd: c_types::c_uint, + arg: c_types::c_ulong, +) -> c_types::c_long { + from_kernel_result! { + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let mut cmd = IoctlCommand::new(cmd as _, arg as _); + let ret = T::compat_ioctl(&f, unsafe { &FileRef::from_ptr(file) }, &mut cmd)?; + Ok(ret as _) + } +} + +unsafe extern "C" fn mmap_callback( + file: *mut bindings::file, + vma: *mut bindings::vm_area_struct, +) -> c_types::c_int { + from_kernel_result! { + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + T::mmap(&f, unsafe { &FileRef::from_ptr(file) }, unsafe { &mut *vma })?; + Ok(0) + } +} + +unsafe extern "C" fn fsync_callback( + file: *mut bindings::file, + start: bindings::loff_t, + end: bindings::loff_t, + datasync: c_types::c_int, +) -> c_types::c_int { + from_kernel_result! { + let start = start.try_into()?; + let end = end.try_into()?; + let datasync = datasync != 0; + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to + // `file` have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + let res = T::fsync(&f, unsafe { &FileRef::from_ptr(file) }, start, end, datasync)?; + Ok(res.try_into().unwrap()) + } +} + +unsafe extern "C" fn poll_callback( + file: *mut bindings::file, + wait: *mut bindings::poll_table_struct, +) -> bindings::__poll_t { + // SAFETY: `private_data` was initialised by `open_callback` with a value returned by + // `T::Wrapper::into_pointer`. `T::Wrapper::from_pointer` is only called by the `release` + // callback, which the C API guarantees that will be called only when all references to `file` + // have been released, so we know it can't be called while this function is running. + let f = unsafe { T::Wrapper::borrow((*file).private_data) }; + match T::poll(&f, unsafe { &FileRef::from_ptr(file) }, unsafe { + &PollTable::from_ptr(wait) + }) { + Ok(v) => v, + Err(_) => bindings::POLLERR, + } +} + +pub(crate) struct FileOperationsVtable(marker::PhantomData, marker::PhantomData); + +impl> FileOperationsVtable { + const VTABLE: bindings::file_operations = bindings::file_operations { + open: Some(open_callback::), + release: Some(release_callback::), + read: if T::TO_USE.read { + Some(read_callback::) + } else { + None + }, + write: if T::TO_USE.write { + Some(write_callback::) + } else { + None + }, + llseek: if T::TO_USE.seek { + Some(llseek_callback::) + } else { + None + }, + + check_flags: None, + compat_ioctl: if T::TO_USE.compat_ioctl { + Some(compat_ioctl_callback::) + } else { + None + }, + copy_file_range: None, + fallocate: None, + fadvise: None, + fasync: None, + flock: None, + flush: None, + fsync: if T::TO_USE.fsync { + Some(fsync_callback::) + } else { + None + }, + get_unmapped_area: None, + iterate: None, + iterate_shared: None, + iopoll: None, + lock: None, + mmap: if T::TO_USE.mmap { + Some(mmap_callback::) + } else { + None + }, + mmap_supported_flags: 0, + owner: ptr::null_mut(), + poll: if T::TO_USE.poll { + Some(poll_callback::) + } else { + None + }, + read_iter: if T::TO_USE.read_iter { + Some(read_iter_callback::) + } else { + None + }, + remap_file_range: None, + sendpage: None, + setlease: None, + show_fdinfo: None, + splice_read: None, + splice_write: None, + unlocked_ioctl: if T::TO_USE.ioctl { + Some(unlocked_ioctl_callback::) + } else { + None + }, + write_iter: if T::TO_USE.write_iter { + Some(write_iter_callback::) + } else { + None + }, + }; + + /// Builds an instance of [`struct file_operations`]. + /// + /// # Safety + /// + /// The caller must ensure that the adapter is compatible with the way the device is registered. + pub(crate) const unsafe fn build() -> &'static bindings::file_operations { + &Self::VTABLE + } +} + +/// Represents which fields of [`struct file_operations`] should be populated with pointers. +pub struct ToUse { + /// The `read` field of [`struct file_operations`]. + pub read: bool, + + /// The `read_iter` field of [`struct file_operations`]. + pub read_iter: bool, + + /// The `write` field of [`struct file_operations`]. + pub write: bool, + + /// The `write_iter` field of [`struct file_operations`]. + pub write_iter: bool, + + /// The `llseek` field of [`struct file_operations`]. + pub seek: bool, + + /// The `unlocked_ioctl` field of [`struct file_operations`]. + pub ioctl: bool, + + /// The `compat_ioctl` field of [`struct file_operations`]. + pub compat_ioctl: bool, + + /// The `fsync` field of [`struct file_operations`]. + pub fsync: bool, + + /// The `mmap` field of [`struct file_operations`]. + pub mmap: bool, + + /// The `poll` field of [`struct file_operations`]. + pub poll: bool, +} + +/// A constant version where all values are to set to `false`, that is, all supported fields will +/// be set to null pointers. +pub const USE_NONE: ToUse = ToUse { + read: false, + read_iter: false, + write: false, + write_iter: false, + seek: false, + ioctl: false, + compat_ioctl: false, + fsync: false, + mmap: false, + poll: false, +}; + +/// Defines the [`FileOperations::TO_USE`] field based on a list of fields to be populated. +#[macro_export] +macro_rules! declare_file_operations { + () => { + const TO_USE: $crate::file_operations::ToUse = $crate::file_operations::USE_NONE; + }; + ($($i:ident),+) => { + const TO_USE: kernel::file_operations::ToUse = + $crate::file_operations::ToUse { + $($i: true),+ , + ..$crate::file_operations::USE_NONE + }; + }; +} + +/// Allows the handling of ioctls defined with the `_IO`, `_IOR`, `_IOW`, and `_IOWR` macros. +/// +/// For each macro, there is a handler function that takes the appropriate types as arguments. +pub trait IoctlHandler: Sync { + /// The type of the first argument to each associated function. + type Target; + + /// Handles ioctls defined with the `_IO` macro, that is, with no buffer as argument. + fn pure(_this: &Self::Target, _file: &File, _cmd: u32, _arg: usize) -> Result { + Err(Error::EINVAL) + } + + /// Handles ioctls defined with the `_IOR` macro, that is, with an output buffer provided as + /// argument. + fn read( + _this: &Self::Target, + _file: &File, + _cmd: u32, + _writer: &mut UserSlicePtrWriter, + ) -> Result { + Err(Error::EINVAL) + } + + /// Handles ioctls defined with the `_IOW` macro, that is, with an input buffer provided as + /// argument. + fn write( + _this: &Self::Target, + _file: &File, + _cmd: u32, + _reader: &mut UserSlicePtrReader, + ) -> Result { + Err(Error::EINVAL) + } + + /// Handles ioctls defined with the `_IOWR` macro, that is, with a buffer for both input and + /// output provided as argument. + fn read_write( + _this: &Self::Target, + _file: &File, + _cmd: u32, + _data: UserSlicePtr, + ) -> Result { + Err(Error::EINVAL) + } +} + +/// Represents an ioctl command. +/// +/// It can use the components of an ioctl command to dispatch ioctls using +/// [`IoctlCommand::dispatch`]. +pub struct IoctlCommand { + cmd: u32, + arg: usize, + user_slice: Option, +} + +impl IoctlCommand { + /// Constructs a new [`IoctlCommand`]. + fn new(cmd: u32, arg: usize) -> Self { + let size = (cmd >> bindings::_IOC_SIZESHIFT) & bindings::_IOC_SIZEMASK; + + // SAFETY: We only create one instance of the user slice per ioctl call, so TOCTOU issues + // are not possible. + let user_slice = Some(unsafe { UserSlicePtr::new(arg as _, size as _) }); + Self { + cmd, + arg, + user_slice, + } + } + + /// Dispatches the given ioctl to the appropriate handler based on the value of the command. It + /// also creates a [`UserSlicePtr`], [`UserSlicePtrReader`], or [`UserSlicePtrWriter`] + /// depending on the direction of the buffer of the command. + /// + /// It is meant to be used in implementations of [`FileOperations::ioctl`] and + /// [`FileOperations::compat_ioctl`]. + pub fn dispatch(&mut self, handler: &T::Target, file: &File) -> Result { + let dir = (self.cmd >> bindings::_IOC_DIRSHIFT) & bindings::_IOC_DIRMASK; + if dir == bindings::_IOC_NONE { + return T::pure(handler, file, self.cmd, self.arg); + } + + let data = self.user_slice.take().ok_or(Error::EINVAL)?; + const READ_WRITE: u32 = bindings::_IOC_READ | bindings::_IOC_WRITE; + match dir { + bindings::_IOC_WRITE => T::write(handler, file, self.cmd, &mut data.reader()), + bindings::_IOC_READ => T::read(handler, file, self.cmd, &mut data.writer()), + READ_WRITE => T::read_write(handler, file, self.cmd, data), + _ => Err(Error::EINVAL), + } + } + + /// Returns the raw 32-bit value of the command and the ptr-sized argument. + pub fn raw(&self) -> (u32, usize) { + (self.cmd, self.arg) + } +} + +/// Trait for extracting file open arguments from kernel data structures. +/// +/// This is meant to be implemented by registration managers. +pub trait FileOpenAdapter { + /// The type of argument this adapter extracts. + type Arg; + + /// Converts untyped data stored in [`struct inode`] and [`struct file`] (when [`struct + /// file_operations::open`] is called) into the given type. For example, for `miscdev` + /// devices, a pointer to the registered [`struct miscdev`] is stored in [`struct + /// file::private_data`]. + /// + /// # Safety + /// + /// This function must be called only when [`struct file_operations::open`] is being called for + /// a file that was registered by the implementer. The returned pointer must be valid and + /// not-null. + unsafe fn convert(_inode: *mut bindings::inode, _file: *mut bindings::file) + -> *const Self::Arg; +} + +/// Trait for implementers of kernel files. +/// +/// In addition to the methods in [`FileOperations`], implementers must also provide +/// [`FileOpener::open`] with a customised argument. This allows a single implementation of +/// [`FileOperations`] to be used for different types of registrations, for example, `miscdev` and +/// `chrdev`. +pub trait FileOpener: FileOperations { + /// Creates a new instance of this file. + /// + /// Corresponds to the `open` function pointer in `struct file_operations`. + fn open(context: &T) -> Result; +} + +impl> + Default> FileOpener<()> for T { + fn open(_: &()) -> Result { + Ok(Box::try_new(T::default())?) + } +} + +/// Corresponds to the kernel's `struct file_operations`. +/// +/// You implement this trait whenever you would create a `struct file_operations`. +/// +/// File descriptors may be used from multiple threads/processes concurrently, so your type must be +/// [`Sync`]. It must also be [`Send`] because [`FileOperations::release`] will be called from the +/// thread that decrements that associated file's refcount to zero. +pub trait FileOperations: Send + Sync + Sized { + /// The methods to use to populate [`struct file_operations`]. + const TO_USE: ToUse; + + /// The pointer type that will be used to hold ourselves. + type Wrapper: PointerWrapper = Box; + + /// Cleans up after the last reference to the file goes away. + /// + /// Note that the object is moved, so it will be freed automatically unless the implementation + /// moves it elsewhere. + /// + /// Corresponds to the `release` function pointer in `struct file_operations`. + fn release(_obj: Self::Wrapper, _file: &File) {} + + /// Reads data from this file to the caller's buffer. + /// + /// Corresponds to the `read` and `read_iter` function pointers in `struct file_operations`. + fn read( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _data: &mut impl IoBufferWriter, + _offset: u64, + ) -> Result { + Err(Error::EINVAL) + } + + /// Writes data from the caller's buffer to this file. + /// + /// Corresponds to the `write` and `write_iter` function pointers in `struct file_operations`. + fn write( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _data: &mut impl IoBufferReader, + _offset: u64, + ) -> Result { + Err(Error::EINVAL) + } + + /// Changes the position of the file. + /// + /// Corresponds to the `llseek` function pointer in `struct file_operations`. + fn seek( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _offset: SeekFrom, + ) -> Result { + Err(Error::EINVAL) + } + + /// Performs IO control operations that are specific to the file. + /// + /// Corresponds to the `unlocked_ioctl` function pointer in `struct file_operations`. + fn ioctl( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _cmd: &mut IoctlCommand, + ) -> Result { + Err(Error::ENOTTY) + } + + /// Performs 32-bit IO control operations on that are specific to the file on 64-bit kernels. + /// + /// Corresponds to the `compat_ioctl` function pointer in `struct file_operations`. + fn compat_ioctl( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _cmd: &mut IoctlCommand, + ) -> Result { + Err(Error::ENOTTY) + } + + /// Syncs pending changes to this file. + /// + /// Corresponds to the `fsync` function pointer in `struct file_operations`. + fn fsync( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _start: u64, + _end: u64, + _datasync: bool, + ) -> Result { + Err(Error::EINVAL) + } + + /// Maps areas of the caller's virtual memory with device/file memory. + /// + /// Corresponds to the `mmap` function pointer in `struct file_operations`. + /// TODO: wrap `vm_area_struct` so that we don't have to expose it. + fn mmap( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _vma: &mut bindings::vm_area_struct, + ) -> Result { + Err(Error::EINVAL) + } + + /// Checks the state of the file and optionally registers for notification when the state + /// changes. + /// + /// Corresponds to the `poll` function pointer in `struct file_operations`. + fn poll( + _this: &<::Borrowed as Deref>::Target, + _file: &File, + _table: &PollTable, + ) -> Result { + Ok(bindings::POLLIN | bindings::POLLOUT | bindings::POLLRDNORM | bindings::POLLWRNORM) + } +} diff --git a/rust/kernel/io_buffer.rs b/rust/kernel/io_buffer.rs new file mode 100644 index 0000000000000..ccecc4763aca3 --- /dev/null +++ b/rust/kernel/io_buffer.rs @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Buffers used in IO. + +use crate::Result; +use alloc::vec::Vec; +use core::mem::{size_of, MaybeUninit}; + +/// Represents a buffer to be read from during IO. +pub trait IoBufferReader { + /// Returns the number of bytes left to be read from the io buffer. + /// + /// Note that even reading less than this number of bytes may fail. + fn len(&self) -> usize; + + /// Returns `true` if no data is available in the io buffer. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Reads raw data from the io buffer into a raw kernel buffer. + /// + /// # Safety + /// + /// The output buffer must be valid. + unsafe fn read_raw(&mut self, out: *mut u8, len: usize) -> Result; + + /// Reads all data remaining in the io buffer. + /// + /// Returns `EFAULT` if the address does not currently point to mapped, readable memory. + fn read_all(&mut self) -> Result> { + let mut data = Vec::::new(); + data.try_resize(self.len(), 0)?; + + // SAFETY: The output buffer is valid as we just allocated it. + unsafe { self.read_raw(data.as_mut_ptr(), data.len())? }; + Ok(data) + } + + /// Reads a byte slice from the io buffer. + /// + /// Returns `EFAULT` if the byte slice is bigger than the remaining size of the user slice or + /// if the address does not currently point to mapped, readable memory. + fn read_slice(&mut self, data: &mut [u8]) -> Result { + // SAFETY: The output buffer is valid as it's coming from a live reference. + unsafe { self.read_raw(data.as_mut_ptr(), data.len()) } + } + + /// Reads the contents of a plain old data (POD) type from the io buffer. + fn read(&mut self) -> Result { + let mut out = MaybeUninit::::uninit(); + // SAFETY: The buffer is valid as it was just allocated. + unsafe { self.read_raw(out.as_mut_ptr() as _, size_of::()) }?; + // SAFETY: We just initialised the data. + Ok(unsafe { out.assume_init() }) + } +} + +/// Represents a buffer to be written to during IO. +pub trait IoBufferWriter { + /// Returns the number of bytes left to be written into the io buffer. + /// + /// Note that even writing less than this number of bytes may fail. + fn len(&self) -> usize; + + /// Returns `true` if the io buffer cannot hold any additional data. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Writes zeroes to the io buffer. + /// + /// Differently from the other write functions, `clear` will zero as much as it can and update + /// the writer internal state to reflect this. It will, however, return an error if it cannot + /// clear `len` bytes. + /// + /// For example, if a caller requests that 100 bytes be cleared but a segfault happens after + /// 20 bytes, then EFAULT is returned and the writer is advanced by 20 bytes. + fn clear(&mut self, len: usize) -> Result; + + /// Writes a byte slice into the io buffer. + /// + /// Returns `EFAULT` if the byte slice is bigger than the remaining size of the io buffer or if + /// the address does not currently point to mapped, writable memory. + fn write_slice(&mut self, data: &[u8]) -> Result { + // SAFETY: The input buffer is valid as it's coming from a live reference. + unsafe { self.write_raw(data.as_ptr(), data.len()) } + } + + /// Writes raw data to the io buffer from a raw kernel buffer. + /// + /// # Safety + /// + /// The input buffer must be valid. + unsafe fn write_raw(&mut self, data: *const u8, len: usize) -> Result; + + /// Writes the contents of the given data into the io buffer. + fn write(&mut self, data: &T) -> Result { + // SAFETY: The input buffer is valid as it's coming from a live + // reference to a type that implements `WritableToBytes`. + unsafe { self.write_raw(data as *const T as _, size_of::()) } + } +} + +/// Specifies that a type is safely readable from byte slices. +/// +/// Not all types can be safely read from byte slices; examples from +/// include `bool` +/// that must be either `0` or `1`, and `char` that cannot be a surrogate or above `char::MAX`. +/// +/// # Safety +/// +/// Implementers must ensure that the type is made up only of types that can be safely read from +/// arbitrary byte sequences (e.g., `u32`, `u64`, etc.). +pub unsafe trait ReadableFromBytes {} + +// SAFETY: All bit patterns are acceptable values of the types below. +unsafe impl ReadableFromBytes for u8 {} +unsafe impl ReadableFromBytes for u16 {} +unsafe impl ReadableFromBytes for u32 {} +unsafe impl ReadableFromBytes for u64 {} +unsafe impl ReadableFromBytes for usize {} +unsafe impl ReadableFromBytes for i8 {} +unsafe impl ReadableFromBytes for i16 {} +unsafe impl ReadableFromBytes for i32 {} +unsafe impl ReadableFromBytes for i64 {} +unsafe impl ReadableFromBytes for isize {} + +/// Specifies that a type is safely writable to byte slices. +/// +/// This means that we don't read undefined values (which leads to UB) in preparation for writing +/// to the byte slice. It also ensures that no potentially sensitive information is leaked into the +/// byte slices. +/// +/// # Safety +/// +/// A type must not include padding bytes and must be fully initialised to safely implement +/// [`WritableToBytes`] (i.e., it doesn't contain [`MaybeUninit`] fields). A composition of +/// writable types in a structure is not necessarily writable because it may result in padding +/// bytes. +pub unsafe trait WritableToBytes {} + +// SAFETY: Initialised instances of the following types have no uninitialised portions. +unsafe impl WritableToBytes for u8 {} +unsafe impl WritableToBytes for u16 {} +unsafe impl WritableToBytes for u32 {} +unsafe impl WritableToBytes for u64 {} +unsafe impl WritableToBytes for usize {} +unsafe impl WritableToBytes for i8 {} +unsafe impl WritableToBytes for i16 {} +unsafe impl WritableToBytes for i32 {} +unsafe impl WritableToBytes for i64 {} +unsafe impl WritableToBytes for isize {} diff --git a/rust/kernel/io_mem.rs b/rust/kernel/io_mem.rs new file mode 100644 index 0000000000000..c6d2ea4abc2c7 --- /dev/null +++ b/rust/kernel/io_mem.rs @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Memory-mapped IO. +//! +//! C header: [`include/asm-generic/io.h`](../../../../include/asm-generic/io.h) + +#![allow(dead_code)] + +use crate::{bindings, Error, Result}; +use core::convert::TryInto; + +/// Represents a memory resource. +pub struct Resource { + offset: bindings::resource_size_t, + size: bindings::resource_size_t, +} + +impl Resource { + pub(crate) fn new( + start: bindings::resource_size_t, + end: bindings::resource_size_t, + ) -> Option { + if start == 0 { + return None; + } + Some(Self { + offset: start, + size: end.checked_sub(start)?.checked_add(1)?, + }) + } +} + +/// Represents a memory block of at least `SIZE` bytes. +/// +/// # Invariants +/// +/// `ptr` is a non-null and valid address of at least `SIZE` bytes and returned by an `ioremap` +/// variant. `ptr` is also 8-byte aligned. +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::io_mem::{IoMem, Resource}; +/// +/// fn test(res: Resource) -> Result { +/// // Create an io mem block of at least 100 bytes. +/// // SAFETY: No DMA operations are initiated through `mem`. +/// let mem = unsafe { IoMem::<100>::try_new(res) }?; +/// +/// // Read one byte from offset 10. +/// let v = mem.readb(10); +/// +/// // Write value to offset 20. +/// mem.writeb(v, 20); +/// +/// Ok(()) +/// } +/// +/// ``` +pub struct IoMem { + ptr: usize, +} + +macro_rules! define_read { + ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => { + /// Reads IO data from the given offset known, at compile time. + /// + /// If the offset is not known at compile time, the build will fail. + $(#[$attr])* + pub fn $name(&self, offset: usize) -> $type_name { + Self::check_offset::<$type_name>(offset); + let ptr = self.ptr.wrapping_add(offset); + // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above + // guarantees that the code won't build if `offset` makes the read go out of bounds + // (including the type size). + unsafe { bindings::$name(ptr as _) } + } + + /// Reads IO data from the given offset. + /// + /// It fails if/when the offset (plus the type size) is out of bounds. + $(#[$attr])* + pub fn $try_name(&self, offset: usize) -> Result<$type_name> { + if !Self::offset_ok::<$type_name>(offset) { + return Err(Error::EINVAL); + } + let ptr = self.ptr.wrapping_add(offset); + // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above + // returns an error if `offset` would make the read go out of bounds (including the + // type size). + Ok(unsafe { bindings::$name(ptr as _) }) + } + }; +} + +macro_rules! define_write { + ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => { + /// Writes IO data to the given offset, known at compile time. + /// + /// If the offset is not known at compile time, the build will fail. + $(#[$attr])* + pub fn $name(&self, value: $type_name, offset: usize) { + Self::check_offset::<$type_name>(offset); + let ptr = self.ptr.wrapping_add(offset); + // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above + // guarantees that the code won't link if `offset` makes the write go out of bounds + // (including the type size). + unsafe { bindings::$name(value, ptr as _) } + } + + /// Writes IO data to the given offset. + /// + /// It fails if/when the offset (plus the type size) is out of bounds. + $(#[$attr])* + pub fn $try_name(&self, value: $type_name, offset: usize) -> Result { + if !Self::offset_ok::<$type_name>(offset) { + return Err(Error::EINVAL); + } + let ptr = self.ptr.wrapping_add(offset); + // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above + // returns an error if `offset` would make the write go out of bounds (including the + // type size). + unsafe { bindings::$name(value, ptr as _) }; + Ok(()) + } + }; +} + +impl IoMem { + /// Tries to create a new instance of a memory block. + /// + /// The resource described by `res` is mapped into the CPU's address space so that it can be + /// accessed directly. It is also consumed by this function so that it can't be mapped again + /// to a different address. + /// + /// # Safety + /// + /// Callers must ensure that either (a) the resulting interface cannot be used to initiate DMA + /// operations, or (b) that DMA operations initiated via the returned interface use DMA handles + /// allocated through the `dma` module. + pub unsafe fn try_new(res: Resource) -> Result { + // Check that the resource has at least `SIZE` bytes in it. + if res.size < SIZE.try_into()? { + return Err(Error::EINVAL); + } + + // To be able to check pointers at compile time based only on offsets, we need to guarantee + // that the base pointer is minimally aligned. So we conservatively expect at least 8 bytes. + if res.offset % 8 != 0 { + crate::pr_err!("Physical address is not 64-bit aligned: {:x}", res.offset); + return Err(Error::EDOM); + } + + // Try to map the resource. + // SAFETY: Just mapping the memory range. + let addr = unsafe { bindings::ioremap(res.offset, res.size as _) }; + if addr.is_null() { + Err(Error::ENOMEM) + } else { + // INVARIANT: `addr` is non-null and was returned by `ioremap`, so it is valid. It is + // also 8-byte aligned because we checked it above. + Ok(Self { ptr: addr as usize }) + } + } + + const fn offset_ok(offset: usize) -> bool { + let type_size = core::mem::size_of::(); + if let Some(end) = offset.checked_add(type_size) { + end <= SIZE && offset % type_size == 0 + } else { + false + } + } + + const fn check_offset(offset: usize) { + crate::build_assert!(Self::offset_ok::(offset), "IoMem offset overflow"); + } + + define_read!(readb, try_readb, u8); + define_read!(readw, try_readw, u16); + define_read!(readl, try_readl, u32); + define_read!( + #[cfg(CONFIG_64BIT)] + readq, + try_readq, + u64 + ); + + define_write!(writeb, try_writeb, u8); + define_write!(writew, try_writew, u16); + define_write!(writel, try_writel, u32); + define_write!( + #[cfg(CONFIG_64BIT)] + writeq, + try_writeq, + u64 + ); +} + +impl Drop for IoMem { + fn drop(&mut self) { + // SAFETY: By the type invariant, `self.ptr` is a value returned by a previous successful + // call to `ioremap`. + unsafe { bindings::iounmap(self.ptr as _) }; + } +} diff --git a/rust/kernel/iov_iter.rs b/rust/kernel/iov_iter.rs new file mode 100644 index 0000000000000..fe738c529b848 --- /dev/null +++ b/rust/kernel/iov_iter.rs @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! IO vector iterators. +//! +//! C header: [`include/linux/uio.h`](../../../../include/linux/uio.h) + +use crate::{ + bindings, + error::Error, + io_buffer::{IoBufferReader, IoBufferWriter}, + Result, +}; + +/// Wraps the kernel's `struct iov_iter`. +/// +/// # Invariants +/// +/// The pointer `IovIter::ptr` is non-null and valid. +pub struct IovIter { + ptr: *mut bindings::iov_iter, +} + +impl IovIter { + fn common_len(&self) -> usize { + // SAFETY: `IovIter::ptr` is guaranteed to be valid by the type invariants. + unsafe { (*self.ptr).count } + } + + /// Constructs a new [`struct iov_iter`] wrapper. + /// + /// # Safety + /// + /// The pointer `ptr` must be non-null and valid for the lifetime of the object. + pub(crate) unsafe fn from_ptr(ptr: *mut bindings::iov_iter) -> Self { + // INVARIANTS: the safety contract ensures the type invariant will hold. + Self { ptr } + } +} + +impl IoBufferWriter for IovIter { + fn len(&self) -> usize { + self.common_len() + } + + fn clear(&mut self, mut len: usize) -> Result { + while len > 0 { + // SAFETY: `IovIter::ptr` is guaranteed to be valid by the type invariants. + let written = unsafe { bindings::iov_iter_zero(len, self.ptr) }; + if written == 0 { + return Err(Error::EFAULT); + } + + len -= written; + } + Ok(()) + } + + unsafe fn write_raw(&mut self, data: *const u8, len: usize) -> Result { + let res = unsafe { bindings::copy_to_iter(data as _, len, self.ptr) }; + if res != len { + Err(Error::EFAULT) + } else { + Ok(()) + } + } +} + +impl IoBufferReader for IovIter { + fn len(&self) -> usize { + self.common_len() + } + + unsafe fn read_raw(&mut self, out: *mut u8, len: usize) -> Result { + let res = unsafe { bindings::copy_from_iter(out as _, len, self.ptr) }; + if res != len { + Err(Error::EFAULT) + } else { + Ok(()) + } + } +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs new file mode 100644 index 0000000000000..d7fc98f064fb5 --- /dev/null +++ b/rust/kernel/lib.rs @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! The `kernel` crate. +//! +//! This crate contains the kernel APIs that have been ported or wrapped for +//! usage by Rust code in the kernel and is shared by all of them. +//! +//! In other words, all the rest of the Rust code in the kernel (e.g. kernel +//! modules written in Rust) depends on [`core`], [`alloc`] and this crate. +//! +//! If you need a kernel C API that is not ported or wrapped yet here, then +//! do so first instead of bypassing this crate. + +#![no_std] +#![feature( + allocator_api, + associated_type_defaults, + concat_idents, + const_fn_trait_bound, + const_mut_refs, + const_panic, + const_fn_transmute, + const_unreachable_unchecked, + doc_cfg, + ptr_metadata, + receiver_trait, + coerce_unsized, + dispatch_from_dyn, + unsize, + try_reserve +)] + +// Ensure conditional compilation based on the kernel configuration works; +// otherwise we may silently break things like initcall handling. +#[cfg(not(CONFIG_RUST))] +compile_error!("Missing kernel configuration for conditional compilation"); + +#[cfg(not(test))] +#[cfg(not(testlib))] +mod allocator; + +#[doc(hidden)] +pub mod bindings; + +pub mod buffer; +pub mod c_types; +pub mod chrdev; +mod error; +pub mod file; +pub mod file_operations; +pub mod miscdev; +pub mod pages; +pub mod power; +pub mod security; +pub mod str; +pub mod task; + +pub mod linked_list; +mod raw_list; +pub mod rbtree; + +#[doc(hidden)] +pub mod module_param; + +mod build_assert; +pub mod prelude; +pub mod print; +pub mod random; +mod static_assert; +#[doc(hidden)] +pub mod std_vendor; +pub mod sync; + +#[cfg(any(CONFIG_SYSCTL, doc))] +#[doc(cfg(CONFIG_SYSCTL))] +pub mod sysctl; + +pub mod io_buffer; +pub mod io_mem; +pub mod iov_iter; +pub mod of; +pub mod platdev; +mod types; +pub mod user_ptr; + +#[doc(hidden)] +pub use build_error::build_error; + +pub use crate::error::{Error, Result}; +pub use crate::types::{Mode, ScopeGuard}; + +use core::marker::PhantomData; + +/// Page size defined in terms of the `PAGE_SHIFT` macro from C. +/// +/// [`PAGE_SHIFT`]: ../../../include/asm-generic/page.h +pub const PAGE_SIZE: usize = 1 << bindings::PAGE_SHIFT; + +/// Prefix to appear before log messages printed from within the kernel crate. +const __LOG_PREFIX: &[u8] = b"rust_kernel\0"; + +/// The top level entrypoint to implementing a kernel module. +/// +/// For any teardown or cleanup operations, your type may implement [`Drop`]. +pub trait KernelModule: Sized + Sync { + /// Called at module initialization time. + /// + /// Use this method to perform whatever setup or registration your module + /// should do. + /// + /// Equivalent to the `module_init` macro in the C API. + fn init() -> Result; +} + +/// Equivalent to `THIS_MODULE` in the C API. +/// +/// C header: `include/linux/export.h` +pub struct ThisModule(*mut bindings::module); + +// SAFETY: `THIS_MODULE` may be used from all threads within a module. +unsafe impl Sync for ThisModule {} + +impl ThisModule { + /// Creates a [`ThisModule`] given the `THIS_MODULE` pointer. + /// + /// # Safety + /// + /// The pointer must be equal to the right `THIS_MODULE`. + pub const unsafe fn from_ptr(ptr: *mut bindings::module) -> ThisModule { + ThisModule(ptr) + } + + /// Locks the module parameters to access them. + /// + /// Returns a [`KParamGuard`] that will release the lock when dropped. + pub fn kernel_param_lock(&self) -> KParamGuard<'_> { + // SAFETY: `kernel_param_lock` will check if the pointer is null and + // use the built-in mutex in that case. + #[cfg(CONFIG_SYSFS)] + unsafe { + bindings::kernel_param_lock(self.0) + } + + KParamGuard { + #[cfg(CONFIG_SYSFS)] + this_module: self, + phantom: PhantomData, + } + } +} + +/// Scoped lock on the kernel parameters of [`ThisModule`]. +/// +/// Lock will be released when this struct is dropped. +pub struct KParamGuard<'a> { + #[cfg(CONFIG_SYSFS)] + this_module: &'a ThisModule, + phantom: PhantomData<&'a ()>, +} + +#[cfg(CONFIG_SYSFS)] +impl<'a> Drop for KParamGuard<'a> { + fn drop(&mut self) { + // SAFETY: `kernel_param_lock` will check if the pointer is null and + // use the built-in mutex in that case. The existance of `self` + // guarantees that the lock is held. + unsafe { bindings::kernel_param_unlock(self.this_module.0) } + } +} + +/// Calculates the offset of a field from the beginning of the struct it belongs to. +/// +/// # Example +/// +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::offset_of; +/// struct Test { +/// a: u64, +/// b: u32, +/// } +/// +/// fn test() { +/// // This prints `8`. +/// pr_info!("{}\n", offset_of!(Test, b)); +/// } +/// ``` +#[macro_export] +macro_rules! offset_of { + ($type:ty, $($f:tt)*) => {{ + let tmp = core::mem::MaybeUninit::<$type>::uninit(); + let outer = tmp.as_ptr(); + // To avoid warnings when nesting `unsafe` blocks. + #[allow(unused_unsafe)] + // SAFETY: The pointer is valid and aligned, just not initialised; `addr_of` ensures that + // we don't actually read from `outer` (which would be UB) nor create an intermediate + // reference. + let inner = unsafe { core::ptr::addr_of!((*outer).$($f)*) } as *const u8; + // To avoid warnings when nesting `unsafe` blocks. + #[allow(unused_unsafe)] + // SAFETY: The two pointers are within the same allocation block. + unsafe { inner.offset_from(outer as *const u8) } + }} +} + +/// Produces a pointer to an object from a pointer to one of its fields. +/// +/// # Safety +/// +/// Callers must ensure that the pointer to the field is in fact a pointer to the specified field, +/// as opposed to a pointer to another object of the same type. If this condition is not met, +/// any dereference of the resulting pointer is UB. +/// +/// # Example +/// +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::container_of; +/// struct Test { +/// a: u64, +/// b: u32, +/// } +/// +/// fn test() { +/// let test = Test { a: 10, b: 20 }; +/// let b_ptr = &test.b; +/// let test_alias = container_of!(b_ptr, Test, b); +/// // This prints `true`. +/// pr_info!("{}\n", core::ptr::eq(&test, test_alias)); +/// } +/// ``` +#[macro_export] +macro_rules! container_of { + ($ptr:expr, $type:ty, $($f:tt)*) => {{ + let ptr = $ptr as *const _ as *const u8; + let offset = $crate::offset_of!($type, $($f)*); + ptr.wrapping_offset(-offset) as *const $type + }} +} + +#[cfg(not(any(testlib, test)))] +#[panic_handler] +fn panic(info: &core::panic::PanicInfo<'_>) -> ! { + pr_emerg!("{}\n", info); + // SAFETY: FFI call. + unsafe { bindings::BUG() }; + // Bindgen currently does not recognize `__noreturn` so `BUG` returns `()` + // instead of `!`. + // https://github.com/rust-lang/rust-bindgen/issues/2094 + loop {} +} diff --git a/rust/kernel/linked_list.rs b/rust/kernel/linked_list.rs new file mode 100644 index 0000000000000..3330edcc7ca8d --- /dev/null +++ b/rust/kernel/linked_list.rs @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Linked lists. +//! +//! TODO: This module is a work in progress. + +use alloc::boxed::Box; +use core::ptr::NonNull; + +pub use crate::raw_list::{Cursor, GetLinks, Links}; +use crate::{raw_list, raw_list::RawList, sync::Ref}; + +// TODO: Use the one from `kernel::file_operations::PointerWrapper` instead. +/// Wraps an object to be inserted in a linked list. +pub trait Wrapper { + /// Converts the wrapped object into a pointer that represents it. + fn into_pointer(self) -> NonNull; + + /// Converts the object back from the pointer representation. + /// + /// # Safety + /// + /// The passed pointer must come from a previous call to [`Wrapper::into_pointer()`]. + unsafe fn from_pointer(ptr: NonNull) -> Self; + + /// Returns a reference to the wrapped object. + fn as_ref(&self) -> &T; +} + +impl Wrapper for Box { + fn into_pointer(self) -> NonNull { + NonNull::new(Box::into_raw(self)).unwrap() + } + + unsafe fn from_pointer(ptr: NonNull) -> Self { + unsafe { Box::from_raw(ptr.as_ptr()) } + } + + fn as_ref(&self) -> &T { + AsRef::as_ref(self) + } +} + +impl Wrapper for Ref { + fn into_pointer(self) -> NonNull { + NonNull::new(Ref::into_raw(self) as _).unwrap() + } + + unsafe fn from_pointer(ptr: NonNull) -> Self { + // SAFETY: The safety requirements of `from_pointer` satisfy the ones from `Ref::from_raw`. + unsafe { Ref::from_raw(ptr.as_ptr() as _) } + } + + fn as_ref(&self) -> &T { + AsRef::as_ref(self) + } +} + +impl Wrapper for &T { + fn into_pointer(self) -> NonNull { + NonNull::from(self) + } + + unsafe fn from_pointer(ptr: NonNull) -> Self { + unsafe { &*ptr.as_ptr() } + } + + fn as_ref(&self) -> &T { + self + } +} + +/// A descriptor of wrapped list elements. +pub trait GetLinksWrapped: GetLinks { + /// Specifies which wrapper (e.g., `Box` and `Arc`) wraps the list entries. + type Wrapped: Wrapper; +} + +impl GetLinksWrapped for Box +where + Box: GetLinks, +{ + type Wrapped = Box< as GetLinks>::EntryType>; +} + +impl GetLinks for Box { + type EntryType = T::EntryType; + fn get_links(data: &Self::EntryType) -> &Links { + ::get_links(data) + } +} + +impl GetLinksWrapped for Ref +where + Ref: GetLinks, +{ + type Wrapped = Ref< as GetLinks>::EntryType>; +} + +impl GetLinks for Ref { + type EntryType = T::EntryType; + + fn get_links(data: &Self::EntryType) -> &Links { + ::get_links(data) + } +} + +/// A linked list. +/// +/// Elements in the list are wrapped and ownership is transferred to the list while the element is +/// in the list. +pub struct List { + list: RawList, +} + +impl List { + /// Constructs a new empty linked list. + pub fn new() -> Self { + Self { + list: RawList::new(), + } + } + + /// Returns whether the list is empty. + pub fn is_empty(&self) -> bool { + self.list.is_empty() + } + + /// Adds the given object to the end (back) of the list. + /// + /// It is dropped if it's already on this (or another) list; this can happen for + /// reference-counted objects, so dropping means decrementing the reference count. + pub fn push_back(&mut self, data: G::Wrapped) { + let ptr = data.into_pointer(); + + // SAFETY: We took ownership of the entry, so it is safe to insert it. + if !unsafe { self.list.push_back(ptr.as_ref()) } { + // If insertion failed, rebuild object so that it can be freed. + // SAFETY: We just called `into_pointer` above. + unsafe { G::Wrapped::from_pointer(ptr) }; + } + } + + /// Inserts the given object after `existing`. + /// + /// It is dropped if it's already on this (or another) list; this can happen for + /// reference-counted objects, so dropping means decrementing the reference count. + /// + /// # Safety + /// + /// Callers must ensure that `existing` points to a valid entry that is on the list. + pub unsafe fn insert_after(&mut self, existing: NonNull, data: G::Wrapped) { + let ptr = data.into_pointer(); + let entry = unsafe { &*existing.as_ptr() }; + if unsafe { !self.list.insert_after(entry, ptr.as_ref()) } { + // If insertion failed, rebuild object so that it can be freed. + unsafe { G::Wrapped::from_pointer(ptr) }; + } + } + + /// Removes the given entry. + /// + /// # Safety + /// + /// Callers must ensure that `data` is either on this list or in no list. It being on another + /// list leads to memory unsafety. + pub unsafe fn remove(&mut self, data: &G::Wrapped) -> Option { + let entry_ref = Wrapper::as_ref(data); + if unsafe { self.list.remove(entry_ref) } { + Some(unsafe { G::Wrapped::from_pointer(NonNull::from(entry_ref)) }) + } else { + None + } + } + + /// Removes the element currently at the front of the list and returns it. + /// + /// Returns `None` if the list is empty. + pub fn pop_front(&mut self) -> Option { + let front = self.list.pop_front()?; + // SAFETY: Elements on the list were inserted after a call to `into_pointer `. + Some(unsafe { G::Wrapped::from_pointer(front) }) + } + + /// Returns a cursor starting on the first (front) element of the list. + pub fn cursor_front(&self) -> Cursor<'_, G> { + self.list.cursor_front() + } + + /// Returns a mutable cursor starting on the first (front) element of the list. + pub fn cursor_front_mut(&mut self) -> CursorMut<'_, G> { + CursorMut::new(self.list.cursor_front_mut()) + } +} + +impl Default for List { + fn default() -> Self { + Self::new() + } +} + +impl Drop for List { + fn drop(&mut self) { + while self.pop_front().is_some() {} + } +} + +/// A list cursor that allows traversing a linked list and inspecting & mutating elements. +pub struct CursorMut<'a, G: GetLinksWrapped> { + cursor: raw_list::CursorMut<'a, G>, +} + +impl<'a, G: GetLinksWrapped> CursorMut<'a, G> { + fn new(cursor: raw_list::CursorMut<'a, G>) -> Self { + Self { cursor } + } + + /// Returns the element the cursor is currently positioned on. + pub fn current(&mut self) -> Option<&mut G::EntryType> { + self.cursor.current() + } + + /// Removes the element the cursor is currently positioned on. + /// + /// After removal, it advances the cursor to the next element. + pub fn remove_current(&mut self) -> Option { + let ptr = self.cursor.remove_current()?; + + // SAFETY: Elements on the list were inserted after a call to `into_pointer `. + Some(unsafe { G::Wrapped::from_pointer(ptr) }) + } + + /// Returns the element immediately after the one the cursor is positioned on. + pub fn peek_next(&mut self) -> Option<&mut G::EntryType> { + self.cursor.peek_next() + } + + /// Returns the element immediately before the one the cursor is positioned on. + pub fn peek_prev(&mut self) -> Option<&mut G::EntryType> { + self.cursor.peek_prev() + } + + /// Moves the cursor to the next element. + pub fn move_next(&mut self) { + self.cursor.move_next(); + } +} diff --git a/rust/kernel/miscdev.rs b/rust/kernel/miscdev.rs new file mode 100644 index 0000000000000..4a4676983f235 --- /dev/null +++ b/rust/kernel/miscdev.rs @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Miscellaneous devices. +//! +//! C header: [`include/linux/miscdevice.h`](../../../../include/linux/miscdevice.h) +//! +//! Reference: + +use crate::bindings; +use crate::error::{Error, Result}; +use crate::file_operations::{FileOpenAdapter, FileOpener, FileOperationsVtable}; +use crate::str::CStr; +use alloc::boxed::Box; +use core::marker::PhantomPinned; +use core::pin::Pin; + +/// A registration of a miscellaneous device. +pub struct Registration { + registered: bool, + mdev: bindings::miscdevice, + _pin: PhantomPinned, + + /// Context initialised on construction and made available to all file instances on + /// [`FileOpener::open`]. + pub context: T, +} + +impl Registration { + /// Creates a new [`Registration`] but does not register it yet. + /// + /// It is allowed to move. + pub fn new(context: T) -> Self { + Self { + registered: false, + mdev: bindings::miscdevice::default(), + _pin: PhantomPinned, + context, + } + } + + /// Registers a miscellaneous device. + /// + /// Returns a pinned heap-allocated representation of the registration. + pub fn new_pinned>( + name: &'static CStr, + minor: Option, + context: T, + ) -> Result>> { + let mut r = Pin::from(Box::try_new(Self::new(context))?); + r.as_mut().register::(name, minor)?; + Ok(r) + } + + /// Registers a miscellaneous device with the rest of the kernel. + /// + /// It must be pinned because the memory block that represents the registration is + /// self-referential. If a minor is not given, the kernel allocates a new one if possible. + pub fn register>( + self: Pin<&mut Self>, + name: &'static CStr, + minor: Option, + ) -> Result { + // SAFETY: We must ensure that we never move out of `this`. + let this = unsafe { self.get_unchecked_mut() }; + if this.registered { + // Already registered. + return Err(Error::EINVAL); + } + + // SAFETY: The adapter is compatible with `misc_register`. + this.mdev.fops = unsafe { FileOperationsVtable::::build() }; + this.mdev.name = name.as_char_ptr(); + this.mdev.minor = minor.unwrap_or(bindings::MISC_DYNAMIC_MINOR as i32); + + let ret = unsafe { bindings::misc_register(&mut this.mdev) }; + if ret < 0 { + return Err(Error::from_kernel_errno(ret)); + } + this.registered = true; + Ok(()) + } +} + +impl FileOpenAdapter for Registration { + type Arg = T; + + unsafe fn convert(_inode: *mut bindings::inode, file: *mut bindings::file) -> *const Self::Arg { + // SAFETY: the caller must guarantee that `file` is valid. + let reg = crate::container_of!(unsafe { (*file).private_data }, Self, mdev); + unsafe { &(*reg).context } + } +} + +// SAFETY: The only method is `register()`, which requires a (pinned) mutable `Registration`, so it +// is safe to pass `&Registration` to multiple threads because it offers no interior mutability, +// except maybe through `Registration::context`, but it is itself `Sync`. +unsafe impl Sync for Registration {} + +// SAFETY: All functions work from any thread. So as long as the `Registration::context` is +// `Send`, so is `Registration`. `T` needs to be `Sync` because it's a requirement of +// `Registration`. +unsafe impl Send for Registration {} + +impl Drop for Registration { + /// Removes the registration from the kernel if it has completed successfully before. + fn drop(&mut self) { + if self.registered { + unsafe { bindings::misc_deregister(&mut self.mdev) } + } + } +} diff --git a/rust/kernel/module_param.rs b/rust/kernel/module_param.rs new file mode 100644 index 0000000000000..a588449c41fab --- /dev/null +++ b/rust/kernel/module_param.rs @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Types for module parameters. +//! +//! C header: [`include/linux/moduleparam.h`](../../../include/linux/moduleparam.h) + +use crate::str::CStr; +use core::fmt::Write; + +/// Types that can be used for module parameters. +/// +/// Note that displaying the type in `sysfs` will fail if +/// [`alloc::string::ToString::to_string`] (as implemented through the +/// [`core::fmt::Display`] trait) writes more than [`PAGE_SIZE`] +/// bytes (including an additional null terminator). +/// +/// [`PAGE_SIZE`]: `crate::PAGE_SIZE` +pub trait ModuleParam: core::fmt::Display + core::marker::Sized { + /// The `ModuleParam` will be used by the kernel module through this type. + /// + /// This may differ from `Self` if, for example, `Self` needs to track + /// ownership without exposing it or allocate extra space for other possible + /// parameter values. See [`StringParam`] or [`ArrayParam`] for examples. + type Value: ?Sized; + + /// Whether the parameter is allowed to be set without an argument. + /// + /// Setting this to `true` allows the parameter to be passed without an + /// argument (e.g. just `module.param` instead of `module.param=foo`). + const NOARG_ALLOWED: bool; + + /// Convert a parameter argument into the parameter value. + /// + /// `None` should be returned when parsing of the argument fails. + /// `arg == None` indicates that the parameter was passed without an + /// argument. If `NOARG_ALLOWED` is set to `false` then `arg` is guaranteed + /// to always be `Some(_)`. + /// + /// Parameters passed at boot time will be set before [`kmalloc`] is + /// available (even if the module is loaded at a later time). However, in + /// this case, the argument buffer will be valid for the entire lifetime of + /// the kernel. So implementations of this method which need to allocate + /// should first check that the allocator is available (with + /// [`crate::bindings::slab_is_available`]) and when it is not available + /// provide an alternative implementation which doesn't allocate. In cases + /// where the allocator is not available it is safe to save references to + /// `arg` in `Self`, but in other cases a copy should be made. + /// + /// [`kmalloc`]: ../../../include/linux/slab.h + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option; + + /// Get the current value of the parameter for use in the kernel module. + /// + /// This function should not be used directly. Instead use the wrapper + /// `read` which will be generated by [`macros::module`]. + fn value(&self) -> &Self::Value; + + /// Set the module parameter from a string. + /// + /// Used to set the parameter value when loading the module or when set + /// through `sysfs`. + /// + /// # Safety + /// + /// If `val` is non-null then it must point to a valid null-terminated + /// string. The `arg` field of `param` must be an instance of `Self`. + unsafe extern "C" fn set_param( + val: *const crate::c_types::c_char, + param: *const crate::bindings::kernel_param, + ) -> crate::c_types::c_int { + let arg = if val.is_null() { + None + } else { + Some(unsafe { CStr::from_char_ptr(val).as_bytes() }) + }; + match Self::try_from_param_arg(arg) { + Some(new_value) => { + let old_value = unsafe { (*param).__bindgen_anon_1.arg as *mut Self }; + let _ = unsafe { core::ptr::replace(old_value, new_value) }; + 0 + } + None => crate::error::Error::EINVAL.to_kernel_errno(), + } + } + + /// Write a string representation of the current parameter value to `buf`. + /// + /// Used for displaying the current parameter value in `sysfs`. + /// + /// # Safety + /// + /// `buf` must be a buffer of length at least `kernel::PAGE_SIZE` that is + /// writeable. The `arg` field of `param` must be an instance of `Self`. + unsafe extern "C" fn get_param( + buf: *mut crate::c_types::c_char, + param: *const crate::bindings::kernel_param, + ) -> crate::c_types::c_int { + let slice = unsafe { core::slice::from_raw_parts_mut(buf as *mut u8, crate::PAGE_SIZE) }; + let mut buf = crate::buffer::Buffer::new(slice); + match unsafe { write!(buf, "{}\0", *((*param).__bindgen_anon_1.arg as *mut Self)) } { + Err(_) => crate::error::Error::EINVAL.to_kernel_errno(), + Ok(()) => buf.bytes_written() as crate::c_types::c_int, + } + } + + /// Drop the parameter. + /// + /// Called when unloading a module. + /// + /// # Safety + /// + /// The `arg` field of `param` must be an instance of `Self`. + unsafe extern "C" fn free(arg: *mut crate::c_types::c_void) { + unsafe { core::ptr::drop_in_place(arg as *mut Self) }; + } +} + +/// Trait for parsing integers. +/// +/// Strings begining with `0x`, `0o`, or `0b` are parsed as hex, octal, or +/// binary respectively. Strings beginning with `0` otherwise are parsed as +/// octal. Anything else is parsed as decimal. A leading `+` or `-` is also +/// permitted. Any string parsed by [`kstrtol()`] or [`kstrtoul()`] will be +/// successfully parsed. +/// +/// [`kstrtol()`]: https://www.kernel.org/doc/html/latest/core-api/kernel-api.html#c.kstrtol +/// [`kstrtoul()`]: https://www.kernel.org/doc/html/latest/core-api/kernel-api.html#c.kstrtoul +trait ParseInt: Sized { + fn from_str_radix(src: &str, radix: u32) -> Result; + fn checked_neg(self) -> Option; + + fn from_str_unsigned(src: &str) -> Result { + let (radix, digits) = if let Some(n) = src.strip_prefix("0x") { + (16, n) + } else if let Some(n) = src.strip_prefix("0X") { + (16, n) + } else if let Some(n) = src.strip_prefix("0o") { + (8, n) + } else if let Some(n) = src.strip_prefix("0O") { + (8, n) + } else if let Some(n) = src.strip_prefix("0b") { + (2, n) + } else if let Some(n) = src.strip_prefix("0B") { + (2, n) + } else if src.starts_with('0') { + (8, src) + } else { + (10, src) + }; + Self::from_str_radix(digits, radix) + } + + fn from_str(src: &str) -> Option { + match src.bytes().next() { + None => None, + Some(b'-') => Self::from_str_unsigned(&src[1..]).ok()?.checked_neg(), + Some(b'+') => Some(Self::from_str_unsigned(&src[1..]).ok()?), + Some(_) => Some(Self::from_str_unsigned(src).ok()?), + } + } +} + +macro_rules! impl_parse_int { + ($ty:ident) => { + impl ParseInt for $ty { + fn from_str_radix(src: &str, radix: u32) -> Result { + $ty::from_str_radix(src, radix) + } + + fn checked_neg(self) -> Option { + self.checked_neg() + } + } + }; +} + +impl_parse_int!(i8); +impl_parse_int!(u8); +impl_parse_int!(i16); +impl_parse_int!(u16); +impl_parse_int!(i32); +impl_parse_int!(u32); +impl_parse_int!(i64); +impl_parse_int!(u64); +impl_parse_int!(isize); +impl_parse_int!(usize); + +macro_rules! impl_module_param { + ($ty:ident) => { + impl ModuleParam for $ty { + type Value = $ty; + + const NOARG_ALLOWED: bool = false; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option { + let bytes = arg?; + let utf8 = core::str::from_utf8(bytes).ok()?; + <$ty as crate::module_param::ParseInt>::from_str(utf8) + } + + fn value(&self) -> &Self::Value { + self + } + } + }; +} + +#[doc(hidden)] +#[macro_export] +/// Generate a static [`kernel_param_ops`](../../../include/linux/moduleparam.h) struct. +/// +/// # Example +/// ```ignore +/// make_param_ops!( +/// /// Documentation for new param ops. +/// PARAM_OPS_MYTYPE, // Name for the static. +/// MyType // A type which implements [`ModuleParam`]. +/// ); +/// ``` +macro_rules! make_param_ops { + ($ops:ident, $ty:ty) => { + $crate::make_param_ops!( + #[doc=""] + $ops, + $ty + ); + }; + ($(#[$meta:meta])* $ops:ident, $ty:ty) => { + $(#[$meta])* + /// + /// Static [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// struct generated by [`make_param_ops`]. + pub static $ops: $crate::bindings::kernel_param_ops = $crate::bindings::kernel_param_ops { + flags: if <$ty as $crate::module_param::ModuleParam>::NOARG_ALLOWED { + $crate::bindings::KERNEL_PARAM_OPS_FL_NOARG + } else { + 0 + }, + set: Some(<$ty as $crate::module_param::ModuleParam>::set_param), + get: Some(<$ty as $crate::module_param::ModuleParam>::get_param), + free: Some(<$ty as $crate::module_param::ModuleParam>::free), + }; + }; +} + +impl_module_param!(i8); +impl_module_param!(u8); +impl_module_param!(i16); +impl_module_param!(u16); +impl_module_param!(i32); +impl_module_param!(u32); +impl_module_param!(i64); +impl_module_param!(u64); +impl_module_param!(isize); +impl_module_param!(usize); + +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i8`]. + PARAM_OPS_I8, + i8 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u8`]. + PARAM_OPS_U8, + u8 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i16`]. + PARAM_OPS_I16, + i16 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u16`]. + PARAM_OPS_U16, + u16 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i32`]. + PARAM_OPS_I32, + i32 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u32`]. + PARAM_OPS_U32, + u32 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i64`]. + PARAM_OPS_I64, + i64 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u64`]. + PARAM_OPS_U64, + u64 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`isize`]. + PARAM_OPS_ISIZE, + isize +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`usize`]. + PARAM_OPS_USIZE, + usize +); + +impl ModuleParam for bool { + type Value = bool; + + const NOARG_ALLOWED: bool = true; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option { + match arg { + None => Some(true), + Some(b"y") | Some(b"Y") | Some(b"1") | Some(b"true") => Some(true), + Some(b"n") | Some(b"N") | Some(b"0") | Some(b"false") => Some(false), + _ => None, + } + } + + fn value(&self) -> &Self::Value { + self + } +} + +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`bool`]. + PARAM_OPS_BOOL, + bool +); + +/// An array of at __most__ `N` values. +/// +/// # Invariant +/// +/// The first `self.used` elements of `self.values` are initialized. +pub struct ArrayParam { + values: [core::mem::MaybeUninit; N], + used: usize, +} + +impl ArrayParam { + fn values(&self) -> &[T] { + // SAFETY: The invariant maintained by `ArrayParam` allows us to cast + // the first `self.used` elements to `T`. + unsafe { + &*(&self.values[0..self.used] as *const [core::mem::MaybeUninit] as *const [T]) + } + } +} + +impl ArrayParam { + const fn new() -> Self { + // INVARIANT: The first `self.used` elements of `self.values` are + // initialized. + ArrayParam { + values: [core::mem::MaybeUninit::uninit(); N], + used: 0, + } + } + + const fn push(&mut self, val: T) { + if self.used < N { + // INVARIANT: The first `self.used` elements of `self.values` are + // initialized. + self.values[self.used] = core::mem::MaybeUninit::new(val); + self.used += 1; + } + } + + /// Create an instance of `ArrayParam` initialized with `vals`. + /// + /// This function is only meant to be used in the [`module::module`] macro. + pub const fn create(vals: &[T]) -> Self { + let mut result = ArrayParam::new(); + let mut i = 0; + while i < vals.len() { + result.push(vals[i]); + i += 1; + } + result + } +} + +impl core::fmt::Display for ArrayParam { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + for val in self.values() { + write!(f, "{},", val)?; + } + Ok(()) + } +} + +impl ModuleParam + for ArrayParam +{ + type Value = [T]; + + const NOARG_ALLOWED: bool = false; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option { + arg.and_then(|args| { + let mut result = Self::new(); + for arg in args.split(|b| *b == b',') { + result.push(T::try_from_param_arg(Some(arg))?); + } + Some(result) + }) + } + + fn value(&self) -> &Self::Value { + self.values() + } +} + +/// A C-style string parameter. +/// +/// The Rust version of the [`charp`] parameter. This type is meant to be +/// used by the [`macros::module`] macro, not handled directly. Instead use the +/// `read` method generated by that macro. +/// +/// [`charp`]: ../../../include/linux/moduleparam.h +pub enum StringParam { + /// A borrowed parameter value. + /// + /// Either the default value (which is static in the module) or borrowed + /// from the original argument buffer used to set the value. + Ref(&'static [u8]), + + /// A value that was allocated when the parameter was set. + /// + /// The value needs to be freed when the parameter is reset or the module is + /// unloaded. + Owned(alloc::vec::Vec), +} + +impl StringParam { + fn bytes(&self) -> &[u8] { + match self { + StringParam::Ref(bytes) => *bytes, + StringParam::Owned(vec) => &vec[..], + } + } +} + +impl core::fmt::Display for StringParam { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let bytes = self.bytes(); + match core::str::from_utf8(bytes) { + Ok(utf8) => write!(f, "{}", utf8), + Err(_) => write!(f, "{:?}", bytes), + } + } +} + +impl ModuleParam for StringParam { + type Value = [u8]; + + const NOARG_ALLOWED: bool = false; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option { + // SAFETY: It is always safe to call [`slab_is_available`](../../../include/linux/slab.h). + let slab_available = unsafe { crate::bindings::slab_is_available() }; + arg.and_then(|arg| { + if slab_available { + let mut vec = alloc::vec::Vec::new(); + vec.try_extend_from_slice(arg).ok()?; + Some(StringParam::Owned(vec)) + } else { + Some(StringParam::Ref(arg)) + } + }) + } + + fn value(&self) -> &Self::Value { + self.bytes() + } +} + +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`StringParam`]. + PARAM_OPS_STR, + StringParam +); diff --git a/rust/kernel/of.rs b/rust/kernel/of.rs new file mode 100644 index 0000000000000..78aa5956f03fe --- /dev/null +++ b/rust/kernel/of.rs @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Devicetree and Open Firmware abstractions. +//! +//! C header: [`include/linux/of_*.h`](../../../../include/linux/of_*.h) + +use crate::{bindings, c_types, str::CStr}; + +use core::ops::Deref; +use core::ptr; + +/// A kernel Open Firmware / devicetree match table. +/// +/// Can only exist as an `&OfMatchTable` reference (akin to `&str` or +/// `&Path` in Rust std). +/// +/// # Invariants +/// +/// The inner reference points to a sentinel-terminated C array. +#[repr(transparent)] +pub struct OfMatchTable(bindings::of_device_id); + +impl OfMatchTable { + /// Returns the table as a reference to a static lifetime, sentinel-terminated C array. + /// + /// This is suitable to be coerced into the kernel's `of_match_table` field. + pub fn as_ptr(&'static self) -> &'static bindings::of_device_id { + // The inner reference points to a sentinel-terminated C array, as per + // the type invariant. + &self.0 + } +} + +/// An Open Firmware Match Table that can be constructed at build time. +/// +/// # Invariants +/// +/// `sentinel` always contains zeroes. +#[repr(C)] +pub struct ConstOfMatchTable { + table: [bindings::of_device_id; N], + sentinel: bindings::of_device_id, +} + +impl ConstOfMatchTable { + /// Creates a new Open Firmware Match Table from a list of compatible strings. + pub const fn new_const(compatibles: [&'static CStr; N]) -> Self { + let mut table = [Self::zeroed_of_device_id(); N]; + let mut i = 0; + while i < N { + table[i] = Self::new_of_device_id(compatibles[i]); + i += 1; + } + Self { + table, + // INVARIANTS: we zero the sentinel here, and never change it + // anywhere. Therefore it always contains zeroes. + sentinel: Self::zeroed_of_device_id(), + } + } + + const fn zeroed_of_device_id() -> bindings::of_device_id { + bindings::of_device_id { + name: [0; 32], + type_: [0; 32], + compatible: [0; 128], + data: ptr::null(), + } + } + + const fn new_of_device_id(compatible: &'static CStr) -> bindings::of_device_id { + let mut id = Self::zeroed_of_device_id(); + let compatible = compatible.as_bytes_with_nul(); + let mut i = 0; + while i < compatible.len() { + // If `compatible` does not fit in `id.compatible`, an + // "index out of bounds" build time error will be triggered. + id.compatible[i] = compatible[i] as c_types::c_char; + i += 1; + } + id + } +} + +impl Deref for ConstOfMatchTable { + type Target = OfMatchTable; + + fn deref(&self) -> &OfMatchTable { + // INVARIANTS: `head` points to a sentinel-terminated C array, + // as per the `ConstOfMatchTable` type invariant, therefore + // `&OfMatchTable`'s inner reference will point to a sentinel-terminated C array. + let head = &self.table[0] as *const bindings::of_device_id as *const OfMatchTable; + + // SAFETY: The returned reference must remain valid for the lifetime of `self`. + // The raw pointer `head` points to memory inside `self`. So the reference created + // from this raw pointer has the same lifetime as `self`. + // Therefore this reference remains valid for the lifetime of `self`, and + // is safe to return. + unsafe { &*head } + } +} diff --git a/rust/kernel/pages.rs b/rust/kernel/pages.rs new file mode 100644 index 0000000000000..de8358629fdd9 --- /dev/null +++ b/rust/kernel/pages.rs @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel page allocation and management. +//! +//! TODO: This module is a work in progress. + +use crate::{ + bindings, c_types, io_buffer::IoBufferReader, user_ptr::UserSlicePtrReader, Error, Result, + PAGE_SIZE, +}; +use core::{marker::PhantomData, ptr}; + +/// A set of physical pages. +/// +/// `Pages` holds a reference to a set of pages of order `ORDER`. Having the order as a generic +/// const allows the struct to have the same size as a pointer. +/// +/// # Invariants +/// +/// The pointer `Pages::pages` is valid and points to 2^ORDER pages. +pub struct Pages { + pages: *mut bindings::page, +} + +impl Pages { + /// Allocates a new set of contiguous pages. + pub fn new() -> Result { + // TODO: Consider whether we want to allow callers to specify flags. + // SAFETY: This only allocates pages. We check that it succeeds in the next statement. + let pages = unsafe { + bindings::alloc_pages( + bindings::GFP_KERNEL | bindings::__GFP_ZERO | bindings::__GFP_HIGHMEM, + ORDER, + ) + }; + if pages.is_null() { + return Err(Error::ENOMEM); + } + // INVARIANTS: We checked that the allocation above succeeded> + Ok(Self { pages }) + } + + /// Maps a single page at the given address in the given VM area. + /// + /// This is only meant to be used by pages of order 0. + pub fn insert_page(&self, vma: &mut bindings::vm_area_struct, address: usize) -> Result { + if ORDER != 0 { + return Err(Error::EINVAL); + } + + // SAFETY: We check above that the allocation is of order 0. The range of `address` is + // already checked by `vm_insert_page`. + let ret = unsafe { bindings::vm_insert_page(vma, address as _, self.pages) }; + if ret != 0 { + Err(Error::from_kernel_errno(ret)) + } else { + Ok(()) + } + } + + /// Copies data from the given [`UserSlicePtrReader`] into the pages. + pub fn copy_into_page( + &self, + reader: &mut UserSlicePtrReader, + offset: usize, + len: usize, + ) -> Result { + // TODO: For now this only works on the first page. + let end = offset.checked_add(len).ok_or(Error::EINVAL)?; + if end > PAGE_SIZE { + return Err(Error::EINVAL); + } + + let mapping = self.kmap(0).ok_or(Error::EINVAL)?; + + // SAFETY: We ensured that the buffer was valid with the check above. + unsafe { reader.read_raw((mapping.ptr as usize + offset) as _, len) }?; + Ok(()) + } + + /// Maps the pages and reads from them into the given buffer. + /// + /// # Safety + /// + /// Callers must ensure that the destination buffer is valid for the given length. + /// Additionally, if the raw buffer is intended to be recast, they must ensure that the data + /// can be safely cast; [`crate::io_buffer::ReadableFromBytes`] has more details about it. + pub unsafe fn read(&self, dest: *mut u8, offset: usize, len: usize) -> Result { + // TODO: For now this only works on the first page. + let end = offset.checked_add(len).ok_or(Error::EINVAL)?; + if end > PAGE_SIZE { + return Err(Error::EINVAL); + } + + let mapping = self.kmap(0).ok_or(Error::EINVAL)?; + unsafe { ptr::copy((mapping.ptr as *mut u8).add(offset), dest, len) }; + Ok(()) + } + + /// Maps the pages and writes into them from the given bufer. + /// + /// # Safety + /// + /// Callers must ensure that the buffer is valid for the given length. Additionally, if the + /// page is (or will be) mapped by userspace, they must ensure that no kernel data is leaked + /// through padding if it was cast from another type; [`crate::io_buffer::WritableToBytes`] has + /// more details about it. + pub unsafe fn write(&self, src: *const u8, offset: usize, len: usize) -> Result { + // TODO: For now this only works on the first page. + let end = offset.checked_add(len).ok_or(Error::EINVAL)?; + if end > PAGE_SIZE { + return Err(Error::EINVAL); + } + + let mapping = self.kmap(0).ok_or(Error::EINVAL)?; + unsafe { ptr::copy(src, (mapping.ptr as *mut u8).add(offset), len) }; + Ok(()) + } + + /// Maps the page at index `index`. + fn kmap(&self, index: usize) -> Option> { + if index >= 1usize << ORDER { + return None; + } + + // SAFETY: We checked above that `index` is within range. + let page = unsafe { self.pages.add(index) }; + + // SAFETY: `page` is valid based on the checks above. + let ptr = unsafe { bindings::kmap(page) }; + if ptr.is_null() { + return None; + } + + Some(PageMapping { + page, + ptr, + _phantom: PhantomData, + }) + } +} + +impl Drop for Pages { + fn drop(&mut self) { + // SAFETY: By the type invariants, we know the pages are allocated with the given order. + unsafe { bindings::__free_pages(self.pages, ORDER) }; + } +} + +struct PageMapping<'a> { + page: *mut bindings::page, + ptr: *mut c_types::c_void, + _phantom: PhantomData<&'a i32>, +} + +impl Drop for PageMapping<'_> { + fn drop(&mut self) { + // SAFETY: An instance of `PageMapping` is created only when `kmap` succeeded for the given + // page, so it is safe to unmap it here. + unsafe { bindings::kunmap(self.page) }; + } +} diff --git a/rust/kernel/platdev.rs b/rust/kernel/platdev.rs new file mode 100644 index 0000000000000..d55adb01059af --- /dev/null +++ b/rust/kernel/platdev.rs @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Platform devices. +//! +//! Also called `platdev`, `pdev`. +//! +//! C header: [`include/linux/platform_device.h`](../../../../include/linux/platform_device.h) + +use crate::{ + bindings, c_types, + error::{Error, Result}, + from_kernel_result, + of::OfMatchTable, + str::CStr, + types::PointerWrapper, +}; +use alloc::boxed::Box; +use core::{marker::PhantomPinned, pin::Pin}; + +/// A registration of a platform device. +#[derive(Default)] +pub struct Registration { + registered: bool, + pdrv: bindings::platform_driver, + _pin: PhantomPinned, +} + +// SAFETY: `Registration` does not expose any of its state across threads +// (it is fine for multiple threads to have a shared reference to it). +unsafe impl Sync for Registration {} + +extern "C" fn probe_callback( + pdev: *mut bindings::platform_device, +) -> c_types::c_int { + from_kernel_result! { + // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer. + let device_id = unsafe { (*pdev).id }; + let drv_data = P::probe(device_id)?; + let drv_data = drv_data.into_pointer() as *mut c_types::c_void; + // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer. + unsafe { + bindings::platform_set_drvdata(pdev, drv_data); + } + Ok(0) + } +} + +extern "C" fn remove_callback( + pdev: *mut bindings::platform_device, +) -> c_types::c_int { + from_kernel_result! { + // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer. + let device_id = unsafe { (*pdev).id }; + // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer. + let ptr = unsafe { bindings::platform_get_drvdata(pdev) }; + // SAFETY: + // - we allocated this pointer using `P::DrvData::into_pointer`, + // so it is safe to turn back into a `P::DrvData`. + // - the allocation happened in `probe`, no-one freed the memory, + // `remove` is the canonical kernel location to free driver data. so OK + // to convert the pointer back to a Rust structure here. + let drv_data = unsafe { P::DrvData::from_pointer(ptr) }; + P::remove(device_id, drv_data)?; + Ok(0) + } +} + +impl Registration { + fn register( + self: Pin<&mut Self>, + name: &'static CStr, + of_match_table: Option<&'static OfMatchTable>, + module: &'static crate::ThisModule, + ) -> Result { + // SAFETY: We must ensure that we never move out of `this`. + let this = unsafe { self.get_unchecked_mut() }; + if this.registered { + // Already registered. + return Err(Error::EINVAL); + } + this.pdrv.driver.name = name.as_char_ptr(); + if let Some(tbl) = of_match_table { + this.pdrv.driver.of_match_table = tbl.as_ptr(); + } + this.pdrv.probe = Some(probe_callback::

); + this.pdrv.remove = Some(remove_callback::

); + // SAFETY: + // - `this.pdrv` lives at least until the call to `platform_driver_unregister()` returns. + // - `name` pointer has static lifetime. + // - `module.0` lives at least as long as the module. + // - `probe()` and `remove()` are static functions. + // - `of_match_table` is either a raw pointer with static lifetime, + // as guaranteed by the [`of::OfMatchTable::as_ptr()`] return type, + // or null. + let ret = unsafe { bindings::__platform_driver_register(&mut this.pdrv, module.0) }; + if ret < 0 { + return Err(Error::from_kernel_errno(ret)); + } + this.registered = true; + Ok(()) + } + + /// Registers a platform device. + /// + /// Returns a pinned heap-allocated representation of the registration. + pub fn new_pinned( + name: &'static CStr, + of_match_tbl: Option<&'static OfMatchTable>, + module: &'static crate::ThisModule, + ) -> Result>> { + let mut r = Pin::from(Box::try_new(Self::default())?); + r.as_mut().register::

(name, of_match_tbl, module)?; + Ok(r) + } +} + +impl Drop for Registration { + fn drop(&mut self) { + if self.registered { + // SAFETY: if `registered` is true, then `self.pdev` was registered + // previously, which means `platform_driver_unregister` is always + // safe to call. + unsafe { bindings::platform_driver_unregister(&mut self.pdrv) } + } + } +} + +/// Trait for implementers of platform drivers. +/// +/// Implement this trait whenever you create a platform driver. +pub trait PlatformDriver { + /// Device driver data. + /// + /// Corresponds to the data set or retrieved via the kernel's + /// `platform_{set,get}_drvdata()` functions. + /// + /// Require that `DrvData` implements `PointerWrapper`. We guarantee to + /// never move the underlying wrapped data structure. This allows + /// driver writers to use pinned or self-referential data structures. + type DrvData: PointerWrapper; + + /// Platform driver probe. + /// + /// Called when a new platform device is added or discovered. + /// Implementers should attempt to initialize the device here. + fn probe(device_id: i32) -> Result; + + /// Platform driver remove. + /// + /// Called when a platform device is removed. + /// Implementers should prepare the device for complete removal here. + fn remove(device_id: i32, drv_data: Self::DrvData) -> Result; +} diff --git a/rust/kernel/power.rs b/rust/kernel/power.rs new file mode 100644 index 0000000000000..67d653aeadbb8 --- /dev/null +++ b/rust/kernel/power.rs @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Power management interfaces. +//! +//! C header: [`include/linux/pm.h`](../../../../include/linux/pm.h) + +#![allow(dead_code)] + +use crate::{bindings, c_types, from_kernel_result, sync::Ref, types::PointerWrapper, Result}; +use core::{marker::PhantomData, ops::Deref}; + +/// Corresponds to the kernel's `struct dev_pm_ops`. +/// +/// It is meant to be implemented by drivers that support power-management operations. +pub trait Operations: Sync + Send + Sized { + /// The type of the context data stored by the driver on each device. + type Data: PointerWrapper + Sync + Send = Ref; + + /// Called before the system goes into a sleep state. + fn suspend(_data: &<::Borrowed as Deref>::Target) -> Result { + Ok(()) + } + + /// Called after the system comes back from a sleep state. + fn resume(_data: &<::Borrowed as Deref>::Target) -> Result { + Ok(()) + } + + /// Called before creating a hibernation image. + fn freeze(_data: &<::Borrowed as Deref>::Target) -> Result { + Ok(()) + } + + /// Called after the system is restored from a hibernation image. + fn restore(_data: &<::Borrowed as Deref>::Target) -> Result { + Ok(()) + } +} + +macro_rules! pm_callback { + ($callback:ident, $method:ident) => { + unsafe extern "C" fn $callback( + dev: *mut bindings::device, + ) -> c_types::c_int { + from_kernel_result! { + // SAFETY: `dev` is valid as it was passed in by the C portion. + let ptr = unsafe { bindings::dev_get_drvdata(dev) }; + // SAFETY: By the safety requirements of `OpsTable::build`, we know that `ptr` came + // from a previous call to `T::Data::into_pointer`. + let data = unsafe { T::Data::borrow(ptr) }; + T::$method(data.deref())?; + Ok(0) + } + } + }; +} + +pm_callback!(suspend_callback, suspend); +pm_callback!(resume_callback, resume); +pm_callback!(freeze_callback, freeze); +pm_callback!(restore_callback, restore); + +pub(crate) struct OpsTable(PhantomData<*const T>); + +impl OpsTable { + const VTABLE: bindings::dev_pm_ops = bindings::dev_pm_ops { + prepare: None, + complete: None, + suspend: Some(suspend_callback::), + resume: Some(resume_callback::), + freeze: Some(freeze_callback::), + thaw: None, + poweroff: None, + restore: Some(restore_callback::), + suspend_late: None, + resume_early: None, + freeze_late: None, + thaw_early: None, + poweroff_late: None, + restore_early: None, + suspend_noirq: None, + resume_noirq: None, + freeze_noirq: None, + thaw_noirq: None, + poweroff_noirq: None, + restore_noirq: None, + runtime_suspend: None, + runtime_resume: None, + runtime_idle: None, + }; + + /// Builds an instance of `struct dev_pm_ops`. + /// + /// # Safety + /// + /// The caller must ensure that `dev_get_drvdata` will result in a value returned by + /// [`T::Data::into_pointer`]. + pub(crate) const unsafe fn build() -> &'static bindings::dev_pm_ops { + &Self::VTABLE + } +} + +/// Implements the [`Operations`] trait as no-ops. +/// +/// This is useful when one doesn't want to provide the implementation of any power-manager related +/// operation. +pub struct NoOperations(PhantomData); + +impl Operations for NoOperations { + type Data = T; +} + +// SAFETY: `NoOperation` provides no functionality, it is safe to send a reference to it to +// different threads. +unsafe impl Sync for NoOperations {} + +// SAFETY: `NoOperation` provides no functionality, it is safe to send it to different threads. +unsafe impl Send for NoOperations {} diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs new file mode 100644 index 0000000000000..531f843177f4b --- /dev/null +++ b/rust/kernel/prelude.rs @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! The `kernel` prelude. +//! +//! These are the most common items used by Rust code in the kernel, +//! intended to be imported by all Rust code, for convenience. +//! +//! # Examples +//! +//! ``` +//! use kernel::prelude::*; +//! ``` + +pub use core::pin::Pin; + +pub use alloc::{boxed::Box, string::String, vec::Vec}; + +pub use macros::{module, module_misc_device}; + +pub use super::build_assert; + +pub use super::{dbg, pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn}; + +pub use super::static_assert; + +pub use super::{Error, KernelModule, Result}; diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs new file mode 100644 index 0000000000000..328d893f87aaf --- /dev/null +++ b/rust/kernel/print.rs @@ -0,0 +1,441 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Printing facilities. +//! +//! C header: [`include/linux/printk.h`](../../../../include/linux/printk.h) +//! +//! Reference: + +use core::cmp; +use core::fmt; + +use crate::bindings; +use crate::c_types::{c_char, c_void}; + +// Called from `vsprintf` with format specifier `%pA`. +#[no_mangle] +unsafe fn rust_fmt_argument(buf: *mut c_char, end: *mut c_char, ptr: *const c_void) -> *mut c_char { + use fmt::Write; + + // Use `usize` to use `saturating_*` functions. + struct Writer { + buf: usize, + end: usize, + } + + impl Write for Writer { + fn write_str(&mut self, s: &str) -> fmt::Result { + // `buf` value after writing `len` bytes. This does not have to be bounded + // by `end`, but we don't want it to wrap around to 0. + let buf_new = self.buf.saturating_add(s.len()); + + // Amount that we can copy. `saturating_sub` ensures we get 0 if + // `buf` goes past `end`. + let len_to_copy = cmp::min(buf_new, self.end).saturating_sub(self.buf); + + // SAFETY: In any case, `buf` is non-null and properly aligned. + // If `len_to_copy` is non-zero, then we know `buf` has not past + // `end` yet and so is valid. + unsafe { + core::ptr::copy_nonoverlapping( + s.as_bytes().as_ptr(), + self.buf as *mut u8, + len_to_copy, + ) + }; + + self.buf = buf_new; + Ok(()) + } + } + + let mut w = Writer { + buf: buf as _, + end: end as _, + }; + let _ = w.write_fmt(unsafe { *(ptr as *const fmt::Arguments<'_>) }); + w.buf as _ +} + +/// Format strings. +/// +/// Public but hidden since it should only be used from public macros. +#[doc(hidden)] +pub mod format_strings { + use crate::bindings; + + /// The length we copy from the `KERN_*` kernel prefixes. + const LENGTH_PREFIX: usize = 2; + + /// The length of the fixed format strings. + pub const LENGTH: usize = 10; + + /// Generates a fixed format string for the kernel's [`_printk`]. + /// + /// The format string is always the same for a given level, i.e. for a + /// given `prefix`, which are the kernel's `KERN_*` constants. + /// + /// [`_printk`]: ../../../../include/linux/printk.h + const fn generate(is_cont: bool, prefix: &[u8; 3]) -> [u8; LENGTH] { + // Ensure the `KERN_*` macros are what we expect. + assert!(prefix[0] == b'\x01'); + if is_cont { + assert!(prefix[1] == b'c'); + } else { + assert!(prefix[1] >= b'0' && prefix[1] <= b'7'); + } + assert!(prefix[2] == b'\x00'); + + let suffix: &[u8; LENGTH - LENGTH_PREFIX] = if is_cont { + b"%pA\0\0\0\0\0" + } else { + b"%s: %pA\0" + }; + + [ + prefix[0], prefix[1], suffix[0], suffix[1], suffix[2], suffix[3], suffix[4], suffix[5], + suffix[6], suffix[7], + ] + } + + // Generate the format strings at compile-time. + // + // This avoids the compiler generating the contents on the fly in the stack. + // + // Furthermore, `static` instead of `const` is used to share the strings + // for all the kernel. + pub static EMERG: [u8; LENGTH] = generate(false, bindings::KERN_EMERG); + pub static ALERT: [u8; LENGTH] = generate(false, bindings::KERN_ALERT); + pub static CRIT: [u8; LENGTH] = generate(false, bindings::KERN_CRIT); + pub static ERR: [u8; LENGTH] = generate(false, bindings::KERN_ERR); + pub static WARNING: [u8; LENGTH] = generate(false, bindings::KERN_WARNING); + pub static NOTICE: [u8; LENGTH] = generate(false, bindings::KERN_NOTICE); + pub static INFO: [u8; LENGTH] = generate(false, bindings::KERN_INFO); + pub static DEBUG: [u8; LENGTH] = generate(false, bindings::KERN_DEBUG); + pub static CONT: [u8; LENGTH] = generate(true, bindings::KERN_CONT); +} + +/// Prints a message via the kernel's [`_printk`]. +/// +/// Public but hidden since it should only be used from public macros. +/// +/// # Safety +/// +/// The format string must be one of the ones in [`format_strings`], and +/// the module name must be null-terminated. +/// +/// [`_printk`]: ../../../../include/linux/_printk.h +#[doc(hidden)] +pub unsafe fn call_printk( + format_string: &[u8; format_strings::LENGTH], + module_name: &[u8], + args: fmt::Arguments<'_>, +) { + // `_printk` does not seem to fail in any path. + unsafe { + bindings::_printk( + format_string.as_ptr() as _, + module_name.as_ptr(), + &args as *const _ as *const c_void, + ); + } +} + +/// Prints a message via the kernel's [`_printk`] for the `CONT` level. +/// +/// Public but hidden since it should only be used from public macros. +/// +/// [`_printk`]: ../../../../include/linux/printk.h +#[doc(hidden)] +pub fn call_printk_cont(args: fmt::Arguments<'_>) { + // `_printk` does not seem to fail in any path. + // + // SAFETY: The format string is fixed. + unsafe { + bindings::_printk( + format_strings::CONT.as_ptr() as _, + &args as *const _ as *const c_void, + ); + } +} + +/// Performs formatting and forwards the string to [`call_printk`]. +/// +/// Public but hidden since it should only be used from public macros. +#[doc(hidden)] +#[cfg(not(testlib))] +#[macro_export] +macro_rules! print_macro ( + // The non-continuation cases (most of them, e.g. `INFO`). + ($format_string:path, false, $($arg:tt)+) => ( + // SAFETY: This hidden macro should only be called by the documented + // printing macros which ensure the format string is one of the fixed + // ones. All `__LOG_PREFIX`s are null-terminated as they are generated + // by the `module!` proc macro or fixed values defined in a kernel + // crate. + unsafe { + $crate::print::call_printk( + &$format_string, + crate::__LOG_PREFIX, + format_args!($($arg)+), + ); + } + ); + + // The `CONT` case. + ($format_string:path, true, $($arg:tt)+) => ( + $crate::print::call_printk_cont( + format_args!($($arg)+), + ); + ); +); + +/// Stub for doctests +#[cfg(testlib)] +#[macro_export] +macro_rules! print_macro ( + ($format_string:path, $e:expr, $($arg:tt)+) => ( + () + ); +); + +// We could use a macro to generate these macros. However, doing so ends +// up being a bit ugly: it requires the dollar token trick to escape `$` as +// well as playing with the `doc` attribute. Furthermore, they cannot be easily +// imported in the prelude due to [1]. So, for the moment, we just write them +// manually, like in the C side; while keeping most of the logic in another +// macro, i.e. [`print_macro`]. +// +// [1]: https://github.com/rust-lang/rust/issues/52234 + +/// Prints an emergency-level message (level 0). +/// +/// Use this level if the system is unusable. +/// +/// Equivalent to the kernel's [`pr_emerg`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_emerg`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_emerg +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_emerg!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_emerg ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::EMERG, false, $($arg)*) + ) +); + +/// Prints an alert-level message (level 1). +/// +/// Use this level if action must be taken immediately. +/// +/// Equivalent to the kernel's [`pr_alert`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_alert`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_alert +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_alert!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_alert ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::ALERT, false, $($arg)*) + ) +); + +/// Prints a critical-level message (level 2). +/// +/// Use this level for critical conditions. +/// +/// Equivalent to the kernel's [`pr_crit`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_crit`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_crit +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_crit!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_crit ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::CRIT, false, $($arg)*) + ) +); + +/// Prints an error-level message (level 3). +/// +/// Use this level for error conditions. +/// +/// Equivalent to the kernel's [`pr_err`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_err`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_err +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_err!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_err ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::ERR, false, $($arg)*) + ) +); + +/// Prints a warning-level message (level 4). +/// +/// Use this level for warning conditions. +/// +/// Equivalent to the kernel's [`pr_warn`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_warn`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_warn +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_warn!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_warn ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::WARNING, false, $($arg)*) + ) +); + +/// Prints a notice-level message (level 5). +/// +/// Use this level for normal but significant conditions. +/// +/// Equivalent to the kernel's [`pr_notice`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_notice`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_notice +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_notice!("hello {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_notice ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::NOTICE, false, $($arg)*) + ) +); + +/// Prints an info-level message (level 6). +/// +/// Use this level for informational messages. +/// +/// Equivalent to the kernel's [`pr_info`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_info`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_info +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_info!("hello {}\n", "there"); +/// ``` +#[macro_export] +#[doc(alias = "print")] +macro_rules! pr_info ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::INFO, false, $($arg)*) + ) +); + +/// Prints a debug-level message (level 7). +/// +/// Use this level for debug messages. +/// +/// Equivalent to the kernel's [`pr_debug`] macro, except that it doesn't support dynamic debug +/// yet. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_debug`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_debug +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// pr_debug!("hello {}\n", "there"); +/// ``` +#[macro_export] +#[doc(alias = "print")] +macro_rules! pr_debug ( + ($($arg:tt)*) => ( + if cfg!(debug_assertions) { + $crate::print_macro!($crate::print::format_strings::DEBUG, false, $($arg)*) + } + ) +); + +/// Continues a previous log message in the same line. +/// +/// Use only when continuing a previous `pr_*!` macro (e.g. [`pr_info!`]). +/// +/// Equivalent to the kernel's [`pr_cont`] macro. +/// +/// Mimics the interface of [`std::print!`]. See [`core::fmt`] and +/// [`alloc::format!`] for information about the formatting syntax. +/// +/// [`pr_cont`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_cont +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::pr_cont; +/// pr_info!("hello"); +/// pr_cont!(" {}\n", "there"); +/// ``` +#[macro_export] +macro_rules! pr_cont ( + ($($arg:tt)*) => ( + $crate::print_macro!($crate::print::format_strings::CONT, true, $($arg)*) + ) +); diff --git a/rust/kernel/random.rs b/rust/kernel/random.rs new file mode 100644 index 0000000000000..723a89829f661 --- /dev/null +++ b/rust/kernel/random.rs @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Random numbers. +//! +//! C header: [`include/linux/random.h`](../../../../include/linux/random.h) + +use core::convert::TryInto; + +use crate::{bindings, c_types, error}; + +/// Fills a byte slice with random bytes generated from the kernel's CSPRNG. +/// +/// Ensures that the CSPRNG has been seeded before generating any random bytes, +/// and will block until it is ready. +pub fn getrandom(dest: &mut [u8]) -> error::Result { + let res = unsafe { bindings::wait_for_random_bytes() }; + if res != 0 { + return Err(error::Error::from_kernel_errno(res)); + } + + unsafe { + bindings::get_random_bytes( + dest.as_mut_ptr() as *mut c_types::c_void, + dest.len().try_into()?, + ); + } + Ok(()) +} + +/// Fills a byte slice with random bytes generated from the kernel's CSPRNG. +/// +/// If the CSPRNG is not yet seeded, returns an `Err(EAGAIN)` immediately. +pub fn getrandom_nonblock(dest: &mut [u8]) -> error::Result { + if !unsafe { bindings::rng_is_initialized() } { + return Err(error::Error::EAGAIN); + } + getrandom(dest) +} + +/// Contributes the contents of a byte slice to the kernel's entropy pool. +/// +/// Does *not* credit the kernel entropy counter though. +pub fn add_randomness(data: &[u8]) { + unsafe { + bindings::add_device_randomness( + data.as_ptr() as *const c_types::c_void, + data.len().try_into().unwrap(), + ); + } +} diff --git a/rust/kernel/raw_list.rs b/rust/kernel/raw_list.rs new file mode 100644 index 0000000000000..4bc4f4a24ad5e --- /dev/null +++ b/rust/kernel/raw_list.rs @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Raw lists. +//! +//! TODO: This module is a work in progress. + +use core::{ + cell::UnsafeCell, + ptr, + ptr::NonNull, + sync::atomic::{AtomicBool, Ordering}, +}; + +/// A descriptor of list elements. +/// +/// It describes the type of list elements and provides a function to determine how to get the +/// links to be used on a list. +/// +/// A type that may be in multiple lists simultaneously neneds to implement one of these for each +/// simultaneous list. +pub trait GetLinks { + /// The type of the entries in the list. + type EntryType: ?Sized; + + /// Returns the links to be used when linking an entry within a list. + fn get_links(data: &Self::EntryType) -> &Links; +} + +/// The links used to link an object on a linked list. +/// +/// Instances of this type are usually embedded in structures and returned in calls to +/// [`GetLinks::get_links`]. +pub struct Links { + inserted: AtomicBool, + entry: UnsafeCell>, +} + +impl Links { + /// Constructs a new [`Links`] instance that isn't inserted on any lists yet. + pub fn new() -> Self { + Self { + inserted: AtomicBool::new(false), + entry: UnsafeCell::new(ListEntry::new()), + } + } + + fn acquire_for_insertion(&self) -> bool { + self.inserted + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_ok() + } + + fn release_after_removal(&self) { + self.inserted.store(false, Ordering::Release); + } +} + +impl Default for Links { + fn default() -> Self { + Self::new() + } +} + +struct ListEntry { + next: Option>, + prev: Option>, +} + +impl ListEntry { + fn new() -> Self { + Self { + next: None, + prev: None, + } + } +} + +/// A linked list. +/// +/// # Invariants +/// +/// The links of objects added to a list are owned by the list. +pub(crate) struct RawList { + head: Option>, +} + +impl RawList { + pub(crate) fn new() -> Self { + Self { head: None } + } + + pub(crate) fn is_empty(&self) -> bool { + self.head.is_none() + } + + fn insert_after_priv( + &mut self, + existing: &G::EntryType, + new_entry: &mut ListEntry, + new_ptr: Option>, + ) { + { + // SAFETY: It's safe to get the previous entry of `existing` because the list cannot + // change. + let existing_links = unsafe { &mut *G::get_links(existing).entry.get() }; + new_entry.next = existing_links.next; + existing_links.next = new_ptr; + } + + new_entry.prev = Some(NonNull::from(existing)); + + // SAFETY: It's safe to get the next entry of `existing` because the list cannot change. + let next_links = + unsafe { &mut *G::get_links(new_entry.next.unwrap().as_ref()).entry.get() }; + next_links.prev = new_ptr; + } + + /// Inserts the given object after `existing`. + /// + /// # Safety + /// + /// Callers must ensure that `existing` points to a valid entry that is on the list. + pub(crate) unsafe fn insert_after( + &mut self, + existing: &G::EntryType, + new: &G::EntryType, + ) -> bool { + let links = G::get_links(new); + if !links.acquire_for_insertion() { + // Nothing to do if already inserted. + return false; + } + + // SAFETY: The links are now owned by the list, so it is safe to get a mutable reference. + let new_entry = unsafe { &mut *links.entry.get() }; + self.insert_after_priv(existing, new_entry, Some(NonNull::from(new))); + true + } + + fn push_back_internal(&mut self, new: &G::EntryType) -> bool { + let links = G::get_links(new); + if !links.acquire_for_insertion() { + // Nothing to do if already inserted. + return false; + } + + // SAFETY: The links are now owned by the list, so it is safe to get a mutable reference. + let new_entry = unsafe { &mut *links.entry.get() }; + let new_ptr = Some(NonNull::from(new)); + match self.back() { + // SAFETY: `back` is valid as the list cannot change. + Some(back) => self.insert_after_priv(unsafe { back.as_ref() }, new_entry, new_ptr), + None => { + self.head = new_ptr; + new_entry.next = new_ptr; + new_entry.prev = new_ptr; + } + } + true + } + + pub(crate) unsafe fn push_back(&mut self, new: &G::EntryType) -> bool { + self.push_back_internal(new) + } + + fn remove_internal(&mut self, data: &G::EntryType) -> bool { + let links = G::get_links(data); + + // SAFETY: The links are now owned by the list, so it is safe to get a mutable reference. + let entry = unsafe { &mut *links.entry.get() }; + let next = if let Some(next) = entry.next { + next + } else { + // Nothing to do if the entry is not on the list. + return false; + }; + + if ptr::eq(data, next.as_ptr()) { + // We're removing the only element. + self.head = None + } else { + // Update the head if we're removing it. + if let Some(raw_head) = self.head { + if ptr::eq(data, raw_head.as_ptr()) { + self.head = Some(next); + } + } + + // SAFETY: It's safe to get the previous entry because the list cannot change. + unsafe { &mut *G::get_links(entry.prev.unwrap().as_ref()).entry.get() }.next = + entry.next; + + // SAFETY: It's safe to get the next entry because the list cannot change. + unsafe { &mut *G::get_links(next.as_ref()).entry.get() }.prev = entry.prev; + } + + // Reset the links of the element we're removing so that we know it's not on any list. + entry.next = None; + entry.prev = None; + links.release_after_removal(); + true + } + + /// Removes the given entry. + /// + /// # Safety + /// + /// Callers must ensure that `data` is either on this list or in no list. It being on another + /// list leads to memory unsafety. + pub(crate) unsafe fn remove(&mut self, data: &G::EntryType) -> bool { + self.remove_internal(data) + } + + fn pop_front_internal(&mut self) -> Option> { + let head = self.head?; + // SAFETY: The head is on the list as we just got it from there and it cannot change. + unsafe { self.remove(head.as_ref()) }; + Some(head) + } + + pub(crate) fn pop_front(&mut self) -> Option> { + self.pop_front_internal() + } + + pub(crate) fn front(&self) -> Option> { + self.head + } + + pub(crate) fn back(&self) -> Option> { + // SAFETY: The links of head are owned by the list, so it is safe to get a reference. + unsafe { &*G::get_links(self.head?.as_ref()).entry.get() }.prev + } + + pub(crate) fn cursor_front(&self) -> Cursor<'_, G> { + Cursor::new(self, self.front()) + } + + pub(crate) fn cursor_front_mut(&mut self) -> CursorMut<'_, G> { + CursorMut::new(self, self.front()) + } +} + +struct CommonCursor { + cur: Option>, +} + +impl CommonCursor { + fn new(cur: Option>) -> Self { + Self { cur } + } + + fn move_next(&mut self, list: &RawList) { + match self.cur.take() { + None => self.cur = list.head, + Some(cur) => { + if let Some(head) = list.head { + // SAFETY: We have a shared ref to the linked list, so the links can't change. + let links = unsafe { &*G::get_links(cur.as_ref()).entry.get() }; + if links.next.unwrap() != head { + self.cur = links.next; + } + } + } + } + } + + fn move_prev(&mut self, list: &RawList) { + match list.head { + None => self.cur = None, + Some(head) => { + let next = match self.cur.take() { + None => head, + Some(cur) => { + if cur == head { + return; + } + cur + } + }; + // SAFETY: There's a shared ref to the list, so the links can't change. + let links = unsafe { &*G::get_links(next.as_ref()).entry.get() }; + self.cur = links.prev; + } + } + } +} + +/// A list cursor that allows traversing a linked list and inspecting elements. +pub struct Cursor<'a, G: GetLinks> { + cursor: CommonCursor, + list: &'a RawList, +} + +impl<'a, G: GetLinks> Cursor<'a, G> { + fn new(list: &'a RawList, cur: Option>) -> Self { + Self { + list, + cursor: CommonCursor::new(cur), + } + } + + /// Returns the element the cursor is currently positioned on. + pub fn current(&self) -> Option<&'a G::EntryType> { + let cur = self.cursor.cur?; + // SAFETY: Objects must be kept alive while on the list. + Some(unsafe { &*cur.as_ptr() }) + } + + /// Moves the cursor to the next element. + pub fn move_next(&mut self) { + self.cursor.move_next(self.list); + } +} + +pub(crate) struct CursorMut<'a, G: GetLinks> { + cursor: CommonCursor, + list: &'a mut RawList, +} + +impl<'a, G: GetLinks> CursorMut<'a, G> { + fn new(list: &'a mut RawList, cur: Option>) -> Self { + Self { + list, + cursor: CommonCursor::new(cur), + } + } + + pub(crate) fn current(&mut self) -> Option<&mut G::EntryType> { + let cur = self.cursor.cur?; + // SAFETY: Objects must be kept alive while on the list. + Some(unsafe { &mut *cur.as_ptr() }) + } + + /// Removes the entry the cursor is pointing to and advances the cursor to the next entry. It + /// returns a raw pointer to the removed element (if one is removed). + pub(crate) fn remove_current(&mut self) -> Option> { + let entry = self.cursor.cur?; + self.cursor.move_next(self.list); + // SAFETY: The entry is on the list as we just got it from there and it cannot change. + unsafe { self.list.remove(entry.as_ref()) }; + Some(entry) + } + + pub(crate) fn peek_next(&mut self) -> Option<&mut G::EntryType> { + let mut new = CommonCursor::new(self.cursor.cur); + new.move_next(self.list); + // SAFETY: Objects must be kept alive while on the list. + Some(unsafe { &mut *new.cur?.as_ptr() }) + } + + pub(crate) fn peek_prev(&mut self) -> Option<&mut G::EntryType> { + let mut new = CommonCursor::new(self.cursor.cur); + new.move_prev(self.list); + // SAFETY: Objects must be kept alive while on the list. + Some(unsafe { &mut *new.cur?.as_ptr() }) + } + + pub(crate) fn move_next(&mut self) { + self.cursor.move_next(self.list); + } +} diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs new file mode 100644 index 0000000000000..880252e9cde71 --- /dev/null +++ b/rust/kernel/rbtree.rs @@ -0,0 +1,562 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Red-black trees. +//! +//! C header: [`include/linux/rbtree.h`](../../../../include/linux/rbtree.h) +//! +//! Reference: + +use crate::{bindings, Result}; +use alloc::boxed::Box; +use core::{ + cmp::{Ord, Ordering}, + iter::{IntoIterator, Iterator}, + marker::PhantomData, + mem::MaybeUninit, + ptr::{addr_of_mut, NonNull}, +}; + +struct Node { + links: bindings::rb_node, + key: K, + value: V, +} + +/// A red-black tree with owned nodes. +/// +/// It is backed by the kernel C red-black trees. +/// +/// # Invariants +/// +/// Non-null parent/children pointers stored in instances of the `rb_node` C struct are always +/// valid, and pointing to a field of our internal representation of a node. +/// +/// # Examples +/// +/// In the example below we do several operations on a tree. We note that insertions may fail if +/// the system is out of memory. +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::rbtree::RBTree; +/// +/// fn rbtest() -> Result { +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_insert(20, 200)?; +/// tree.try_insert(10, 100)?; +/// tree.try_insert(30, 300)?; +/// +/// // Check the nodes we just inserted. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &300)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Print all elements. +/// for (key, value) in &tree { +/// pr_info!("{} = {}\n", key, value); +/// } +/// +/// // Replace one of the elements. +/// tree.try_insert(10, 1000)?; +/// +/// // Check that the tree reflects the replacement. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &1000)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &300)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Change the value of one of the elements. +/// *tree.get_mut(&30).unwrap() = 3000; +/// +/// // Check that the tree reflects the update. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &1000)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &3000)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Remove an element. +/// tree.remove(&10); +/// +/// // Check that the tree reflects the removal. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &3000)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Update all values. +/// for value in tree.values_mut() { +/// *value *= 10; +/// } +/// +/// // Check that the tree reflects the changes to values. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&20, &2000)); +/// assert_eq!(iter.next().unwrap(), (&30, &30000)); +/// assert!(iter.next().is_none()); +/// } +/// +/// Ok(()) +/// } +/// ``` +/// +/// In the example below, we first allocate a node, acquire a spinlock, then insert the node into +/// the tree. This is useful when the insertion context does not allow sleeping, for example, when +/// holding a spinlock. +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::{rbtree::RBTree, sync::SpinLock}; +/// +/// fn insert_test(tree: &SpinLock>) -> Result { +/// // Pre-allocate node. This may fail (as it allocates memory). +/// let node = RBTree::try_allocate_node(10, 100)?; +/// +/// // Insert node while holding the lock. It is guaranteed to succeed with no allocation +/// // attempts. +/// let mut guard = tree.lock(); +/// guard.insert(node); +/// Ok(()) +/// } +/// ``` +/// +/// In the example below, we reuse an existing node allocation from an element we removed. +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::rbtree::RBTree; +/// +/// fn reuse_test() -> Result { +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_insert(20, 200)?; +/// tree.try_insert(10, 100)?; +/// tree.try_insert(30, 300)?; +/// +/// // Check the nodes we just inserted. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &300)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Remove a node, getting back ownership of it. +/// let existing = tree.remove_node(&30).unwrap(); +/// +/// // Check that the tree reflects the removal. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Turn the node into a reservation so that we can reuse it with a different key/value. +/// let reservation = existing.into_reservation(); +/// +/// // Insert a new node into the tree, reusing the previous allocation. This is guaranteed to +/// // succeed (no memory allocations). +/// tree.insert(reservation.into_node(15, 150)); +/// +/// // Check that the tree reflect the new insertion. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&15, &150)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert!(iter.next().is_none()); +/// } +/// +/// Ok(()) +/// } +/// ``` +pub struct RBTree { + root: bindings::rb_root, + _p: PhantomData>, +} + +impl RBTree { + /// Creates a new and empty tree. + pub fn new() -> Self { + Self { + // INVARIANT: There are no nodes in the tree, so the invariant holds vacuously. + root: bindings::rb_root::default(), + _p: PhantomData, + } + } + + /// Tries to insert a new value into the tree. + /// + /// It overwrites a node if one already exists with the same key and returns it (containing the + /// key/value pair). Returns [`None`] if a node with the same key didn't already exist. + /// + /// Returns an error if it cannot allocate memory for the new node. + pub fn try_insert(&mut self, key: K, value: V) -> Result>> + where + K: Ord, + { + Ok(self.insert(Self::try_allocate_node(key, value)?)) + } + + /// Allocates memory for a node to be eventually initialised and inserted into the tree via a + /// call to [`RBTree::insert`]. + pub fn try_reserve_node() -> Result> { + Ok(RBTreeNodeReservation { + node: Box::try_new(MaybeUninit::uninit())?, + }) + } + + /// Allocates and initialiases a node that can be inserted into the tree via + /// [`RBTree::insert`]. + pub fn try_allocate_node(key: K, value: V) -> Result> { + Ok(Self::try_reserve_node()?.into_node(key, value)) + } + + /// Inserts a new node into the tree. + /// + /// It overwrites a node if one already exists with the same key and returns it (containing the + /// key/value pair). Returns [`None`] if a node with the same key didn't already exist. + /// + /// This function always succeeds. + pub fn insert(&mut self, node: RBTreeNode) -> Option> + where + K: Ord, + { + let RBTreeNode { node } = node; + let node = Box::into_raw(node); + // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when + // the node is removed or replaced. + let node_links = unsafe { addr_of_mut!((*node).links) }; + let mut new_link: &mut *mut bindings::rb_node = &mut self.root.rb_node; + let mut parent = core::ptr::null_mut(); + while !new_link.is_null() { + let this = crate::container_of!(*new_link, Node, links); + + parent = *new_link; + + // SAFETY: `this` is a non-null node so it is valid by the type invariants. `node` is + // valid until the node is removed. + match unsafe { (*node).key.cmp(&(*this).key) } { + // SAFETY: `parent` is a non-null node so it is valid by the type invariants. + Ordering::Less => new_link = unsafe { &mut (*parent).rb_left }, + // SAFETY: `parent` is a non-null node so it is valid by the type invariants. + Ordering::Greater => new_link = unsafe { &mut (*parent).rb_right }, + Ordering::Equal => { + // INVARIANT: We are replacing an existing node with a new one, which is valid. + // It remains valid because we "forgot" it with `Box::into_raw`. + // SAFETY: All pointers are non-null and valid (parent, despite the name, really + // is the node we're replacing). + unsafe { bindings::rb_replace_node(parent, node_links, &mut self.root) }; + + // INVARIANT: The node is being returned and the caller may free it, however, + // it was removed from the tree. So the invariants still hold. + return Some(RBTreeNode { + // SAFETY: `this` was a node in the tree, so it is valid. + node: unsafe { Box::from_raw(this as _) }, + }); + } + } + } + + // INVARIANT: We are linking in a new node, which is valid. It remains valid because we + // "forgot" it with `Box::into_raw`. + // SAFETY: All pointers are non-null and valid (`*new_link` is null, but `new_link` is a + // mutable reference). + unsafe { bindings::rb_link_node(node_links, parent, new_link) }; + + // SAFETY: All pointers are valid. `node` has just been inserted into the tree. + unsafe { bindings::rb_insert_color(node_links, &mut self.root) }; + None + } + + /// Returns a node with the given key, if one exists. + fn find(&self, key: &K) -> Option>> + where + K: Ord, + { + let mut node = self.root.rb_node; + while !node.is_null() { + let this = crate::container_of!(node, Node, links); + // SAFETY: `this` is a non-null node so it is valid by the type invariants. + node = match key.cmp(unsafe { &(*this).key }) { + // SAFETY: `node` is a non-null node so it is valid by the type invariants. + Ordering::Less => unsafe { (*node).rb_left }, + // SAFETY: `node` is a non-null node so it is valid by the type invariants. + Ordering::Greater => unsafe { (*node).rb_right }, + Ordering::Equal => return NonNull::new(this as _), + } + } + None + } + + /// Returns a reference to the value corresponding to the key. + pub fn get(&self, key: &K) -> Option<&V> + where + K: Ord, + { + // SAFETY: The `find` return value is a node in the tree, so it is valid. + self.find(key).map(|node| unsafe { &node.as_ref().value }) + } + + /// Returns a mutable reference to the value corresponding to the key. + pub fn get_mut(&mut self, key: &K) -> Option<&mut V> + where + K: Ord, + { + // SAFETY: the `find` return value is a node in the tree, so it is valid. + self.find(key) + .map(|mut node| unsafe { &mut node.as_mut().value }) + } + + /// Removes the node with the given key from the tree. + /// + /// It returns the node that was removed if one exists, or [`None`] otherwise. + pub fn remove_node(&mut self, key: &K) -> Option> + where + K: Ord, + { + let mut node = self.find(key)?; + + // SAFETY: the `find` return value is a node in the tree, so it is valid. + unsafe { bindings::rb_erase(&mut node.as_mut().links, &mut self.root) }; + + // INVARIANT: The node is being returned and the caller may free it, however, it was + // removed from the tree. So the invariants still hold. + Some(RBTreeNode { + // SAFETY: the `find` return value was a node in the tree, so it is valid. + node: unsafe { Box::from_raw(node.as_ptr()) }, + }) + } + + /// Removes the node with the given key from the tree. + /// + /// It returns the value that was removed if one exists, or [`None`] otherwise. + pub fn remove(&mut self, key: &K) -> Option + where + K: Ord, + { + let node = self.remove_node(key)?; + let RBTreeNode { node } = node; + let Node { + links: _, + key: _, + value, + } = *node; + Some(value) + } + + /// Returns an iterator over the tree nodes, sorted by key. + pub fn iter(&self) -> RBTreeIterator<'_, K, V> { + RBTreeIterator { + _tree: PhantomData, + // SAFETY: `root` is valid as it's embedded in `self` and we have a valid `self`. + next: unsafe { bindings::rb_first(&self.root) }, + } + } + + /// Returns a mutable iterator over the tree nodes, sorted by key. + pub fn iter_mut(&mut self) -> RBTreeIteratorMut<'_, K, V> { + RBTreeIteratorMut { + _tree: PhantomData, + // SAFETY: `root` is valid as it's embedded in `self` and we have a valid `self`. + next: unsafe { bindings::rb_first(&self.root) }, + } + } + + /// Returns an iterator over the keys of the nodes in the tree, in sorted order. + pub fn keys(&self) -> impl Iterator { + self.iter().map(|(k, _)| k) + } + + /// Returns an iterator over the values of the nodes in the tree, sorted by key. + pub fn values(&self) -> impl Iterator { + self.iter().map(|(_, v)| v) + } + + /// Returns a mutable iterator over the values of the nodes in the tree, sorted by key. + pub fn values_mut(&mut self) -> impl Iterator { + self.iter_mut().map(|(_, v)| v) + } +} + +impl Default for RBTree { + fn default() -> Self { + Self::new() + } +} + +impl Drop for RBTree { + fn drop(&mut self) { + // SAFETY: `root` is valid as it's embedded in `self` and we have a valid `self`. + let mut next = unsafe { bindings::rb_first_postorder(&self.root) }; + + // INVARIANT: The loop invariant is that all tree nodes from `next` in postorder are valid. + while !next.is_null() { + let this = crate::container_of!(next, Node, links); + + // Find out what the next node is before disposing of the current one. + // SAFETY: `next` and all nodes in postorder are still valid. + next = unsafe { bindings::rb_next_postorder(next) }; + + // INVARIANT: This is the destructor, so we break the type invariant during clean-up, + // but it is not observable. The loop invariant is still maintained. + // SAFETY: `this` is valid per the loop invariant. + unsafe { Box::from_raw(this as *mut Node) }; + } + } +} + +impl<'a, K, V> IntoIterator for &'a RBTree { + type Item = (&'a K, &'a V); + type IntoIter = RBTreeIterator<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An iterator over the nodes of a [`RBTree`]. +/// +/// Instances are created by calling [`RBTree::iter`]. +pub struct RBTreeIterator<'a, K, V> { + _tree: PhantomData<&'a RBTree>, + next: *mut bindings::rb_node, +} + +impl<'a, K, V> Iterator for RBTreeIterator<'a, K, V> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option { + if self.next.is_null() { + return None; + } + + let cur = crate::container_of!(self.next, Node, links); + + // SAFETY: The reference to the tree used to create the iterator outlives the iterator, so + // the tree cannot change. By the tree invariant, all nodes are valid. + self.next = unsafe { bindings::rb_next(self.next) }; + + // SAFETY: By the same reasoning above, it is safe to dereference the node. Additionally, + // it is ok to return a reference to members because the iterator must outlive it. + Some(unsafe { (&(*cur).key, &(*cur).value) }) + } +} + +impl<'a, K, V> IntoIterator for &'a mut RBTree { + type Item = (&'a K, &'a mut V); + type IntoIter = RBTreeIteratorMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +/// A mutable iterator over the nodes of a [`RBTree`]. +/// +/// Instances are created by calling [`RBTree::iter_mut`]. +pub struct RBTreeIteratorMut<'a, K, V> { + _tree: PhantomData<&'a RBTree>, + next: *mut bindings::rb_node, +} + +impl<'a, K, V> Iterator for RBTreeIteratorMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + fn next(&mut self) -> Option { + if self.next.is_null() { + return None; + } + + let cur = crate::container_of!(self.next, Node, links) as *mut Node; + + // SAFETY: The reference to the tree used to create the iterator outlives the iterator, so + // the tree cannot change (except for the value of previous nodes, but those don't affect + // the iteration process). By the tree invariant, all nodes are valid. + self.next = unsafe { bindings::rb_next(self.next) }; + + // SAFETY: By the same reasoning above, it is safe to dereference the node. Additionally, + // it is ok to return a reference to members because the iterator must outlive it. + Some(unsafe { (&(*cur).key, &mut (*cur).value) }) + } +} + +/// A memory reservation for a red-black tree node. +/// +/// It contains the memory needed to hold a node that can be inserted into a red-black tree. One +/// can be obtained by directly allocating it ([`RBTree::try_reserve_node`]) or by "uninitialising" +/// ([`RBTreeNode::into_reservation`]) an actual node (usually returned by some operation like +/// removal from a tree). +pub struct RBTreeNodeReservation { + node: Box>>, +} + +impl RBTreeNodeReservation { + /// Initialises a node reservation. + /// + /// It then becomes an [`RBTreeNode`] that can be inserted into a tree. + pub fn into_node(mut self, key: K, value: V) -> RBTreeNode { + let node_ptr = self.node.as_mut_ptr(); + // SAFETY: `node_ptr` is valid, and so are its fields. + unsafe { addr_of_mut!((*node_ptr).links).write(bindings::rb_node::default()) }; + // SAFETY: `node_ptr` is valid, and so are its fields. + unsafe { addr_of_mut!((*node_ptr).key).write(key) }; + // SAFETY: `node_ptr` is valid, and so are its fields. + unsafe { addr_of_mut!((*node_ptr).value).write(value) }; + let raw = Box::into_raw(self.node); + RBTreeNode { + // SAFETY: The pointer came from a `MaybeUninit` whose fields have all been + // initialised. Additionally, it has the same layout as `Node`. + node: unsafe { Box::from_raw(raw as _) }, + } + } +} + +/// A red-black tree node. +/// +/// The node is fully initialised (with key and value) and can be inserted into a tree without any +/// extra allocations or failure paths. +pub struct RBTreeNode { + node: Box>, +} + +impl RBTreeNode { + /// "Uninitialises" a node. + /// + /// It then becomes a reservation that can be re-initialised into a different node (i.e., with + /// a different key and/or value). + /// + /// The existing key and value are dropped in-place as part of this operation, that is, memory + /// may be freed (but only for the key/value; memory for the node itself is kept for reuse). + pub fn into_reservation(self) -> RBTreeNodeReservation { + let raw = Box::into_raw(self.node); + let mut ret = RBTreeNodeReservation { + // SAFETY: The pointer came from a valid `Node`, which has the same layout as + // `MaybeUninit`. + node: unsafe { Box::from_raw(raw as _) }, + }; + // SAFETY: Although the type is `MaybeUninit`, we know it has been initialised + // because it came from a `Node`. So it is safe to drop it. + unsafe { core::ptr::drop_in_place(ret.node.as_mut_ptr()) }; + ret + } +} diff --git a/rust/kernel/security.rs b/rust/kernel/security.rs new file mode 100644 index 0000000000000..7161ec072cbaa --- /dev/null +++ b/rust/kernel/security.rs @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Linux Security Modules (LSM). +//! +//! C header: [`include/linux/security.h`](../../../../include/linux/security.h). + +use crate::{bindings, error::Error, file::File, task::Task, Result}; + +/// Calls the security modules to determine if the given task can become the manager of a binder +/// context. +pub fn binder_set_context_mgr(mgr: &Task) -> Result { + // SAFETY: By the `Task` invariants, `mgr.ptr` is valid. + let ret = unsafe { bindings::security_binder_set_context_mgr(mgr.ptr) }; + if ret != 0 { + Err(Error::from_kernel_errno(ret)) + } else { + Ok(()) + } +} + +/// Calls the security modules to determine if binder transactions are allowed from task `from` to +/// task `to`. +pub fn binder_transaction(from: &Task, to: &Task) -> Result { + // SAFETY: By the `Task` invariants, `from.ptr` and `to.ptr` are valid. + let ret = unsafe { bindings::security_binder_transaction(from.ptr, to.ptr) }; + if ret != 0 { + Err(Error::from_kernel_errno(ret)) + } else { + Ok(()) + } +} + +/// Calls the security modules to determine if task `from` is allowed to send binder objects +/// (owned by itself or other processes) to task `to` through a binder transaction. +pub fn binder_transfer_binder(from: &Task, to: &Task) -> Result { + // SAFETY: By the `Task` invariants, `from.ptr` and `to.ptr` are valid. + let ret = unsafe { bindings::security_binder_transfer_binder(from.ptr, to.ptr) }; + if ret != 0 { + Err(Error::from_kernel_errno(ret)) + } else { + Ok(()) + } +} + +/// Calls the security modules to determine if task `from` is allowed to send the given file to +/// task `to` (which would get its own file descriptor) through a binder transaction. +pub fn binder_transfer_file(from: &Task, to: &Task, file: &File) -> Result { + // SAFETY: By the `Task` invariants, `from.ptr` and `to.ptr` are valid. Similarly, by the + // `File` invariants, `file.ptr` is also valid. + let ret = unsafe { bindings::security_binder_transfer_file(from.ptr, to.ptr, file.ptr) }; + if ret != 0 { + Err(Error::from_kernel_errno(ret)) + } else { + Ok(()) + } +} diff --git a/rust/kernel/static_assert.rs b/rust/kernel/static_assert.rs new file mode 100644 index 0000000000000..a80d8ab57564f --- /dev/null +++ b/rust/kernel/static_assert.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Static assert. + +/// Static assert (i.e. compile-time assert). +/// +/// Similar to C11 [`_Static_assert`] and C++11 [`static_assert`]. +/// +/// The feature may be added to Rust in the future: see [RFC 2790]. +/// +/// [`_Static_assert`]: https://en.cppreference.com/w/c/language/_Static_assert +/// [`static_assert`]: https://en.cppreference.com/w/cpp/language/static_assert +/// [RFC 2790]: https://github.com/rust-lang/rfcs/issues/2790 +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// static_assert!(42 > 24); +/// static_assert!(core::mem::size_of::() == 1); +/// +/// const X: &[u8] = b"bar"; +/// static_assert!(X[1] == 'a' as u8); +/// +/// const fn f(x: i32) -> i32 { +/// x + 2 +/// } +/// static_assert!(f(40) == 42); +/// ``` +#[macro_export] +macro_rules! static_assert { + ($condition:expr) => { + // Based on the latest one in `rustc`'s one before it was [removed]. + // + // [removed]: https://github.com/rust-lang/rust/commit/c2dad1c6b9f9636198d7c561b47a2974f5103f6d + #[allow(dead_code)] + const _: () = [()][!($condition) as usize]; + }; +} diff --git a/rust/kernel/std_vendor.rs b/rust/kernel/std_vendor.rs new file mode 100644 index 0000000000000..492388d7ce10d --- /dev/null +++ b/rust/kernel/std_vendor.rs @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! The contents of this file come from the Rust standard library, hosted in the +//! repository. For copyright details, see +//! . + +/// [`std::dbg`], but using [`pr_info`] instead of [`eprintln`]. +/// +/// Prints and returns the value of a given expression for quick and dirty +/// debugging. +/// +/// An example: +/// +/// ```rust +/// let a = 2; +/// let b = dbg!(a * 2) + 1; +/// // ^-- prints: [src/main.rs:2] a * 2 = 4 +/// assert_eq!(b, 5); +/// ``` +/// +/// The macro works by using the `Debug` implementation of the type of +/// the given expression to print the value with [`printk`] along with the +/// source location of the macro invocation as well as the source code +/// of the expression. +/// +/// Invoking the macro on an expression moves and takes ownership of it +/// before returning the evaluated expression unchanged. If the type +/// of the expression does not implement `Copy` and you don't want +/// to give up ownership, you can instead borrow with `dbg!(&expr)` +/// for some expression `expr`. +/// +/// The `dbg!` macro works exactly the same in release builds. +/// This is useful when debugging issues that only occur in release +/// builds or when debugging in release mode is significantly faster. +/// +/// Note that the macro is intended as a debugging tool and therefore you +/// should avoid having uses of it in version control for long periods +/// (other than in tests and similar). +/// +/// # Stability +/// +/// The exact output printed by this macro should not be relied upon +/// and is subject to future changes. +/// +/// # Further examples +/// +/// With a method call: +/// +/// ```rust +/// fn foo(n: usize) { +/// if let Some(_) = dbg!(n.checked_sub(4)) { +/// // ... +/// } +/// } +/// +/// foo(3) +/// ``` +/// +/// This prints to the kernel log: +/// +/// ```text,ignore +/// [src/main.rs:4] n.checked_sub(4) = None +/// ``` +/// +/// Naive factorial implementation: +/// +/// ```rust +/// fn factorial(n: u32) -> u32 { +/// if dbg!(n <= 1) { +/// dbg!(1) +/// } else { +/// dbg!(n * factorial(n - 1)) +/// } +/// } +/// +/// dbg!(factorial(4)); +/// ``` +/// +/// This prints to the kernel log: +/// +/// ```text,ignore +/// [src/main.rs:3] n <= 1 = false +/// [src/main.rs:3] n <= 1 = false +/// [src/main.rs:3] n <= 1 = false +/// [src/main.rs:3] n <= 1 = true +/// [src/main.rs:4] 1 = 1 +/// [src/main.rs:5] n * factorial(n - 1) = 2 +/// [src/main.rs:5] n * factorial(n - 1) = 6 +/// [src/main.rs:5] n * factorial(n - 1) = 24 +/// [src/main.rs:11] factorial(4) = 24 +/// ``` +/// +/// The `dbg!(..)` macro moves the input: +/// +/// ```compile_fail +/// /// A wrapper around `usize` which importantly is not Copyable. +/// #[derive(Debug)] +/// struct NoCopy(usize); +/// +/// let a = NoCopy(42); +/// let _ = dbg!(a); // <-- `a` is moved here. +/// let _ = dbg!(a); // <-- `a` is moved again; error! +/// ``` +/// +/// You can also use `dbg!()` without a value to just print the +/// file and line whenever it's reached. +/// +/// Finally, if you want to `dbg!(..)` multiple values, it will treat them as +/// a tuple (and return it, too): +/// +/// ``` +/// assert_eq!(dbg!(1usize, 2u32), (1, 2)); +/// ``` +/// +/// However, a single argument with a trailing comma will still not be treated +/// as a tuple, following the convention of ignoring trailing commas in macro +/// invocations. You can use a 1-tuple directly if you need one: +/// +/// ``` +/// assert_eq!(1, dbg!(1u32,)); // trailing comma ignored +/// assert_eq!((1,), dbg!((1u32,))); // 1-tuple +/// ``` +/// +/// [`std::dbg`]: https://doc.rust-lang.org/std/macro.dbg.html +/// [`eprintln`]: https://doc.rust-lang.org/std/macro.eprintln.html +/// [`printk`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html +#[macro_export] +macro_rules! dbg { + // NOTE: We cannot use `concat!` to make a static string as a format argument + // of `pr_info!` because `file!` could contain a `{` or + // `$val` expression could be a block (`{ .. }`), in which case the `pr_info!` + // will be malformed. + () => { + $crate::pr_info!("[{}:{}]\n", ::core::file!(), ::core::line!()) + }; + ($val:expr $(,)?) => { + // Use of `match` here is intentional because it affects the lifetimes + // of temporaries - https://stackoverflow.com/a/48732525/1063961 + match $val { + tmp => { + $crate::pr_info!("[{}:{}] {} = {:#?}\n", + ::core::file!(), ::core::line!(), ::core::stringify!($val), &tmp); + tmp + } + } + }; + ($($val:expr),+ $(,)?) => { + ($($crate::dbg!($val)),+,) + }; +} diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs new file mode 100644 index 0000000000000..ff5bc12f95680 --- /dev/null +++ b/rust/kernel/str.rs @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! String representations. + +use core::ops::{self, Deref, Index}; + +use crate::bindings; +use crate::c_types; + +/// Byte string without UTF-8 validity guarantee. +/// +/// `BStr` is simply an alias to `[u8]`, but has a more evident semantical meaning. +pub type BStr = [u8]; + +/// Creates a new [`BStr`] from a string literal. +/// +/// `b_str!` converts the supplied string literal to byte string, so non-ASCII +/// characters can be included. +/// +/// # Examples +/// +/// ``` +/// # use kernel::b_str; +/// # use kernel::str::BStr; +/// const MY_BSTR: &'static BStr = b_str!("My awesome BStr!"); +/// ``` +#[macro_export] +macro_rules! b_str { + ($str:literal) => {{ + const S: &'static str = $str; + const C: &'static $crate::str::BStr = S.as_bytes(); + C + }}; +} + +/// Possible errors when using conversion functions in [`CStr`]. +#[derive(Debug, Clone, Copy)] +pub enum CStrConvertError { + /// Supplied bytes contain an interior `NUL`. + InteriorNul, + + /// Supplied bytes are not terminated by `NUL`. + NotNulTerminated, +} + +impl From for crate::Error { + #[inline] + fn from(_: CStrConvertError) -> crate::Error { + crate::Error::EINVAL + } +} + +/// A string that is guaranteed to have exactly one `NUL` byte, which is at the +/// end. +/// +/// Used for interoperability with kernel APIs that take C strings. +#[repr(transparent)] +pub struct CStr([u8]); + +impl CStr { + /// Returns the length of this string excluding `NUL`. + #[inline] + pub const fn len(&self) -> usize { + self.len_with_nul() - 1 + } + + /// Returns the length of this string with `NUL`. + #[inline] + pub const fn len_with_nul(&self) -> usize { + // SAFETY: This is one of the invariant of `CStr`. + // We add a `unreachable_unchecked` here to hint the optimizer that + // the value returned from this function is non-zero. + if self.0.is_empty() { + unsafe { core::hint::unreachable_unchecked() }; + } + self.0.len() + } + + /// Returns `true` if the string only includes `NUL`. + #[inline] + pub const fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Wraps a raw C string pointer. + /// + /// # Safety + /// + /// `ptr` must be a valid pointer to a `NUL`-terminated C string, and it must + /// last at least `'a`. When `CStr` is alive, the memory pointed by `ptr` + /// must not be mutated. + #[inline] + pub unsafe fn from_char_ptr<'a>(ptr: *const c_types::c_char) -> &'a Self { + // SAFETY: The safety precondition guarantees `ptr` is a valid pointer + // to a `NUL`-terminated C string. + let len = unsafe { bindings::strlen(ptr) } + 1; + // SAFETY: Lifetime guaranteed by the safety precondition. + let bytes = unsafe { core::slice::from_raw_parts(ptr as _, len as _) }; + // SAFETY: As `len` is returned by `strlen`, `bytes` does not contain interior `NUL`. + // As we have added 1 to `len`, the last byte is known to be `NUL`. + unsafe { Self::from_bytes_with_nul_unchecked(bytes) } + } + + /// Creates a [`CStr`] from a `[u8]`. + /// + /// The provided slice must be `NUL`-terminated, does not contain any + /// interior `NUL` bytes. + pub const fn from_bytes_with_nul(bytes: &[u8]) -> Result<&Self, CStrConvertError> { + if bytes.is_empty() { + return Err(CStrConvertError::NotNulTerminated); + } + if bytes[bytes.len() - 1] != 0 { + return Err(CStrConvertError::NotNulTerminated); + } + let mut i = 0; + // `i + 1 < bytes.len()` allows LLVM to optimize away bounds checking, + // while it couldn't optimize away bounds checks for `i < bytes.len() - 1`. + while i + 1 < bytes.len() { + if bytes[i] == 0 { + return Err(CStrConvertError::InteriorNul); + } + i += 1; + } + // SAFETY: We just checked that all properties hold. + Ok(unsafe { Self::from_bytes_with_nul_unchecked(bytes) }) + } + + /// Creates a [`CStr`] from a `[u8]`, panic if input is not valid. + /// + /// This function is only meant to be used by `c_str!` macro, so + /// crates using `c_str!` macro don't have to enable `const_panic` feature. + #[doc(hidden)] + pub const fn from_bytes_with_nul_unwrap(bytes: &[u8]) -> &Self { + match Self::from_bytes_with_nul(bytes) { + Ok(v) => v, + Err(_) => panic!("string contains interior NUL"), + } + } + + /// Creates a [`CStr`] from a `[u8]` without performing any additional + /// checks. + /// + /// # Safety + /// + /// `bytes` *must* end with a `NUL` byte, and should only have a single + /// `NUL` byte (or the string will be truncated). + #[inline] + pub const unsafe fn from_bytes_with_nul_unchecked(bytes: &[u8]) -> &CStr { + // SAFETY: Properties of `bytes` guaranteed by the safety precondition. + unsafe { core::mem::transmute(bytes) } + } + + /// Returns a C pointer to the string. + #[inline] + pub const fn as_char_ptr(&self) -> *const c_types::c_char { + self.0.as_ptr() as _ + } + + /// Convert the string to a byte slice without the trailing 0 byte. + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.0[..self.len()] + } + + /// Convert the string to a byte slice containing the trailing 0 byte. + #[inline] + pub const fn as_bytes_with_nul(&self) -> &[u8] { + &self.0 + } +} + +impl AsRef for CStr { + #[inline] + fn as_ref(&self) -> &BStr { + self.as_bytes() + } +} + +impl Deref for CStr { + type Target = BStr; + + #[inline] + fn deref(&self) -> &Self::Target { + self.as_bytes() + } +} + +impl Index> for CStr { + type Output = CStr; + + #[inline] + fn index(&self, index: ops::RangeFrom) -> &Self::Output { + // Delegate bounds checking to slice. + // Assign to _ to mute clippy's unnecessary operation warning. + let _ = &self.as_bytes()[index.start..]; + // SAFETY: We just checked the bounds. + unsafe { Self::from_bytes_with_nul_unchecked(&self.0[index.start..]) } + } +} + +impl Index for CStr { + type Output = CStr; + + #[inline] + fn index(&self, _index: ops::RangeFull) -> &Self::Output { + self + } +} + +mod private { + use core::ops; + + // Marker trait for index types that can be forward to `BStr`. + pub trait CStrIndex {} + + impl CStrIndex for usize {} + impl CStrIndex for ops::Range {} + impl CStrIndex for ops::RangeInclusive {} + impl CStrIndex for ops::RangeToInclusive {} +} + +impl Index for CStr +where + Idx: private::CStrIndex, + BStr: Index, +{ + type Output = >::Output; + + #[inline] + fn index(&self, index: Idx) -> &Self::Output { + &self.as_bytes()[index] + } +} + +/// Creates a new [`CStr`] from a string literal. +/// +/// The string literal should not contain any `NUL` bytes. +/// +/// # Examples +/// +/// ``` +/// # use kernel::c_str; +/// # use kernel::str::CStr; +/// const MY_CSTR: &'static CStr = c_str!("My awesome CStr!"); +/// ``` +#[macro_export] +macro_rules! c_str { + ($str:literal) => {{ + const S: &str = concat!($str, "\0"); + const C: &$crate::str::CStr = $crate::str::CStr::from_bytes_with_nul_unwrap(S.as_bytes()); + C + }}; +} diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs new file mode 100644 index 0000000000000..10c45680a4f35 --- /dev/null +++ b/rust/kernel/sync/arc.rs @@ -0,0 +1,480 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A reference-counted pointer. +//! +//! This module implements a way for users to create reference-counted objects and pointers to +//! them. Such a pointer automatically increments and decrements the count, and drops the +//! underlying object when it reaches zero. It is also safe to use concurrently from multiple +//! threads. +//! +//! It is different from the standard library's [`Arc`] in a few ways: +//! 1. It is backed by the kernel's `refcount_t` type. +//! 2. It does not support weak references, which allows it to be half the size. +//! 3. It saturates the reference count instead of aborting when it goes over a threshold. +//! 4. It does not provide a `get_mut` method, so the ref counted object is pinned. +//! +//! [`Arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html + +use crate::{bindings, Error, Result}; +use alloc::{ + alloc::{alloc, dealloc}, + vec::Vec, +}; +use core::{ + alloc::Layout, + cell::UnsafeCell, + convert::{AsRef, TryFrom}, + marker::{PhantomData, Unsize}, + mem::{ManuallyDrop, MaybeUninit}, + ops::{Deref, DerefMut}, + pin::Pin, + ptr::{self, NonNull}, +}; + +/// A reference-counted pointer to an instance of `T`. +/// +/// The reference count is incremented when new instances of [`Ref`] are created, and decremented +/// when they are dropped. When the count reaches zero, the underlying `T` is also dropped. +/// +/// # Invariants +/// +/// The reference count on an instance of [`Ref`] is always non-zero. +/// The object pointed to by [`Ref`] is always pinned. +pub struct Ref { + ptr: NonNull>, + _p: PhantomData>, +} + +#[repr(C)] +struct RefInner { + refcount: UnsafeCell, + data: T, +} + +// This is to allow [`Ref`] (and variants) to be used as the type of `self`. +impl core::ops::Receiver for Ref {} + +// This is to allow coercion from `Ref` to `Ref` if `T` can be converted to the +// dynamically-sized type (DST) `U`. +impl, U: ?Sized> core::ops::CoerceUnsized> for Ref {} + +// This is to allow `Ref` to be dispatched on when `Ref` can be coerced into `Ref`. +impl, U: ?Sized> core::ops::DispatchFromDyn> for Ref {} + +// SAFETY: It is safe to send `Ref` to another thread when the underlying `T` is `Sync` because +// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs +// `T` to be `Send` because any thread that has a `Ref` may ultimately access `T` directly, for +// example, when the reference count reaches zero and `T` is dropped. +unsafe impl Send for Ref {} + +// SAFETY: It is safe to send `&Ref` to another thread when the underlying `T` is `Sync` for +// the same reason as above. `T` needs to be `Send` as well because a thread can clone a `&Ref` +// into a `Ref`, which may lead to `T` being accessed by the same reasoning as above. +unsafe impl Sync for Ref {} + +impl Ref { + /// Constructs a new reference counted instance of `T`. + pub fn try_new(contents: T) -> Result { + let layout = Layout::new::>(); + // SAFETY: The layout size is guaranteed to be non-zero because `RefInner` contains the + // reference count. + let inner = NonNull::new(unsafe { alloc(layout) }) + .ok_or(Error::ENOMEM)? + .cast::>(); + + // INVARIANT: The refcount is initialised to a non-zero value. + let value = RefInner { + // SAFETY: Just an FFI call that returns a `refcount_t` initialised to 1. + refcount: UnsafeCell::new(unsafe { bindings::REFCOUNT_INIT(1) }), + data: contents, + }; + // SAFETY: `inner` is writable and properly aligned. + unsafe { inner.as_ptr().write(value) }; + + // SAFETY: We just created `inner` with a reference count of 1, which is owned by the new + // `Ref` object. + Ok(unsafe { Self::from_inner(inner) }) + } + + /// Constructs a new reference counted instance of `T` and calls the initialisation function. + /// + /// This is useful because it provides a mutable reference to `T` at its final location. + pub fn try_new_and_init)>(contents: T, init: U) -> Result { + Ok(UniqueRef::try_new(contents)?.pin_init_and_share(init)) + } + + /// Deconstructs a [`Ref`] object into a `usize`. + /// + /// It can be reconstructed once via [`Ref::from_usize`]. + pub fn into_usize(obj: Self) -> usize { + ManuallyDrop::new(obj).ptr.as_ptr() as _ + } + + /// Borrows a [`Ref`] instance previously deconstructed via [`Ref::into_usize`]. + /// + /// # Safety + /// + /// `encoded` must have been returned by a previous call to [`Ref::into_usize`]. Additionally, + /// [`Ref::from_usize`] can only be called after *all* instances of [`RefBorrow`] have been + /// dropped. + pub unsafe fn borrow_usize(encoded: usize) -> RefBorrow { + // SAFETY: By the safety requirement of this function, we know that `encoded` came from + // a previous call to `Ref::into_usize`. + let obj = ManuallyDrop::new(unsafe { Ref::from_usize(encoded) }); + + // SAFEY: The safety requirements ensure that the object remains alive for the lifetime of + // the returned value. There is no way to create mutable references to the object. + unsafe { RefBorrow::new(obj) } + } + + /// Recreates a [`Ref`] instance previously deconstructed via [`Ref::into_usize`]. + /// + /// # Safety + /// + /// `encoded` must have been returned by a previous call to [`Ref::into_usize`]. Additionally, + /// it can only be called once for each previous call to [``Ref::into_usize`]. + pub unsafe fn from_usize(encoded: usize) -> Self { + // SAFETY: By the safety invariants we know that `encoded` came from `Ref::into_usize`, so + // the reference count held then will be owned by the new `Ref` object. + unsafe { Self::from_inner(NonNull::new(encoded as _).unwrap()) } + } +} + +impl Ref { + /// Constructs a new [`Ref`] from an existing [`RefInner`]. + /// + /// # Safety + /// + /// The caller must ensure that `inner` points to a valid location and has a non-zero reference + /// count, one of which will be owned by the new [`Ref`] instance. + unsafe fn from_inner(inner: NonNull>) -> Self { + // INVARIANT: By the safety requirements, the invariants hold. + Ref { + ptr: inner, + _p: PhantomData, + } + } + + /// Determines if two reference-counted pointers point to the same underlying instance of `T`. + pub fn ptr_eq(a: &Self, b: &Self) -> bool { + ptr::eq(a.ptr.as_ptr(), b.ptr.as_ptr()) + } + + /// Returns a pinned version of a given `Ref` instance. + pub fn pinned(obj: Self) -> Pin { + // SAFETY: The type invariants guarantee that the value is pinned. + unsafe { Pin::new_unchecked(obj) } + } + + /// Deconstructs a [`Ref`] object into a raw pointer. + /// + /// It can be reconstructed once via [`Ref::from_raw`]. + pub fn into_raw(obj: Self) -> *const T { + let ret = &*obj as *const T; + core::mem::forget(obj); + ret + } + + /// Recreates a [`Ref`] instance previously deconstructed via [`Ref::into_raw`]. + /// + /// This code relies on the `repr(C)` layout of structs as described in + /// . + /// + /// # Safety + /// + /// `ptr` must have been returned by a previous call to [`Ref::into_raw`]. Additionally, it + /// can only be called once for each previous call to [``Ref::into_raw`]. + pub unsafe fn from_raw(ptr: *const T) -> Self { + // SAFETY: The safety requirement ensures that the pointer is valid. + let align = core::mem::align_of_val(unsafe { &*ptr }); + let offset = Layout::new::>() + .align_to(align) + .unwrap() + .pad_to_align() + .size(); + // SAFETY: The pointer is in bounds because by the safety requirements `ptr` came from + // `Ref::into_raw`, so it is a pointer `offset` bytes from the beginning of the allocation. + let data = unsafe { (ptr as *const u8).sub(offset) }; + let metadata = ptr::metadata(ptr as *const RefInner); + let ptr = ptr::from_raw_parts_mut(data as _, metadata); + // SAFETY: By the safety requirements we know that `ptr` came from `Ref::into_raw`, so the + // reference count held then will be owned by the new `Ref` object. + unsafe { Self::from_inner(NonNull::new(ptr).unwrap()) } + } +} + +impl Deref for Ref { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is + // safe to dereference it. + unsafe { &self.ptr.as_ref().data } + } +} + +impl Clone for Ref { + fn clone(&self) -> Self { + // INVARIANT: C `refcount_inc` saturates the refcount, so it cannot overflow to zero. + // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is + // safe to increment the refcount. + unsafe { bindings::refcount_inc(self.ptr.as_ref().refcount.get()) }; + Self { + ptr: self.ptr, + _p: PhantomData, + } + } +} + +impl AsRef for Ref { + fn as_ref(&self) -> &T { + // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is + // safe to dereference it. + unsafe { &self.ptr.as_ref().data } + } +} + +impl Drop for Ref { + fn drop(&mut self) { + // SAFETY: By the type invariant, there is necessarily a reference to the object. We cannot + // touch `refcount` after it's decremented to a non-zero value because another thread/CPU + // may concurrently decrement it to zero and free it. It is ok to have a raw pointer to + // freed/invalid memory as long as it is never dereferenced. + let refcount = unsafe { self.ptr.as_ref() }.refcount.get(); + + // INVARIANT: If the refcount reaches zero, there are no other instances of `Ref`, and + // this instance is being dropped, so the broken invariant is not observable. + // SAFETY: Also by the type invariant, we are allowed to decrement the refcount. + let is_zero = unsafe { bindings::refcount_dec_and_test(refcount) }; + if is_zero { + // The count reached zero, we must free the memory. + + // SAFETY: This thread holds the only remaining reference to `self`, so it is safe to + // get a mutable reference to it. + let inner = unsafe { self.ptr.as_mut() }; + let layout = Layout::for_value(inner); + // SAFETY: The value stored in inner is valid. + unsafe { core::ptr::drop_in_place(inner) }; + // SAFETY: The pointer was initialised from the result of a call to `alloc`. + unsafe { dealloc(self.ptr.cast().as_ptr(), layout) }; + } + } +} + +impl TryFrom> for Ref<[T]> { + type Error = Error; + + fn try_from(mut v: Vec) -> Result { + let value_layout = Layout::array::(v.len())?; + let layout = Layout::new::>() + .extend(value_layout)? + .0 + .pad_to_align(); + // SAFETY: The layout size is guaranteed to be non-zero because `RefInner` contains the + // reference count. + let ptr = NonNull::new(unsafe { alloc(layout) }).ok_or(Error::ENOMEM)?; + let inner = + core::ptr::slice_from_raw_parts_mut(ptr.as_ptr() as _, v.len()) as *mut RefInner<[T]>; + + // SAFETY: Just an FFI call that returns a `refcount_t` initialised to 1. + let count = UnsafeCell::new(unsafe { bindings::REFCOUNT_INIT(1) }); + // SAFETY: `inner.refcount` is writable and properly aligned. + unsafe { core::ptr::addr_of_mut!((*inner).refcount).write(count) }; + // SAFETY: The contents of `v` as readable and properly aligned; `inner.data` is writable + // and properly aligned. There is no overlap between the two because `inner` is a new + // allocation. + unsafe { + core::ptr::copy_nonoverlapping( + v.as_ptr(), + core::ptr::addr_of_mut!((*inner).data) as *mut [T] as *mut T, + v.len(), + ) + }; + // SAFETY: We're setting the new length to zero, so it is <= to capacity, and old_len..0 is + // an empty range (so satisfies vacuously the requirement of being initialised). + unsafe { v.set_len(0) }; + // SAFETY: We just created `inner` with a reference count of 1, which is owned by the new + // `Ref` object. + Ok(unsafe { Self::from_inner(NonNull::new(inner).unwrap()) }) + } +} + +impl From> for Ref { + fn from(item: UniqueRef) -> Self { + item.inner + } +} + +/// A borrowed [`Ref`] with manually-managed lifetime. +/// +/// # Invariants +/// +/// There are no mutable references to the underlying [`Ref`], and it remains valid for the lifetime +/// of the [`RefBorrow`] instance. +pub struct RefBorrow { + inner_ref: ManuallyDrop>, +} + +impl RefBorrow { + /// Creates a new [`RefBorrow`] instance. + /// + /// # Safety + /// + /// Callers must ensure the following for the lifetime of the returned [`RefBorrow`] instance: + /// 1. That `obj` remains valid; + /// 2. That no mutable references to `obj` are created. + unsafe fn new(obj: ManuallyDrop>) -> Self { + // INVARIANT: The safety requirements guarantee the invariants. + Self { inner_ref: obj } + } +} + +impl Deref for RefBorrow { + type Target = Ref; + + fn deref(&self) -> &Self::Target { + self.inner_ref.deref() + } +} + +/// A refcounted object that is known to have a refcount of 1. +/// +/// It is mutable and can be converted to a [`Ref`] so that it can be shared. +/// +/// # Invariants +/// +/// `inner` always has a reference count of 1. +/// +/// # Examples +/// +/// In the following example, we make changes to the inner object before turning it into a +/// `Ref` object (after which point, it cannot be mutated directly). Note that `x.into()` +/// cannot fail. +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::sync::{Ref, UniqueRef}; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn test() -> Result> { +/// let mut x = UniqueRef::try_new(Example { a: 10, b: 20 })?; +/// x.a += 1; +/// x.b += 1; +/// Ok(x.into()) +/// } +/// ``` +/// +/// In the following example we first allocate memory for a ref-counted `Example` but we don't +/// initialise it on allocation. We do initialise it later with a call to [`UniqueRef::write`], +/// followed by a conversion to `Ref`. This is particularly useful when allocation happens +/// in one context (e.g., sleepable) and initialisation in another (e.g., atomic): +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::sync::{Ref, UniqueRef}; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn test2() -> Result> { +/// let x = UniqueRef::try_new_uninit()?; +/// Ok(x.write(Example { a: 10, b: 20 }).into()) +/// } +/// ``` +/// +/// In the last example below, the caller gets a pinned instance of `Example` while converting to +/// `Ref`; this is useful in scenarios where one needs a pinned reference during +/// initialisation, for example, when initialising fields that are wrapped in locks. +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::sync::{Ref, UniqueRef}; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn test2() -> Result> { +/// let mut x = UniqueRef::try_new(Example { a: 10, b: 20 })?; +/// // We can modify `pinned` because it is `Unpin`. +/// Ok(x.pin_init_and_share(|mut pinned| pinned.a += 1)) +/// } +/// ``` +pub struct UniqueRef { + inner: Ref, +} + +impl UniqueRef { + /// Tries to allocate a new [`UniqueRef`] instance. + pub fn try_new(value: T) -> Result { + Ok(Self { + // INVARIANT: The newly-created object has a ref-count of 1. + inner: Ref::try_new(value)?, + }) + } + + /// Tries to allocate a new [`UniqueRef`] instance whose contents are not initialised yet. + pub fn try_new_uninit() -> Result>> { + Ok(UniqueRef::> { + // INVARIANT: The newly-created object has a ref-count of 1. + inner: Ref::try_new(MaybeUninit::uninit())?, + }) + } +} + +impl UniqueRef { + /// Converts a [`UniqueRef`] into a [`Ref`]. + /// + /// It allows callers to get a `Pin<&mut T>` that they can use to initialise the inner object + /// just before it becomes shareable. + pub fn pin_init_and_share)>(mut self, init: U) -> Ref { + let inner = self.deref_mut(); + + // SAFETY: By the `Ref` invariant, `RefInner` is pinned and `T` is also pinned. + let pinned = unsafe { Pin::new_unchecked(inner) }; + + // INVARIANT: The only places where `&mut T` is available are here (where it is explicitly + // pinned, i.e. implementations of `init` will see a Pin<&mut T>), and in `Ref::drop`. Both + // are compatible with the pin requirements of the invariants of `Ref`. + init(pinned); + + self.into() + } +} + +impl UniqueRef> { + /// Converts a `UniqueRef>` into a `UniqueRef` by writing a value into it. + pub fn write(mut self, value: T) -> UniqueRef { + self.deref_mut().write(value); + let inner = ManuallyDrop::new(self).inner.ptr; + UniqueRef { + // SAFETY: The new `Ref` is taking over `ptr` from `self.inner` (which won't be + // dropped). The types are compatible because `MaybeUninit` is compatible with `T`. + inner: unsafe { Ref::from_inner(inner.cast()) }, + } + } +} + +impl Deref for UniqueRef { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl DerefMut for UniqueRef { + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: By the `Ref` type invariant, there is necessarily a reference to the object, so + // it is safe to dereference it. Additionally, we know there is only one reference when + // it's inside a `UniqueRef`, so it is safe to get a mutable reference. + unsafe { &mut self.inner.ptr.as_mut().data } + } +} diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs new file mode 100644 index 0000000000000..8add1b5be39ce --- /dev/null +++ b/rust/kernel/sync/condvar.rs @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A condition variable. +//! +//! This module allows Rust code to use the kernel's [`struct wait_queue_head`] as a condition +//! variable. + +use super::{Guard, Lock, NeedsLockClass}; +use crate::{bindings, str::CStr, task::Task}; +use core::{cell::UnsafeCell, marker::PhantomPinned, mem::MaybeUninit, pin::Pin}; + +/// Safely initialises a [`CondVar`] with the given name, generating a new lock class. +#[macro_export] +macro_rules! condvar_init { + ($condvar:expr, $name:literal) => { + $crate::init_with_lockdep!($condvar, $name) + }; +} + +// TODO: `bindgen` is not generating this constant. Figure out why. +const POLLFREE: u32 = 0x4000; + +/// Exposes the kernel's [`struct wait_queue_head`] as a condition variable. It allows the caller to +/// atomically release the given lock and go to sleep. It reacquires the lock when it wakes up. And +/// it wakes up when notified by another thread (via [`CondVar::notify_one`] or +/// [`CondVar::notify_all`]) or because the thread received a signal. +/// +/// [`struct wait_queue_head`]: ../../../include/linux/wait.h +pub struct CondVar { + pub(crate) wait_list: UnsafeCell, + + /// A condvar needs to be pinned because it contains a [`struct list_head`] that is + /// self-referential, so it cannot be safely moved once it is initialised. + _pin: PhantomPinned, +} + +// SAFETY: `CondVar` only uses a `struct wait_queue_head`, which is safe to use on any thread. +unsafe impl Send for CondVar {} + +// SAFETY: `CondVar` only uses a `struct wait_queue_head`, which is safe to use on multiple threads +// concurrently. +unsafe impl Sync for CondVar {} + +impl CondVar { + /// Constructs a new conditional variable. + /// + /// # Safety + /// + /// The caller must call `CondVar::init` before using the conditional variable. + pub unsafe fn new() -> Self { + Self { + wait_list: UnsafeCell::new(bindings::wait_queue_head::default()), + _pin: PhantomPinned, + } + } + + /// Atomically releases the given lock (whose ownership is proven by the guard) and puts the + /// thread to sleep. It wakes up when notified by [`CondVar::notify_one`] or + /// [`CondVar::notify_all`], or when the thread receives a signal. + /// + /// Returns whether there is a signal pending. + #[must_use = "wait returns if a signal is pending, so the caller must check the return value"] + pub fn wait(&self, guard: &mut Guard<'_, L>) -> bool { + let lock = guard.lock; + let mut wait = MaybeUninit::::uninit(); + + // SAFETY: `wait` points to valid memory. + unsafe { bindings::init_wait(wait.as_mut_ptr()) }; + + // SAFETY: Both `wait` and `wait_list` point to valid memory. + unsafe { + bindings::prepare_to_wait_exclusive( + self.wait_list.get(), + wait.as_mut_ptr(), + bindings::TASK_INTERRUPTIBLE as _, + ); + } + + // SAFETY: The guard is evidence that the caller owns the lock. + unsafe { lock.unlock(&mut guard.context) }; + + // SAFETY: No arguments, switches to another thread. + unsafe { bindings::schedule() }; + + guard.context = lock.lock_noguard(); + + // SAFETY: Both `wait` and `wait_list` point to valid memory. + unsafe { bindings::finish_wait(self.wait_list.get(), wait.as_mut_ptr()) }; + + Task::current().signal_pending() + } + + /// Calls the kernel function to notify the appropriate number of threads with the given flags. + fn notify(&self, count: i32, flags: u32) { + // SAFETY: `wait_list` points to valid memory. + unsafe { + bindings::__wake_up( + self.wait_list.get(), + bindings::TASK_NORMAL, + count, + flags as _, + ) + }; + } + + /// Wakes a single waiter up, if any. This is not 'sticky' in the sense that if no thread is + /// waiting, the notification is lost completely (as opposed to automatically waking up the + /// next waiter). + pub fn notify_one(&self) { + self.notify(1, 0); + } + + /// Wakes all waiters up, if any. This is not 'sticky' in the sense that if no thread is + /// waiting, the notification is lost completely (as opposed to automatically waking up the + /// next waiter). + pub fn notify_all(&self) { + self.notify(0, 0); + } + + /// Wakes all waiters up. If they were added by `epoll`, they are also removed from the list of + /// waiters. This is useful when cleaning up a condition variable that may be waited on by + /// threads that use `epoll`. + pub fn free_waiters(&self) { + self.notify(1, bindings::POLLHUP | POLLFREE); + } +} + +impl NeedsLockClass for CondVar { + unsafe fn init(self: Pin<&mut Self>, name: &'static CStr, key: *mut bindings::lock_class_key) { + unsafe { bindings::__init_waitqueue_head(self.wait_list.get(), name.as_char_ptr(), key) }; + } +} diff --git a/rust/kernel/sync/guard.rs b/rust/kernel/sync/guard.rs new file mode 100644 index 0000000000000..3af735d363571 --- /dev/null +++ b/rust/kernel/sync/guard.rs @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A generic lock guard and trait. +//! +//! This module contains a lock guard that can be used with any locking primitive that implements +//! the ([`Lock`]) trait. It also contains the definition of the trait, which can be leveraged by +//! other constructs to work on generic locking primitives. + +/// Allows mutual exclusion primitives that implement the [`Lock`] trait to automatically unlock +/// when a guard goes out of scope. It also provides a safe and convenient way to access the data +/// protected by the lock. +#[must_use = "the lock unlocks immediately when the guard is unused"] +pub struct Guard<'a, L: Lock + ?Sized> { + pub(crate) lock: &'a L, + pub(crate) context: L::GuardContext, +} + +// SAFETY: `Guard` is sync when the data protected by the lock is also sync. This is more +// conservative than the default compiler implementation; more details can be found on +// https://github.com/rust-lang/rust/issues/41622 -- it refers to `MutexGuard` from the standard +// library. +unsafe impl Sync for Guard<'_, L> +where + L: Lock + ?Sized, + L::Inner: Sync, +{ +} + +impl core::ops::Deref for Guard<'_, L> { + type Target = L::Inner; + + fn deref(&self) -> &Self::Target { + // SAFETY: The caller owns the lock, so it is safe to deref the protected data. + unsafe { &*self.lock.locked_data().get() } + } +} + +impl core::ops::DerefMut for Guard<'_, L> { + fn deref_mut(&mut self) -> &mut L::Inner { + // SAFETY: The caller owns the lock, so it is safe to deref the protected data. + unsafe { &mut *self.lock.locked_data().get() } + } +} + +impl Drop for Guard<'_, L> { + fn drop(&mut self) { + // SAFETY: The caller owns the lock, so it is safe to unlock it. + unsafe { self.lock.unlock(&mut self.context) }; + } +} + +impl<'a, L: Lock + ?Sized> Guard<'a, L> { + /// Constructs a new lock guard. + /// + /// # Safety + /// + /// The caller must ensure that it owns the lock. + pub(crate) unsafe fn new(lock: &'a L, context: L::GuardContext) -> Self { + Self { lock, context } + } +} + +/// A generic mutual exclusion primitive. +/// +/// [`Guard`] is written such that any mutual exclusion primitive that can implement this trait can +/// also benefit from having an automatic way to unlock itself. +/// +/// # Safety +/// +/// Implementers of this trait must ensure that only one thread/CPU may access the protected data +/// once the lock is held, that is, between calls to `lock_noguard` and `unlock`. +pub unsafe trait Lock { + /// The type of the data protected by the lock. + type Inner: ?Sized; + + /// The type of context, if any, that needs to be stored in the guard. + type GuardContext; + + /// Acquires the lock, making the caller its owner. + fn lock_noguard(&self) -> Self::GuardContext; + + /// Releases the lock, giving up ownership of the lock. + /// + /// # Safety + /// + /// It must only be called by the current owner of the lock. + unsafe fn unlock(&self, context: &mut Self::GuardContext); + + /// Returns the data protected by the lock. + fn locked_data(&self) -> &core::cell::UnsafeCell; +} diff --git a/rust/kernel/sync/locked_by.rs b/rust/kernel/sync/locked_by.rs new file mode 100644 index 0000000000000..d3e0b0d5e9b4e --- /dev/null +++ b/rust/kernel/sync/locked_by.rs @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A wrapper for data protected by a lock that does not wrap it. + +use super::{Guard, Lock}; +use core::{cell::UnsafeCell, ops::Deref, ptr}; + +/// Allows access to some data to be serialised by a lock that does not wrap it. +/// +/// In most cases, data protected by a lock is wrapped by the appropriate lock type, e.g., +/// [`super::Mutex`] or [`super::SpinLock`]. [`LockedBy`] is meant for cases when this is not +/// possible. For example, if a container has a lock and some data in the contained elements needs +/// to be protected by the same lock. +/// +/// [`LockedBy`] wraps the data in lieu of another locking primitive, and only allows access to it +/// when the caller shows evidence that 'external' lock is locked. +/// +/// # Example +/// +/// The following is an example for illustrative purposes: `InnerDirectory::bytes_used` is an +/// aggregate of all `InnerFile::bytes_used` and must be kept consistent; so we wrap `InnerFile` in +/// a `LockedBy` so that it shares a lock with `InnerDirectory`. This allows us to enforce at +/// compile-time that access to `InnerFile` is only granted when an `InnerDirectory` is also +/// locked; we enforce at run time that the right `InnerDirectory` is locked. +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::sync::{LockedBy, Mutex}; +/// +/// struct InnerFile { +/// bytes_used: u64, +/// } +/// +/// struct File { +/// name: String, +/// inner: LockedBy>, +/// } +/// +/// struct InnerDirectory { +/// /// The sum of the bytes used by all files. +/// bytes_used: u64, +/// files: Vec, +/// } +/// +/// struct Directory { +/// name: String, +/// inner: Mutex, +/// } +/// ``` +pub struct LockedBy { + owner: *const L::Inner, + data: UnsafeCell, +} + +// SAFETY: `LockedBy` can be transferred across thread boundaries iff the data it protects can. +unsafe impl Send for LockedBy {} + +// SAFETY: `LockedBy` serialises the interior mutability it provides, so it is `Sync` as long as the +// data it protects is `Send`. +unsafe impl Sync for LockedBy {} + +impl LockedBy { + /// Constructs a new instance of [`LockedBy`]. + /// + /// It stores a raw pointer to the owner that is never dereferenced. It is only used to ensure + /// that the right owner is being used to access the protected data. If the owner is freed, the + /// data becomes inaccessible; if another instance of the owner is allocated *on the same + /// memory location*, the data becomes accessible again: none of this affects memory safety + /// because in any case at most one thread (or CPU) can access the protected data at a time. + pub fn new(owner: &L, data: T) -> Self { + Self { + owner: owner.locked_data().get(), + data: UnsafeCell::new(data), + } + } +} + +impl LockedBy { + /// Returns a reference to the protected data when the caller provides evidence (via a + /// [`Guard`]) that the owner is locked. + pub fn access<'a>(&'a self, guard: &'a Guard<'_, L>) -> &'a T { + if !ptr::eq(guard.deref(), self.owner) { + panic!("guard does not match owner"); + } + + // SAFETY: `guard` is evidence that the owner is locked. + unsafe { &mut *self.data.get() } + } + + /// Returns a mutable reference to the protected data when the caller provides evidence (via a + /// mutable [`Guard`]) that the owner is locked mutably. + pub fn access_mut<'a>(&'a self, guard: &'a mut Guard<'_, L>) -> &'a mut T { + if !ptr::eq(guard.deref().deref(), self.owner) { + panic!("guard does not match owner"); + } + + // SAFETY: `guard` is evidence that the owner is locked. + unsafe { &mut *self.data.get() } + } + + /// Returns a mutable reference to the protected data when the caller provides evidence (via a + /// mutable owner) that the owner is locked mutably. Showing a mutable reference to the owner + /// is sufficient because we know no other references can exist to it. + pub fn access_from_mut<'a>(&'a self, owner: &'a mut L::Inner) -> &'a mut T { + if !ptr::eq(owner, self.owner) { + panic!("mismatched owners"); + } + + // SAFETY: `owner` is evidence that there is only one reference to the owner. + unsafe { &mut *self.data.get() } + } +} diff --git a/rust/kernel/sync/mod.rs b/rust/kernel/sync/mod.rs new file mode 100644 index 0000000000000..fa691fb938062 --- /dev/null +++ b/rust/kernel/sync/mod.rs @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Synchronisation primitives. +//! +//! This module contains the kernel APIs related to synchronisation that have been ported or +//! wrapped for usage by Rust code in the kernel and is shared by all of them. +//! +//! # Example +//! +//! ```no_run +//! # use kernel::prelude::*; +//! # use kernel::mutex_init; +//! # use kernel::sync::Mutex; +//! # use alloc::boxed::Box; +//! # use core::pin::Pin; +//! // SAFETY: `init` is called below. +//! let mut data = Pin::from(Box::new(unsafe { Mutex::new(0) })); +//! mutex_init!(data.as_mut(), "test::data"); +//! *data.lock() = 10; +//! pr_info!("{}\n", *data.lock()); +//! ``` + +use crate::bindings; +use crate::str::CStr; +use core::pin::Pin; + +mod arc; +mod condvar; +mod guard; +mod locked_by; +mod mutex; +mod spinlock; + +pub use arc::{Ref, RefBorrow, UniqueRef}; +pub use condvar::CondVar; +pub use guard::{Guard, Lock}; +pub use locked_by::LockedBy; +pub use mutex::Mutex; +pub use spinlock::SpinLock; + +/// Safely initialises an object that has an `init` function that takes a name and a lock class as +/// arguments, examples of these are [`Mutex`] and [`SpinLock`]. Each of them also provides a more +/// specialised name that uses this macro. +#[doc(hidden)] +#[macro_export] +macro_rules! init_with_lockdep { + ($obj:expr, $name:literal) => {{ + static mut CLASS: core::mem::MaybeUninit<$crate::bindings::lock_class_key> = + core::mem::MaybeUninit::uninit(); + let obj = $obj; + let name = $crate::c_str!($name); + // SAFETY: `CLASS` is never used by Rust code directly; the kernel may change it though. + #[allow(unused_unsafe)] + unsafe { + $crate::sync::NeedsLockClass::init(obj, name, CLASS.as_mut_ptr()) + }; + }}; +} + +/// A trait for types that need a lock class during initialisation. +/// +/// Implementers of this trait benefit from the [`init_with_lockdep`] macro that generates a new +/// class for each initialisation call site. +pub trait NeedsLockClass { + /// Initialises the type instance so that it can be safely used. + /// + /// Callers are encouraged to use the [`init_with_lockdep`] macro as it automatically creates a + /// new lock class on each usage. + /// + /// # Safety + /// + /// `key` must point to a valid memory location as it will be used by the kernel. + unsafe fn init(self: Pin<&mut Self>, name: &'static CStr, key: *mut bindings::lock_class_key); +} + +/// Reschedules the caller's task if needed. +pub fn cond_resched() -> bool { + // SAFETY: No arguments, reschedules `current` if needed. + unsafe { bindings::cond_resched() != 0 } +} diff --git a/rust/kernel/sync/mutex.rs b/rust/kernel/sync/mutex.rs new file mode 100644 index 0000000000000..7053a5136abb9 --- /dev/null +++ b/rust/kernel/sync/mutex.rs @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A kernel mutex. +//! +//! This module allows Rust code to use the kernel's [`struct mutex`]. + +use super::{Guard, Lock, NeedsLockClass}; +use crate::bindings; +use crate::str::CStr; +use core::{cell::UnsafeCell, marker::PhantomPinned, pin::Pin}; + +/// Safely initialises a [`Mutex`] with the given name, generating a new lock class. +#[macro_export] +macro_rules! mutex_init { + ($mutex:expr, $name:literal) => { + $crate::init_with_lockdep!($mutex, $name) + }; +} + +/// Exposes the kernel's [`struct mutex`]. When multiple threads attempt to lock the same mutex, +/// only one at a time is allowed to progress, the others will block (sleep) until the mutex is +/// unlocked, at which point another thread will be allowed to wake up and make progress. +/// +/// A [`Mutex`] must first be initialised with a call to [`Mutex::init`] before it can be used. The +/// [`mutex_init`] macro is provided to automatically assign a new lock class to a mutex instance. +/// +/// Since it may block, [`Mutex`] needs to be used with care in atomic contexts. +/// +/// [`struct mutex`]: ../../../include/linux/mutex.h +pub struct Mutex { + /// The kernel `struct mutex` object. + mutex: UnsafeCell, + + /// A mutex needs to be pinned because it contains a [`struct list_head`] that is + /// self-referential, so it cannot be safely moved once it is initialised. + _pin: PhantomPinned, + + /// The data protected by the mutex. + data: UnsafeCell, +} + +// SAFETY: `Mutex` can be transferred across thread boundaries iff the data it protects can. +unsafe impl Send for Mutex {} + +// SAFETY: `Mutex` serialises the interior mutability it provides, so it is `Sync` as long as the +// data it protects is `Send`. +unsafe impl Sync for Mutex {} + +impl Mutex { + /// Constructs a new mutex. + /// + /// # Safety + /// + /// The caller must call [`Mutex::init`] before using the mutex. + pub unsafe fn new(t: T) -> Self { + Self { + mutex: UnsafeCell::new(bindings::mutex::default()), + data: UnsafeCell::new(t), + _pin: PhantomPinned, + } + } +} + +impl Mutex { + /// Locks the mutex and gives the caller access to the data protected by it. Only one thread at + /// a time is allowed to access the protected data. + pub fn lock(&self) -> Guard<'_, Self> { + self.lock_noguard(); + // SAFETY: The mutex was just acquired. + unsafe { Guard::new(self, ()) } + } +} + +impl NeedsLockClass for Mutex { + unsafe fn init(self: Pin<&mut Self>, name: &'static CStr, key: *mut bindings::lock_class_key) { + unsafe { bindings::__mutex_init(self.mutex.get(), name.as_char_ptr(), key) }; + } +} + +// SAFETY: The underlying kernel `struct mutex` object ensures mutual exclusion. +unsafe impl Lock for Mutex { + type Inner = T; + type GuardContext = (); + + fn lock_noguard(&self) { + // SAFETY: `mutex` points to valid memory. + unsafe { + bindings::mutex_lock(self.mutex.get()); + } + } + + unsafe fn unlock(&self, _: &mut ()) { + // SAFETY: The safety requirements of the function ensure that the mutex is owned by the + // caller. + unsafe { bindings::mutex_unlock(self.mutex.get()) }; + } + + fn locked_data(&self) -> &UnsafeCell { + &self.data + } +} diff --git a/rust/kernel/sync/spinlock.rs b/rust/kernel/sync/spinlock.rs new file mode 100644 index 0000000000000..6fa00a514aac9 --- /dev/null +++ b/rust/kernel/sync/spinlock.rs @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A kernel spinlock. +//! +//! This module allows Rust code to use the kernel's [`struct spinlock`]. +//! +//! See . + +use super::{Guard, Lock, NeedsLockClass}; +use crate::bindings; +use crate::str::CStr; +use core::{cell::UnsafeCell, marker::PhantomPinned, pin::Pin}; + +/// Safely initialises a [`SpinLock`] with the given name, generating a new lock class. +#[macro_export] +macro_rules! spinlock_init { + ($spinlock:expr, $name:literal) => { + $crate::init_with_lockdep!($spinlock, $name) + }; +} + +/// Exposes the kernel's [`spinlock_t`]. When multiple CPUs attempt to lock the same spinlock, only +/// one at a time is allowed to progress, the others will block (spinning) until the spinlock is +/// unlocked, at which point another CPU will be allowed to make progress. +/// +/// A [`SpinLock`] must first be initialised with a call to [`SpinLock::init`] before it can be +/// used. The [`spinlock_init`] macro is provided to automatically assign a new lock class to a +/// spinlock instance. +/// +/// [`SpinLock`] does not manage the interrupt state, so it can be used in only two cases: (a) when +/// the caller knows that interrupts are disabled, or (b) when callers never use it in interrupt +/// handlers (in which case it is ok for interrupts to be enabled). +/// +/// [`spinlock_t`]: ../../../include/linux/spinlock.h +pub struct SpinLock { + spin_lock: UnsafeCell, + + /// Spinlocks are architecture-defined. So we conservatively require them to be pinned in case + /// some architecture uses self-references now or in the future. + _pin: PhantomPinned, + + data: UnsafeCell, +} + +// SAFETY: `SpinLock` can be transferred across thread boundaries iff the data it protects can. +unsafe impl Send for SpinLock {} + +// SAFETY: `SpinLock` serialises the interior mutability it provides, so it is `Sync` as long as the +// data it protects is `Send`. +unsafe impl Sync for SpinLock {} + +impl SpinLock { + /// Constructs a new spinlock. + /// + /// # Safety + /// + /// The caller must call [`SpinLock::init`] before using the spinlock. + pub unsafe fn new(t: T) -> Self { + Self { + spin_lock: UnsafeCell::new(bindings::spinlock::default()), + data: UnsafeCell::new(t), + _pin: PhantomPinned, + } + } +} + +impl SpinLock { + /// Locks the spinlock and gives the caller access to the data protected by it. Only one thread + /// at a time is allowed to access the protected data. + pub fn lock(&self) -> Guard<'_, Self> { + self.lock_noguard(); + // SAFETY: The spinlock was just acquired. + unsafe { Guard::new(self, ()) } + } +} + +impl NeedsLockClass for SpinLock { + unsafe fn init(self: Pin<&mut Self>, name: &'static CStr, key: *mut bindings::lock_class_key) { + unsafe { bindings::__spin_lock_init(self.spin_lock.get(), name.as_char_ptr(), key) }; + } +} + +// SAFETY: The underlying kernel `spinlock_t` object ensures mutual exclusion. +unsafe impl Lock for SpinLock { + type Inner = T; + type GuardContext = (); + + fn lock_noguard(&self) { + // SAFETY: `spin_lock` points to valid memory. + unsafe { bindings::spin_lock(self.spin_lock.get()) }; + } + + unsafe fn unlock(&self, _: &mut ()) { + // SAFETY: The safety requirements of the function ensure that the spinlock is owned by the + // caller. + unsafe { bindings::spin_unlock(self.spin_lock.get()) }; + } + + fn locked_data(&self) -> &UnsafeCell { + &self.data + } +} diff --git a/rust/kernel/sysctl.rs b/rust/kernel/sysctl.rs new file mode 100644 index 0000000000000..42a2c7c818240 --- /dev/null +++ b/rust/kernel/sysctl.rs @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! System control. +//! +//! C header: [`include/linux/sysctl.h`](../../../../include/linux/sysctl.h) +//! +//! Reference: + +use alloc::boxed::Box; +use alloc::vec; +use core::mem; +use core::ptr; +use core::sync::atomic; + +use crate::{ + bindings, c_types, error, + io_buffer::IoBufferWriter, + str::CStr, + types, + user_ptr::{UserSlicePtr, UserSlicePtrWriter}, +}; + +/// Sysctl storage. +pub trait SysctlStorage: Sync { + /// Writes a byte slice. + fn store_value(&self, data: &[u8]) -> (usize, error::Result); + + /// Reads via a [`UserSlicePtrWriter`]. + fn read_value(&self, data: &mut UserSlicePtrWriter) -> (usize, error::Result); +} + +fn trim_whitespace(mut data: &[u8]) -> &[u8] { + while !data.is_empty() && (data[0] == b' ' || data[0] == b'\t' || data[0] == b'\n') { + data = &data[1..]; + } + while !data.is_empty() + && (data[data.len() - 1] == b' ' + || data[data.len() - 1] == b'\t' + || data[data.len() - 1] == b'\n') + { + data = &data[..data.len() - 1]; + } + data +} + +impl SysctlStorage for &T +where + T: SysctlStorage, +{ + fn store_value(&self, data: &[u8]) -> (usize, error::Result) { + (*self).store_value(data) + } + + fn read_value(&self, data: &mut UserSlicePtrWriter) -> (usize, error::Result) { + (*self).read_value(data) + } +} + +impl SysctlStorage for atomic::AtomicBool { + fn store_value(&self, data: &[u8]) -> (usize, error::Result) { + let result = match trim_whitespace(data) { + b"0" => { + self.store(false, atomic::Ordering::Relaxed); + Ok(()) + } + b"1" => { + self.store(true, atomic::Ordering::Relaxed); + Ok(()) + } + _ => Err(error::Error::EINVAL), + }; + (data.len(), result) + } + + fn read_value(&self, data: &mut UserSlicePtrWriter) -> (usize, error::Result) { + let value = if self.load(atomic::Ordering::Relaxed) { + b"1\n" + } else { + b"0\n" + }; + (value.len(), data.write_slice(value)) + } +} + +/// Holds a single `sysctl` entry (and its table). +pub struct Sysctl { + inner: Box, + // Responsible for keeping the `ctl_table` alive. + _table: Box<[bindings::ctl_table]>, + header: *mut bindings::ctl_table_header, +} + +// SAFETY: The only public method we have is `get()`, which returns `&T`, and +// `T: Sync`. Any new methods must adhere to this requirement. +unsafe impl Sync for Sysctl {} + +unsafe extern "C" fn proc_handler( + ctl: *mut bindings::ctl_table, + write: c_types::c_int, + buffer: *mut c_types::c_void, + len: *mut usize, + ppos: *mut bindings::loff_t, +) -> c_types::c_int { + // If we are reading from some offset other than the beginning of the file, + // return an empty read to signal EOF. + if unsafe { *ppos } != 0 && write == 0 { + unsafe { *len = 0 }; + return 0; + } + + let data = unsafe { UserSlicePtr::new(buffer, *len) }; + let storage = unsafe { &*((*ctl).data as *const T) }; + let (bytes_processed, result) = if write != 0 { + let data = match data.read_all() { + Ok(r) => r, + Err(e) => return e.to_kernel_errno(), + }; + storage.store_value(&data) + } else { + let mut writer = data.writer(); + storage.read_value(&mut writer) + }; + unsafe { *len = bytes_processed }; + unsafe { *ppos += *len as bindings::loff_t }; + match result { + Ok(()) => 0, + Err(e) => e.to_kernel_errno(), + } +} + +impl Sysctl { + /// Registers a single entry in `sysctl`. + pub fn register( + path: &'static CStr, + name: &'static CStr, + storage: T, + mode: types::Mode, + ) -> error::Result> { + if name.contains(&b'/') { + return Err(error::Error::EINVAL); + } + + let storage = Box::try_new(storage)?; + let mut table = vec![ + bindings::ctl_table { + procname: name.as_char_ptr(), + mode: mode.as_int(), + data: &*storage as *const T as *mut c_types::c_void, + proc_handler: Some(proc_handler::), + + maxlen: 0, + child: ptr::null_mut(), + poll: ptr::null_mut(), + extra1: ptr::null_mut(), + extra2: ptr::null_mut(), + }, + unsafe { mem::zeroed() }, + ] + .try_into_boxed_slice()?; + + let result = unsafe { bindings::register_sysctl(path.as_char_ptr(), table.as_mut_ptr()) }; + if result.is_null() { + return Err(error::Error::ENOMEM); + } + + Ok(Sysctl { + inner: storage, + _table: table, + header: result, + }) + } + + /// Gets the storage. + pub fn get(&self) -> &T { + &self.inner + } +} + +impl Drop for Sysctl { + fn drop(&mut self) { + unsafe { + bindings::unregister_sysctl_table(self.header); + } + self.header = ptr::null_mut(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_trim_whitespace() { + assert_eq!(trim_whitespace(b"foo "), b"foo"); + assert_eq!(trim_whitespace(b" foo"), b"foo"); + assert_eq!(trim_whitespace(b" foo "), b"foo"); + } +} diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs new file mode 100644 index 0000000000000..7a2aff0e92191 --- /dev/null +++ b/rust/kernel/task.rs @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Tasks (threads and processes). +//! +//! C header: [`include/linux/sched.h`](../../../../include/linux/sched.h). + +use crate::bindings; +use core::{marker::PhantomData, mem::ManuallyDrop, ops::Deref}; + +/// Wraps the kernel's `struct task_struct`. +/// +/// # Invariants +/// +/// The pointer `Task::ptr` is non-null and valid. Its reference count is also non-zero. +/// +/// # Examples +/// +/// The following is an example of getting the PID of the current thread with zero additional cost +/// when compared to the C version: +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::task::Task; +/// +/// # fn test() { +/// Task::current().pid(); +/// # } +/// ``` +/// +/// Getting the PID of the current process, also zero additional cost: +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::task::Task; +/// +/// # fn test() { +/// Task::current().group_leader().pid(); +/// # } +/// ``` +/// +/// Getting the current task and storing it in some struct. The reference count is automatically +/// incremented when creating `State` and decremented when it is dropped: +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::task::Task; +/// +/// struct State { +/// creator: Task, +/// index: u32, +/// } +/// +/// impl State { +/// fn new() -> Self { +/// Self { +/// creator: Task::current().clone(), +/// index: 0, +/// } +/// } +/// } +/// ``` +pub struct Task { + pub(crate) ptr: *mut bindings::task_struct, +} + +// SAFETY: Given that the task is referenced, it is OK to send it to another thread. +unsafe impl Send for Task {} + +// SAFETY: It's OK to access `Task` through references from other threads because we're either +// accessing properties that don't change (e.g., `pid`, `group_leader`) or that are properly +// synchronised by C code (e.g., `signal_pending`). +unsafe impl Sync for Task {} + +/// The type of process identifiers (PIDs). +type Pid = bindings::pid_t; + +impl Task { + /// Returns a task reference for the currently executing task/thread. + pub fn current<'a>() -> TaskRef<'a> { + // SAFETY: Just an FFI call. + let ptr = unsafe { bindings::get_current() }; + + // SAFETY: If the current thread is still running, the current task is valid. Given + // that `TaskRef` is not `Send`, we know it cannot be transferred to another thread (where + // it could potentially outlive the caller). + unsafe { TaskRef::from_ptr(ptr) } + } + + /// Returns the group leader of the given task. + pub fn group_leader(&self) -> TaskRef<'_> { + // SAFETY: By the type invariant, we know that `self.ptr` is non-null and valid. + let ptr = unsafe { (*self.ptr).group_leader }; + + // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`, + // and given that a task has a reference to its group leader, we know it must be valid for + // the lifetime of the returned task reference. + unsafe { TaskRef::from_ptr(ptr) } + } + + /// Returns the PID of the given task. + pub fn pid(&self) -> Pid { + // SAFETY: By the type invariant, we know that `self.ptr` is non-null and valid. + unsafe { (*self.ptr).pid } + } + + /// Determines whether the given task has pending signals. + pub fn signal_pending(&self) -> bool { + // SAFETY: By the type invariant, we know that `self.ptr` is non-null and valid. + unsafe { bindings::signal_pending(self.ptr) != 0 } + } +} + +impl PartialEq for Task { + fn eq(&self, other: &Self) -> bool { + self.ptr == other.ptr + } +} + +impl Eq for Task {} + +impl Clone for Task { + fn clone(&self) -> Self { + // SAFETY: The type invariants guarantee that `self.ptr` has a non-zero reference count. + unsafe { bindings::get_task_struct(self.ptr) }; + + // INVARIANT: We incremented the reference count to account for the new `Task` being + // created. + Self { ptr: self.ptr } + } +} + +impl Drop for Task { + fn drop(&mut self) { + // INVARIANT: We may decrement the refcount to zero, but the `Task` is being dropped, so + // this is not observable. + // SAFETY: The type invariants guarantee that `Task::ptr` has a non-zero reference count. + unsafe { bindings::put_task_struct(self.ptr) }; + } +} + +/// A wrapper for [`Task`] that doesn't automatically decrement the refcount when dropped. +/// +/// We need the wrapper because [`ManuallyDrop`] alone would allow callers to call +/// [`ManuallyDrop::into_inner`]. This would allow an unsafe sequence to be triggered without +/// `unsafe` blocks because it would trigger an unbalanced call to `put_task_struct`. +/// +/// We make this explicitly not [`Send`] so that we can use it to represent the current thread +/// without having to increment/decrement its reference count. +/// +/// # Invariants +/// +/// The wrapped [`Task`] remains valid for the lifetime of the object. +pub struct TaskRef<'a> { + task: ManuallyDrop, + _not_send: PhantomData<(&'a (), *mut ())>, +} + +impl TaskRef<'_> { + /// Constructs a new `struct task_struct` wrapper that doesn't change its reference count. + /// + /// # Safety + /// + /// The pointer `ptr` must be non-null and valid for the lifetime of the object. + pub(crate) unsafe fn from_ptr(ptr: *mut bindings::task_struct) -> Self { + Self { + task: ManuallyDrop::new(Task { ptr }), + _not_send: PhantomData, + } + } +} + +// SAFETY: It is OK to share a reference to the current thread with another thread because we know +// the owner cannot go away while the shared reference exists (and `Task` itself is `Sync`). +unsafe impl Sync for TaskRef<'_> {} + +impl Deref for TaskRef<'_> { + type Target = Task; + + fn deref(&self) -> &Self::Target { + self.task.deref() + } +} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs new file mode 100644 index 0000000000000..d3630f828befa --- /dev/null +++ b/rust/kernel/types.rs @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel types. +//! +//! C header: [`include/linux/types.h`](../../../../include/linux/types.h) + +use crate::{ + bindings, c_types, + sync::{Ref, RefBorrow}, +}; +use alloc::boxed::Box; +use core::{ops::Deref, pin::Pin, ptr::NonNull}; + +/// Permissions. +/// +/// C header: [`include/uapi/linux/stat.h`](../../../../include/uapi/linux/stat.h) +/// +/// C header: [`include/linux/stat.h`](../../../../include/linux/stat.h) +pub struct Mode(bindings::umode_t); + +impl Mode { + /// Creates a [`Mode`] from an integer. + pub fn from_int(m: u16) -> Mode { + Mode(m) + } + + /// Returns the mode as an integer. + pub fn as_int(&self) -> u16 { + self.0 + } +} + +/// Used to convert an object into a raw pointer that represents it. +/// +/// It can eventually be converted back into the object. This is used to store objects as pointers +/// in kernel data structures, for example, an implementation of [`FileOperations`] in `struct +/// file::private_data`. +pub trait PointerWrapper { + /// Type of values borrowed between calls to [`PointerWrapper::into_pointer`] and + /// [`PointerWrapper::from_pointer`]. + type Borrowed: Deref; + + /// Returns the raw pointer. + fn into_pointer(self) -> *const c_types::c_void; + + /// Returns a borrowed value. + /// + /// # Safety + /// + /// `ptr` must have been returned by a previous call to [`PointerWrapper::into_pointer`]. + /// Additionally, [`PointerWrapper::from_pointer`] can only be called after *all* values + /// returned by [`PointerWrapper::borrow`] have been dropped. + unsafe fn borrow(ptr: *const c_types::c_void) -> Self::Borrowed; + + /// Returns the instance back from the raw pointer. + /// + /// # Safety + /// + /// The passed pointer must come from a previous call to [`PointerWrapper::into_pointer()`]. + unsafe fn from_pointer(ptr: *const c_types::c_void) -> Self; +} + +impl PointerWrapper for Box { + type Borrowed = UnsafeReference; + + fn into_pointer(self) -> *const c_types::c_void { + Box::into_raw(self) as _ + } + + unsafe fn borrow(ptr: *const c_types::c_void) -> Self::Borrowed { + // SAFETY: The safety requirements for this function ensure that the object is still alive, + // so it is safe to dereference the raw pointer. + // The safety requirements also ensure that the object remains alive for the lifetime of + // the returned value. + unsafe { UnsafeReference::new(&*ptr.cast()) } + } + + unsafe fn from_pointer(ptr: *const c_types::c_void) -> Self { + // SAFETY: The passed pointer comes from a previous call to [`Self::into_pointer()`]. + unsafe { Box::from_raw(ptr as _) } + } +} + +impl PointerWrapper for Ref { + type Borrowed = RefBorrow; + + fn into_pointer(self) -> *const c_types::c_void { + Ref::into_usize(self) as _ + } + + unsafe fn borrow(ptr: *const c_types::c_void) -> Self::Borrowed { + // SAFETY: The safety requirements for this function ensure that the underlying object + // remains valid for the lifetime of the returned value. + unsafe { Ref::borrow_usize(ptr as _) } + } + + unsafe fn from_pointer(ptr: *const c_types::c_void) -> Self { + // SAFETY: The passed pointer comes from a previous call to [`Self::into_pointer()`]. + unsafe { Ref::from_usize(ptr as _) } + } +} + +/// A reference with manually-managed lifetime. +/// +/// # Invariants +/// +/// There are no mutable references to the underlying object, and it remains valid for the lifetime +/// of the [`UnsafeReference`] instance. +pub struct UnsafeReference { + ptr: NonNull, +} + +impl UnsafeReference { + /// Creates a new [`UnsafeReference`] instance. + /// + /// # Safety + /// + /// Callers must ensure the following for the lifetime of the returned [`UnsafeReference`] + /// instance: + /// 1. That `obj` remains valid; + /// 2. That no mutable references to `obj` are created. + unsafe fn new(obj: &T) -> Self { + // INVARIANT: The safety requirements of this function ensure that the invariants hold. + Self { + ptr: NonNull::from(obj), + } + } +} + +impl Deref for UnsafeReference { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: By the type invariant, the object is still valid and alive, and there are no + // mutable references to it. + unsafe { self.ptr.as_ref() } + } +} + +impl PointerWrapper for Pin { + type Borrowed = T::Borrowed; + + fn into_pointer(self) -> *const c_types::c_void { + // SAFETY: We continue to treat the pointer as pinned by returning just a pointer to it to + // the caller. + let inner = unsafe { Pin::into_inner_unchecked(self) }; + inner.into_pointer() + } + + unsafe fn borrow(ptr: *const c_types::c_void) -> Self::Borrowed { + // SAFETY: The safety requirements for this function are the same as the ones for + // `T::borrow`. + unsafe { T::borrow(ptr) } + } + + unsafe fn from_pointer(p: *const c_types::c_void) -> Self { + // SAFETY: The object was originally pinned. + // The passed pointer comes from a previous call to `inner::into_pointer()`. + unsafe { Pin::new_unchecked(T::from_pointer(p)) } + } +} + +/// Runs a cleanup function/closure when dropped. +/// +/// The [`ScopeGuard::dismiss`] function prevents the cleanup function from running. +/// +/// # Examples +/// +/// In the example below, we have multiple exit paths and we want to log regardless of which one is +/// taken: +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::ScopeGuard; +/// fn example1(arg: bool) { +/// let _log = ScopeGuard::new(|| pr_info!("example1 completed\n")); +/// +/// if arg { +/// return; +/// } +/// +/// // Do something... +/// } +/// ``` +/// +/// In the example below, we want to log the same message on all early exits but a different one on +/// the main exit path: +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::ScopeGuard; +/// fn example2(arg: bool) { +/// let log = ScopeGuard::new(|| pr_info!("example2 returned early\n")); +/// +/// if arg { +/// return; +/// } +/// +/// // (Other early returns...) +/// +/// log.dismiss(); +/// pr_info!("example2 no early return\n"); +/// } +/// ``` +pub struct ScopeGuard { + cleanup_func: Option, +} + +impl ScopeGuard { + /// Creates a new cleanup object with the given cleanup function. + pub fn new(cleanup_func: T) -> Self { + Self { + cleanup_func: Some(cleanup_func), + } + } + + /// Prevents the cleanup function from running. + pub fn dismiss(mut self) { + self.cleanup_func.take(); + } +} + +impl Drop for ScopeGuard { + fn drop(&mut self) { + // Run the cleanup function if one is still present. + if let Some(cleanup) = self.cleanup_func.take() { + cleanup(); + } + } +} diff --git a/rust/kernel/user_ptr.rs b/rust/kernel/user_ptr.rs new file mode 100644 index 0000000000000..467ead639071c --- /dev/null +++ b/rust/kernel/user_ptr.rs @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! User pointers. +//! +//! C header: [`include/linux/uaccess.h`](../../../../include/linux/uaccess.h) + +use crate::{ + bindings, c_types, + error::Error, + io_buffer::{IoBufferReader, IoBufferWriter}, + Result, +}; +use alloc::vec::Vec; + +/// A reference to an area in userspace memory, which can be either +/// read-only or read-write. +/// +/// All methods on this struct are safe: invalid pointers return +/// `EFAULT`. Concurrent access, *including data races to/from userspace +/// memory*, is permitted, because fundamentally another userspace +/// thread/process could always be modifying memory at the same time +/// (in the same way that userspace Rust's [`std::io`] permits data races +/// with the contents of files on disk). In the presence of a race, the +/// exact byte values read/written are unspecified but the operation is +/// well-defined. Kernelspace code should validate its copy of data +/// after completing a read, and not expect that multiple reads of the +/// same address will return the same value. +/// +/// All APIs enforce the invariant that a given byte of memory from userspace +/// may only be read once. By preventing double-fetches we avoid TOCTOU +/// vulnerabilities. This is accomplished by taking `self` by value to prevent +/// obtaining multiple readers on a given [`UserSlicePtr`], and the readers +/// only permitting forward reads. +/// +/// Constructing a [`UserSlicePtr`] performs no checks on the provided +/// address and length, it can safely be constructed inside a kernel thread +/// with no current userspace process. Reads and writes wrap the kernel APIs +/// `copy_from_user` and `copy_to_user`, which check the memory map of the +/// current process and enforce that the address range is within the user +/// range (no additional calls to `access_ok` are needed). +/// +/// [`std::io`]: https://doc.rust-lang.org/std/io/index.html +pub struct UserSlicePtr(*mut c_types::c_void, usize); + +impl UserSlicePtr { + /// Constructs a user slice from a raw pointer and a length in bytes. + /// + /// # Safety + /// + /// Callers must be careful to avoid time-of-check-time-of-use + /// (TOCTOU) issues. The simplest way is to create a single instance of + /// [`UserSlicePtr`] per user memory block as it reads each byte at + /// most once. + pub unsafe fn new(ptr: *mut c_types::c_void, length: usize) -> Self { + UserSlicePtr(ptr, length) + } + + /// Reads the entirety of the user slice. + /// + /// Returns `EFAULT` if the address does not currently point to + /// mapped, readable memory. + pub fn read_all(self) -> Result> { + self.reader().read_all() + } + + /// Constructs a [`UserSlicePtrReader`]. + pub fn reader(self) -> UserSlicePtrReader { + UserSlicePtrReader(self.0, self.1) + } + + /// Writes the provided slice into the user slice. + /// + /// Returns `EFAULT` if the address does not currently point to + /// mapped, writable memory (in which case some data from before the + /// fault may be written), or `data` is larger than the user slice + /// (in which case no data is written). + pub fn write_all(self, data: &[u8]) -> Result { + self.writer().write_slice(data) + } + + /// Constructs a [`UserSlicePtrWriter`]. + pub fn writer(self) -> UserSlicePtrWriter { + UserSlicePtrWriter(self.0, self.1) + } + + /// Constructs both a [`UserSlicePtrReader`] and a [`UserSlicePtrWriter`]. + pub fn reader_writer(self) -> (UserSlicePtrReader, UserSlicePtrWriter) { + ( + UserSlicePtrReader(self.0, self.1), + UserSlicePtrWriter(self.0, self.1), + ) + } +} + +/// A reader for [`UserSlicePtr`]. +/// +/// Used to incrementally read from the user slice. +pub struct UserSlicePtrReader(*mut c_types::c_void, usize); + +impl IoBufferReader for UserSlicePtrReader { + /// Returns the number of bytes left to be read from this. + /// + /// Note that even reading less than this number of bytes may fail. + fn len(&self) -> usize { + self.1 + } + + /// Reads raw data from the user slice into a raw kernel buffer. + /// + /// # Safety + /// + /// The output buffer must be valid. + unsafe fn read_raw(&mut self, out: *mut u8, len: usize) -> Result { + if len > self.1 || len > u32::MAX as usize { + return Err(Error::EFAULT); + } + let res = unsafe { bindings::copy_from_user(out as _, self.0, len as _) }; + if res != 0 { + return Err(Error::EFAULT); + } + // Since this is not a pointer to a valid object in our program, + // we cannot use `add`, which has C-style rules for defined + // behavior. + self.0 = self.0.wrapping_add(len); + self.1 -= len; + Ok(()) + } +} + +/// A writer for [`UserSlicePtr`]. +/// +/// Used to incrementally write into the user slice. +pub struct UserSlicePtrWriter(*mut c_types::c_void, usize); + +impl IoBufferWriter for UserSlicePtrWriter { + fn len(&self) -> usize { + self.1 + } + + fn clear(&mut self, mut len: usize) -> Result { + let mut ret = Ok(()); + if len > self.1 { + ret = Err(Error::EFAULT); + len = self.1; + } + + // SAFETY: The buffer will be validated by `clear_user`. We ensure that `len` is within + // bounds in the check above. + let left = unsafe { bindings::clear_user(self.0, len as _) } as usize; + if left != 0 { + ret = Err(Error::EFAULT); + len -= left; + } + + self.0 = self.0.wrapping_add(len); + self.1 -= len; + ret + } + + unsafe fn write_raw(&mut self, data: *const u8, len: usize) -> Result { + if len > self.1 || len > u32::MAX as usize { + return Err(Error::EFAULT); + } + let res = unsafe { bindings::copy_to_user(self.0, data as _, len as _) }; + if res != 0 { + return Err(Error::EFAULT); + } + // Since this is not a pointer to a valid object in our program, + // we cannot use `add`, which has C-style rules for defined + // behavior. + self.0 = self.0.wrapping_add(len); + self.1 -= len; + Ok(()) + } +} From 6e28ec5867e951c72e0d8c66edff2fde91131bd5 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:18:23 +0200 Subject: [PATCH 0388/1202] rust: export generated symbols All symbols are reexported reusing the `EXPORT_SYMBOL_GPL` macro from C. The lists of symbols are generated on the fly. There are three main sets of symbols to distinguish: - The ones from the `core` and `alloc` crates (from the Rust standard library). The code is licensed as Apache/MIT. - The ones from our abstractions in the `kernel` crate. - The helpers (already exported since they are not generated). We export everything as GPL. This ensures we do not mistakenly expose GPL kernel symbols/features as non-GPL, even indirectly. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- rust/exports.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 rust/exports.c diff --git a/rust/exports.c b/rust/exports.c new file mode 100644 index 0000000000000..d7dff1b3b9199 --- /dev/null +++ b/rust/exports.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// A hack to export Rust symbols for loadable modules without having to redo +// the entire `include/linux/export.h` logic in Rust. +// +// This requires the Rust's new/future `v0` mangling scheme because the default +// one ("legacy") uses invalid characters for C identifiers (thus we cannot use +// the `EXPORT_SYMBOL_*` macros). + +#include + +#define EXPORT_SYMBOL_RUST_GPL(sym) extern int sym; EXPORT_SYMBOL_GPL(sym); + +#include "exports_core_generated.h" +#include "exports_alloc_generated.h" +#include "exports_kernel_generated.h" From b1cdddf4d1bdf68f05bf40b8c933cd645a1c6d05 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 16:42:57 +0200 Subject: [PATCH 0389/1202] Kbuild: add Rust support Having all the new files in place, we now enable Rust support in the build system, including `Kconfig` entries related to Rust, the Rust configuration printer, the target definition files, the version detection script and a few other bits. In the future, we will likely want to generate the target files on the fly via a script. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- .gitignore | 5 + .rustfmt.toml | 12 ++ Makefile | 162 +++++++++++++++- arch/arm/rust/target.json | 27 +++ arch/arm64/rust/target.json | 34 ++++ arch/powerpc/rust/target.json | 29 +++ arch/riscv/Makefile | 1 + arch/riscv/rust/rv32ima.json | 36 ++++ arch/riscv/rust/rv32imac.json | 36 ++++ arch/riscv/rust/rv64ima.json | 36 ++++ arch/riscv/rust/rv64imac.json | 36 ++++ arch/x86/rust/target.json | 36 ++++ init/Kconfig | 31 ++- lib/Kconfig.debug | 144 ++++++++++++++ rust/.gitignore | 7 + rust/Makefile | 355 ++++++++++++++++++++++++++++++++++ rust/bindgen_parameters | 13 ++ scripts/Makefile.build | 22 +++ scripts/Makefile.lib | 12 ++ scripts/Makefile.modfinal | 8 +- scripts/is_rust_module.sh | 19 ++ scripts/kconfig/confdata.c | 67 ++++++- scripts/rust-version.sh | 31 +++ 23 files changed, 1147 insertions(+), 12 deletions(-) create mode 100644 .rustfmt.toml create mode 100644 arch/arm/rust/target.json create mode 100644 arch/arm64/rust/target.json create mode 100644 arch/powerpc/rust/target.json create mode 100644 arch/riscv/rust/rv32ima.json create mode 100644 arch/riscv/rust/rv32imac.json create mode 100644 arch/riscv/rust/rv64ima.json create mode 100644 arch/riscv/rust/rv64imac.json create mode 100644 arch/x86/rust/target.json create mode 100644 rust/.gitignore create mode 100644 rust/Makefile create mode 100644 rust/bindgen_parameters create mode 100755 scripts/is_rust_module.sh create mode 100755 scripts/rust-version.sh diff --git a/.gitignore b/.gitignore index 7afd412dadd2c..48c68948f476d 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,7 @@ *.o *.o.* *.patch +*.rmeta *.s *.so *.so.dbg @@ -96,6 +97,7 @@ modules.order !.gitattributes !.gitignore !.mailmap +!.rustfmt.toml # # Generated include files @@ -161,3 +163,6 @@ x509.genkey # Documentation toolchain sphinx_*/ + +# Rust analyzer configuration +/rust-project.json diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000000000..4fea7c464f0d3 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,12 @@ +edition = "2018" +newline_style = "Unix" + +# Unstable options that help catching some mistakes in formatting and that we may want to enable +# when they become stable. +# +# They are kept here since they are useful to run from time to time. +#format_code_in_doc_comments = true +#reorder_impl_items = true +#comment_width = 100 +#wrap_comments = true +#normalize_comments = true diff --git a/Makefile b/Makefile index 4d0c0ed9236e4..1b9b6714b199c 100644 --- a/Makefile +++ b/Makefile @@ -120,6 +120,13 @@ endif export KBUILD_CHECKSRC +# Enable "clippy" (a linter) as part of the Rust compilation. +# +# Use 'make CLIPPY=1' to enable it. +ifeq ("$(origin CLIPPY)", "command line") + KBUILD_CLIPPY := $(CLIPPY) +endif + # Use make M=dir or set the environment variable KBUILD_EXTMOD to specify the # directory of external module to build. Setting M= takes precedence. ifeq ("$(origin M)", "command line") @@ -267,7 +274,7 @@ no-dot-config-targets := $(clean-targets) \ cscope gtags TAGS tags help% %docs check% coccicheck \ $(version_h) headers headers_% archheaders archscripts \ %asm-generic kernelversion %src-pkg dt_binding_check \ - outputmakefile + outputmakefile rustfmt rustfmtcheck # Installation targets should not require compiler. Unfortunately, vdso_install # is an exception where build artifacts may be updated. This must be fixed. no-compiler-targets := $(no-dot-config-targets) install dtbs_install \ @@ -461,6 +468,12 @@ OBJDUMP = $(CROSS_COMPILE)objdump READELF = $(CROSS_COMPILE)readelf STRIP = $(CROSS_COMPILE)strip endif +RUSTC = rustc +RUSTDOC = rustdoc +RUSTFMT = rustfmt +CLIPPY_DRIVER = clippy-driver +BINDGEN = bindgen +CARGO = cargo PAHOLE = pahole RESOLVE_BTFIDS = $(objtree)/tools/bpf/resolve_btfids/resolve_btfids LEX = flex @@ -484,9 +497,11 @@ CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) NOSTDINC_FLAGS := CFLAGS_MODULE = +RUSTFLAGS_MODULE = AFLAGS_MODULE = LDFLAGS_MODULE = CFLAGS_KERNEL = +RUSTFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = @@ -515,15 +530,41 @@ KBUILD_CFLAGS := -Wall -Wundef -Werror=strict-prototypes -Wno-trigraphs \ -Werror=return-type -Wno-format-security \ -std=gnu89 KBUILD_CPPFLAGS := -D__KERNEL__ +KBUILD_RUST_TARGET := $(srctree)/arch/$(SRCARCH)/rust/target.json +KBUILD_RUSTFLAGS := --emit=dep-info,obj,metadata --edition=2018 \ + -Cpanic=abort -Cembed-bitcode=n -Clto=n -Crpath=n \ + -Cforce-unwind-tables=n -Ccodegen-units=1 \ + -Zbinary_dep_depinfo=y -Zsymbol-mangling-version=v0 \ + -Dunsafe_op_in_unsafe_fn -Drust_2018_idioms \ + -Wmissing_docs +KBUILD_CLIPPYFLAGS := -Dclippy::correctness -Dclippy::style \ + -Dclippy::complexity -Dclippy::perf -Dclippy::float_arithmetic KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := +KBUILD_RUSTFLAGS_KERNEL := KBUILD_AFLAGS_MODULE := -DMODULE KBUILD_CFLAGS_MODULE := -DMODULE +KBUILD_RUSTFLAGS_MODULE := --cfg MODULE KBUILD_LDFLAGS_MODULE := KBUILD_LDFLAGS := CLANG_FLAGS := +ifeq ($(KBUILD_CLIPPY),1) + RUSTC_OR_CLIPPY_QUIET := CLIPPY + RUSTC_OR_CLIPPY = $(CLIPPY_DRIVER) $(KBUILD_CLIPPYFLAGS) +else + RUSTC_OR_CLIPPY_QUIET := RUSTC + RUSTC_OR_CLIPPY = $(RUSTC) +endif + +ifdef RUST_LIB_SRC + export RUST_LIB_SRC +endif + +export RUSTC_BOOTSTRAP := 1 + export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC +export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN CARGO export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD @@ -531,9 +572,10 @@ export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE +export KBUILD_RUST_TARGET KBUILD_RUSTFLAGS RUSTFLAGS_KERNEL RUSTFLAGS_MODULE export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE -export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE -export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL +export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_RUSTFLAGS_MODULE KBUILD_LDFLAGS_MODULE +export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL KBUILD_RUSTFLAGS_KERNEL # Files to ignore in find ... statements @@ -724,7 +766,7 @@ $(KCONFIG_CONFIG): # # Do not use $(call cmd,...) here. That would suppress prompts from syncconfig, # so you cannot notice that Kconfig is waiting for the user input. -%/config/auto.conf %/config/auto.conf.cmd %/generated/autoconf.h: $(KCONFIG_CONFIG) +%/config/auto.conf %/config/auto.conf.cmd %/generated/autoconf.h %/generated/rustc_cfg: $(KCONFIG_CONFIG) $(Q)$(kecho) " SYNC $@" $(Q)$(MAKE) -f $(srctree)/Makefile syncconfig else # !may-sync-config @@ -751,12 +793,43 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +ifdef CONFIG_RUST_DEBUG_ASSERTIONS +KBUILD_RUSTFLAGS += -Cdebug-assertions=y +else +KBUILD_RUSTFLAGS += -Cdebug-assertions=n +endif + +ifdef CONFIG_RUST_OVERFLOW_CHECKS +KBUILD_RUSTFLAGS += -Coverflow-checks=y +else +KBUILD_RUSTFLAGS += -Coverflow-checks=n +endif + ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE KBUILD_CFLAGS += -O2 +KBUILD_RUSTFLAGS_OPT_LEVEL_MAP := 2 else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 KBUILD_CFLAGS += -O3 +KBUILD_RUSTFLAGS_OPT_LEVEL_MAP := 3 else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os +KBUILD_RUSTFLAGS_OPT_LEVEL_MAP := z +endif + +ifdef CONFIG_RUST_OPT_LEVEL_SIMILAR_AS_CHOSEN_FOR_C +KBUILD_RUSTFLAGS += -Copt-level=$(KBUILD_RUSTFLAGS_OPT_LEVEL_MAP) +else ifdef CONFIG_RUST_OPT_LEVEL_0 +KBUILD_RUSTFLAGS += -Copt-level=0 +else ifdef CONFIG_RUST_OPT_LEVEL_1 +KBUILD_RUSTFLAGS += -Copt-level=1 +else ifdef CONFIG_RUST_OPT_LEVEL_2 +KBUILD_RUSTFLAGS += -Copt-level=2 +else ifdef CONFIG_RUST_OPT_LEVEL_3 +KBUILD_RUSTFLAGS += -Copt-level=3 +else ifdef CONFIG_RUST_OPT_LEVEL_S +KBUILD_RUSTFLAGS += -Copt-level=s +else ifdef CONFIG_RUST_OPT_LEVEL_Z +KBUILD_RUSTFLAGS += -Copt-level=z endif # Tell gcc to never replace conditional load with a non-conditional one @@ -788,6 +861,9 @@ KBUILD_CFLAGS += $(stackp-flags-y) KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) +KBUILD_RUSTFLAGS-$(CONFIG_WERROR) += -Dwarnings +KBUILD_RUSTFLAGS += $(KBUILD_RUSTFLAGS-y) + ifdef CONFIG_CC_IS_CLANG KBUILD_CPPFLAGS += -Qunused-arguments # The kernel builds with '-std=gnu89' so use of GNU extensions is acceptable. @@ -813,6 +889,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls +KBUILD_RUSTFLAGS += -Cforce-frame-pointers=y else # Some targets (ARM with Thumb2, for example), can't be built with frame # pointers. For those, we don't have FUNCTION_TRACER automatically @@ -848,6 +925,7 @@ KBUILD_CFLAGS += -fzero-call-used-regs=used-gpr endif DEBUG_CFLAGS := +DEBUG_RUSTFLAGS := ifdef CONFIG_DEBUG_INFO @@ -859,6 +937,11 @@ endif ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS += -Wa,-gdwarf-2 +ifdef CONFIG_DEBUG_INFO_REDUCED +DEBUG_RUSTFLAGS += -Cdebuginfo=1 +else +DEBUG_RUSTFLAGS += -Cdebuginfo=2 +endif endif ifndef CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT @@ -885,6 +968,9 @@ endif # CONFIG_DEBUG_INFO KBUILD_CFLAGS += $(DEBUG_CFLAGS) export DEBUG_CFLAGS +KBUILD_RUSTFLAGS += $(DEBUG_RUSTFLAGS) +export DEBUG_RUSTFLAGS + ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_FTRACE_MCOUNT_USE_CC CC_FLAGS_FTRACE += -mrecord-mcount @@ -1045,10 +1131,11 @@ include $(addprefix $(srctree)/, $(include-y)) # Do not add $(call cc-option,...) below this line. When you build the kernel # from the clean source tree, the GCC plugins do not exist at this point. -# Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments +# Add user supplied CPPFLAGS, AFLAGS, CFLAGS and RUSTFLAGS as the last assignments KBUILD_CPPFLAGS += $(KCPPFLAGS) KBUILD_AFLAGS += $(KAFLAGS) KBUILD_CFLAGS += $(KCFLAGS) +KBUILD_RUSTFLAGS += $(KRUSTFLAGS) KBUILD_LDFLAGS_MODULE += --build-id=sha1 LDFLAGS_vmlinux += --build-id=sha1 @@ -1117,6 +1204,10 @@ export MODULES_NSDEPS := $(extmod_prefix)modules.nsdeps ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ +ifdef CONFIG_RUST +core-y += rust/ +endif + vmlinux-dirs := $(patsubst %/,%,$(filter %/, \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ $(libs-y) $(libs-m))) @@ -1217,6 +1308,9 @@ archprepare: outputmakefile archheaders archscripts scripts include/config/kerne prepare0: archprepare $(Q)$(MAKE) $(build)=scripts/mod $(Q)$(MAKE) $(build)=. +ifdef CONFIG_RUST + $(Q)$(MAKE) $(build)=rust +endif # All the preparing.. prepare: prepare0 @@ -1517,7 +1611,7 @@ endif # CONFIG_MODULES # Directories & files removed with 'make clean' CLEAN_FILES += include/ksym vmlinux.symvers modules-only.symvers \ modules.builtin modules.builtin.modinfo modules.nsdeps \ - compile_commands.json .thinlto-cache + compile_commands.json .thinlto-cache rust/test rust/doc # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -1528,7 +1622,8 @@ MRPROPER_FILES += include/config include/generated \ certs/signing_key.pem certs/signing_key.x509 \ certs/x509.genkey \ vmlinux-gdb.py \ - *.spec + *.spec \ + rust/*_generated.h rust/*_generated.rs rust/libmacros.so # clean - Delete most, but leave enough to build external modules # @@ -1640,6 +1735,17 @@ help: @echo ' kselftest-merge - Merge all the config dependencies of' @echo ' kselftest to existing .config.' @echo '' + @echo 'Rust targets:' + @echo ' rustfmt - Reformat all the Rust code in the kernel' + @echo ' rustfmtcheck - Checks if all the Rust code in the kernel' + @echo ' is formatted, printing a diff otherwise.' + @echo ' rustdoc - Generate Rust documentation' + @echo ' (requires kernel .config)' + @echo ' rusttest - Runs the Rust tests' + @echo ' (requires kernel .config; downloads external repos)' + @echo ' rust-analyzer - Generate rust-project.json rust-analyzer support file' + @echo ' (requires kernel .config)' + @echo '' @$(if $(dtstree), \ echo 'Devicetree:'; \ echo '* dtbs - Build device tree blobs for enabled boards'; \ @@ -1711,6 +1817,47 @@ PHONY += $(DOC_TARGETS) $(DOC_TARGETS): $(Q)$(MAKE) $(build)=Documentation $@ + +# Rust targets +# --------------------------------------------------------------------------- + +# Documentation target +# +# Using the singular to avoid running afoul of `no-dot-config-targets`. +PHONY += rustdoc +rustdoc: prepare0 + $(Q)$(MAKE) $(build)=rust $@ + +# Testing target +PHONY += rusttest +rusttest: prepare0 + $(Q)$(MAKE) $(build)=rust $@ + +# Formatting targets +PHONY += rustfmt rustfmtcheck + +# We skip `rust/alloc` since we want to minimize the diff w.r.t. upstream. +# +# We match using absolute paths since `find` does not resolve them +# when matching, which is a problem when e.g. `srctree` is `..`. +# We `grep` afterwards in order to remove the directory entry itself. +rustfmt: + $(Q)find $(abs_srctree) -type f -name '*.rs' \ + -o -path $(abs_srctree)/rust/alloc -prune \ + -o -path $(abs_objtree)/rust/test -prune \ + | grep -Fv $(abs_srctree)/rust/alloc \ + | grep -Fv $(abs_objtree)/rust/test \ + | grep -Fv generated \ + | xargs $(RUSTFMT) $(rustfmt_flags) + +rustfmtcheck: rustfmt_flags = --check +rustfmtcheck: rustfmt + +# IDE support targets +PHONY += rust-analyzer +rust-analyzer: prepare0 + $(Q)$(MAKE) $(build)=rust $@ + # Misc # --------------------------------------------------------------------------- @@ -1878,6 +2025,7 @@ clean: $(clean-dirs) $(call cmd,rmfiles) @find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \ \( -name '*.[aios]' -o -name '*.ko' -o -name '.*.cmd' \ + -o -name '*.rmeta' \ -o -name '*.ko.*' \ -o -name '*.dtb' -o -name '*.dtbo' -o -name '*.dtb.S' -o -name '*.dt.yaml' \ -o -name '*.dwo' -o -name '*.lst' \ diff --git a/arch/arm/rust/target.json b/arch/arm/rust/target.json new file mode 100644 index 0000000000000..3f845b8221dcd --- /dev/null +++ b/arch/arm/rust/target.json @@ -0,0 +1,27 @@ +{ + "arch": "arm", + "crt-static-respected": true, + "data-layout": "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64", + "dynamic-linking": true, + "env": "gnu", + "executables": true, + "features": "+strict-align,+v6", + "function-sections": false, + "has-elf-tls": true, + "has-rpath": true, + "linker-is-gnu": true, + "llvm-target": "arm-unknown-linux-gnueabi", + "max-atomic-width": 64, + "os": "linux", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack" + ] + }, + "relocation-model": "static", + "target-family": "unix", + "target-mcount": "\u0001__gnu_mcount_nc", + "target-pointer-width": "32" +} diff --git a/arch/arm64/rust/target.json b/arch/arm64/rust/target.json new file mode 100644 index 0000000000000..09a264df26c7a --- /dev/null +++ b/arch/arm64/rust/target.json @@ -0,0 +1,34 @@ +{ + "arch": "aarch64", + "data-layout": "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", + "disable-redzone": true, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "+strict-align,+neon,+fp-armv8", + "frame-pointer": "always", + "function-sections": false, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "aarch64-unknown-none", + "max-atomic-width": 128, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m64" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "64", + "vendor": "" +} diff --git a/arch/powerpc/rust/target.json b/arch/powerpc/rust/target.json new file mode 100644 index 0000000000000..b7c5ec469222d --- /dev/null +++ b/arch/powerpc/rust/target.json @@ -0,0 +1,29 @@ +{ + "arch": "powerpc64", + "code-mode": "kernel", + "cpu": "ppc64le", + "data-layout": "e-m:e-i64:64-n32:64", + "env": "gnu", + "features": "-altivec,-vsx,-hard-float", + "function-sections": false, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "powerpc64le-elf", + "max-atomic-width": 64, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m64" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "target-family": "unix", + "target-mcount": "_mcount", + "target-endian": "little", + "target-pointer-width": "64" +} diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 0eb4568fbd290..7b5b3ff0e5981 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -54,6 +54,7 @@ riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_AFLAGS += -march=$(riscv-march-y) +KBUILD_RUST_TARGET := $(srctree)/arch/riscv/rust/$(subst fd,,$(riscv-march-y)).json KBUILD_CFLAGS += -mno-save-restore KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET) diff --git a/arch/riscv/rust/rv32ima.json b/arch/riscv/rust/rv32ima.json new file mode 100644 index 0000000000000..bcdda88c16041 --- /dev/null +++ b/arch/riscv/rust/rv32ima.json @@ -0,0 +1,36 @@ +{ + "arch": "riscv32", + "code-model": "medium", + "cpu": "generic-rv32", + "data-layout": "e-m:e-p:32:32-i64:64-n32-S128", + "disable-redzone": true, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "+m,+a", + "frame-pointer": "always", + "function-sections": false, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "riscv32", + "max-atomic-width": 32, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m32" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "32", + "vendor": "" +} diff --git a/arch/riscv/rust/rv32imac.json b/arch/riscv/rust/rv32imac.json new file mode 100644 index 0000000000000..45873c10a5c33 --- /dev/null +++ b/arch/riscv/rust/rv32imac.json @@ -0,0 +1,36 @@ +{ + "arch": "riscv32", + "code-model": "medium", + "cpu": "generic-rv32", + "data-layout": "e-m:e-p:32:32-i64:64-n32-S128", + "disable-redzone": true, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "+m,+a,+c", + "frame-pointer": "always", + "function-sections": false, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "riscv32", + "max-atomic-width": 32, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m32" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "32", + "vendor": "" +} diff --git a/arch/riscv/rust/rv64ima.json b/arch/riscv/rust/rv64ima.json new file mode 100644 index 0000000000000..853d758c54610 --- /dev/null +++ b/arch/riscv/rust/rv64ima.json @@ -0,0 +1,36 @@ +{ + "arch": "riscv64", + "code-model": "medium", + "cpu": "generic-rv64", + "data-layout": "e-m:e-p:64:64-i64:64-i128:128-n64-S128", + "disable-redzone": true, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "+m,+a", + "frame-pointer": "always", + "function-sections": false, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "riscv64", + "max-atomic-width": 64, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m64" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "64", + "vendor": "" +} diff --git a/arch/riscv/rust/rv64imac.json b/arch/riscv/rust/rv64imac.json new file mode 100644 index 0000000000000..ce50ee8e8c930 --- /dev/null +++ b/arch/riscv/rust/rv64imac.json @@ -0,0 +1,36 @@ +{ + "arch": "riscv64", + "code-model": "medium", + "cpu": "generic-rv64", + "data-layout": "e-m:e-p:64:64-i64:64-i128:128-n64-S128", + "disable-redzone": true, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "+m,+a,+c", + "frame-pointer": "always", + "function-sections": false, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "riscv64", + "max-atomic-width": 64, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m64" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "64", + "vendor": "" +} diff --git a/arch/x86/rust/target.json b/arch/x86/rust/target.json new file mode 100644 index 0000000000000..379cf39e8941b --- /dev/null +++ b/arch/x86/rust/target.json @@ -0,0 +1,36 @@ +{ + "arch": "x86_64", + "code-model": "kernel", + "cpu": "x86-64", + "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", + "disable-redzone": true, + "emit-debug-gdb-scripts": false, + "env": "gnu", + "features": "-mmx,-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-3dnow,-3dnowa,-avx,-avx2,+soft-float", + "frame-pointer": "always", + "function-sections": false, + "linker-flavor": "gcc", + "linker-is-gnu": true, + "llvm-target": "x86_64-elf", + "max-atomic-width": 64, + "needs-plt": true, + "os": "none", + "panic-strategy": "abort", + "position-independent-executables": true, + "pre-link-args": { + "gcc": [ + "-Wl,--as-needed", + "-Wl,-z,noexecstack", + "-m64" + ] + }, + "relocation-model": "static", + "relro-level": "full", + "stack-probes": { + "kind": "none" + }, + "target-c-int-width": "32", + "target-endian": "little", + "target-pointer-width": "64", + "vendor": "unknown" +} diff --git a/init/Kconfig b/init/Kconfig index 11f8a845f259d..7e1b85f85c3d4 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -60,6 +60,15 @@ config LLD_VERSION default $(ld-version) if LD_IS_LLD default 0 +config HAS_RUST + depends on ARM64 || CPU_32v6 || CPU_32v6K || (PPC64 && CPU_LITTLE_ENDIAN) || X86_64 || RISCV + def_bool $(success,$(RUSTC) --version) + +config RUSTC_VERSION + depends on HAS_RUST + int + default $(shell,$(srctree)/scripts/rust-version.sh $(RUSTC)) + config CC_CAN_LINK bool default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT @@ -142,7 +151,8 @@ config WERROR default COMPILE_TEST help A kernel build should not cause any compiler warnings, and this - enables the '-Werror' flag to enforce that rule by default. + enables the '-Werror' (for C) and '-Dwarnings' (for Rust) flags + to enforce that rule by default. However, if you have a new (or very old) compiler with odd and unusual warnings, or you have some architecture with problems, @@ -2037,6 +2047,25 @@ config PROFILING Say Y here to enable the extended profiling support mechanisms used by profilers. +config RUST + bool "Rust support" + depends on HAS_RUST + depends on !COMPILE_TEST + depends on !MODVERSIONS + default n + help + Enables Rust support in the kernel. + + This allows other Rust-related options, like drivers written in Rust, + to be selected. + + It is also required to be able to load external kernel modules + written in Rust. + + See Documentation/rust/ for more information. + + If unsure, say N. + # # Place an empty function call at each tracepoint site. Can be # dynamically changed for a probe function. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2a9b6dcdac4ff..2ff6d84198d83 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2637,6 +2637,150 @@ config HYPERV_TESTING endmenu # "Kernel Testing and Coverage" +menu "Rust hacking" + +config RUST_DEBUG_ASSERTIONS + bool "Debug assertions" + default n + depends on RUST + help + Enables rustc's `-Cdebug-assertions` codegen option. + + This flag lets you turn `cfg(debug_assertions)` conditional + compilation on or off. This can be used to enable extra debugging + code in development but not in production. For example, it controls + the behavior of the standard library's `debug_assert!` macro. + + Note that this will apply to all Rust code, including `core`. + + If unsure, say N. + +config RUST_OVERFLOW_CHECKS + bool "Overflow checks" + default y + depends on RUST + help + Enables rustc's `-Coverflow-checks` codegen option. + + This flag allows you to control the behavior of runtime integer + overflow. When overflow-checks are enabled, a panic will occur + on overflow. + + Note that this will apply to all Rust code, including `core`. + + If unsure, say Y. + +choice + prompt "Optimization level" + default RUST_OPT_LEVEL_SIMILAR_AS_CHOSEN_FOR_C + depends on RUST + help + Controls rustc's `-Copt-level` codegen option. + + This flag controls the optimization level. + + If unsure, say "Similar as chosen for C". + +config RUST_OPT_LEVEL_SIMILAR_AS_CHOSEN_FOR_C + bool "Similar as chosen for C" + help + This choice will pick a similar optimization level as chosen in + the "Compiler optimization level" for C: + + -O2 is currently mapped to -Copt-level=2 + -O3 is currently mapped to -Copt-level=3 + -Os is currently mapped to -Copt-level=z + + The mapping may change over time to follow the intended semantics + of the choice for C as sensibly as possible. + + This is the default. + +config RUST_OPT_LEVEL_0 + bool "No optimizations (-Copt-level=0)" + help + Not recommended for most purposes. It may come in handy for debugging + suspected optimizer bugs, unexpected undefined behavior, etc. + + Note that this level will *not* enable debug assertions nor overflow + checks on its own (like it happens when interacting with rustc + directly). Use the corresponding configuration options to control + that instead, orthogonally. + + Note this level may cause excessive stack usage, which can lead to stack + overflow and subsequent crashes. + +config RUST_OPT_LEVEL_1 + bool "Basic optimizations (-Copt-level=1)" + help + Useful for debugging without getting too lost, but without + the overhead and boilerplate of no optimizations at all. + + Note this level may cause excessive stack usage, which can lead to stack + overflow and subsequent crashes. + +config RUST_OPT_LEVEL_2 + bool "Some optimizations (-Copt-level=2)" + help + The sensible choice in most cases. + +config RUST_OPT_LEVEL_3 + bool "All optimizations (-Copt-level=3)" + help + Yet more performance (hopefully). + +config RUST_OPT_LEVEL_S + bool "Optimize for size (-Copt-level=s)" + help + Smaller kernel, ideally without too much performance loss. + +config RUST_OPT_LEVEL_Z + bool "Optimize for size, no loop vectorization (-Copt-level=z)" + help + Like the previous level, but also turn off loop vectorization. + +endchoice + +choice + prompt "Build-time assertions" + default RUST_BUILD_ASSERT_ALLOW if RUST_OPT_LEVEL_0 + default RUST_BUILD_ASSERT_DENY if !RUST_OPT_LEVEL_0 + depends on RUST + help + Controls how are `build_error!` and `build_assert!` handled during build. + + If calls to them exist in the binary, it may indicate a violated invariant + or that the optimizer failed to verify the invariant during compilation. + You can choose to abort compilation or ignore them during build and let the + check be carried to runtime. + + If optimizations are turned off, you cannot select "Deny". + + If unsure, say "Deny". + +config RUST_BUILD_ASSERT_ALLOW + bool "Allow" + help + Unoptimized calls to `build_error!` will be converted to `panic!` + and checked at runtime. + +config RUST_BUILD_ASSERT_WARN + bool "Warn" + help + Unoptimized calls to `build_error!` will be converted to `panic!` + and checked at runtime, but warnings will be generated when building. + +config RUST_BUILD_ASSERT_DENY + bool "Deny" + depends on !RUST_OPT_LEVEL_0 + help + Unoptimized calls to `build_error!` will abort compilation. + +endchoice + + +endmenu # "Rust" + source "Documentation/Kconfig" endmenu # Kernel hacking diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100644 index 0000000000000..168cb26a31b99 --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +bindings_generated.rs +bindings_helpers_generated.rs +exports_*_generated.h +doc/ +test/ diff --git a/rust/Makefile b/rust/Makefile new file mode 100644 index 0000000000000..086e1ef914cc5 --- /dev/null +++ b/rust/Makefile @@ -0,0 +1,355 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_RUST) += core.o compiler_builtins.o helpers.o +extra-$(CONFIG_RUST) += exports_core_generated.h + +extra-$(CONFIG_RUST) += libmacros.so + +extra-$(CONFIG_RUST) += bindings_generated.rs bindings_helpers_generated.rs +obj-$(CONFIG_RUST) += alloc.o kernel.o +extra-$(CONFIG_RUST) += exports_alloc_generated.h exports_kernel_generated.h + +ifdef CONFIG_RUST_BUILD_ASSERT_DENY +extra-$(CONFIG_RUST) += build_error.o +else +obj-$(CONFIG_RUST) += build_error.o +endif + +obj-$(CONFIG_RUST) += exports.o + +ifeq ($(quiet),silent_) +cargo_quiet=-q +rust_test_quiet=-q +rustdoc_test_quiet=--test-args -q +else ifeq ($(quiet),quiet_) +rust_test_quiet=-q +rustdoc_test_quiet=--test-args -q +else +cargo_quiet=--verbose +endif + +core-cfgs = \ + --cfg no_fp_fmt_parse + +alloc-cfgs = \ + --cfg no_global_oom_handling \ + --cfg no_rc \ + --cfg no_sync + +quiet_cmd_rustdoc = RUSTDOC $(if $(rustdoc_host),H, ) $< + cmd_rustdoc = \ + OBJTREE=$(abspath $(objtree)) \ + $(RUSTDOC) $(if $(rustdoc_host),,$(rust_cross_flags)) \ + $(filter-out -Cpanic=abort, $(filter-out --emit=%, $(rust_flags))) \ + $(rustc_target_flags) -L $(objtree)/rust \ + --output $(objtree)/rust/doc --crate-name $(subst rustdoc-,,$@) \ + @$(objtree)/include/generated/rustc_cfg $< + +# This is a temporary fix for the CSS, visible on `type`s (`Result`). +# It is already fixed in nightly. +RUSTDOC_FIX_BEFORE := .impl,.method,.type:not(.container-rustdoc),.associatedconstant,.associatedtype +RUSTDOC_FIX_AFTER := .impl,.impl-items .method,.methods .method,.impl-items \ + .type,.methods .type,.impl-items .associatedconstant,.methods \ + .associatedconstant,.impl-items .associatedtype,.methods .associatedtype + +rustdoc: rustdoc-core rustdoc-macros rustdoc-compiler_builtins rustdoc-alloc rustdoc-kernel + $(Q)cp $(srctree)/Documentation/rust/assets/* $(objtree)/rust/doc + $(Q)sed -i "s/$(RUSTDOC_FIX_BEFORE)/$(RUSTDOC_FIX_AFTER)/" \ + $(objtree)/rust/doc/rustdoc.css + +rustdoc-macros: private rustdoc_host = yes +rustdoc-macros: private rustc_target_flags = --crate-type proc-macro \ + --extern proc_macro +rustdoc-macros: $(srctree)/rust/macros/lib.rs FORCE + $(call if_changed,rustdoc) + +rustdoc-compiler_builtins: $(srctree)/rust/compiler_builtins.rs rustdoc-core FORCE + $(call if_changed,rustdoc) + +# We need to allow `rustdoc::broken_intra_doc_links` because some +# `no_global_oom_handling` functions refer to non-`no_global_oom_handling` +# functions. Ideally `rustdoc` would have a way to distinguish broken links +# due to things that are "configured out" vs. entirely non-existing ones. +rustdoc-alloc: private rustc_target_flags = $(alloc-cfgs) \ + -Arustdoc::broken_intra_doc_links +rustdoc-alloc: $(srctree)/rust/alloc/lib.rs rustdoc-core \ + rustdoc-compiler_builtins FORCE + $(call if_changed,rustdoc) + +rustdoc-kernel: private rustc_target_flags = --extern alloc \ + --extern build_error \ + --extern macros=$(objtree)/rust/libmacros.so +rustdoc-kernel: $(srctree)/rust/kernel/lib.rs rustdoc-core \ + rustdoc-macros rustdoc-compiler_builtins rustdoc-alloc \ + $(objtree)/rust/libmacros.so $(objtree)/rust/bindings_generated.rs \ + $(objtree)/rust/bindings_helpers_generated.rs FORCE + $(call if_changed,rustdoc) + +quiet_cmd_rustc_test_library = RUSTC TL $< + cmd_rustc_test_library = \ + OBJTREE=$(abspath $(objtree)) \ + $(RUSTC) $(filter-out --sysroot=%, $(filter-out -Cpanic=abort, $(filter-out --emit=%, $(rust_flags)))) \ + $(rustc_target_flags) --crate-type $(if $(rustc_test_library_proc),proc-macro,rlib) \ + --out-dir $(objtree)/rust/test/ --cfg testlib \ + --sysroot $(objtree)/rust/test/sysroot \ + -L $(objtree)/rust/test/ --crate-name $(subst rusttest-,,$(subst rusttestlib-,,$@)) $< + +rusttestlib-build_error: $(srctree)/rust/build_error.rs rusttest-prepare FORCE + $(call if_changed,rustc_test_library) + +rusttestlib-macros: private rustc_target_flags = --extern proc_macro +rusttestlib-macros: private rustc_test_library_proc = yes +rusttestlib-macros: $(srctree)/rust/macros/lib.rs rusttest-prepare FORCE + $(call if_changed,rustc_test_library) + +quiet_cmd_rustdoc_test = RUSTDOC T $< + cmd_rustdoc_test = \ + OBJTREE=$(abspath $(objtree)) \ + $(RUSTDOC) --test $(filter-out --sysroot=%, $(filter-out -Cpanic=abort, $(filter-out --emit=%, $(rust_flags)))) \ + $(rustc_target_flags) $(rustdoc_test_target_flags) \ + --sysroot $(objtree)/rust/test/sysroot $(rustdoc_test_quiet) \ + -L $(objtree)/rust/test \ + --output $(objtree)/rust/doc --crate-name $(subst rusttest-,,$@) \ + @$(objtree)/include/generated/rustc_cfg $< + +# We cannot use `-Zpanic-abort-tests` because some tests are dynamic, +# so for the moment we skip `-Cpanic=abort`. +quiet_cmd_rustc_test = RUSTC T $< + cmd_rustc_test = \ + OBJTREE=$(abspath $(objtree)) \ + $(RUSTC) --test $(filter-out --sysroot=%, $(filter-out -Cpanic=abort, $(filter-out --emit=%, $(rust_flags)))) \ + $(rustc_target_flags) --out-dir $(objtree)/rust/test \ + --sysroot $(objtree)/rust/test/sysroot \ + -L $(objtree)/rust/test/ --crate-name $(subst rusttest-,,$@) $<; \ + $(objtree)/rust/test/$(subst rusttest-,,$@) $(rust_test_quiet) \ + $(rustc_test_run_flags) + +rusttest: rusttest-macros rusttest-kernel + +# This prepares a custom sysroot with our custom `alloc` instead of +# the standard one. +# +# This requires several hacks: +# - Unlike `core` and `alloc`, `std` depends on more than a dozen crates, +# including third-party crates that need to be downloaded, plus custom +# `build.rs` steps. Thus hardcoding things here is not maintainable. +# - `cargo` knows how to build the standard library, but it is an unstable +# feature so far (`-Zbuild-std`). +# - `cargo` only considers the use case of building the standard library +# to use it in a given package. Thus we need to create a dummy package +# and pick the generated libraries from there. +# - Since we only keep a subset of upstream `alloc` in-tree, we need +# to recreate it on the fly by putting our sources on top. +# - The usual ways of modifying the dependency graph in `cargo` do not seem +# to apply for the `-Zbuild-std` steps, thus we have to mislead it +# by modifying the sources in the sysroot. +# - To avoid messing with the user's Rust installation, we create a clone +# of the sysroot. However, `cargo` ignores `RUSTFLAGS` in the `-Zbuild-std` +# steps, thus we use a wrapper binary passed via `RUSTC` to pass the flag. +# +# In the future, we hope to avoid the whole ordeal by either: +# - Making the `test` crate not depend on `std` (either improving upstream +# or having our own custom crate). +# - Making the tests run in kernel space (requires the previous point). +# - Making `std` and friends be more like a "normal" crate, so that +# `-Zbuild-std` and related hacks are not needed. +quiet_cmd_rustsysroot = RUSTSYSROOT + cmd_rustsysroot = \ + rm -rf $(objtree)/rust/test; \ + mkdir -p $(objtree)/rust/test; \ + cp -a $(rustc_sysroot) $(objtree)/rust/test/sysroot; \ + cp -r $(srctree)/rust/alloc/* \ + $(objtree)/rust/test/sysroot/lib/rustlib/src/rust/library/alloc/src; \ + echo '\#!/bin/sh' > $(objtree)/rust/test/rustc_sysroot; \ + echo "$(RUSTC) --sysroot=$(abspath $(objtree)/rust/test/sysroot) \"\$$@\"" \ + >> $(objtree)/rust/test/rustc_sysroot; \ + chmod u+x $(objtree)/rust/test/rustc_sysroot; \ + $(CARGO) -q new $(objtree)/rust/test/dummy; \ + RUSTC=$(objtree)/rust/test/rustc_sysroot $(CARGO) $(cargo_quiet) \ + test -Zbuild-std --target $(rustc_host_target) \ + --manifest-path $(objtree)/rust/test/dummy/Cargo.toml; \ + rm $(objtree)/rust/test/sysroot/lib/rustlib/$(rustc_host_target)/lib/*; \ + cp $(objtree)/rust/test/dummy/target/$(rustc_host_target)/debug/deps/* \ + $(objtree)/rust/test/sysroot/lib/rustlib/$(rustc_host_target)/lib + +rusttest-prepare: FORCE + $(call if_changed,rustsysroot) + +rusttest-macros: private rustc_target_flags = --extern proc_macro +rusttest-macros: private rustdoc_test_target_flags = --crate-type proc-macro +rusttest-macros: $(srctree)/rust/macros/lib.rs rusttest-prepare FORCE + $(call if_changed,rustc_test) + $(call if_changed,rustdoc_test) + +rusttest-kernel: private rustc_target_flags = --extern alloc \ + --extern build_error --extern macros +rusttest-kernel: private rustc_test_run_flags = \ + --skip bindgen_test_layout_ +rusttest-kernel: $(srctree)/rust/kernel/lib.rs rusttest-prepare \ + rusttestlib-build_error rusttestlib-macros FORCE + $(call if_changed,rustc_test) + $(call if_changed,rustc_test_library) + $(call if_changed,rustdoc_test) + +ifdef CONFIG_CC_IS_CLANG +bindgen_c_flags = $(c_flags) +else +# bindgen relies on libclang to parse C. Ideally, bindgen would support a GCC +# plugin backend and/or the Clang driver would be perfectly compatible with GCC. +# +# For the moment, here we are tweaking the flags on the fly. Some config +# options may not work (e.g. `GCC_PLUGIN_RANDSTRUCT` if we end up using one +# of those structs). +bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ + -mskip-rax-setup -mgeneral-regs-only -msign-return-address=% \ + -mindirect-branch=thunk-extern -mindirect-branch-register -mrecord-mcount \ + -mabi=lp64 -mstack-protector-guard% -fconserve-stack -falign-jumps=% \ + -falign-loops=% -femit-struct-debug-baseonly \ + -fno-ipa-cp-clone -fno-ipa-sra -fno-partial-inlining \ + -fplugin-arg-arm_ssp_per_task_plugin-% \ + -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ + -Wno-packed-not-aligned -Wno-format-truncation -Wno-format-overflow \ + -Wno-stringop-truncation -Wno-unused-but-set-variable \ + -Wno-stringop-overflow -Wno-restrict -Wno-maybe-uninitialized \ + -Werror=designated-init -Wno-zero-length-bounds -Wimplicit-fallthrough=% \ + --param=% --param asan-% + +# PowerPC +bindgen_skip_c_flags += -mtraceback=no -mno-pointers-to-nested-functions \ + -mno-string -mno-strict-align + +# Derived from `scripts/Makefile.clang` +BINDGEN_TARGET_arm := arm-linux-gnueabi +BINDGEN_TARGET_arm64 := aarch64-linux-gnu +BINDGEN_TARGET_powerpc := powerpc64le-linux-gnu +BINDGEN_TARGET_riscv := riscv64-linux-gnu +BINDGEN_TARGET_x86 := x86_64-linux-gnu +BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH)) + +bindgen_extra_c_flags = --target=$(BINDGEN_TARGET) -Wno-address-of-packed-member +bindgen_c_flags = $(filter-out $(bindgen_skip_c_flags), $(c_flags)) \ + $(bindgen_extra_c_flags) +endif + +ifdef CONFIG_LTO +bindgen_c_flags_lto = $(filter-out $(CC_FLAGS_LTO), $(bindgen_c_flags)) +else +bindgen_c_flags_lto = $(bindgen_c_flags) +endif + +# To avoid several recompilations in PowerPC, which inserts `-D_TASK_CPU` +bindgen_c_flags_final = $(filter-out -D_TASK_CPU=%, $(bindgen_c_flags_lto)) + +quiet_cmd_bindgen = BINDGEN $@ + cmd_bindgen = \ + $(BINDGEN) $< $(shell grep -v '^\#\|^$$' $(srctree)/rust/bindgen_parameters) \ + --use-core --with-derive-default --ctypes-prefix c_types \ + --no-debug '.*' \ + --size_t-is-usize -o $@ -- $(bindgen_c_flags_final) -DMODULE + +$(objtree)/rust/bindings_generated.rs: $(srctree)/rust/kernel/bindings_helper.h \ + $(srctree)/rust/bindgen_parameters FORCE + $(call if_changed_dep,bindgen) + +quiet_cmd_bindgen_helper = BINDGEN $@ + cmd_bindgen_helper = \ + $(BINDGEN) $< --blacklist-type '.*' --whitelist-var '' \ + --whitelist-function 'rust_helper_.*' \ + --use-core --with-derive-default --ctypes-prefix c_types \ + --no-debug '.*' \ + --size_t-is-usize -o $@ -- $(bindgen_c_flags_final) \ + -I$(objtree)/rust/ -DMODULE; \ + sed -Ei 's/pub fn rust_helper_([a-zA-Z0-9_]*)/\#[link_name="rust_helper_\1"]\n pub fn \1/g' $@ + +$(objtree)/rust/bindings_helpers_generated.rs: $(srctree)/rust/helpers.c FORCE + $(call if_changed_dep,bindgen_helper) + +quiet_cmd_exports = EXPORTS $@ + cmd_exports = \ + $(NM) -p --defined-only $< \ + | grep -E ' (T|R|D) ' | cut -d ' ' -f 3 \ + | xargs -Isymbol \ + echo 'EXPORT_SYMBOL_RUST_GPL(symbol);' > $@ + +$(objtree)/rust/exports_core_generated.h: $(objtree)/rust/core.o FORCE + $(call if_changed,exports) + +$(objtree)/rust/exports_alloc_generated.h: $(objtree)/rust/alloc.o FORCE + $(call if_changed,exports) + +$(objtree)/rust/exports_kernel_generated.h: $(objtree)/rust/kernel.o FORCE + $(call if_changed,exports) + +# `-Cpanic=unwind -Cforce-unwind-tables=y` overrides `rust_flags` in order to +# avoid the https://github.com/rust-lang/rust/issues/82320 rustc crash. +quiet_cmd_rustc_procmacro = $(RUSTC_OR_CLIPPY_QUIET) P $@ + cmd_rustc_procmacro = \ + $(RUSTC_OR_CLIPPY) $(rust_flags) \ + --emit=dep-info,link --extern proc_macro \ + -Cpanic=unwind -Cforce-unwind-tables=y \ + --crate-type proc-macro --out-dir $(objtree)/rust/ \ + --crate-name $(patsubst lib%.so,%,$(notdir $@)) $<; \ + mv $(objtree)/rust/$(patsubst lib%.so,%,$(notdir $@)).d $(depfile); \ + sed -i '/^\#/d' $(depfile) + +# Procedural macros can only be used with the `rustc` that compiled it. +# Therefore, to get `libmacros.so` automatically recompiled when the compiler +# version changes, we add `core.o` as a dependency (even if it is not needed). +$(objtree)/rust/libmacros.so: $(srctree)/rust/macros/lib.rs \ + $(objtree)/rust/core.o FORCE + $(call if_changed_dep,rustc_procmacro) + +quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L $@ + cmd_rustc_library = \ + OBJTREE=$(abspath $(objtree)) \ + $(if $(skip_clippy),$(RUSTC),$(RUSTC_OR_CLIPPY)) \ + $(rust_flags) $(rust_cross_flags) $(rustc_target_flags) \ + --crate-type rlib --out-dir $(objtree)/rust/ -L $(objtree)/rust/ \ + --crate-name $(patsubst %.o,%,$(notdir $@)) $<; \ + mv $(objtree)/rust/$(patsubst %.o,%,$(notdir $@)).d $(depfile); \ + sed -i '/^\#/d' $(depfile) \ + $(if $(rustc_objcopy),;$(OBJCOPY) $(rustc_objcopy) $@) + +# `$(rust_flags)` is passed in case the user added `--sysroot`. +rustc_sysroot = $(shell $(RUSTC) $(rust_flags) --print sysroot) +rustc_host_target = $(shell $(RUSTC) --version --verbose | grep -F 'host: ' | cut -d' ' -f2) +RUST_LIB_SRC ?= $(rustc_sysroot)/lib/rustlib/src/rust/library + +rust-analyzer: + $(Q)$(srctree)/scripts/generate_rust_analyzer.py $(srctree) $(objtree) $(RUST_LIB_SRC) > $(objtree)/rust-project.json + +$(objtree)/rust/compiler_builtins.o: private rustc_objcopy = -w -W '__*' +$(objtree)/rust/compiler_builtins.o: $(srctree)/rust/compiler_builtins.rs \ + $(objtree)/rust/core.o FORCE + $(call if_changed_dep,rustc_library) + +$(objtree)/rust/alloc.o: private skip_clippy = 1 +$(objtree)/rust/alloc.o: private rustc_target_flags = $(alloc-cfgs) +$(objtree)/rust/alloc.o: $(srctree)/rust/alloc/lib.rs \ + $(objtree)/rust/compiler_builtins.o FORCE + $(call if_changed_dep,rustc_library) + +$(objtree)/rust/build_error.o: $(srctree)/rust/build_error.rs \ + $(objtree)/rust/compiler_builtins.o FORCE + $(call if_changed_dep,rustc_library) + +# ICE on `--extern macros`: https://github.com/rust-lang/rust/issues/56935 +$(objtree)/rust/kernel.o: private rustc_target_flags = --extern alloc \ + --extern build_error \ + --extern macros=$(objtree)/rust/libmacros.so +$(objtree)/rust/kernel.o: $(srctree)/rust/kernel/lib.rs $(objtree)/rust/alloc.o \ + $(objtree)/rust/build_error.o \ + $(objtree)/rust/libmacros.so $(objtree)/rust/bindings_generated.rs \ + $(objtree)/rust/bindings_helpers_generated.rs FORCE + $(call if_changed_dep,rustc_library) + +# Targets that need to expand twice +.SECONDEXPANSION: +$(objtree)/rust/core.o: private skip_clippy = 1 +$(objtree)/rust/core.o: private rustc_target_flags = $(core-cfgs) +$(objtree)/rust/core.o: $$(RUST_LIB_SRC)/core/src/lib.rs FORCE + $(call if_changed_dep,rustc_library) + +rustdoc-core: private rustc_target_flags = $(core-cfgs) +rustdoc-core: $$(RUST_LIB_SRC)/core/src/lib.rs FORCE + $(call if_changed,rustdoc) diff --git a/rust/bindgen_parameters b/rust/bindgen_parameters new file mode 100644 index 0000000000000..c2cc4a88234ef --- /dev/null +++ b/rust/bindgen_parameters @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 + +--opaque-type xregs_state +--opaque-type desc_struct +--opaque-type arch_lbr_state +--opaque-type local_apic + +# If SMP is disabled, `arch_spinlock_t` is defined as a ZST which triggers a Rust +# warning. We don't need to peek into it anyway. +--opaque-type spinlock + +# `seccomp`'s comment gets understood as a doctest +--no-doc-comments diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 3efc984d4c690..bad29117c44ff 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -26,6 +26,7 @@ EXTRA_CPPFLAGS := EXTRA_LDFLAGS := asflags-y := ccflags-y := +rustflags-y := cppflags-y := ldflags-y := @@ -314,6 +315,27 @@ quiet_cmd_cc_lst_c = MKLST $@ $(obj)/%.lst: $(src)/%.c FORCE $(call if_changed_dep,cc_lst_c) +# Compile Rust sources (.rs) +# --------------------------------------------------------------------------- + +# Need to use absolute path here and have symbolic links resolved; +# otherwise rustdoc and rustc compute different hashes for the target. +rust_cross_flags := --target=$(realpath $(KBUILD_RUST_TARGET)) + +quiet_cmd_rustc_o_rs = $(RUSTC_OR_CLIPPY_QUIET) $(quiet_modtag) $@ + cmd_rustc_o_rs = \ + RUST_MODFILE=$(modfile) \ + $(RUSTC_OR_CLIPPY) $(rust_flags) $(rust_cross_flags) \ + -Zallow-features=allocator_api,bench_black_box,concat_idents,global_asm,try_reserve \ + --extern alloc --extern kernel \ + --crate-type rlib --out-dir $(obj) -L $(objtree)/rust/ \ + --crate-name $(patsubst %.o,%,$(notdir $@)) $<; \ + mv $(obj)/$(subst .o,,$(notdir $@)).d $(depfile); \ + sed -i '/^\#/d' $(depfile) + +$(obj)/%.o: $(src)/%.rs FORCE + $(call if_changed_dep,rustc_o_rs) + # Compile assembler sources (.S) # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 54582673fc1a0..273b20bab7754 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -8,6 +8,7 @@ ldflags-y += $(EXTRA_LDFLAGS) # flags that take effect in current and sub directories KBUILD_AFLAGS += $(subdir-asflags-y) KBUILD_CFLAGS += $(subdir-ccflags-y) +KBUILD_RUSTFLAGS += $(subdir-rustflags-y) # Figure out what we need to build from the various variables # =========================================================================== @@ -133,6 +134,10 @@ _c_flags = $(filter-out $(CFLAGS_REMOVE_$(target-stem).o), \ $(filter-out $(ccflags-remove-y), \ $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(ccflags-y)) \ $(CFLAGS_$(target-stem).o)) +_rust_flags = $(filter-out $(RUSTFLAGS_REMOVE_$(target-stem).o), \ + $(filter-out $(rustflags-remove-y), \ + $(KBUILD_RUSTFLAGS) $(rustflags-y)) \ + $(RUSTFLAGS_$(target-stem).o)) _a_flags = $(filter-out $(AFLAGS_REMOVE_$(target-stem).o), \ $(filter-out $(asflags-remove-y), \ $(KBUILD_CPPFLAGS) $(KBUILD_AFLAGS) $(asflags-y)) \ @@ -202,6 +207,11 @@ modkern_cflags = \ $(KBUILD_CFLAGS_MODULE) $(CFLAGS_MODULE), \ $(KBUILD_CFLAGS_KERNEL) $(CFLAGS_KERNEL) $(modfile_flags)) +modkern_rustflags = \ + $(if $(part-of-module), \ + $(KBUILD_RUSTFLAGS_MODULE) $(RUSTFLAGS_MODULE), \ + $(KBUILD_RUSTFLAGS_KERNEL) $(RUSTFLAGS_KERNEL)) + modkern_aflags = $(if $(part-of-module), \ $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE), \ $(KBUILD_AFLAGS_KERNEL) $(AFLAGS_KERNEL)) @@ -211,6 +221,8 @@ c_flags = -Wp,-MMD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ $(_c_flags) $(modkern_cflags) \ $(basename_flags) $(modname_flags) +rust_flags = $(_rust_flags) $(modkern_rustflags) @$(objtree)/include/generated/rustc_cfg + a_flags = -Wp,-MMD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ $(_a_flags) $(modkern_aflags) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index ff805777431ce..c0842e999a756 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -39,10 +39,12 @@ quiet_cmd_ld_ko_o = LD [M] $@ quiet_cmd_btf_ko = BTF [M] $@ cmd_btf_ko = \ - if [ -f vmlinux ]; then \ - LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \ - else \ + if [ ! -f vmlinux ]; then \ printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \ + elif $(srctree)/scripts/is_rust_module.sh $@; then \ + printf "Skipping BTF generation for %s because it's a Rust module\n" $@ 1>&2; \ + else \ + LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \ fi; # Same as newer-prereqs, but allows to exclude specified extra dependencies diff --git a/scripts/is_rust_module.sh b/scripts/is_rust_module.sh new file mode 100755 index 0000000000000..ff24d9c16f094 --- /dev/null +++ b/scripts/is_rust_module.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# is_rust_module.sh MOD.ko +# +# Returns 0 if MOD.ko is a rust module, 1 otherwise. + +set -e +module="$*" + +while IFS= read -r line +do + # Any symbol beginning with "_R" is a v0 mangled rust symbol + if [[ $line =~ ^[0-9a-fA-F]+[[:space:]]+[uUtTrR][[:space:]]+_R[^[:space:]]+$ ]]; then + exit 0 + fi +done < <(nm "$module") + +exit 1 diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index cf72680cd7692..d9fc638dfa86a 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -651,6 +651,56 @@ static struct conf_printer kconfig_printer_cb = .print_comment = kconfig_print_comment, }; +/* + * rustc cfg printer + * + * This printer is used when generating the resulting rustc configuration + * after kconfig invocation and `defconfig` files. + */ +static void rustc_cfg_print_symbol(FILE *fp, struct symbol *sym, const char *value, void *arg) +{ + const char *str; + + switch (sym->type) { + case S_INT: + case S_HEX: + case S_BOOLEAN: + case S_TRISTATE: + str = sym_escape_string_value(value); + + /* + * We don't care about disabled ones, i.e. no need for + * what otherwise are "comments" in other printers. + */ + if (*value == 'n') + return; + + /* + * To have similar functionality to the C macro `IS_ENABLED()` + * we provide an empty `--cfg CONFIG_X` here in both `y` + * and `m` cases. + * + * Then, the common `fprintf()` below will also give us + * a `--cfg CONFIG_X="y"` or `--cfg CONFIG_X="m"`, which can + * be used as the equivalent of `IS_BUILTIN()`/`IS_MODULE()`. + */ + if (*value == 'y' || *value == 'm') + fprintf(fp, "--cfg=%s%s\n", CONFIG_, sym->name); + + break; + default: + str = value; + break; + } + + fprintf(fp, "--cfg=%s%s=%s\n", CONFIG_, sym->name, str); +} + +static struct conf_printer rustc_cfg_printer_cb = +{ + .print_symbol = rustc_cfg_print_symbol, +}; + /* * Header printer * @@ -1058,7 +1108,7 @@ int conf_write_autoconf(int overwrite) struct symbol *sym; const char *name; const char *autoconf_name = conf_get_autoconfig_name(); - FILE *out, *out_h; + FILE *out, *out_h, *out_rustc_cfg; int i; if (!overwrite && is_present(autoconf_name)) @@ -1079,6 +1129,13 @@ int conf_write_autoconf(int overwrite) return 1; } + out_rustc_cfg = fopen(".tmp_rustc_cfg", "w"); + if (!out_rustc_cfg) { + fclose(out); + fclose(out_h); + return 1; + } + conf_write_heading(out, &kconfig_printer_cb, NULL); conf_write_heading(out_h, &header_printer_cb, NULL); @@ -1090,9 +1147,11 @@ int conf_write_autoconf(int overwrite) /* write symbols to auto.conf and autoconf.h */ conf_write_symbol(out, sym, &kconfig_printer_cb, (void *)1); conf_write_symbol(out_h, sym, &header_printer_cb, NULL); + conf_write_symbol(out_rustc_cfg, sym, &rustc_cfg_printer_cb, NULL); } fclose(out); fclose(out_h); + fclose(out_rustc_cfg); name = getenv("KCONFIG_AUTOHEADER"); if (!name) @@ -1111,6 +1170,12 @@ int conf_write_autoconf(int overwrite) if (rename(".tmpconfig", autoconf_name)) return 1; + name = "include/generated/rustc_cfg"; + if (make_parent_dir(name)) + return 1; + if (rename(".tmp_rustc_cfg", name)) + return 1; + return 0; } diff --git a/scripts/rust-version.sh b/scripts/rust-version.sh new file mode 100755 index 0000000000000..67b6d31688e24 --- /dev/null +++ b/scripts/rust-version.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# rust-version rust-command +# +# Print the compiler version of `rust-command' in a 5 or 6-digit form +# such as `14502' for rustc-1.45.2 etc. +# +# Returns 0 if not found (so that Kconfig does not complain) +compiler="$*" + +if [ ${#compiler} -eq 0 ]; then + echo "Error: No compiler specified." >&2 + printf "Usage:\n\t$0 \n" >&2 + exit 1 +fi + +if ! command -v $compiler >/dev/null 2>&1; then + echo 0 + exit 0 +fi + +VERSION=$($compiler --version | cut -f2 -d' ') + +# Cut suffix if any (e.g. `-dev`) +VERSION=$(echo $VERSION | cut -f1 -d'-') + +MAJOR=$(echo $VERSION | cut -f1 -d'.') +MINOR=$(echo $VERSION | cut -f2 -d'.') +PATCHLEVEL=$(echo $VERSION | cut -f3 -d'.') +printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL From 863f302d181ff1f02d061e2f13d6d1f22928d430 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:23:16 +0200 Subject: [PATCH 0390/1202] docs: add Rust documentation Most of the documentation for Rust is written within the source code itself, as it is idiomatic for Rust projects. This applies to both the shared infrastructure at `rust/` as well as any other Rust module (e.g. drivers) written across the kernel. These documents contain general information that does not fit particularly well in the source code, like the Quick Start guide. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- Documentation/doc-guide/kernel-doc.rst | 3 + Documentation/index.rst | 1 + Documentation/kbuild/kbuild.rst | 4 + Documentation/process/changes.rst | 13 ++ Documentation/rust/arch-support.rst | 35 ++++ Documentation/rust/assets/favicon-16x16.png | Bin 0 -> 798 bytes Documentation/rust/assets/favicon-32x32.png | Bin 0 -> 2076 bytes Documentation/rust/assets/rust-logo.png | Bin 0 -> 53976 bytes Documentation/rust/coding.rst | 92 +++++++++ Documentation/rust/docs.rst | 110 ++++++++++ Documentation/rust/index.rst | 20 ++ Documentation/rust/quick-start.rst | 218 ++++++++++++++++++++ 12 files changed, 496 insertions(+) create mode 100644 Documentation/rust/arch-support.rst create mode 100644 Documentation/rust/assets/favicon-16x16.png create mode 100644 Documentation/rust/assets/favicon-32x32.png create mode 100644 Documentation/rust/assets/rust-logo.png create mode 100644 Documentation/rust/coding.rst create mode 100644 Documentation/rust/docs.rst create mode 100644 Documentation/rust/index.rst create mode 100644 Documentation/rust/quick-start.rst diff --git a/Documentation/doc-guide/kernel-doc.rst b/Documentation/doc-guide/kernel-doc.rst index 79aaa55d6bcf2..724e2ffddff12 100644 --- a/Documentation/doc-guide/kernel-doc.rst +++ b/Documentation/doc-guide/kernel-doc.rst @@ -11,6 +11,9 @@ when it is embedded in source files. reasons. The kernel source contains tens of thousands of kernel-doc comments. Please stick to the style described here. +.. note:: kernel-doc does not cover Rust code: please see + Documentation/rust/docs.rst instead. + The kernel-doc structure is extracted from the comments, and proper `Sphinx C Domain`_ function and type descriptions with anchors are generated from them. The descriptions are filtered for special kernel-doc diff --git a/Documentation/index.rst b/Documentation/index.rst index 54ce34fd6fbda..1b13c2445e87b 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -82,6 +82,7 @@ merged much easier. maintainer/index fault-injection/index livepatch/index + rust/index Kernel API documentation diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index 2d1fc03d346ea..468a0d216c29f 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -57,6 +57,10 @@ CFLAGS_MODULE ------------- Additional module specific options to use for $(CC). +KRUSTFLAGS +---------- +Additional options to the Rust compiler (for built-in and modules). + LDFLAGS_MODULE -------------- Additional options used for $(LD) when linking modules. diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index e35ab74a0f804..a9426c055ae45 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -26,11 +26,18 @@ running a Linux kernel. Also, not all tools are necessary on all systems; obviously, if you don't have any PC Card hardware, for example, you probably needn't concern yourself with pcmciautils. +Furthermore, note that newer versions of the Rust toolchain may or may not work +because, for the moment, we depend on some unstable features. Thus, unless you +know what you are doing, use the exact version listed here. Please see +:ref:`Documentation/rust/quick-start.rst ` for details. + ====================== =============== ======================================== Program Minimal version Command to check the version ====================== =============== ======================================== GNU C 5.1 gcc --version Clang/LLVM (optional) 10.0.1 clang --version +rustc (optional) 1.55.0 rustc --version +bindgen (optional) 0.56.0 bindgen --version GNU make 3.81 make --version binutils 2.23 ld -v flex 2.5.35 flex --version @@ -329,6 +336,12 @@ Sphinx Please see :ref:`sphinx_install` in :ref:`Documentation/doc-guide/sphinx.rst ` for details about Sphinx requirements. +rustdoc +------- + +``rustdoc`` is used to generate Rust documentation. Please see +:ref:`Documentation/rust/docs.rst ` for more information. + Getting updated software ======================== diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst new file mode 100644 index 0000000000000..0dd603d378195 --- /dev/null +++ b/Documentation/rust/arch-support.rst @@ -0,0 +1,35 @@ +.. _rust_arch_support: + +Arch Support +============ + +Currently, the Rust compiler (``rustc``) uses LLVM for code generation, +which limits the supported architectures we can target. In addition, support +for building the kernel with LLVM/Clang varies (see :ref:`kbuild_llvm`), +which ``bindgen`` relies on through ``libclang``. + +Below is a general summary of architectures that currently work. Level of +support corresponds to ``S`` values in the ``MAINTAINERS`` file. + +.. list-table:: + :widths: 10 10 10 + :header-rows: 1 + + * - Architecture + - Level of support + - Constraints + * - ``arm`` + - Maintained + - ``armv6`` and compatible only, ``RUST_OPT_LEVEL >= 2`` + * - ``arm64`` + - Maintained + - None + * - ``powerpc`` + - Maintained + - ``ppc64le`` only, ``RUST_OPT_LEVEL < 2`` requires ``CONFIG_THREAD_SHIFT=15`` + * - ``riscv`` + - Maintained + - ``riscv64`` only + * - ``x86`` + - Maintained + - ``x86_64`` only diff --git a/Documentation/rust/assets/favicon-16x16.png b/Documentation/rust/assets/favicon-16x16.png new file mode 100644 index 0000000000000000000000000000000000000000..d93115e8f47a939635b73ad3b3226837f83f7584 GIT binary patch literal 798 zcmV+(1L6FMP)60Bw|2Q zLBXm~u^5_#D=(`S=^_iY%Mx4&QgPv>=t9NnMoV2tLKM_ms)9Af%fw5(#7W1=T>klg z7yT{L`Yg|R59hq^5&lmV5CyuFQk4LO^dYzb*M0snm08V&e67;5T5iYw;fNPy^F}ZNPfK0%jMB`BiI{q-%F<$tb^i%%!2% z=HkHQ)9hU`E4w8WwDzSQ5^j{RP{$p90T-9FPFg3J3zVz#ZUcfX>bi z7A;yxeM1~$49R3G@pv=Fc*1TDf4T3TrT_sPU=(;8iPWo&8+$OuAOc!zT3TA@?cGW+ z7>p@pRs+p|0vrRR0UOu~e4a?mpOs7|DU~ch5D~QY(Ae0Bhyct;FLAM1-5y z2Kgj+hMr}b_BJnN;yk!=JO5*`#Ge}A_;>#7>}u)p7^<{}rih2JH?=%?W^?ZRc@FOX zfv!*AP*de$e7x{#Zj~JBuFoDhc9oI(osf4OvsN0v@aIZeBNvb@80Jz^D$ z%nUi!p0EA+aD^Hjx|7=*m|r;E-;d3Ik^-QoorqB;acbCFn$6(@P>M`f!VHEg^yrD9 c{K$v;cZ2KTr4Y@#wEzGB07*qoM6N<$f+87m)&Kwi literal 0 HcmV?d00001 diff --git a/Documentation/rust/assets/favicon-32x32.png b/Documentation/rust/assets/favicon-32x32.png new file mode 100644 index 0000000000000000000000000000000000000000..655ccbcfc8cddde1e9c7d826a3f44c1e056b1dc8 GIT binary patch literal 2076 zcmV+%2;=vOP)3;q-MhQ?`hkrNhS-jwDF%$eMm!QnNsuCF zOiL0VZ9|aS(2`dhMWInem4v8Y8dWVHS_z0M6uF@!K_U@pAZ-Fh4UvGhD2PINnDB_P z&BKl{yI#Dz_nv;Zvt|t;Nt+&NwCnNA`Tyt4|C}=m|Gl*U3(WUbW?<%8{s(_w$z}jg z0Oi1D;56vxl+ve_(x-rDgE=;vHBXp5U&|bz9*7!%EU*_;pHh}+ZF^eVVp)z97nfum zCnP{$u!gMJ6E!}Zn}Mh=AW#YX85jUcfJOkNG)n1fEFFi2hN*OQbT=)+fR#t!DsVqR`s7qLXf`hg9`Swu+qaA@8+YG~vwG>g zd}w24lt_Hp|GTkx~k78^E-I2y_-)jD!?BLve2yG0~|IHx!2%b20{cH>B$jz z0Qe0smM^iA;$j*nMhS&eB>K~w??`beF`4#5HbtdXfRIckETF*vw-a3TLx5`zI1l(2 z@b?^nGXx4P0bV62BtGx~L1l~O2;B0GnM|LvgbSaXFPzmkr?WEIb zQmIsKc?{S>Ajk+ok-7~y06Y&Q0q!6up##7n0<<@#V!AUl=B!lqH~lX?*+@_z2ieAU-je3pB6; z_`R8QfdXJP@FSofP@u3ZoB8waB^HaJwZ?T_9LK@)Jd(*IeSN(&G&B&8$5Be5ltODY zj-VWW*I-l-)BrJm@6ZHI<0-E7g?CI826GqbI1UvR71Y+&;&~pxFh~HT$z-xjojQdF z9=IRZb%{hGfO^QXtOW$QzZrPbxKsx7*=t?pnvIyVEQ?sIfTc^9(9&|pFsN+@H7pm6 z^71mOtE+kZ@s-rp)=*YfMmQXf_&yI4bj2nD0e-HnB2c{mzs;ME#bO12yY<#O*}s3E zi1^}p8OdaZB%4i(=Ve9=-_J@aH7LuLEtAsHQBqo3CQ4c7z?ns!mJHUa+-c_nPv%|0 zWT1*58zw7dRZp5UiTe8b;Z~cwhU-m4a9x+m%1ZkCFXy6FuC=!en2@(r8O-@MkRZs< zVl!HRoq(^k6|*coe%iEY#9}dg-x~(HxhV<@3$bk*5kW*8U?n5_PCu{@NZ17B@;I=K zpcF?iccqmg9xtG#riT2ti&9_oj)9?c2G?~F5iHBX^Rm~!h>ih21uo}C&gsH|e;nZ5Jp;^WZYCOyAR@!rkQWwG13mQae~i;7_Okt>K~kw9 zns0-7O~OBqrZKcX2G#-}0^K&PVLK2gjsmIO=+=4X+>>imW{9PWqW%E zGn<#HuuRP6^R^ zN0hpnFr`Hn{R5tVup{%S=aVIP;9P`}MdyU)(ON%V?MP>Y{A>1om@`H5_~IfeD#~xr zfYufRR}yS_{ZCxF&_lGqp}HnQMMVgu6$LSi=YLqDx4u-mr*;gB2}IBJ1TyceVIUUM zC0gI8a>s%dtq#xpU>0FFf>IV*2bp#3*fE}d`Wd!tewC2rp|rws6iO?+tWRPfLtk&2 zh?P|@tmHgFy9Y+*CdrgDF_G}6r3N7!R#^76YXC)oljQxat9g0dPHwAjAmrE_J9dJd zJ9p91(LwVB8`lBxg9xUQ89F8FezU!89b+~Ppw_+$uBk((d zmI^^{|B2S@n>Wq*H6r(fEIB;0(EH1p-JcKqb?pE8^ZY;Wk^Z@%?TL~A0000{`>ua0DKbA$MpmFgJ3KtEdllZ z-_Q4+vScVIawr)IQB{we^IZ1~Q?<-@;iE8F5_2ysc6lfS0s;|7;-CmPB@vVea#@yw z4A|4o!Klv8+Ow>Mc+Q$LA{NG=T1v{o#zI3V5Sr*N3-4doN}t)Q_%{2+n$yZlqW3~N?2wfC*tXez2hhSt!zT-4vX00#Pr{=NzCA!WM`X2XabL#jhHYEhrlis z-vm&G*I4?n11Gc&Z)xklqq_VzgcBaJvR+9q6du0q4xjHc9jg37MAz#}&_!-D2P)9B zpZ|Nnz2Sh3Uwx1(C^TeCCPZiz)E_0c5Lg@79Gx1wD(f+Qa?GE)VnFK*()jrx_KM>u z0_DFZ&yiOHx>IWnc@VMNH;z0#;=M6hE#Y7v@@DTm)Pr6A8+T?Z&o$FF zH7M~)vA=I2Z&g8K_dV#E_r3ZVI?oD%!svGu-h79~P zv>ak-Q7+{|sk#)WQ(;i^vL_jIo&Ar%I40hX5N zdDjM%zm3gtiBT(6)j<~f<3Bq@DEW+JW+#k_qZKtYf=(5CLsun z1b?*{%#CJm?cq!mXh< z!So!2^PfXt*zT=blI(~d1a?F0n`6f3`V$%B3~)S&(G{ELyd7h znV=kZ7&RlZR{hw>E+&i`foTUSig84Et?x-pGi78yY~mD*2@AAD9{h} z2-;%$`aX4Ba^UEO=5%IiCnQnRCF?q$zikCk<`gMP3B>Ry9lBN2Pd6UqW^eRTiYZrg z&lu#Pr z8DX=&HOY5uGz)_Yi%ab@qI+Jo_d{%K$X*4GqtVx34<3^C2GqR}*8r)i%&U4Ns3mO9Uc?$t>h3f&Bs#>v?&CZ4CYfv_|IlhJmE`y#|7ijP{G@L=Q;AobrN!cPs2Fp#&}VaG~EN|_8BJ0Tz-5D*ia z8X6`@7zcO=hzgL>;6-CaMvS+s%ay?Py1wQ4)&)I)vmgYwb%8zoMdR*gT-%}0G5E@% zaMr(L;mMIXB zEEtc9G#)d01nIuRm_j|$$iu+@(R+t)d1?%+4-$W4QOr_A1`6PY(V*akRMF{uVA$vX zM$y>%x>Uw0ZjFHixy}jx(~}p8IV|Rh-8kYVJWteutN)RBYnK%O zmH?U6b^~%CI{Ae{AFL08NBx9T*YCj1HKBNG`S`&kyJ`qX83%o>HTo^imP-dGafKeK zYfk%M#;pk>cM)4`e$v7T^_sMp*0iYPNjOL~5$$8dSCJqy;bI28qTv8;@)|{d1Ro+g z*X0j-4CF3f1iC%in&cO*H9?q^i4clDaYby5`;JuADNMRpGYrS2P6LiREgw8*ioaM{ z_lu-qw%Ec;`*-5zz~@QkE@q?z zWk#vbf$drz-sah+I;qGshCz*_MCUq|7VyFl<{mg*9=-NN1UwA%hQMGE7~!a2UCR#> z`(t<@x5&AYq50(3>XyiPzq!O(M+ST)%{0^crUU`;F-U>&K9k1dQ#Bel5F|pHw`mf~ z!Brs}K`+-#2ycXu@*S8JUaL2L+b!j-*VB@fcA|@o zva-ENN#*U~R+BX#5+cbh89RSzx|RB9)&LFIIw*Unyzr0%1Q}X#Xe=a1ee>SXCyT={ zQN4gae>197na)4&(7x(wy-(^u=CYlHdT^p5trYz2IaAn>4iq}IB}AS^o6p!kAwdFw z5P~aK$k^^r{wjZNQnHi#SQ#l@ahbZ5rOPD8mHjz=Dsk#yGKxOUK})Gw-8^m*tKjlx zH`))|wBvg(4KSe}>-?TVv|&z=A{elk;`S1z&1K3Kij}R{GEp++!*rQ6a0u_aV{PA4 zC5mZ~nGr-iF!9L?0^sDfkjHh~gt|TOy{@>_gz{<9r|{v$s^f`QY~x({zVPW`CCF3j zP*bKFb^VQ$QG5_zmE7`239IYS)CNixJTDLZseSMV1TSmfXow=M^)8PT@=`WaFhf2X(B&Y!qy^`SQiHA-Sm z9J!3xRoxy}-(H5c3n(ZYS@~+-b>*t`LKbU@!x&Xf#DkYwsTzm zwih;DboIisD?{(|z9`9u2Sqh^>!R3-5plv}*3P^CFCM3#pPHrAzitU4^T27MokLwp z(<}TTd8H+%b|k!JlAX?gqE2XSXv2wH3y>ijMTQ3z%N=*wf%|&BVf&r>Bm0{YFTV%p z^<6;;q5HrbS1FIb{`d>QmoL3OJ(l%W& zdXs^zWXLdXN>@Tv{9WL?SSS2H+@wHKqz|ghF7sByJ6qiN<4iK!Sliy?Ov8yO)zKDI z{P2Da*d3PV$oUZAkKDIm@0hczVzpi+*ZwYCHFnN;ikz@MXQ~h&>=Qn#H2aI~&R~H;8 zuv3i@QbohYO1n1%4Vp0!P>`9H8R6-L6x|)(Q{8Z!7}I$W9=heM_5$vo%z^zJN>8*0c!=D!jyf76z5pQ$%<0qD5U!8Jqu~eslF|Pi_ zcYWx(_l(9=Z}q;QmfbbNjQlN8q(qrWn+Sm`Q7y>v&Z?-OQ?B4xv2ER0(?;h}u&+6~ zx1cJxU@q=k3L%HU+y%;)(afVgO~6b*`@`g|okaejRq5x(jNaQhMURYqNQ^#dVM|fg zOw(Hb=R{mq9c@td`f<@Qn%;{Q1$M19a-lqUjk_0?;3()CcN=$HeerS?U9Bp0_luvR z2p!OA1ORXLJ{Hc=mRjNP|8pz)tgJ7fgn!H2-)IZWKyYv{2=4K;CnRJgMl;PR zWa+J^@4yee9-OQRmJ%SP!ip@!F;lbaN>mOPDg~3d7FfN~YQS2qs{NlEELo~0tjuuYpf*b+Y?mHb*=AyYHucciARq{=oJ z4Z)hFLAbIhfNfDO8qzj6fA-;_1;zTpr*YsYh9c4Z9TqIwD`)geJ@OseZ{! zC?8pO<3z^w<2}q3oZs2l1j>*X`YyLs)MAx}%b&Sf!<|7%9)&!be4nIj`zOXs`Ny~b zqj4^E02tw6lR40*QfA7-<6%{$?4mlfhm1ty6eOAWh#Jff*_fpziXD!np;y9%WdBqQfEr0| z93I*YYzUTwVPf?cZ-ej!@{<*XB30=*YhI*5SQOdQ&WmcohC^Yfc?~hUOYxs3QWPwr zCOrMv1O$HYZ#{lfuJ{!LqcmG%rqzAl^FAKS{nklKY41_@%m?iMH!W~hp*(Rn9OLV2 zyH>W_HT}|7ifQiLP0aNr3(1x=ukE3nk*3>B=YJ&l0;C2}X2n`5ee|qS*%aXNG_Lj# z5fC2w5jSUV52lbc{+5LB1Jh#H)t>6IpD11L_saM7a@3dd7t1~OOgmlL?s4RFvC4Dk z6O)m(b)#)|<>uPtGs@8RTQzz0s_Oh8oF`HI5v_adrnx9JdqMX z5|sZHH>B~{(IXAu=$#!Sp|_iOR)2OaDFy>iisWSMj8d(?^$HqfW0?9laO5vhkMyhq(`=B8|E zd8*+z5U!z<{I5)HFFDwf_SQaI`s?Ynai$MoPC6GpxkYVE^ae5AGb$+#qLEg)HSN8< zJ>73B-VNYLF?Y$~S>1%T-oMlG^72r>7%|u*)>S99BLz)>FPe0S2kE} zI~N~~ibKcF+5MTyQkLpF2y7RK8E$1h`6B^CiS9$}WZTMKLC3!adpMmvHkr!swNlmb zYY2sMGn=l__F~@ix3b;j3JMbK+Wh45~BtZ5LzZ;24&~*Be&T&{Io0 z+Qq7lD1U^`Bi>dAJznqi^JjF4@~@qc6@ZHj;3cRuP>x)^D50-G%C(=5FDmKF_iZCX z@O2%9GUd}e1mcV`2&7Kvg@|J7T~Czp7_}jktY0EmoNC4Y{neUFn+S}vW6xQn;s;0aTe>5H=D0zZ~uQOfHTPm8D zv^D^U?dH=`+jYmRFR7)$na+@*-!l@_9pW}&f9j7OFIV3VU-;KE*uajzie#|*T0DG| zq(SUO`DQDPCGrXic=ykYQ@h2<3cKe`v&Mf-5++sopwTxxCKp^ORbz|c#jMu=U1TH`V6s~&R99yk;G`#eWeS$^RMV=(XaR|RtaB+b|D z%8;{>5&zQ^upaqq^wf=1g!n+i&5IIp)036sIQTI%f@(VHvt0XeA+lE2zn^JTu|f|6 z=s2&`HI6@6#~hCf!f{^k@Y2-$vwi_d`8Y&ofADYZ%G9U(8!u>q1K7QI&z49=uCKo; zi2%7}k>jL2RaYR2bwOI+?++sld+%=PBb+I*l+;@cVCnFb+hem{MR#9Ji|$H+>;6Tk zYVDRJjlL(fk*~VUC$3r2WWF&#sw@c}LnEHg5?bVJ{*h>Tk%X<>2?KoV`GE)PiXPN>0}E43p4xVpo_(B zp7*86>yt^xd87YsMOofptKII62P@wh5Vrlry50(uLx#=%i}_*f0KaFa1KJEMUE}hjROZ#lX*5NevgtYgGQUlx`UA@Vd>!Csr)n1cRq5qaDW5@f!A=APc3uNFFVOV2$w zsZz7%?+#;s5qIV7{rBlwo5RB1o(X%L+Zc)a@y^cUDht>ytF`r_oRolI9#}q>rqb(v zR;gcz8k#m_nbSD6YU`|dVP&SMTMLp>@TD!*b?b7>&qK2^kg~h zB29^@AoN&VYLN#45s;FOvc&8v#p-o^yO~eV(~)O`i+UuTvkVCg6vYE;j_;1hX*OH3 zBICt;ZyD-1>dg^;Jz?e?BO@T#{=q)Gogj{8^~A{`TE>XM+(w>%f!ZWkj~q#5GucR; zFC0;A^-N*2&SyH!-|J|wJYQqfZ2?OL9ap{AKxq1jCRX<&2-D(Hb*J|uXuytm<%t(f zbLTw;_K0sArnA%Ni@8r}yecJYfbv1~-Ape3<86AM_REHy4nGISkA4y28Y64^iQm8V z_3u07+D}S9*+@wV-glJU-sKxC|N3z>ovm?xVMNf9`}BUx{60Av`>E<}ngs=b<>KOv zu_Z0$#>PPq(4;uJ;4nPC?(l~D*1-2q$-04NTGd($OB|6OoH1AOm~@-&Xs^M=Ag>Kc z-mZ`X2y?<(0%gk(q-0lg-FaM`JD)=GJI`x-$KPOP|5r zQG!C?hQ5xal+(g$W-E_@+u}Wg5Ao$<_jG?hz=Z;Ar?Yl#|FYi1pN>T1cU_?N(cxbira#4(%lJ~ z{Cz62N80?SUo@3@26Q85SJer1vW#*C=+TFCsu3W7!O>jLry6}Td4h}HJai?Rt>B2KFm^1v z`vA8DapJRg5eJ{kwUO`u{BK@Z`kh}ygnk2%3zM+eVIrMRFcHBqW5-$@@8tIRvb_jL z>JpfYdR2w3Gmuo z?pNkxak=N~9ntdC|KR}7%{v8Pn=w1z=t{R;VmwgO6RGWcd#b$eT6Aq%;h5ORF)`VT ziH&9D8G1IYEQBir>Mij2}A}vu}{COB~LM;sWoDV-!=1$cdv<;h^7m#j`l|Zrm zTS2ieWwpCDrwm$(G^ScUPuKC^J^vISTGALD+RY+*&_*j2*^k0%ZwNsAI zocrzT)qDNn_VXnJjPV-gf3!InnaT6Ni%=6p{%_}oM+%sUz}w5t$vM*LCf#7YNDp9q z^}FszpS(4Px?JW`93C`NHbQC|8YJVE;JLZEpbu#e(+<||7z65Pli#+Rm7 zA{5yrmd3x0c`p}d`;a3aGl`zzbOqwW`U9XlOI-0+XAl|5Ms(Ycg~Mr`($A|MYoZmZ zL{mg#jq=1}11vFxTf6A~4^$nxwe5OpmBtK&m$Gz?D=0-p3(UG3^EW^#YRV?bmrI>J z?KDn-csC~e78r~!^r0rVP;uDoY^la9QG-*TUz@*JS*DrkuT`Dv)0Wqn-C};tO^vJU ztAw$jW1zp_wNG)h`7$_OgB5RH#{|DO=;uFA56IN4f4!Kruy|r&yi7@@xqNhmG(?EyBYz8a2o7 zd2Q>aF})`SGR$;WLR<2?)ov`NjxT*GdFnu>jGZ7X(2gc@Y;Q#9RKL}3O`SU6bF+ga z#aY?jM9AM;@BOYDxu+(Q7CE$z@pGiklPG_f$Amu&I)N%DRyR;3*30;7J>D8CyO*u@ zHDPHC|Y@t05U3UG3re69d2NOg4XVU7?^@Ebh8Uv|#Tzl}C{396`=*u~ReN~cP^cF1)b^6qIL>;xuhYqY|FNze=^9fCB^ zE_|E#X7^mRZYJ#Sxc38@p$vfZULF3njk7&~HajGplcn&7QKvO&_Q)wmz_1`?S8J9p z4=A>$o5wowAZCO4avVgTQ)LsU%?U|J4mPgd!gv2=C_6rgMp+QsJ3kCysxvidHj@t) z!egd1Ld@hxsd2Mg4?M5uQ2aY=lDfD+GSnuZWEuO;leQeEQNoy*A7iZ}WQ~?{ln983 z85x#6-wrUPiMeW2WrdgrJKb!&mnCYvXJDK>37o3m@to-IVe^Zo=-l5Z2{ldL5oN19K12V?< zYF(=~=<5aFG-$;Md3kLJYJkZ+zF&}q>sNm~a>l2m>??g-_2?NS-R=X3D_LM;K?ZF; zGXPMrAC&lYgCAI|p~?xdJAw2B2)z#7iAImbDke%II{`wk#mZCw(%*)E>Kn8Q*Pj)9 z$3B=MtLMFPO&&oea`=kdfNPOQd=yQ_@wha0&r)7zG*#Ni4pce6Qh06@`KR-5vJQ07{^oRUOc)<2G7G5{FPg| z1nHci0~!Y+smJ%@90X0E{&VnN(;*`2Zv-#Z%3}K78qa7yqt1Be$#gHVX@34WHJ9be zH+!6iqQgbA(o8qeYG?KDayn6gAFTh~bA)h>*LBlObK)vFXMJdYA7qi|i2$%fsM9S; zi&f~`e-@_r(I=lNKI_`|z5Yv^*`n3HFGep;5{6=yj4GRvoQK%e*A{(hs~7Qs-IDa8 zndKyfpP?sqL96-ul&Sq|)AwGG*zH%MX%jCX&a$8YRR=@+W#^g4^+33V;QM&OuF946 z;N*myQmfSRb=waKp*oq*B;PJ(nrz(TOxYAUDhY|Ww3_>kPuOG*KSfv&Y)kLo3Y#ci zj_k)Yb?hfC--I%A80Z}ik-dG*d>>lFf4%IwGFz_I`;yuMAATX6kDH|YafOO9jD zl>5p2&jWj0j08TOx-##YVsqYN#~(7y-@aCUKwIbhax9AH4&X;UwFdBhNY*+J*s z3Bk_HsqV!#qV?E7M%}!Zjh3*bsSM>5vx@YRH6x*>zC9#BJ{VGpPo{2It9`&VCami^}sV@zHj%9mz*96|!esfBz!lh*t?gBBE;OVtSe z6{hUI+|3@HR{4tvkTR!K&f-n=TCiCG%P05xbyaJ0xS=Kl>#PsHgUK!UBe>lLPn zh^RfJLuIkBYPnzkA>akm>VjY%`kIg&n}iF^di%2jEWR%Vg}+N zCFzmX-qvQ4c3NM_QsX^5b_@ z=SNYlOTMVwOnerHn}*-5p@3Mjt5jK260P;P^*H1H2!P*%=l4R=;-cMZ!E9oX(N*+# zT9k09?sYY^y(GQwJtedB>hm|o9&&PoB-`aB^ErxXrT74-9Op?opA$KQ{zmzIlk#W< zvz{Y#6x(j%g>k%^I1xArTrkD~4xBiwf{`1}lz4@Jti{}Z#ut$&M1+bL*?)(5)9o&l zS}olVM54HB+txK5?{A16ciCIY)g0^g%?LH0WbJ$L0p4D)6U@+wFe~}O-{eo*6+IRZ zd~zEX({o*Fz5nIBQaz4h5feId!{Fv&FTUxjcS^IxTDVcGt#lFRF_)XY{UkVpK3bU) zXOh-Vhpq&vZ9JZxCElwC4(u_T=FPvL!@Z?9sijN4f zsNi~g@|~BDjoFZrg_tiAGNsM{l>3jrI!HH|`TbiHv2MS+u7D|xoEN`NEJ^ChI83mf zX`2i1)@T5FBJ>P8L|5HB2ZWmiiQ!AJZT6Cft2su4xn1i`d&5>JC76HcA!M& zZW+J*;>;)=1wZgUe(%}VyR|7HCa|KruNcH*o!)7IMt#hBGZg2#t;oRfvQ8iIc20CphL z_*PJ>3lSU7ya!;QC+tZ@kkG?idH-sXlgqerZ~3qzlGLZXF1!yoo|?-qn7DD61d3$B zD^H((f9O*CZLdQ6V{i+lU(6^%GzcM8nyK}yO~jfcY<~En2TZa)`8XSYcgrrt!Z72i|8}@70>$^Bh}i#^qrfJ$Pej3t&Bzqy@!&V{N}^~adqR4 zy;$@V7vm?&Sfj4ve$6?1J;}m%H|UDrgIUz%VUY#4U(@!h2=KHs2`1s>F|klMq(tM_ zX{$N>K7644iByAw5alUOT}H}3+_e1pVL?!)_1v;gr$7?gNei|c(=)Mc-giw~1+r1C zF$XZZNbL!v^+MmD??k%KMJ91WTv^#uS?Q#kEH!UJxwX#QKtJhH3%**yPqIK^*GO`F z#iPZp-?zY_jU2Ns)s@S2!<(avMw5QTJ2n&A83rYQ-Hz??rHn>7 zbe8HZ!T!KA;=PFG_%rn5hn~g04#mt@>m?DFM^C)xt3A(Zb#`khIj=%lie_0E%ev7P z9$Br(gTF27x)_LR)KQH3H8wxvf|G%Q;BdUe#gtvkITMV>tKg;myrih&66=Hq+RiQ z5yvQ=ghXem2CTH37uMgO(~BN@2nppO`WZIkD71!K@KMnUO#Aw9q>dog4ob4x{Gq|W zF73MoQ8RX^vPI3X^vqf(t6#`%jqr#ryG??|NHa}oD>XafM+QT=z75l=BH#%_m~u^~ zqm~TZu-Q?(0`>ye5()$E;RH2vb&+2%JS2gj!o%Kt))T?!O1z!_5`F7q$mqyp?9|@& zJjee?Pr(kBlC_!M>$g6k}s zg~7Tnq)6B(4MK2w*9rTAf9q~Tw@RecW1-E7g)&{+4^Ndw|Cws?6ca?*pb(divX>J} z*ky*DOc)W8IJ}y1M;X!P{QIzsQF6P_-C{PJxe{vI@S1d~>y1O%{Y=%f)pIU7ye~9+ zQPkXVY?c7ih}A%g#<1Qg%#7mrBY{bhj>o5D(E13kPu}@z^pi-vD4D=x38WqXycI$q^z6hHYc*aBp?Lx4RzPUGK*! zfUsKKe!yM#^+%te$?a=I6urCU=0SpEplEQ=n7e4T=spPuJD?b2`5}+PYzOyG1;m{n zhPjf~xGk;+iK`PVr0tTYGnF98#M)C9P~n4yOVD?zw~s!2A|6#eHP_%-N=&b;kc zk^lHbPZXF;K7m`0@9Q1c3T2F=p>r9^zrxS?RqC)Wq`NXP{xt-y3 zrQ5cd=Nu)TllFl(Md{igICL(xOS-Zi)wpF&NFi{+?1{bVWaxk zD^I2Y%&pY$95$9%%Lpx|63Xam7sM(h<(d&m#>%IGhxo?nSfG8dKyRuD!(2Dq-|*f( zysD3TqBNWxoK*(oVFvei$S_A@#E0+-*NIcwFE9x;RknDf8>b1AGjE+K$^>Dqe$8NH zz|tT=`MWjNOlzc%lM*D`@i}&FMtK-Z#?{ynoydcsXKt@p4Gn5@e=PFlX}Ej5><%ze zSMr*v5Ps=g+SQzubZ7V038PPARSI>X1z~rm&}K;6lr`IxxDK$*h2~q3?&fth>fYso zW{QUfjYfh*31n$^F~1C}YnGaBdbd?wzm&H`NOP5~C?Cx<|9^H*84wl<$~aJo{?8Y< zSRe6YDT+rV4(`pM4B`HodwUXK#lCd+$0t7=NElo$c#0j;qFrxn?ejcmBCV;TP#cw5vfL>uj%cFjh&V4mudPu3Z@EX5r7){ zNDx6s$)y=;Js4J{ow}>^AehWZqPU0|cwn!jl_tqSL`9n3dmCm{&_bO)m84JxDPsrd z1?LXpxIG+x@)wp+)F_t`TuS z@WY6dfciq6Z0777RZ$_6G(k6ig0fkFaee$liMK!5S|Zb z^T)QgG5)h8N!;q0B1co_oZ3yu*GZvF;C5AF2jlZ#U^ePkVE+bh4TPI2^ug^5Z8M4t zm)(W_;b|EZYYDqaIm|2LVEyz@r`(ZvBzcXaBF1=Y#J)L67^V=V&8JI@E>^iSp=?+C zL+AzgpdFR3lmy#^*Ux>A*8gRiKx)RwlmHJ!*8KIWJZ3Zw7Ho6}NmGQhK4tAZ{0+qH zi=XqA=!@Hs0(~AU>ar7QtP~*uf$7_e=Z}2he9yZuLSkZ2Vg%rItp_77aIm5vFQ}4I z(B-zJr7B_N#6^sYLMGz8`B{0cjf%f5>zZSoP_8)VG6IhSOSIs02O(G)9Nxh6|-hVXX+A{K%OF>qnjuF-e7$Ut^2(pryVBg(#jT&nc?hwn#@ zd$HV}ov`Zo7$rERR#=e|5YR3PDk|X47zQQ*7WN~c0Jf4)U^PT3X;FeoyWQ=PFI|`lIhzSBGly)&szU4ug^{se;WfkKW#}n-CVs#aMtu^GE7I(` zTsiOi2lwL!@#{XMWG65-s~j~NzkCT!qM}j%_&_-Q@-AxhmucGb70F(F-`d25|GKz2 z<{u&>Ox-odL~VSIWc)gjSaugb?cbp@!j>#9BZK7O`3=VW0$& zEl%`LqJv2S^&ah;PrUiio@bok;C8>-z(KqvYL|P7=Waufz|%D?e;akAHb#8E(A{VM z&v>{45i3aig$*b-ixCPZ^ENdZrT2p4riO~_7Aj5PI*s@`JGZv3 zAz+=ZW4=K@U^O<)p@Fwa?1-vVY!US1<24X~-JZ0VaPo_*``%LmlhMxL2nrf1tTf9A ze0iTge(J1AWw%$Amn3hV0==3&Ysb7s6HtxdszC zESh{8YZ`? zb4QX1!?e|DZEQ}vW#o6i%)0U4YyEqHafi`&I=05KIhC=7--|YH>~Um^ZW}^gd9xYA z5s-D(VkTzIwpOVUb^D)_0~R(osjPlh3B=yDJ&|t$9Y<%>P?2&N_sr?vW5CXaB8Nwp z#-tweLj{Dz!t$M^TC6t|`lQ>K{v<)3VRW=H=5o5;UB1bkBR<)!9`$is+Kxa*WH?Qk z3R5oWy^u>25CWntHA_=;HNu#vsnnzAZjcZoMsB|TgO4E$HyUHaN{cnighw72ZoL@X z(ME{6n-#7jCvTzq2njQFj*5(wK|r7eylunAlv1Tucn%}zlscNmJ^SOP^KP#+s6)d} zeCDCupB}(pw@h{R^y~f{u=uIxFf^6Z_}k0N&NcNT!pweFBtZ;^;UG4$T~*ZENsP*1W0nC!*jp6 zewTg-u?w_|ax=;x~$@QWh<7+$Ro}R+&Ad+k?+&poN9EXmuBL&t3bB*hV|w zFh)A%_gu3ZNt2gzg8-YB-Vq%p^kl}`l4}f^(BNYFQ zI40OYfv*KONzs@P%f1TC0uD8zM^Oo;+bzU79@I|?d5TKjVC&~7Nr|wc6f+Je*%x;> zMDEksg=$L~Wl151moqQ5tK8MgdnQ2~ZK4v`?#?mY$1o0kQr-Q9S5*Wek}3enoLF%G z_?P1{v*VKgEb!OPB()kik&@#f02r4Tky3R2N%&B5%~^JDqem{i3R|m;4Qh7>Zo}_y zK|2cp#;>Q5c zB~jGSXb0Ai(7kbHO!hjdXWr+9=$z5O@V&o5vNt*=*8&iO`8~nIo&-GFSYy|QB+uei$x2f)Dc&+-)Ww8k@nSX9G;|=1fskR z+{QI zu@PgQ-y1ILL&zXYbz#X)i^n=+xlgZYLrlEl^)U9|nhE=3hw!iCBggr$eDA`rvq;D5 zxrf%)KEJ zUZP&O6iR=tFfi{Ziwb}6v)qR+{p(4R*>03rO_984Lc?w7{8m>Bq&6tD};_23RSMv^h7anoT7HbAU>cp&)1 zEu?ne(G!SH(ht-7soyg^z=7(s>LJmm#NkWTgkZRi(1S+&oACAZ(i<@^xT&O^kjV?& zu?9JXzl%pnME*hN`+M*p8{Ui&^UJ#yX4wLD+GJ_r-l*A*}P@3)D^ zDEpGr7>R$jSzcpm>Vzqv27#r%b)*5u&LoO>Q&Q)2V=VE*3wf`NFRIKXjo<~dTA5yAYB?5&L0^5nZoQNI*wbKhvdcwAsS@hm9`}n!F}rgZmBx>z-VMbg^xO_xMsG z=d_mI1U+Z#Jd)=Cvi>+UqEFHoX}0GESc@Wamv`MK9?-!BNX6-bZMmlQ_u-0) zOym`~^raPNY!RQU=Rv^gSQH+(}~Qh;^}Sf(sdSgm*>1R?EwECvt4 z7=Cu-ap^O4kuGoZdMK2PF15SJD2?3Xk;DufT>SfzdMv&L`S^xdXBiP%1KGIE0g!ys(zCrw+!=n?}Y!?ytE&UJb&k{|q@{BkCE1 zE6KyCO72`EL6ihCm|QOhHO&JpvnvWL5_N8Us(-%}w(M*~IX89psD}fGRvSLc6{o-| zzxv+(6HqKs2DCIUK*C@8uI2&Ki`LER=gjzzo!4&@KzVfM^ry1 zM}!NvpM5$1s%s?qc7F8|J&-yYi5&T(V20-ZxNXlJ7ur#a;C;pg-EkIMsp|Ejrw^eU zL|dgf7myNjFT$~SEkPBH_#J3+HQ;5#nr*v(wDTKBc_o7c%2K)KpM!3$}pBH9$ z-%2w3V*&8i0%ma>mLh+QW#t<&kj6W5YB39J@d+570FuI8z!Ww=%YdFZE{yq{K5@6f?@G7HUaKkC(-3a9isG60~Hd0`Pk1~TeyPR zjcxYEUH8%`$8fHKR2?Yc^Hs@=?1Wm3td;Zwio3*@cdZL%n+Q~zF{h~zk+yl)xt%K<~re! zCO`HOOs|ZsHLLZln}rG`rS+kxii$dDM+=>}oXw1cK!bp1e{7SAS=RS)`{LOLrQ8qO zp!c75+#hXS3NY!XjpLL@JMpT{BVn*mTPe6dyCsQJUQ~Ze&z;{EXbVJM)3&Ws-J2}1 zu00~%`5YblQ>~GAgzf?_(2?_@Uan}iO!tMI(`NPG+|8Thx^^&U*pQ~Q+Ap>*(%c^+ zD1x@g*BLha_Jy34upLJ`PdW`G{aC-hr8U$n$H3dOW5!f{wO4teAp%XS`q&PbME?zv z)KswBH`h>^)bpW8(tAb)T-c>*A>|8nMjm@w7n2n`Ept8fZ4s70iASs;0zbj* zIL@%K{7nYE0)UM0nXEKlOt0`~s9K|F+dDdh5I#YIEln}2Wq#`XmNo9lTF>>qyIbRO z>QYUK1ZUb_2wCLrv=p)Jla9}{q`owrVDttg71vdpf3!uH&&^lzHIUgk^gF(c#^oA# zzX%o+c0z2dC_);!KfB)uzdHoJYmL3}vooo+()+g^o3o+GJElJxI#-HgX#7cn>jMly%o)Jrjhx8JO`rnvl54^5Vg-yDGh6{dO!7_Mvwsq z{r8T{+Ru1*;CkU&jj8cO21HqYVXv~YJ{UdUvT5yTj@CWSj&Y=+j;>!_Z_+xq+DG!= zM=dC1U-OsRCvSZBe7@y-GXE`Wzz>&E_?v=*F(j1M^bd*kezoKQ&n2WIi~lvg1(yf@2`%^IcQmx8<3M`ui2nJb8w?e80A? z2cgDfBdBaEiNw;Qmz@;El0Wcz*A*viwz6yjcNaqene|&aZn75K_`(xo)6G8G-^)JU z0<#OOK7)YwFk$WBjzf(J+EC+jjEb4Hk&OF%Lus43)LD6n!`5NIyz+2AZfDT1(L`8Y z$sHKHxY^UsFDua(Mr@A_^QwUp9Egmj3^j>Dp=Daa~HNVw>c_VH`Ue8h=#5gJD=3iV=fzMDa_w`b?JkGH*-`vi_G+H!Qwzh^acZx%jq zwr}MrB`fMH6{$;$%9xQUUW8{gytJYHhHWovNpOPS$Y5TVFU+@j5H}TS1xq+Z76!Dx zZZ@ZNi!a`=x@xF2}Gj}z1zFAv&K1*b~ZdLwqqh&a~ucP}&DT^5i^rGCl)FPaHBbYAPR;J^b zdhuB9r$}mx+}he|h~&YCwD~oz)>MUqJF_g2tv~;}w3@QZv2n!9c>IH|_EtVcM%Zd| zur?zb8KJ~CnK*EA;%bY4a8Ng(6i~1;CZb38N|1(%1n&?R%8Chn^ha@3&ZDStV@IsZ zV}JM2QA%Dt9QUZnag%bumTJkwx@Zw50I#k6v;=OQ_v+G>w||dFj~)Kr9z$en*k;3# zCyBllM=m5sBZTSmoW6M4GHJsQCpm6kC7v1xktb@(<0wE1gRbIh3ptwczWj|exz_2L zG7YFG%ZFF!<4N&YOQ&6PXZ}R@7qV1^n?7Swsn&bG)jg4Y3C`8`l&(XDOzw7V!KLSq1&ipg+UY$5DFEb4V;2Yu1%+#+R3rGHpY3F>q(nym zk8;QRFt@Hx+E;80HfuQBELazCnR~O%dLh3t)F=h$LFK2Wzt1c`x@QXH@V}I{sJ1(- z$l!Q*F8AmkRsO~PW6`7B%%HA0K|T~9EaZNo?O%evjwcu@ow*OwS9xdo#h&`5UuDx@ zFtd_y&Wp)Z<$YzWG#MgF5-*}OToTM7w{GIk?J6yjvTyL%kQky4} zYqtGaJ>JToIl+srvARTPkh+$>B*D5;{jmx^uy?PlcEWJO`@y>vT6p2l5|p(%S>oshLPwPqI(f`p?Y_%Tfsg4p@P|aYOuIa)IUk?=O#uqt)=AyM zN7I=>TY1RfP!AqOUb1unXX04XboOa)7W8pn)5FqWAv|FXR0 z8^M2x%E0YuleQJt^~*iSNtBYBsNZ9ldk-*|QG%#kf#ecgs(-3C+`ee;oM57v@eiLN z=I^X;;CZd zH3Act1gqp`L6vKi_|#HL0(jIUONBj$0T1RACxycSibcf zTk9mUmL9|GqmQOb<0zf=I3ws~YGXRC_GwB7s*Ve?2)Z-5*VGdw4csXyrM)WoitD4w zGB}is*>;oB*w2M=k z2u9(De;G_o_=L+YpiA>x*C}T+^4Zk^=SAH`jA^;OzUfkNm2++;D%9Ex7cEzG1ab1* zt~WX@?fV1QuDz~IGrAs~*p5g>kxR@?ee* zzX#W4c}VU*+I=0eq`CaR^h9uQyYObNczNujEJ!c^||q!$(b?-`j|r zz~}5&uTbUY1md_aTqR^3+rD{uWa)0(GS;{=dj$$5UFRNBJnlrmOWs2 z!TTp>aXL{XL`BWrKtCWjKDS)7MT-Y3P237{W%w^ns&<9Bn&pXghQ$^t=!b z9@TxCN33(+7ItFaUjy#l+9Qe<&ul8M49w$l`@TmDLif&LUYB6Hq+L;<1Vx z$#yEU7E>70r4)_8dZ5F^C~-`p9Fs&@d`#YYRa|hSO0@5DwRXx-Sf5UbEne>NYNd|f z>ehXQmpgh@K*xS>Tf<6$98kQ5tmAaRs`|}GWlVzjhjq~EM#~Hb?4-hyIk+qUb`eG% zeXQ+sU9skp+o7K4jaT`u-LoJ^dDm7Mdv9=`6lIiFUt&&}Fi$7cR0+sdtfWMYp=_a@53h?e0mVgxxBE>lom*a5cL06(Si85a{rC0 zN46zESPy*9=7gkY_5h<5wI2=N1w?nLvFkohQ6d(o?V{0KFxaeH3aPJX*m#r#YApu~ zbaJ!jYKbck+ZuFozO}2(F`BUUE*+>mzu~+I1d5Vaym|IvNL5{Qc-{mHz1k1ll+HnS zEPE|O|%UXHpb_VnnQiM~jt`wz%~=%!c(k_Nd=2$D@YZJ+<(L#meoYc!8~n(Nd1HOmOBZh_JF*+1K&6y>?ASA;MHFDERjO9#*r z+sw=Hocc$LpWMF`y>xX!Y`mSjx(~}u81D>3a2!;`BmJ&5oRXmMtDCH^S8A0xYGXn` zvbE~%J_!3Zg0C6+P4!XmQqn}Tp&f6VIkxq1Y~ap`$Q`2X-#KC(&NyYXQ>YR>Hqx$x zI}nNkQV~F3_Wux20C5b{$3j$T)RODFKpd-eKc&Aza67~1*vt3}ZH!d;#a}Jt)$5Mq z{ogO#doOe`COlW)N8YM>ozJS)uF==G-$yi$feCV3d*c}nz@Kn*TCydc@HSNhAGkku zL>(L*0D1h!)@4<3)m4afdu#4Xo#{|mJbiG*gEQZhUAk)?i~|Ahz$00gQeK=n6cq1` zloj4~L`y2_#zPUJAlW4l@K1;3vo)aX`Mb|loG%lPyl;UgO_8G~_c-pPb7Piv^yjCu zE|&*;8b*i7Y)|te(aqAeaDY~yN~rXROg_KHF5n{A0|J&ae{{)^1EEsnaTXIEJETar zg0+E;CivoIx-G4EeBRfI;LeqOR1jaNVJ77}&rFwo_Q?x}_>f})C)r`uKYfT`E>_=#&yvE6r12woA=5gtap>dk|LNh1J`N^}#)4T!2NSvoBtY;pUj;zokQb}P z8qZK-Rt89G!4_~&+ox+Zp87_RWFtnNe6iw?v!u=YWc8%ri`@tfmB#!h0;z9UW%tKziE-Wyet%bkK>^7PC0Uj> zvK%!fg$eO-ay-*&%eN0CdE;ub6(-NAYpXOVS?GRbk=*H~S>B1xC+BHbo*C)sJvaP7 zYe>xSCif}Tk!`SsSUVsuK_x4P4=4Ed070N*IB04(S_1+<`Paqc68H22q+lSU^D4t( z2~KT1k$S{iL_PShyCE{U5wklojO&*(#|E{Z=-^8KInBnmlh|B5+4p@U_MjN(G$_6z08OWDZaY8qUG|j-vBb~>=E2`PXiG{ zd8^o}6^QJ#a}gVPm>BBUH0=0n7;@!pQ7Ht6oH748$XCS8qHG5+M58AUip)s>SP0Pm zkOP+MVXKV}?*Ye^Y9n|oEG!^ek8fmzOoNSNT+m~CpptScqf2P| zNrY9Et+OD{j`9-X($y?Q-|?L-rRtkRu(z(L<7g&3a#Ze_=UPaM!VlhC2a}rwj4H_? zDoh{aoy_#~)cpi%;%QJIwO7dn5 z+~^P#nfYPmUiV~&c8aJVq-fAS%G9EwV==-mPn%a`a33D;R>BfVE~1w`56J!qi7b2O zt|C8Qsz*o5o3F9yqetjVnfCe>7ku#oL<)e;5a8|Dy>Zv`s&*NA29nf3?jsmr{?L$s zRz$4QyZ0hDO}Mp2H6Hc^gE_?!#zQOuRK}xJh_-%t$={ovR*zhB`D9-Y(%)q+@SB`kDt%pqdTH>)bnFv# zvKZ*@n*6Ke+kgp9hkeE74m?LpDh+}+Q~A1slg$WKJm_u{5KI!~#4Euo&;{)0RAh;M zzlP#*wXT0GG_Lmm9a-Fn80kj1+J1maq{_pGInKXN8!^k#+43TmSZtx zPu&F-kOXqra{%I)7vmlsz`6lU1~}+3ak`cyE|i{EZwlnGVhKosGN0bs(*iW0?rSw{xiF9NZ{FvV)Azy$)D%8alhbgQD(C{38DtA}9uK)(d9kyy zGl%Oh00hR(Em<_Ds~1g(f9Eh9muY_hzcOm5cY%Ld7q=MC{T46uiy9fGkve<#Cra9h zxyqUwW!eOBL_c2uBz05B)t-~{^s83$y!}D%zktrrkexKC7%+sO@JJIz?z!{Gopjd5ISCgxHe^{;}UY5f6CmPR5eGVrC!{ZQg7_M2S;%yC^uuYI=5T@#}eMO@99+x)j1Y~a=Z&$bAF zc#NV1^fczim8A%~19A1Z8AeJpUO%lxI#9Lh2A8tuw|w~wA5YG-92yE--c^s2@#-<_ zx94ED*Q-G?%&&NOq!~n@1ldF#DSJ@_J*fBYYE(Ql4`sNwEO*0?um>eFgM7K^@KfR( zt?fh%>H#MXZtkwBDfDKYNT8YDERq-R#|Iz>6p08U9S%I|{UTGv!$=zTmM((CZJM@t z)hqRM7wyNL)fMo)0ce=}eoE0-g1OWS;1hty$=IO3h{b|G{*Toh(i6HpsWGx5yH4(Vtvs#W1+p@!~VNP_7Nk6ozJI(cRr5K=?K^ zV1r!sZ^M$B8QPSoqymM!VZ+bQWrPpEORBSj6HiK)U;NWicQIA1r9 zin0tw6H`uF+%rd4)?F-R+^;(Jr=O^~%o8lhT#WuY47Lzjcs}hV)mY4zqzwNjyDHDot$ z5qX+Yi0z_*{e3IzeyWU1QgJqi!6w-7lJPrrNX9y8#f2O+10)=O=LpZe1dabHaYeEy zL0)1PW)Ysyf8S$A>t$Lil22LyGi5y+k(AU!!52c^u3QNt`X5!m1%L#sufrTbn!o8` zT|Z8q67caNAd)Mvru5ysQkpd!k0ir|A@`nT3V(V;O2s;-{hgCaWr@{-@Xx$UOd(DE zW;zZ=g+``q&MBd)E{Yu>s(eWq8>3+X$}zy!*lehpC)fhL=M&>VO2fou^fQgKvh{p9 zmBH?e=olK>9EVW{*nos<)|vgu$>}yHuW`S@S*X-klHY4T*GIt&=O2+K ziPH2x-<_P`-3{Q|{%l-xIf;`(%T`~PE|4AO1c)wO|4FnZMl0Xk6iA`}du1s@%k)2( zRaN8iXSI+4@+&Y$R)LJB%WwvZEgaCWzWh{ zCFN0ZpyTf?OI80c1cK4P|BZ^CoYbK-b8?DLv-NSz;`czfuQzZFEmLP%8kpD6*LAd7 zvE6oO{O?lGfB9u)g#iF_!ld)fD3Kq}Y=7fRz!lEY#15;;*eH7y(%Zvk#O*9t!(3z4 zm33zDmJm!*if&>!;jfJPb{6oBv_)l4R;wbHkK-lSu<~7qSrVrT`++urwqm*ED!xrq zB!DHk%h(fuyU|`2SEj~}R{3T*^6TRqKqip~6S^QgtZ}ukw#L`=@z_Ff5lB(#CLN># zP^x$p{*PB={)bgZ06`D{I2$SB0ER9=#g&sSV@Iv$Ybp|bFG4}x%`2TcpYHmSf%91% zWfGbTT0eC>cpua7o?;We>t*`QhcEDmP9!&6%*N_LJT=g8_j~oS&Hn=W0PYc)^I9~3 zF#rZxp-3cv^ukq7R&{EkW77GyPFK<7muB1Hy!@k(x#<2Extt#Zur|vY)OU_2%&mEB zg5lkd&jVCCwZ5dJ0!ix!9uM6;diIrd9RD}Fv*?-42xB`kN}D+|$n zHYuB9nHjLu_q5Odp3I`N2rv(V5dC`dXK8d68Ca4EW@#iPC3`N{MZ7km2G#+>3O=XR zCr9bqyW^KobCLm;^eYMWj%OjV$P_1LpQyyGMV7(!vxs-CPN+yIn*!0s5>ydIj1cd* zm95l5*3xM~BJbNMw)-atAe5bE(^8zGYV*arCIp@{G+xT`JF5|cW=%lcH{Rmin~n=M zVDcXdVN^Cu-@Eq=BhSNv_uim$p9E+kz6Z@)euHX!Ap%xjQ}*e|cmzi06k&Y$J7y`~ z*jho8U+t&7i5}5;zq#FKn5RZ%l0-XPQ(F)dWgbYq@#-w@Z3Pn-TaEz`tC04G^;%6} z44m$_^-*p=shQd?6Q)f^>Hz~6#*@mQ;SqAmiPYaxGad%}qC)L1(<{cds>ovwef(uVzwCruk?1xOnex~>13Rc!&D z9#CP&4jYpaOY;ELrM=E#=rWn7p1EU<+eB3DkuHN_gBu=08dKLX^*`!V<)S^lF5)U$ z&W5k8zyxofQ}Z-JGcCwJxswxoJo@sZ)rBFL*vG845TDM96+`qEF<;_GvjYY2GwPFt z&$=0bqTu0S89*c3r3S!h6w10H=*)tG-H(rq4$ZH1_mGMLci6!M4$|1K28+6-RHtp< z@(BicN|mUcEH!4h?NJr}Hm#S5>QAzpuQvaT0*V`=_~n#{vzC8FI(+0=KF?nP^58Mv z6^iX)-*!O(^&D`08kaZyd679H|v7Rh3v3~XqK>9th@~l*b1KV@DC6-&H=|;5)XL&dKNk~ zOMI@6`b?*O8K%*et2rA%xCb*cjA7Is1TYUj3Et*(l4VT-5FwEqr7nAdc$FL+l~1#Q z+qSCfyp06FlWu&q-QQGU2kiRxfrsj|KY)3SzEfMF(delCw*cMG$CVU3zF04d?Ktk z@p&c{dVZE~6f9ffbJ^@piJ|q|8%u-w4Wn2L=bYE-pS<1&9UrYK%TzeoEjLXlAzfsz z*U_uW*5b@y>RBhl_e;ffy(dH-9Gdo1OH-L=AC>5yOB6}V0lN8SvQcCWdn7-Q8v@YY znC=`aKZAiA>vFanir(*`%d*%H1wxpkJ9wNHFT*k?PNfV_lK|mD53-w|HgEd6C}v|I zC?9`N^G>!8xp({;U1)jshDi6mU!vD-sROR#=X%oWXEQ>em(z6Up5ECFG!Dh$%KUfO zgN224x>oB#zcF_Yf{mtFd0g22g)1Z$qaMNqa=13OCSKkC*YB68}&Q9%~rWx83HW8bQ1+`^*Pq?+tB=aZonCv#(I8#wgU0XoMe1T zSXkD1pI)fOZ~Y4Fn>g&6`^(8HpEbk>2_Sagg!)JKUqt2Xi1zA3`}I)_bhBFp8`c8NvMm* zKXV2AWT<5;byd;mZL-JLGU0la;NtZ~ekHT>&ciZiO`QD|^FwsVpEj*BY>yZ-6b5)^ z%1X^hyYxKIx`Fa|x{PKpNESx<8-X{bxd18;xet`Gti&(ZFfD1-KVP6XCk9X#3DR^A zHa#{+g~j$mBoWAAy>JifDDUU(j9aOgk@H0e%-(5SGd(kje`N|?^siGp*kcV(8a2j`ax}DH;FCqf(PkI{k+raxwak01gFiWuLXOBHxf5EA0o^~fq%k}{^0qXbwQea z)pv!_YKL)v(YiUj|92Q?;v@G0DtPdqKZ6dlXg^=r7k|TQu&wND+TYv+0XBPw)mdwv zXYv`JvLDs7_bkr1qRf5MW87#3+$95}=9_Qhd>$G^kI zBdm2wM?%X@vHl+TejpFyz>nf~$cIi?3YI?7l?%LYb%AR!*FSM*)NZh<@C4){LAr_J zdzPr_KUPkyXe}z^3+(e2*(wy%C+4E%e;Eg;weE>EP88}9jL{!)+ErsPH_`Xe&8MKq zh1<)$ikgf6V6B}eRatAlQI^$PE)Hh0nv(vcz|v!%?!d4+>8se_ zgt-Q0PlYbIPpFgtY2>jrc_PN5eiA|@tE9zCihwgratB~52G{dr01)Hz%W17f`m8mg zMT~SKh1fl&q*SmGp&|kXM#!{aYWq(mT2`Y1-zdo6ijJOS&TQ%s8s9yu7m{4GR@siw z#~Nr&YgrJE3Fr0d#PFSC6N~|bjxzw~T9#@*jFa|6cl}a%@uCdhV_04bS}=+yLztTe z$fM3(X{jDn>s{vadVl93m%kR{$yl+&`FkJK4MWzH0h?9@hZTK)?+weAk?Rri&Apvg{;W;k=^vL0|I>Mt zEp{})4BH$24nUusi8uSO_;@bMqHM%?2`k}ZWhpRnw5e}HX^Hn(SXJy_d<;h z1B(#+xBo+(jDYCC45u2%hV6sT>`idRmk^c{8 zv;6RMJbe1T%F7=seev(&gF)|t__1zIxh9*w7T)bN?c=*uOV7f8%uNtA_9yVJ7^ZoL zZEh!ON)W^&tQ}zs>3}x4oXz`hB33(rs+sn`#pz)RT*yV{-*}aM0Rxq*9v+fKv*s=v z-s%Y{t#IB9`QAeZJKB6WZ^S|T*Vstq)56cnKQM^kI*`1G+`JlyGcE7;9J{2zy}o(V zOX*A#_GRutOLu%#ScqpWKiIai{Zapho3$YB32eWZ32c)`!y@CL!$3ZHfpr*d%|Ydt z;4w=p_zr|NY;N?X44!ECzPOH$&aMx!q-}2E@|;@*ozEmD6mt>L^s&Lk;T&vW0=q5| z$IM?;*;+i|g7$T|_LxIJ>Oi(G%1clxC)ySfJl1_RG)vRvF7aB?<7hUhzi(`z^sN zpq>k)EK}bDckvIyvQU-)Wd9J)OOqw;KcOdfPBM>*s=;D~i!9o+3A%xc=diKgCJAJl zAJ#oNS?43gHoWG)hR8u{HrW}3K8++-AqA?ZR(T}yH{||0zdB^}Z=8j>_FYiRe&uKx zyhPgM=(=IxHW@VFY|k7`lTINz3>AmkcsQ2b*Qzhsz%7b;#J~k(19ySD4h|DWq|@tT z^@?9>ph9?bRiEp{xBRx|TTbmt=X8&e3;OX($tb=w1*TxAc&r+&3?L~ycn1ZUFczbzh-Cw zza>!_i59l{kjm-Dlb+NSV@N7L14XxjGpxC7R~$666b|3u5zCilw0e;k@KX zdc(Rg!5eZ#O0^0=pa%^C209tzj+u@q9sE~ffh6P_eA=($A|mCvnX{UI*sLv-NLWxG{Q0~`Hn^D|AI;uP^xs6f%KnqQ+OJ}z55>!4_$ zGy^1G+rt|`8)Zb^7?)Ud7J_ak(Fuy|vY^?PCohAin?^Z3Krt!x`WN90Gr@VYJ z0XpuYGlnQY3sKfQ=?0@ zocAXkVjpCO{KdUirQedq8VZyGQOch`UPP3|N=c#^K$c}-00%g6C#@)=`e(Oa^8UIM zalyY1?{mDNp&@EO29T!&3T;ZL(uE*Nd( z>)LquLl-3voQ#lAT>J^xHJ&cl#{tObY5?Ema)xK>;NU0lu$s61qVizilPxz?>(S#Aq;E zICIQ=7w>stdG~8Rq)y z&kYi!WTEhql)|F_0g6`1_AJB&$o>BXZLhBW+p-PM zsTJlprbsQ$6oEa#QmHI%wLpKe18Pi%vtcmDHgRB(IYx7-Yy#ygG4bGob9>qJGzKb< zF-qsh+ncbrz$_nOz0IY&(~T?k@w6bopzrf}IYXu{EG!(_t1+%ERPzLbsOKrey=~?9 zV81>+3kyBG!2lt9dd=-k?iMmr!7S62hBM#`q~PkgRWlmW^|v(#!AP!Q0}1 zS5XNT^Q&7?fetM$Dk7s69nY_bpN$26-TCB7VP`v+L$hHYuhwAO4_0BCGqog@*zx@# zdjR&4h-#2$)kMXa{z=-~{Dj#GJwH20n7XNbaL8fQn{t0l}hQY&{hE(LWRYNj; z^5~xOh`6@-_$uWm1rVuh{2f2;;B+D5TkFY#XJ*t%3(zwM))&os3ko1v3=B5rzauno z^}PTC$JF9fTT5gl^gEQB2H-^U7yc$TNb;ZzjflJcju$M%Gf$EMrQzoU6A2UW^XSWI zYwPA7mCU-MbJv3p(IwXhCUYi@_}Al#lN!sUr~`NI$Xx(RzYCE1&|J<^H3|lv%`h@O zZH(p@fd-Ohhu7Cumxy4QDgZQMHD|6=44M;^%|k*7g=nh4ueWtsGpsWpRYp+IB_+jP zE0~47%>|BUW9DoK`ESU_+&C0P}5Ng8uJvp?7;M0O~nVY?2@^H2N@`P;Af;P5%sP`4DwF#!QI8JH;m2EnjNtubp#n1J9|9tp||OWd$H zjA75b?jlK&T&%psO%kU&qRyfS!dJntXgeK(L6QX7>nX7lfT1jh+>uZL7U!EqgDx~B zp2V>C)D}WZ|8kQo09Bq#T!r_(g%hIu!qur%Ky_v*>HT4qk+`4YzM2^8a}xI()FcUG1i}=?*ms zxd=Il#a%&yim~KBo`(*~ug34yqk}Oe{Ce5zX|R~@G6N4rvSn`XdD&z7NP|Y@j;4^q zX?$ukMN1PtO(@kJDL6Dc%lrN~BqcZ!r9{6G&nCed21*Mv{Takz?*#N5br^|bf~UFnY4b}l)Xaikv(=Ow{?j<^9@boIH+)`d zE#^mDe#1KFwQc=EC`5Zrv}+)%7w zaNY_)2NV;X#qNhUW`|KoC*wqwFhDbW(no?_*_iIMp-(3o1li9AxuIKYpX5Oa+UE@- z!ncxR=IpG(Z%A2^RUmGB2i3bMGjb&$5$?%iz@Sr8?|y#r10|{MI)>SPXn?iP%Q{-F zy|X5NzZ1LNmFlm}IYvM?VAb<`E}0QU=D|u6*Mhf)Kc1vApDsE1XK6IdNbvZL0j3flw>@gsvnH zmn{<4Wx#INJlN{+0IxG+wR?!M)S9Cm&U#miM%AG9N7+iq+Rf2e|uMapzDGopwGjnjrFeafX z^n({3(*#E=MCe)h7|E_hH&2rXj=v&65VQ0S(T|U(-=lULWPu_4 zsn+UsaVX9d7Q`D5cDfnx1=ulc`dCBcs;a!{ZqykhTd>(qlIj~!NQbn~(2{ zP!-A-nlnHx#dK(l9!d7Cs^W>Y8Iquk#;D2H9gxlkK3l1}Uf3!CYu#Pmgu zNz>l~hfC@dhi&l&MbzRr>rw4$=Lst~|%LR!RLo$nhKhl)=wd4Ig z-d8vqih388mPyGS5v(_i&;x;jEeZ$17R5m)MIa=BMWX%rX4KN>K{wPpq2A~yc#1Ti z%1!x|I4flA2OzftaGMG4rL7U3iJk~IJk%>zvWD~JuXgL|I z#1d2`W0tG8R781Vq&jEC)Y~JEA^}SMgO_a1cbTksq0}}iLCXMw33oz3&;EXz?V}Dj z-XMzS?Uv0AaHSl0WsZw;pyVr(*ACt*pkYuefl0G8c5D1})MLkau^s%pCP;?({tD+J zK@~eW>p-Fh6BCmVMeAZjOXdByPPMDn{wRTkHz`E(PiV9Av?}gIEV0pb$Eg}O3N6k-b$H}djS1ZxZ&Jle1_}#jBV1RZ?TcppzZakYWwZ-~5UmolhWJ@kRm4f*5N27_c!m|j;GoiZ$&?HC zyVBn&_#+QU5s_%3fX;SbKLG{gpEfXV* zRC&}-#Ar~c5H6NSr+C7=h{R0;zoyL&>7@euF}_o8_CBl6G+a%d{h%yIH$qZ{T2%fk zoFr-gIo&RLb7uCleuw8i`Y0eC*q=8aI38qV?beK@E=x8uCa)9k1t?d9-YQf1`ASz% zE1)PlI~Qn>h>AqV@nX_N{{;yTLlnS>n$K#GXb6w(mW6vCr_z67F$3#UyRS{Cp!xX* zP`NG6`@`^r0tC|*sDQlaiY~b>8#x`lj~eW(XA$~yUz08Kp8}~MBJs2=B61T2!wNHa zuv)LlW@pd%0CP+0)~L2%p5{+#*o)2))5%|=alyrfCjmQZtMmjle=zIx}Eo@;8FnZuw{k2fFl1D_JtP^ZNr-3boXue zt$}oKX2e_aM3J9Xh+fjV?=joq@L7-la8MZR0Ng*7p{XrK>Ro6*FJY2{?RpZ_1U`j5y?KLyNmQW`vM7QR8TR8voJ1iU~eJ z3YzspdoNafxRj+?zsc;}jg@0GHO%#nC?0tOB@i5Y%QFf7Fh}--po&3_1Ko-h$Mil+ znVIsyBFbW7!|YPfG9g~a>PT*eOxz0+J7y0m;G$t*lhfMWHt*{CrHkcc%P7Mv`?UQW z+WXUM8mVQ)f@GGmIK2zGCIW)^2(}5DQT}IJuzNqsFX)mtXk(;uU?d&5gOI^|NWIE@ z!#^WTYeri8EefR`O%;=l6djV(N`8;mpq5tAek#-Mjn2e1+ScbLtxVUG7w9g}joI2@ zj(CGiPU57^V+$0p=SOl{ZFqI;AY=oZrAo}^RnXBBMX9kLl+pkCfvBaCcx0k+>4}bY zs#$ocpH{7ruXGS)1Lt>)lkMn8aN&@t%Y$VgO$nxDg1!ikwtQGVkROW-dOLDle%mwz zam?bI`Uk+v;UMJCxPLZbNTfp;U_;u#$j3sWFj%VkId3@pK1ztXqFv=6pUcMM#N$H3 z?tXZEy5%S(zE3}hm{%yu00ZJZwA#yA7!YGi!5Y_U}dDL?4)acnVK$n>%hNnStI|muEtMsfsR|jL{MAL ziKZ$erW!urP$jBvRFpTzum>Tx6|Ken#a$BzO+pF5O=O)wsPq)W^QGJQ_WG2<$VnQI zGlY$&=91IO3DUg{{to~XLF~R{ibppe;f~NO1-b~GCJt~N9Zsu@B@&ivC#tv`X$*5c z8KZ?*|3_x5K%iVBP;}{(q_4Q+N|)EZFHde3Q76wm_Z%;I$(elSGoRqnOD~=4Z0CfT zNMt^u8fldGP?<&`GJy>L4T9ozQ(EajUqmWYL;?kb#XEyAkFXFZfOU z0{-sCQBFT98HKl_w24J2P(rs$z*qsKDb5LtMjRH8NM6|95$AIkI+Cg@&`kugkU91o zkrFOG+2ysTxhxnALu(zvW;DfQJYiIL!xENh*xXcRmkfQ6LTM@P2*3e1%*SwDf#>MZ zK}YJ$S1F7Bk4W3G9OAPMzB4RG@dgp-a}!FNv!qhEfy7TL_E@2K`$Z0a`OmX+CrwRF zarM>L^1uTR@!t2|F)gX9nJ^?$Y5#mfJtnL2R>07|VeB)V*LJvFRaI@aZ{KO8*< z1x+3(-AehWeO!M1aOcFozU%T~E;%DzyoW;VJW%FZ3GWSK_Qfn11B4?uaJArt&Pati z6Icwq(C&%UgU&T(PRAR8I#T0Gv2;{$=Nl8;aGuMeVcmONH+0hd$UP5yU*WmnoBypx z`?>3Blb7d#>x1Wlmr!_~!t<5&UV-Z>9IvQ}L}9#^??T$3q_A>ch%4Dp8Z)B0YJ#XKn}R_@irD8ir< z6WD^TY>i_76&({8kOFRO_ay2?w-88@Iee*OL-6q%5*)Bf;D)uL98(e3Q@E}c`Uy|q z`{4P4gs-0`JV7EM@O?od?BC)0nnVn-Bz*8){Ta^}nm9b&vexso79w4JOjG<&gqsSZ zT^g2!*5`!~QSB`$z;#s=QtY`B-xDO0lDEFrBRR)b8sGiycRBv}6M68#^$kwEkJ+ML z@-U=fKMFTh;xvcSHB0&WC0f%cYX_i&CYOCzIIP*dy+YS_1s}Y2h-=PGMfZ|2enBXj zqlF<46`8k+RUtH)tXkSJ1Oo6T=CD|92fBqot_GIQ<^w{&d#_5cY(bcd;b_OU7itnl z;Cnt=d$5VZ(~q8O{?|m3^mO@aKM}s}>*rob7+&~Z*8|5j(NLz~VbSjaWnX+FBx2Xb zqIh#4wTf<+jCWmdeZ{h|0ykeY_Z#MsM;_(WQ%~dj-~WCC6Yry5RMS38=CvV@QWD99 z<<0eDAX8e~P{w;+;j$C9Yi-0TIJ&AhI4Fn>AH_!G%=N&6KaJNu^Iuh4Wnk-!K zJ%#V-|21hye81Q!&}@l>d2X}}iSLD*FY#Q7=ZfgFy5EP^bX+GiBT0dDiY2s7;$VsP z(P&JnWa%K_E@6dh71fwi5*=O^X(D>wN)*kZrZTn@ZbRvshinIKA-;Z1O+z;j$OXW@v-yA! zaLHOv3sE=B&i2rL57*KdyrLf@+Gl(up*hNPh4yz??=!v*&!_ml(5K>h;Y#1pT8MN) zJF6AxRgy>;hdIAA53Fp{&UJ^-ksq4ql1ahaUgP37aQ_>>$oc2Limh9>Hhvy`)I_Bm z51T+tZ*rrJ*qaiG5eibEjwY=P{~^^NU&mIYkX2eDZx7~PVuQ5 z2UxODL>&gTY1lYF!l0DM7ZN(qBl0lOS7BM9&@3=I*m0t8FSP9IXms4SRspxp_5(-m zBUmw}wGUTnLGFf;P>vUB35EpRQ1F{hfg!;}|9e_%G0$D=wdnofYuapOazBddaXERE zAg-zqWxf#OT$c_wLU)ssLYFylL(OaD;(!}p-uNYO*IjpU;)y47|NRd%JXRl-Q7LTK zCXqs+XwI@pCF(1$h;ls4;>DPvqKG6V?0~Eyk=R-YMWHPPj0_0=?wT}vENPx7kpj*; z*5ezu4f5(!e0`4zUG+i9Vkb{c2>Js;pH4}DfH3c|8@q+wKo^a4WMIZE?Vi9)=yU=( z2{>W4A39-w4>ycJ5)wQ|Mj- ztky!cxv4eiEOP`FS&7gwVs0MS2{o()q!N;spXhK<4CN{xee^L-Ib|(h``XtUAFPiO zR14p=$-_2jne20$WDGf&ybi)BB|{j>(n!LWiaD;K@6HLOMX{oS-aj@X`N*{?E;!DM zwbP#?;mpH4{^_Pc-f?k?MZ>xZf#ZnKbVax@0wEO2OgP-4Tq|Lkwuy)o30p15(7Zc3 z)RESr6VNiSW6_b{nuU*{EAq|?SDxXrY)tzgTt{g$lcTi_LmsZHeG{@wTd<=vup*jD zlK+JDUh_PBzhfdiqLonb&TMuYO>-z?qKM27)(xeiC8QuIC{&=>f0g8W|J}GMl*{G# z*0;XHh7B7y>#UdddzQz5dct=r5l4A4VSZvGj>_;cZ$tB<1z`vhDE?v+dYYD=;S+m zKj7oc!AtP^b3F!=0?#%5!i~vF;(A82AbOOR(AO%k7P^ts+aeA_DpVm8Aw%o15GAT} ztUiw!AgpJ?nnV!p)AzlxO9%!MkjX0Uesp`C!48{fqE(?1S#lq8-P3gb|NmZ1hM!izkL7!1ps1YricCJq(mi-$mWD6IB2!QOAhh4WUbHZUzXtLeO*QdEz*$S`Z{>U z4MXHzPlmPMWbxi}>=w~xMP-L1TRWmfmQGe3pUCrz2RkP@GQ@ts*XWS)?d9mG*tIg; z%p6T@qeG!)5w1vzc^E3}-lw!<+c?xLEg{OxmDY36grSPppqZF`Ped24J&sKrs_X;J zf@O|SD%7Y7I*d^X5K5D)@`O&J^x$-~J|tAAV>o zWA;G_DO4(JQt5pFQaUAGbL*+TZlh~clxVUBjuaH6AaDduASp;i+C17+wvS0v)(=w{m;7EZNhQE7`4o-0$f$JB| z#EvU)q%0{7Btxyk+`Hx;RZ5_nV(DO#ASjyPq$*Zzkbz|F5lKGq^{Lo?tC%2xw*c3* zdm@$4sXmY;%rOqy3okr<4TV*3O;2)Tw+%!0gk@r~b2My?A<``D=f(^qO842HiM)*l zZP1LPY+S;dQ=m(mnD4+>cu7Gj1w(_1|95`8>d`aLJi{4hyp$jR_{Xu1+lNga^}Rnp zh6`_Dt-OnVfpXZ_d34`|HQl6?QJ#|{W%w8dqc}QX#dE-u#rrYs3zjc%*n7Fdp?kU< zwuj3eOC<{iC4T6S^c;a7l7}&|Ij+DlR)f%NH>;pu@pMM z?F7O|n=uOxE80p3skQc~T|pQgZ*rzwiR(F9&>Ir5Hs3Ych?PfW$^wPXe4V4E$-@sH zT~CG%bQwiO3S*vjij}REWm7KsKCJs5<0mnJ0-^R|G!WMj9J40D>4&v1!@dS?qyx*F zbRvOVgN{6yGjfGu_LFgvMX~Ibsj9rrPfx*005ht)iQSi>eOQVx$iNpQlA1`Ck14J= zEB?!(P$+Qq)z|jJd}oRBe(;qZYrWSX$Y=F_YhOt5=L{$ zFf`l^9on8Nal_hcUReD=mkrWY8~jkqF#CPqgqG_n51wyn9A4<;ca8b1s7;#?c|Gl7 zK1j(4nv=GITBJ~s8QYMD**wSLFRvNE>pV-=B6LZojzXsqhy&cz=pd6B5jh5<=&3LS zF*1?P7udcTqsoWsyA1caF8B#aG6l&bTyTP9c@wgW4T=2ZCqIdO>|RGTvZzKTtE!#~DcB&5(eR<8Da$gq5m<7$n>3yH!M6OF1nxB@>kT^ZAqCKyc? zz6mc5t3&v%EXlWWT%!SjD~m*8NF%aNMTAlo;9?e%=2ermE<_1|NC#FdPVkW%Ix>3j z&2(7gY^M>(h3FLihOlEYD0Wt}!wn5AyRU5*?s-JyVYHC2;iu5$=@Us^t#@b$-f(_% zE{$9+$Ln5q1&=@eWK+iPMU-n6vn3AzmCsBM0<7dbFF9`b7zLnce*q?M%F%V^w3Z;l z3JzKeF%_vjzqt8+=;N?!%Oyf$@wKVS566QK!xtSlEN7pBR6=n4!IIBvas6-_7_DU%fGSe4J zs_ZdZ#Z&Z;L`iujTsHKlxnZAr$2iZ6$$q-!fsFD^Ono`m7x)R|I5(P!#Pzj*!}GLR ztQe|m`swK7M^zzYaXzKBq3KuwGpl?Im>(z^#`J@~RU%&L0ws zjEoDNL?A=JDRB(6Wf##6V4Q=-ac)XeN2`)>|15)Ax)0VlYTselp$&luTbw!`B+`&d zLo%Vb;e64w@8ws&`Zb^a^qtKayoXUv6qN|0(!N4|3KdqrvKLOK)29BM{hB3=!~cqj zv!;DB+PUq8T1Xfz<@uTvyoAK_9lWH2mvHcX2hY>b-GrlG4>c0s566irS`(3oArGTL zNFi)Bx`ZHRGEoal78v$0j&;-Y&2hBm<9jae`O^{3IHIFS}#DwAPb2pv2pYE z0s&A^#a_-T2$QWSW~Hhibd6LpbM+c5nIc6oTh|c^$E+@$(7ccmqz5FsFBe>NYV(-& zjyvAVmMvSFHh!(F1*AE&tAXca<+tp|*Q-pbW-tg)p%7g6K^S)!fQ`p%f{P#49@ywM!O(k=rjhY!DL zgmaGR7!q;Ova%!4Nd&SPh{dMy!p_h+7_x;;6a-eru9$w~^eoDvvJhe6N3=E~10C8c z9f6;KWC~I#!DVN-j5cY#vvuoMKK}7fHfQi2!%R-}YVR`$@|m!NjVd!`h2~vRTtr2| zCs;z7V%9g2aJRG(Ybz2;saePg$;LH)jj;HrYxbF}^C(;wiUmrGl_e-642%zi+S-RX z+j57xF=fdxNI(~kEG9ChladczKf>$JP0TQQOHcqhrguUo5y&p!H*pNKX-g1k9)VJt zJfi>g#TJ#fo;O~Q5Vk{6X@THSG-(N`bR2#PQYlz4s<`15%?tea>}NkmHk)nQ_`QT` z=Bs-8#250}$UJ2=C5%w6fd|(3Q5dX=S~5O^NOQ2A&_wwfj$_IQ+50ph=vpka()l5hL{jqR*N$@g<*6AC&~)4j z?40pkEkY*|2*7*e7-Z9qu<)BHT^;BeIFYH!+RXpG?gEJ*isxFiDT6D}z7Pe{(Jg+H zX+bh2IOAx+{wtd^o#&r_o_p@Or#XZ7Fe+&vwemm;*`50LHZ5rdU`)C;dk9rrrH87D zF%v0MbW~a`njO6YktV0YWQiLylxdOR6rURn#I#ayT#GoQup+NSn71^YJPet^EXV^& zH1X(W5c-5o*1b6wU7kfmiHyDrm%Ma{FT8CSw|#jcf7P}lS3su`$Pa+OY;=%KTePBX zG><~K>@p@`WosYCS2$l3k+LK)l!&r;jV)QASBQR6Fpv@qq$O{@sClhR@4BmBD{D5G zu7T7h4}kF<&xifYOM^;EeIlx4F{O-DHo+H^3U9~GpRoKDPME9VxP}BwLpRGzWi$_0 zo3EngPEm+(QLBkeOD6YYuH+GYKSZC6zQ4$n6_z|Q^AT3>A4lSOlH=DT`R*N~EE#RN z_`CxxBRd8im6Q_VZs7lbgV9A5XMzH@0pq|%;Q5IRPhN3`v)q+Jn1(Uh7OP4!PfaMG zNlowhNUSmlOE82*=p+A#iTzhXKtVB_R&3e~e|Ro_rcI{@xcu@m2%&?a|gfrw~w)Oq!1ZORg+^OQ0DUjpv)U#Ii*RH}Fp>5li^OCiqoM{@=KGkf*{bRnTl{cz9_4BH##dsWvw#J|4u5h& zg5N!wW5brH@yP?gcf!Xzjbu5ne*>>ogvGWpaT&$Maiyo7y? zZ-(2RzzVcUHjB3X<yC5eBwnacQC zlptE<8I7qC&7%^E#TFHT3h5#w5g~h73UED1BBjXZVd)~ln=Wwq>#xPzbf@n`pBY9AG0)3it3)1| zOolhV`Bv7gyN~nsPelu-Xb}aJhx00HOr=7fi7XyJl>Lz=qLdb>D8TdtQD#k|FD_FR z0wC(MV-otzOVaXhm~%1~qHv!qi?(XmVi4Kttns1+Rd_t50|5jo$`=U=3e$0JXh?9! zpAB>MIoT(Z+3nAte*3Ivp(sXGb!5}1TIe(%NNmWXHe=l|K>z_NYRhS&uu5`3<`a29 zi9<0ei><<&MHTyqcs?YPf^-^QdYs^}z2p6&mM-ncRtH^yYSCAj%oI1@{8oPVyWawk zP6nYiU@7b-D$8gFh$5M2H9qQv#S$)N-lht%G$F@2S#s~)k|!)!w&;nwKe%8` z3^VH*^ell$CAJg@i6SsDUn8M! z1Soa|lp@kDq#z2vbu6ETaZX1i*+WOQ5#U9ihdz)Y6T;(~3B>KIjAVdetdOCEog^uY zk0va&5|)`aK>)5RNDnwG7mvEXx#t55Zish=or|6&kN`Fpb3crx5w5@N#6rv}Ohp;$ z^A4dru>GSBW??CGM@L6%A;}c%wNh~9S@GS)=bn3ZY-9C0W=bfWnwsRMn{MWR{->By zB?Tlsq0LK}>t2{hUd)AWOj@?qVSE+=kt@I)S(~oRe~}4FGe|@-4v`=h4kqcSNDji% zI7BpbX@r@XVzU-^wka(^%d2mu5^NFv2M$9_6LVXZhpi)XdVCy1}s4j(e)ohj&7`bg6Lj|K4l!= z;fL*`M-_T8nuIZJX+n|KP7@+4tUMC?{Uyzw%<-F~b(Fxm_>G1Y6_bwR~ z5lIAA@Q4=BWu;BR{kCtggktZfRPd|l4H}U)V!fx7EJ~9LjXy-#D|h28FZ+Bn(Kg(R zCCS8*>C8518S+q(zb3j*BIkc;j+q1}l@M?Jx4#eq6*|h7;~bO&fGC8 zsge$Cf?|p1`E16rV7@1_E_9SY6evbzd4g0*kxIk)CpsLm$L#O+8*jXxJMa8-%rl=~ z5JK?$^Urh1C71HhL+h))Z)_+u)f%?21auU6Wvdn8|CWD!{-K&l%lKzZtry$8Wob_( zniPKpdJ1NY~9-}@FH{pg2roK7r+(77m2Bz*G8r+MWo zFXWL&9;Mcjec@o3Lm~_zsNy3!fJFai#-4tU%sZ&*&&w2)5{OFgEBjwM>h^ElKG;)X zzMY;(IA`RQ8g?j}tcpv6%J$X1^{NOFd0I_5Z$Tj}k0CLot`&&{q|>l`3_f|w?)>Pn zQ+VKk|6{|3O>Ek<3D5Hw8yn;QxA&#dmQ~fc-&|{-bL&rNIt}5yTwwV3sQ1_86(^@@R?6sl=BOYbm89BtpP% zZ{7^Ic{9>3zkiisf|r7{u{<>Mzmn$Zc2xKKt-p77s4TYH$LpEkA=>!il=KZ*JC*R0 zYd(zcU-Hpq9`t16v!DGep7D%l&6_-i4X|z+Igf&rElPd`XN%LWiDimmuME`jAF$NP zVL5f2~9V47DuGn$Vq`D7&kY5`>V4G_Z?C z7&Pu(_nv+6YM=hbTl5LPBW83JF2(V?^1XfCxrPXlK*R z#F*$4Ngy}hIyjnu0b#t=W^Th-l~EY;`A+f*Y+q4Ha;!osvjo(MwfwuevIasR!y)kV z8~+^_pT8g-iuYUGdh4xt(vzNoPkriBd-1OE70|sl5S+$6obI-uIn|2;V0EAScjuDY z1z}`$e=>I!j|IVz#9ClhYa>c5!>o=J7+pw+osvfYx+s=M8xp#}=sF+N+D2MhJEv(T2(r0RnAp?{UlSm4t~T5Kr_BsY|1=4=X9{B@k>kq6t`eEd{FMLsTT8 zj_e|}+CPNh&|x?vOf`�={(ii*e_c8s}vvjz=!aZr#uz6+_JBo8f9j61RS)! zxTPFQm0XKw0V?5DGdwS(RKmKFsf5NzLpSL8m?}#mTNg$cDzoKe?SXY)OcMBx2`Q$r z;CKug+XzY|fzh=>99_p~TSlwI(srz*!U&NNBdc{nAN#%cjjmnBL$=_UI08BA3`{qU zk9WBbx)O=ElFX*I(pk~mSS$HTSqYdknUE^7yaOCq3&}Bt17KA5`sINXKLzUNeg zFj}sWuNvHS*IhX6v=`w|{^U;=pi3j+;0=U<^&u~tOL4n#brmUb!NrU2v5mOTNR7gnCNCO%W*PD_@r$o_p38S_UNvlNCbr2U; zBy>?-T>&@Uf$gtc-h;OAm^cFYl>-LPgQIEZ0B@wxSTh@oh1C5AdghoHYN<160-BRm ztuzf1SK(m57!DXyL&9Ld7Mgpi|#f zMH<%WZ_dJL?740$)p(cL6q)0y8q3i@N!l|jBTHtjA5b)RRRU{(+$@h2#j;32;}sFR zAPYtplpI<{+X``vT1MM3MrL)i;<{?Ph_++3R1&&~NFlxOj0uW;7$%NDyz?*iPV+ys zM!(mR%b7H^nkLMZwcLYVWyq>Fz{ER3BwEgU;HBW=R09keVBmr4F8esH{Q9}eJo1%? zuIuoIH@pcaoNyw3_`~ZL^s|5IKv|hve;3O{AFp~_&i>j8$(9N*UVL;ZVc)x(DAu40VZG)5+)E}PRR(hnzw9&>>g8^gEI1B~}gAN#Y!k5l@HSYVM6R`2nBbM=>SUI@j zimUMQm%kj}{`P+@%I6=t!R01MHR)UY!wA9=BL_wxmw{UeF%TZR8;fs1Esim%ac5@Dh&5DNbPcw~I0F2rEuAmh;IuwZ$moQgK-msmzJ3nQj%X>^wC*CmA3Viy(!kK&$c6Gm;q zNUe>THeu98%#IR9Eif7Zv$MdcOBjuSwgcKIpA!>!Hqskk{_d@>TB0MjESQ*j+W+#u z*Bs>=eoy1zn}NDc8p5CWmj2#qp*w+Mi=B9?_xq5(~iXM5C7<0JKnyuM{Vgb zF)pk&(d(zcL(;&mvvMU=o9*MKd$v4!3uNBN6d-p)f@8;Xc`Twn6-Ru4emtK0=l6>DG2NsK4D*DW+z+l$}kmDV!CQ2!N zA`|tZD-`wmBcLjLE98+hVHK#wRmByR#$q2Ozayy|ilb~#Ml_tVt187}!C25;_Nxjj zL2pVdTKlStLNbvVF_L7NhlH4bn|{V&3hDUQo-=#ZA|JICfr%iH51%=GUq^guHgU-= znCgOF*9PKqQHt)6wyc` zAw>d1OOddW0!xx%Op?=K#S&wL$b4CHM@C)3g_j3h_^t5z*?|3mJb(#_feaJ>H3(Cr z#S={-p5=%@&M}e&jO;T3M|Q;+dY9IoQCoI)&nj533IGo9O^JzkN9sxhjyVdL-Fg$w zJo9xp>Zk|e%rjqy>#zUua{pHgg3)M%bI&~&kA3WK;i*skA4^Ui0C3m_b@Mo?F7l$r zDw}BhHv~jnQe8ibg5AVA%nE^(AK}th!-^QDSV%;Q8A@r9psK2nGs7d=E~4ugqYn^;&jK^s8C!QSw#^W>&j8zJ z6L!oJW;@`@>w(W)QjV~54v&BQ;F*0WZfd+P^3xDIEZeKyWM`@`6>Wy`XT z<#&I}<3aa!Si1&TH$_-K?XYf|ux_2h+9|@?X@}`G4pUPe!y%y=I1B~^?;V_Xk}sm- zlSMN_ROw~rMe!hT&KA!qDzzfaEoBymxtq8Oq{pf!M~V`Ava;$iigl3!t92okNsMNt z$n%gGNF;%Gb8qQqn#FSS?S#)<(&EY+;_U$52H-6K;^G{vm5EjF!a8tz>JON)A?v>x z;0QpwfPw8~l;`9i<(VCWlB~X>u6#_Pr3Q-lTu}(6NLH}lGYeJ6ATg>EClY(|1Pqul zQYm%g8BJXozyS}sx0KrWxmJEpDdD1vF2Y3@U4)Gr&%i0CoPuXR`{{V{lb?*WYu7H@ z|9W@$@sEFuPkrjs_~a))xu}Y&y~TmkG+w5s;(%QVN0wtHDY3MgJZwQCCzr)JkW6W1 z-TuV#;Hq&`m?O6pL5H}Pk_Nb?=2b#stCB0d;-keg5{a8jwrRd5vlS6%hpWf{la z!Fyo++G+s=pw0%&65ZV{f~3`ywoflj#TEi=YcbA`0d*_a-(OY8HT4u*7eI|kq`*1; z5v_!}JvL%ctD_K1u{ffYWzW`y5Dq7`QlflL?m7s%DJ8u9JD2Tv=ld^1w?Ic^Rby2X z$on>9ZQ6$8EfF;b)u1OB6B5f~7bvSz?6f5(S`W!k=X{N1is2G3A=t%4Ik=b=D*LT< zBEIta+9z;MUUQDnXh7X5fq0Vmi1cO=E9Bp)Sd1ytHf!5JrfE6Qv=2W4kLX3=K0S~TefV$`RAXH z^Ups&Z(F~9Js$9Yqj1zw55Q4J-4BN!emFL5+JsG;4#L3)AA;d<2R?C2c*SWG7(l{R@ zAxo~<6}eSbH_0N*<@1>JG^Q*<*&~Tm`*ZvvZz}=uF&(&V(;8ML(KTspCJ?QRYgR-E zteN4KC|I6LP6r{Rq(lNG4IrebBt&Xur0kwDH~!5tf|X(}KaBq?wfvK*6z?Lw88P0S>6T)kLglw5-t8J$x}Y0woF6Vd>=1DUkEWkg*eTd_p~ zkVu|8$hE*44-pcolN8C;cG;cdatwtuQ+wx0b|))kwUJ5{7b0Cb8hx5oLR>77x@wHc ztu6WRh(idC=eu=d^_V|4OzY2Mej<(IMJ|DLD|0@UPXVz&Qer)a%j$=i=M+^eCv0}- z=I<=0H*=H6A;_sv?zqOe?n) zCzN~}QWFJSxdoLxpv|asd*gt69SGcf$MXCylZN%vCj3yBuEqmNWv^_DDI*V^L|tMq z5!f*}k&03DJk|(F{;jzz77|ydsuD)Ez9^&t%qeil*+Me6Rmmi`JP#?i9_Loi?HFB> z%DFLEc6dS;B=N6{K#0=g!>kTT286^$t8Dd{a1B%1{JIafpTvs2;#MYBHGw4HMyD&M zq^_(8fGl9H_qlICN3CJhb@CN;|7BL zC~4NNb=lL3y^a_ZHI|a9cDR+x%U0HP4+Yj@jpF{23zD1?4a)12xx|1{F8yxuQ8J+{ zwS_YG`^-m?qvwES6PG3ETt>wBV}0Mo^()xiT4k6X$O6bF>Bvy`5a;q8DNzYfdoET; zOh|C*m~|Qx0T3y>uxbUha>AIEJyw21iHK0StdgqtD)PvZdn2|`TvO)060*=99TDgv zX~QOc-a=c_iUlH)Ua>%us;?M8inuL=^p3Q)J!|tvF}sw%&7@&f6UZw+htc~VNFRv_ zuNa%880hpt1Y-zs8#R#-J)`k}OX51AB=q9dQSVWR!K@8wMx`n)$B0S{juOc0ikgb4 zZ|WnGYTBiggo6+f#5LwfF4{3tu^0^RJ|D%1Wyz=9>icum)9laYs;7cY7P+WQ$udSE zz|1iW9I&P*kA=YWP!>4JCUA4_s)e3xg|DmoQb@4;c0`Os1mcvf=iG!17>k8e#;c%rT?txlxgaN^wJOFDu>tLO`y5+iAllse+IY0-@E!zYwKYM>1<8 z0*QF0i*yzanEl($@5hd%{beQ-tC~Onz-=G=b((IUNx*#w00My;b)f`;H`fX=VgodK zK{*cuBL|{P5&_mfy%2%dggAm^Qo|&{PW;AzxxrYh6pg2l_^Zmw&6Je-oXr&$i&U7H zi=SDS>6B#rwRJ-(9>~X`c5^Kfvt(SDJF?umCK3}F_&sVwd&~#J zMiwlVMa?E#Wzw2XYH3(rL}Yj+6G^63@@N2J@z;_B64lZc>e#tr-H?)qBt~5}eq(O1#1Pv+=0uY)cZrv{FdGA!_l7`^$C!NhVuUdy%@} zIO+SyVkF~9jr$aQ=dnBUPiRp&Qj0_TEm-ETxeZKFV=GPwU&g4f(M(`-V};n0$3mcS z<(QiFCljhpxGH-MULJSO7E>ZRaUfADb_vdtE{quLNb1PZco)y&vV!D|0CHXcSvCxm z9vzlE7(+RCBo?Cyfq`=e%eJdrSRJF)ct@+R>8vNYzCI>GU|`E#37@{C!_7bAYNS0D z0~13a0PxCpVFb*+{+*}eEe#K!>zRJv6FHp znnSi~v=!mry0}X4n@h|>WuRQGBulw$&i-`?fQm0#bof=7h-|gUeH(fUWFg?Zxef`D zWd%P}>8=HqLzK9IR~fsf1gex*o+BDHW!RJCND-;awYW~fJhn(lb^I%t2muwz2oc@8<79>gWdQJxk>7a_+|0PK0b zw)`2+57m0$Y-yPC%1A`q5mZ`)#u5|B`x7HrEg~h8nNd$33jyy`%>+%J^T@TJLLhV= zXnllt4vhmkN@^)5jc+7K&+#Y=rF^b2NL{X(>TAy5g`1Os(kYOU7aT?PrQ~5&L}J9O zu;MJwwRndF&aL;j=6>jVcZ?!Fb4kDtZrSNP>Iq~KF_8pP1UBI zp9_7zxw@RU*J~A9CrW*2;65dJEmvbEWe>>?C$I+Sb1_IRlYsfPId4>0a6bF5sUi=P zU6wN1b-C=I#L;o_Rg3yZEfOV{z(OcnXO`e(*biW(bws`@6Lknc@P$M|qwXs&xlpdt z1r=nKAxVXVgr0i$}sT+G8eCWS9>*pzX9+!|M!gP z`^4FFT0`{ors3lpVMw$K%OqPJ)x=MLw~Gr#24bOjB>`{BEF}cS&K8g4o|v1g?6bHE zoWy91P@EBpW4I_}Vit@Mm`haRDC&|!V(pJTo^N9Ah8j6H&{mR6XiT1ui*(ukF>T8X67yo4CByNiEbD7_?V-yDOj6fKg*jH*d+LXqXU#E3&lp6fi& zXd+#g0N)l1*O6J{a?&Mc43vRfayf2j3lfXP*^!PwS!k%Ul-05XI9nj%vSe-kRQ2PC z2`Q=7p?M!&hpe<}ZCelvrR_vr74+P>{Dy!FE{pnoyTr=uL^dJV?+B#EpS*46W&m#m z@Yc7!5bM^gA3l2kdcJFLq65~E#v-yy%Lv5WPEePVS`VIzJBt97_JfbJH2?q-MM*?K zRC2ax0Ga5`qR6hV%4(3943Akp)_@5F_=Z3MTHRLs*iLg;?6Z@)(6T#D-^<1tBt%rV ze3%PP>hogh)`n{Jm^2Wctqx1n3MK9lbs}h!xgUz_A!smET8GMHv*j1N|D8Cy(m0C; zu^i=i9e{FQeXq(ZvS7jj>5`l%pBr<=x%A!$QFA?%Kw<*gOdw{J zXfQpA71HYY(gns>ua5YS@AVZ(mJ|Cmfzq}r&TB-%rNClP^@_T*GC}0+l}k)let>PGF^hw!5RX|o-s|f?t;W$ywfr) zURRW=nsJpy6J2c~nY#+b3dxh7xVL2SsLGmD{MU&kU~Up=vpSL$uM0p>(&$1$tJcU! zrIu~>UyoZPZDjmxtB#<`z-NoVR@j@#e<4s?H|4wIPGey`ptYrA&zYBwVA@!O3KDAq z8kM#B=rHP7gdv?+sg_$LiGK-^9fn@qkd2cViq}9$mXxC`JTxsNOL--sR8Fic(@j7^ z%31J%f!OK2E-pH?S}o4eB_MXKlcm#@Y~P zH!hSws$nxAiMkg1Fr^*pma(jAFsqC!GWozok}GN}(z5Jojj*~z1F&@l_~bWQ-1_r* zohMfV__YN)xIK14Z-MLq=beXm9>P}ud<6hDpNfOm9XNQV2R+F-JbK{p%WTQI6{!Gn z;hE34SRYCZ>b3#_)S3HBtq#XkoJBv!QFd2(bD4R-yM)FAofFHXsVWR-)mXIBELv*C z#%14MPui85MuMTGC|~hm7ujwIq?J#L&$0&XBq}jVLnkGvE~vml1LWogZ4mQ=WUZHnkEig9!9yUx3rXCX`KVi_&V;V}oPL4K=TOx&6N>;i&uQS@M-j*dh ziuJ+So!~uT;54A_Al64C_l~sImluMmNoOp{(Ix{_{%oyJVTA$c3>Whbla->-#I0n3 z+Nya^*GabUC=x~^U^a;BYPJJrT3}|R_p?GSv#n$lfB$C2Wk2i#q4R+$rHo8=M<|ax z%HfDjavlsCVBmm(CrmYr;ZW*jO*vpNU`!2xfp_A{YQ*vw42mV&csc)^V^s{Q3sl{& z1W09RJzpelCX5n^$0`8_l@@D_D#GO<&YtwH&yAHN@c^C! z;Ew^E09g!20G9%I6MzdA@0aLd=m}(zv3WC`yLRv!%yd%I;4z-kBb}ooikqw4*Lf!d z>y}ttU(Rw&)T$V04X>smiN*nbAoq>Ayd3L3SERxwF?JFIk;Q1r`pjJDkS&W?CI)jy zgdhRX$kH8dq-vQ_AdFfeh}jmH>4ZFHM?xmEt;9t}9b?ooF1$Qp>+JsEzFG{_!TLfd zM{EL)zMq5l660vp6*csPftPs5aG=kXG=>dfpbL;;j<4y$q&hFH`I?)%=&<3`3lGLF zU09X}6qQu!iUpP&n2baceK&;JC(I#_xmXL}mjK)jVB3Np<{rC6Paw;H*E}EhUQ5H1 z0D8Rle4Ho#RS=q-V3;TNIuE6ai|FLvXh6Nunp{mI7k=Z^60vzU1U<{gKp)`(UKXGNbQa;bd>%{RK0x6~|=UgJQ zT3Ji|m*SaW%#tmYnnNPQ!2i(=M<>7k{Jv}8N=Hv1s{p5;j6(<0gXav0p4l`w!4a>^ z773M1_QrzjmG9bu=)MuM@QrMDO?jV{WXZ)GH4aiw2FawnQF=^Aty5%WCQPPlBr$6i zNlTTCSE{Q1@F$G_`cdz?T5SB*3?Lxn)(T#*<^L&))uhDcpmmTP2IND+0^ z)fTE8R4vw7&KZ;Qrs_T>AhJk!>*iybSR(#)_THoCo_%)Tt9&(}Cy-T!&8K3xEeuZv z@EqUp(>&o2?}~pq$44~Yk>ej08>!q9wIXXQh_90Yp&}5Q1gh$~r5|;%Kt^35leVKw zBwds`Mz{Y0`0CX`CBi)x6W#%je$W61Ogqg1s^Vg1q3U_j_(B%u{45jLj#axoiFK>8`)w5BCKtCtvrgp2BxqnrCAs&{YKr~5k)C$R%^>N2m;6JYknC*k+OOAVObboR5 zeVfzC;Q>~Wz*Xd7ZJOmdDQOf7)f%ML&*y6RD0F=0u)ykOK{yvt><`%sk0c@0sO3gx~To`QXiBa3xl3OCK#ukf`e}Q zvH$#0_j1P`w7#m+i}8t~R(63)R($qnn>)gM*>#q%Hj66d+NI1%7lF_*uKNk&dpD&% z=COQuFMvMS-S>! z=#d@|zPG~x)1?Bi1<(y+69J98A1nx3NQ6s5SitQe;jGYwcYfhYYzJ^9fQK*Xf84|H z0{{=+-OeZe+Tc;Xp}!`=DUdk1R%+I+m%zAtvx<3`C2Q`hxv+Be*6KDTx`vqF@Q{n! zk8kc(RuhAsKqenY0Qm2@8wtSS2NND}n8T5W5T>UnZ_n(3IWDY9(_a$AtVB0m)iHl` z{rd3UIc|x+gmise3bc^b_F(|-17LcIzIRWUfiyO{6~M6ohD-FluLbayJ-z6pW7gb{ zQhJeNynq}YB-Vw-KdkU)EwHHFS5*)_pEkw+nG(_02>kaCzqtLx=AJkv4n2WPJ_x{# z5F*(vHfMz$dD02Ld<}QknIBi~Rj_aT9>2LCE{gYWwp5d>m3ub}2|EkA|LQf$3 z2hIX;#ynqp3xF3r?L>Tcx*Hq;myU?7U!OdJOYyD%nn^KS^RJiUCwuYz7XUbAuSY$9 zFr=*a`**Yd=>Yy^5x;*|crJiX?`Gdi0sI3N_xk%hfRpdZ2b_2!n&BOT-$)6^ZtM8* zqY<9w2Rz*n!9g2}LCVx@A0_;Jd%~UDvYGLA%>SJ0(WA!-#=#I`;62kl9FBWMGbC2cy<~gP}F47o&4q(kZ$FMK>BLG`>^WE12 z_|--H?jAjQEEA>y{5gONA?m1$A>G>!TY~R?4;Fr+{Cn)38|aKh7{lD#e5|1wi*;NU z8~1{oI~POFr3)dF@s%Q6=+UFc1mRdL>?HZm*!w`V2k`9$9!Gt<3cz45$3y@=w7}yy zelN$+qeqV(D;`&2VJ65gK-^;cf=2-e3p9@UmH<3Jj^n)xFpjym-6_E4i)jECEx0pOqo7{>-k ze#AnN$L&}c_1Xpim)?VML`dFCj~+dGOe8*seVZJw#li;re|I0pv8&q)7jRsM0Ql}c zj^n}w97m5HJ$kG-JRf^MG5$AzUtWZ<{J(t|%iP;nu<$X5`#|KddpnlX7I+*zdi3bA zqOb5cpDZkui#f8b*lE2JVuK=wp;gNEO!A|yGUc{(W6I?6@xcpev{&z03Nx> zWBKcOj%8Q3*Dvze9sx-ToX=SHSTwOmj~+cH0S5uN4tGBxZU^vNOE8ux0N>lgSax;0 z7K@whZFnr?SnO#m3n4D((W6I?Re__h`=q{`v9!r>Pk>ge-;3K*mT-K>VfU)BZv(jh z5{{)uj~+c%1R6+C{i6WB1mKg9nq2oZ&tqTkPnbV>oVyI;TMIchpM)H%k3y7Wi(Mwa zM~@yodh7we1j*;v6M5W$rKug(qeqV(JysE(44}jA$)f}C)MXo8j~+dG^jOh&9Hf`{ zt^(C3EaSL(^jLA|i>~$Pu`?WmNFJ`wzXRZ_=zDti=+UD`j~+dG^ytx}M~@!Mg#Qox WKU7`}H-hE>0000`. + +We use the tool's default settings. This means we are following the idiomatic +Rust style. For instance, we use 4 spaces for indentation rather than tabs. + +Typically, you will want to instruct your editor/IDE to format while you type, +when you save or at commit time. However, if for some reason you want +to reformat the entire kernel Rust sources at some point, you may run:: + + make LLVM=1 rustfmt + +To check if everything is formatted (printing a diff otherwise), e.g. if you +have configured a CI for a tree as a maintainer, you may run:: + + make LLVM=1 rustfmtcheck + +Like ``clang-format`` for the rest of the kernel, ``rustfmt`` works on +individual files, and does not require a kernel configuration. Sometimes it may +even work with broken code. + + +Extra lints +----------- + +While ``rustc`` is a very helpful compiler, some extra lints and analysis are +available via ``clippy``, a Rust linter. To enable it, pass ``CLIPPY=1`` to +the same invocation you use for compilation, e.g.:: + + make LLVM=1 CLIPPY=1 + +At the moment, we do not enforce a "clippy-free" compilation, so you can treat +the output the same way as the extra warning levels for C, e.g. like ``W=2``. +Still, we use the default configuration, which is relatively conservative, so +it is a good idea to read any output it may produce from time to time and fix +the pointed out issues. The list of enabled lists will be likely tweaked over +time, and extra levels may end up being introduced, e.g. ``CLIPPY=2``. + + +Abstractions vs. bindings +------------------------- + +We don't have abstractions for all the kernel internal APIs and concepts, +but we would like to expand coverage as time goes on. Unless there is +a good reason not to, always use the abstractions instead of calling +the C bindings directly. + +If you are writing some code that requires a call to an internal kernel API +or concept that isn't abstracted yet, consider providing an (ideally safe) +abstraction for everyone to use. + + +Conditional compilation +----------------------- + +Rust code has access to conditional compilation based on the kernel +configuration: + +.. code-block:: rust + + #[cfg(CONFIG_X)] // `CONFIG_X` is enabled (`y` or `m`) + #[cfg(CONFIG_X="y")] // `CONFIG_X` is enabled as a built-in (`y`) + #[cfg(CONFIG_X="m")] // `CONFIG_X` is enabled as a module (`m`) + #[cfg(not(CONFIG_X))] // `CONFIG_X` is disabled + + +Documentation +------------- + +Please see :ref:`Documentation/rust/docs.rst `. diff --git a/Documentation/rust/docs.rst b/Documentation/rust/docs.rst new file mode 100644 index 0000000000000..ab29d8b6a00df --- /dev/null +++ b/Documentation/rust/docs.rst @@ -0,0 +1,110 @@ +.. _rust_docs: + +Docs +==== + +Rust kernel code is not documented like C kernel code (i.e. via kernel-doc). +Instead, we use the usual system for documenting Rust code: the ``rustdoc`` +tool, which uses Markdown (a *very* lightweight markup language). + +This document describes how to make the most out of the kernel documentation +for Rust. + + +Reading the docs +---------------- + +An advantage of using Markdown is that it attempts to make text look almost as +you would have written it in plain text. This makes the documentation quite +pleasant to read even in its source form. + +However, the generated HTML docs produced by ``rustdoc`` provide a *very* nice +experience, including integrated instant search, clickable items (types, +functions, constants, etc. -- including to all the standard Rust library ones +that we use in the kernel, e.g. ``core``), categorization, links to the source +code, etc. + +Like for the rest of the kernel documentation, pregenerated HTML docs for +the libraries (crates) inside ``rust/`` that are used by the rest of the kernel +are available at `kernel.org`_ (TODO: link when in mainline and generated +alongside the rest of the documentation). + +.. _kernel.org: http://kernel.org/ + +Otherwise, you can generate them locally. This is quite fast (same order as +compiling the code itself) and you do not need any special tools or environment. +This has the added advantage that they will be tailored to your particular +kernel configuration. To generate them, simply use the ``rustdoc`` target with +the same invocation you use for compilation, e.g.:: + + make LLVM=1 rustdoc + + +Writing the docs +---------------- + +If you already know Markdown, learning how to write Rust documentation will be +a breeze. If not, understanding the basics is a matter of minutes reading other +code. There are also many guides available out there, a particularly nice one +is at `GitHub`_. + +.. _GitHub: https://guides.github.com/features/mastering-markdown/#syntax + +This is how a well-documented Rust function may look like (derived from the Rust +standard library):: + + /// Returns the contained [`Some`] value, consuming the `self` value, + /// without checking that the value is not [`None`]. + /// + /// # Safety + /// + /// Calling this method on [`None`] is *[undefined behavior]*. + /// + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// + /// # Examples + /// + /// ``` + /// let x = Some("air"); + /// assert_eq!(unsafe { x.unwrap_unchecked() }, "air"); + /// ``` + pub unsafe fn unwrap_unchecked(self) -> T { + match self { + Some(val) => val, + + // SAFETY: the safety contract must be upheld by the caller. + None => unsafe { hint::unreachable_unchecked() }, + } + } + +This example showcases a few ``rustdoc`` features and some common conventions +(that we also follow in the kernel): + +* The first paragraph must be a single sentence briefly describing what + the documented item does. Further explanations must go in extra paragraphs. + +* ``unsafe`` functions must document the preconditions needed for a call to be + safe under a ``Safety`` section. + +* While not shown here, if a function may panic, the conditions under which + that happens must be described under a ``Panics`` section. Please note that + panicking should be very rare and used only with a good reason. In almost + all cases, you should use a fallible approach, returning a `Result`. + +* If providing examples of usage would help readers, they must be written in + a section called ``Examples``. + +* Rust items (functions, types, constants...) will be automatically linked + (``rustdoc`` will find out the URL for you). + +* Following the Rust standard library conventions, any ``unsafe`` block must be + preceded by a ``SAFETY`` comment describing why the code inside is sound. + + While sometimes the reason might look trivial and therefore unneeded, writing + these comments is not just a good way of documenting what has been taken into + account, but also that there are no *extra* implicit constraints. + +To learn more about how to write documentation for Rust and extra features, +please take a look at the ``rustdoc`` `book`_. + +.. _book: https://doc.rust-lang.org/rustdoc/how-to-write-documentation.html diff --git a/Documentation/rust/index.rst b/Documentation/rust/index.rst new file mode 100644 index 0000000000000..257cf2b200b8a --- /dev/null +++ b/Documentation/rust/index.rst @@ -0,0 +1,20 @@ +Rust +==== + +Documentation related to Rust within the kernel. If you are starting out, +read the :ref:`Documentation/rust/quick-start.rst ` guide. + +.. toctree:: + :maxdepth: 1 + + quick-start + coding + docs + arch-support + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst new file mode 100644 index 0000000000000..85b19b3c942e1 --- /dev/null +++ b/Documentation/rust/quick-start.rst @@ -0,0 +1,218 @@ +.. _rust_quick_start: + +Quick Start +=========== + +This document describes how to get started with kernel development in Rust. +If you have worked previously with Rust, this will only take a moment. + +Please note that, at the moment, a very restricted subset of architectures +is supported, see :doc:`/rust/arch-support`. + + +Requirements: Building +---------------------- + +This section explains how to fetch the tools needed for building. + +Some of these requirements might be available from your Linux distribution +under names like ``rustc``, ``rust-src``, ``rust-bindgen``, etc. However, +at the time of writing, they are likely to not be recent enough. + + +rustc +***** + +A particular version of the Rust compiler is required. Newer versions may or +may not work because, for the moment, we depend on some unstable Rust features. + +If you are using ``rustup``, enter the checked out source code directory +and run:: + + rustup override set 1.55.0 + +Otherwise, fetch a standalone installer or install ``rustup`` from: + + https://www.rust-lang.org + + +Rust standard library source +**************************** + +The Rust standard library source is required because the build system will +cross-compile ``core`` and ``alloc``. + +If you are using ``rustup``, run:: + + rustup component add rust-src + +Otherwise, if you used a standalone installer, you can clone the Rust +repository into the installation folder of your nightly toolchain:: + + git clone --recurse-submodules https://github.com/rust-lang/rust $(rustc --print sysroot)/lib/rustlib/src/rust + + +libclang +******** + +``libclang`` (part of LLVM) is used by ``bindgen`` to understand the C code +in the kernel, which means you will need an LLVM installed; like when +you compile the kernel with ``CC=clang`` or ``LLVM=1``. + +Your Linux distribution is likely to have a suitable one available, so it is +best if you check that first. + +There are also some binaries for several systems and architectures uploaded at: + + https://releases.llvm.org/download.html + +Otherwise, building LLVM takes quite a while, but it is not a complex process: + + https://llvm.org/docs/GettingStarted.html#getting-the-source-code-and-building-llvm + +See Documentation/kbuild/llvm.rst for more information and further ways +to fetch pre-built releases and distribution packages. + + +bindgen +******* + +The bindings to the C side of the kernel are generated at build time using +the ``bindgen`` tool. The version we currently support is ``0.56.0``. + +Install it via (this will build the tool from source):: + + cargo install --locked --version 0.56.0 bindgen + + +Requirements: Developing +------------------------ + +This section explains how to fetch the tools needed for developing. That is, +if you only want to build the kernel, you do not need them. + + +rustfmt +******* + +The ``rustfmt`` tool is used to automatically format all the Rust kernel code, +including the generated C bindings (for details, please see +:ref:`Documentation/rust/coding.rst `). + +If you are using ``rustup``, its ``default`` profile already installs the tool, +so you should be good to go. If you are using another profile, you can install +the component manually:: + + rustup component add rustfmt + +The standalone installers also come with ``rustfmt``. + + +clippy +****** + +``clippy`` is a Rust linter. Installing it allows you to get extra warnings +for Rust code passing ``CLIPPY=1`` to ``make`` (for details, please see +:ref:`Documentation/rust/coding.rst `). + +If you are using ``rustup``, its ``default`` profile already installs the tool, +so you should be good to go. If you are using another profile, you can install +the component manually:: + + rustup component add clippy + +The standalone installers also come with ``clippy``. + + +cargo +***** + +``cargo`` is the Rust native build system. It is currently required to run +the tests (``rusttest`` target) since we use it to build a custom standard +library that contains the facilities provided by our custom ``alloc``. + +If you are using ``rustup``, all the profiles already install the tool, +so you should be good to go. The standalone installers also include ``cargo``. + + +rustdoc +******* + +``rustdoc`` is the documentation tool for Rust. It generates pretty HTML +documentation for Rust code (for details, please see +:ref:`Documentation/rust/docs.rst `. + +``rustdoc`` is also able to test the examples provided in documented Rust code +(called doctests or documentation tests). We use this feature, thus ``rustdoc`` +is required to run the tests (``rusttest`` target). + +If you are using ``rustup``, all the profiles already install the tool, +so you should be good to go. The standalone installers also include ``rustdoc``. + + +rust-analyzer +************* + +The `rust-analyzer `_ language server can +be used with many editors to enable syntax highlighting, completion, go to +definition, and other features. + +``rust-analyzer`` will need to be +`configured `_ +to work with the kernel by adding a ``rust-project.json`` file in the root folder. +A ``rust-project.json`` can be generated by building the Make target ``rust-analyzer``, +which will create a ``rust-project.json`` in the root of the output directory. + + +Configuration +------------- + +``Rust support`` (``CONFIG_RUST``) needs to be enabled in the ``General setup`` +menu. The option is only shown if the build system can locate ``rustc``. +In turn, this will make visible the rest of options that depend on Rust. + +Afterwards, go to:: + + Kernel hacking + -> Sample kernel code + -> Rust samples + +And enable some sample modules either as built-in or as loadable. + + +Building +-------- + +Building a kernel with a complete LLVM toolchain is the best supported setup +at the moment. That is:: + + make LLVM=1 + +For architectures that do not support a full LLVM toolchain, use:: + + make CC=clang + +Using GCC also works for some configurations, but it is *very* experimental at +the moment. + + +Hacking +------- + +If you want to dive deeper, take a look at the source code of the samples +at ``samples/rust/``, the Rust support code under ``rust/`` and +the ``Rust hacking`` menu under ``Kernel hacking``. + +If you use GDB/Binutils and Rust symbols aren't getting demangled, the reason +is your toolchain doesn't support Rust's new v0 mangling scheme yet. There are +a few ways out: + + - Install a newer release (GDB >= 10.2, Binutils >= 2.36). + + - If you only need GDB and can enable ``CONFIG_DEBUG_INFO``, do so: + some versions of GDB (e.g. vanilla GDB 10.1) are able to use + the pre-demangled names embedded in the debug info. + + - If you don't need loadable module support, you may compile without + the ``-Zsymbol-mangling-version=v0`` flag. However, we don't maintain + support for that, so avoid it unless you are in a hurry. From a970ba5e117967db51822255e8dab5905d2480b4 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:21:12 +0200 Subject: [PATCH 0391/1202] samples: add Rust examples A set of Rust modules that showcase how Rust modules look like and how to use the abstracted kernel features. These samples also double as tests in the CI. The semaphore sample comes with a C version for comparison. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- samples/Kconfig | 2 + samples/Makefile | 1 + samples/rust/Kconfig | 113 +++++++++++++ samples/rust/Makefile | 12 ++ samples/rust/rust_chrdev.rs | 51 ++++++ samples/rust/rust_minimal.rs | 38 +++++ samples/rust/rust_miscdev.rs | 150 +++++++++++++++++ samples/rust/rust_module_parameters.rs | 72 +++++++++ samples/rust/rust_print.rs | 57 +++++++ samples/rust/rust_random.rs | 61 +++++++ samples/rust/rust_semaphore.rs | 177 +++++++++++++++++++++ samples/rust/rust_semaphore_c.c | 212 +++++++++++++++++++++++++ samples/rust/rust_stack_probing.rs | 40 +++++ samples/rust/rust_sync.rs | 81 ++++++++++ 14 files changed, 1067 insertions(+) create mode 100644 samples/rust/Kconfig create mode 100644 samples/rust/Makefile create mode 100644 samples/rust/rust_chrdev.rs create mode 100644 samples/rust/rust_minimal.rs create mode 100644 samples/rust/rust_miscdev.rs create mode 100644 samples/rust/rust_module_parameters.rs create mode 100644 samples/rust/rust_print.rs create mode 100644 samples/rust/rust_random.rs create mode 100644 samples/rust/rust_semaphore.rs create mode 100644 samples/rust/rust_semaphore_c.c create mode 100644 samples/rust/rust_stack_probing.rs create mode 100644 samples/rust/rust_sync.rs diff --git a/samples/Kconfig b/samples/Kconfig index b0503ef058d33..8cbd6490823f4 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -223,4 +223,6 @@ config SAMPLE_WATCH_QUEUE Build example userspace program to use the new mount_notify(), sb_notify() syscalls and the KEYCTL_WATCH_KEY keyctl() function. +source "samples/rust/Kconfig" + endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index 087e0988ccc56..291663e56a3cd 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_SAMPLE_INTEL_MEI) += mei/ subdir-$(CONFIG_SAMPLE_WATCHDOG) += watchdog subdir-$(CONFIG_SAMPLE_WATCH_QUEUE) += watch_queue obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak/ +obj-$(CONFIG_SAMPLES_RUST) += rust/ diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig new file mode 100644 index 0000000000000..183a3c4dc80cd --- /dev/null +++ b/samples/rust/Kconfig @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: GPL-2.0 + +menuconfig SAMPLES_RUST + bool "Rust samples" + depends on RUST + help + You can build sample Rust kernel code here. + + If unsure, say N. + +if SAMPLES_RUST + +config SAMPLE_RUST_MINIMAL + tristate "Minimal" + help + This option builds the Rust minimal module sample. + + To compile this as a module, choose M here: + the module will be called rust_minimal. + + If unsure, say N. + +config SAMPLE_RUST_PRINT + tristate "Printing macros" + help + This option builds the Rust printing macros sample. + + To compile this as a module, choose M here: + the module will be called rust_print. + + If unsure, say N. + +config SAMPLE_RUST_MODULE_PARAMETERS + tristate "Module parameters" + help + This option builds the Rust module parameters sample. + + To compile this as a module, choose M here: + the module will be called rust_module_parameters. + + If unsure, say N. + +config SAMPLE_RUST_SYNC + tristate "Synchronisation primitives" + help + This option builds the Rust synchronisation primitives sample. + + To compile this as a module, choose M here: + the module will be called rust_sync. + + If unsure, say N. + +config SAMPLE_RUST_CHRDEV + tristate "Character device" + help + This option builds the Rust character device sample. + + To compile this as a module, choose M here: + the module will be called rust_chrdev. + + If unsure, say N. + +config SAMPLE_RUST_MISCDEV + tristate "Miscellaneous device" + help + This option builds the Rust miscellaneous device sample. + + To compile this as a module, choose M here: + the module will be called rust_miscdev. + + If unsure, say N. + +config SAMPLE_RUST_STACK_PROBING + tristate "Stack probing" + help + This option builds the Rust stack probing sample. + + To compile this as a module, choose M here: + the module will be called rust_stack_probing. + + If unsure, say N. + +config SAMPLE_RUST_SEMAPHORE + tristate "Semaphore" + help + This option builds the Rust semaphore sample. + + To compile this as a module, choose M here: + the module will be called rust_semaphore. + + If unsure, say N. + +config SAMPLE_RUST_SEMAPHORE_C + tristate "Semaphore (in C, for comparison)" + help + This option builds the Rust semaphore sample (in C, for comparison). + + To compile this as a module, choose M here: + the module will be called rust_semaphore_c. + + If unsure, say N. + +config SAMPLE_RUST_RANDOM + tristate "Random" + help + This option builds the Rust random sample. + + To compile this as a module, choose M here: + the module will be called rust_random. + + If unsure, say N. + +endif # SAMPLES_RUST diff --git a/samples/rust/Makefile b/samples/rust/Makefile new file mode 100644 index 0000000000000..48bc871ea1f8f --- /dev/null +++ b/samples/rust/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_SAMPLE_RUST_MINIMAL) += rust_minimal.o +obj-$(CONFIG_SAMPLE_RUST_PRINT) += rust_print.o +obj-$(CONFIG_SAMPLE_RUST_MODULE_PARAMETERS) += rust_module_parameters.o +obj-$(CONFIG_SAMPLE_RUST_SYNC) += rust_sync.o +obj-$(CONFIG_SAMPLE_RUST_CHRDEV) += rust_chrdev.o +obj-$(CONFIG_SAMPLE_RUST_MISCDEV) += rust_miscdev.o +obj-$(CONFIG_SAMPLE_RUST_STACK_PROBING) += rust_stack_probing.o +obj-$(CONFIG_SAMPLE_RUST_SEMAPHORE) += rust_semaphore.o +obj-$(CONFIG_SAMPLE_RUST_SEMAPHORE_C) += rust_semaphore_c.o +obj-$(CONFIG_SAMPLE_RUST_RANDOM) += rust_random.o diff --git a/samples/rust/rust_chrdev.rs b/samples/rust/rust_chrdev.rs new file mode 100644 index 0000000000000..ccdfb2da78555 --- /dev/null +++ b/samples/rust/rust_chrdev.rs @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust character device sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::prelude::*; +use kernel::{c_str, chrdev, file_operations::FileOperations}; + +module! { + type: RustChrdev, + name: b"rust_chrdev", + author: b"Rust for Linux Contributors", + description: b"Rust character device sample", + license: b"GPL v2", +} + +#[derive(Default)] +struct RustFile; + +impl FileOperations for RustFile { + kernel::declare_file_operations!(); +} + +struct RustChrdev { + _dev: Pin>>, +} + +impl KernelModule for RustChrdev { + fn init() -> Result { + pr_info!("Rust character device sample (init)\n"); + + let mut chrdev_reg = + chrdev::Registration::new_pinned(c_str!("rust_chrdev"), 0, &THIS_MODULE)?; + + // Register the same kind of device twice, we're just demonstrating + // that you can use multiple minors. There are two minors in this case + // because its type is `chrdev::Registration<2>` + chrdev_reg.as_mut().register::()?; + chrdev_reg.as_mut().register::()?; + + Ok(RustChrdev { _dev: chrdev_reg }) + } +} + +impl Drop for RustChrdev { + fn drop(&mut self) { + pr_info!("Rust character device sample (exit)\n"); + } +} diff --git a/samples/rust/rust_minimal.rs b/samples/rust/rust_minimal.rs new file mode 100644 index 0000000000000..49cfd8cf3aad6 --- /dev/null +++ b/samples/rust/rust_minimal.rs @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust minimal sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::prelude::*; + +module! { + type: RustMinimal, + name: b"rust_minimal", + author: b"Rust for Linux Contributors", + description: b"Rust minimal sample", + license: b"GPL v2", +} + +struct RustMinimal { + message: String, +} + +impl KernelModule for RustMinimal { + fn init() -> Result { + pr_info!("Rust minimal sample (init)\n"); + pr_info!("Am I built-in? {}\n", !cfg!(MODULE)); + + Ok(RustMinimal { + message: "on the heap!".try_to_owned()?, + }) + } +} + +impl Drop for RustMinimal { + fn drop(&mut self) { + pr_info!("My message is {}\n", self.message); + pr_info!("Rust minimal sample (exit)\n"); + } +} diff --git a/samples/rust/rust_miscdev.rs b/samples/rust/rust_miscdev.rs new file mode 100644 index 0000000000000..3235ed5440e97 --- /dev/null +++ b/samples/rust/rust_miscdev.rs @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust miscellaneous device sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::prelude::*; +use kernel::{ + c_str, + file::File, + file_operations::{FileOpener, FileOperations}, + io_buffer::{IoBufferReader, IoBufferWriter}, + miscdev, + sync::{CondVar, Mutex, Ref}, +}; + +module! { + type: RustMiscdev, + name: b"rust_miscdev", + author: b"Rust for Linux Contributors", + description: b"Rust miscellaneous device sample", + license: b"GPL v2", +} + +const MAX_TOKENS: usize = 3; + +struct SharedStateInner { + token_count: usize, +} + +struct SharedState { + state_changed: CondVar, + inner: Mutex, +} + +impl SharedState { + fn try_new() -> Result>> { + Ok(Ref::pinned(Ref::try_new_and_init( + Self { + // SAFETY: `condvar_init!` is called below. + state_changed: unsafe { CondVar::new() }, + // SAFETY: `mutex_init!` is called below. + inner: unsafe { Mutex::new(SharedStateInner { token_count: 0 }) }, + }, + |mut state| { + // SAFETY: `state_changed` is pinned when `state` is. + let pinned = unsafe { state.as_mut().map_unchecked_mut(|s| &mut s.state_changed) }; + kernel::condvar_init!(pinned, "SharedState::state_changed"); + // SAFETY: `inner` is pinned when `state` is. + let pinned = unsafe { state.as_mut().map_unchecked_mut(|s| &mut s.inner) }; + kernel::mutex_init!(pinned, "SharedState::inner"); + }, + )?)) + } +} + +struct Token; + +impl FileOpener>> for Token { + fn open(shared: &Pin>) -> Result { + Ok(shared.clone()) + } +} + +impl FileOperations for Token { + type Wrapper = Pin>; + + kernel::declare_file_operations!(read, write); + + fn read( + shared: &Ref, + _: &File, + data: &mut impl IoBufferWriter, + offset: u64, + ) -> Result { + // Succeed if the caller doesn't provide a buffer or if not at the start. + if data.is_empty() || offset != 0 { + return Ok(0); + } + + { + let mut inner = shared.inner.lock(); + + // Wait until we are allowed to decrement the token count or a signal arrives. + while inner.token_count == 0 { + if shared.state_changed.wait(&mut inner) { + return Err(Error::EINTR); + } + } + + // Consume a token. + inner.token_count -= 1; + } + + // Notify a possible writer waiting. + shared.state_changed.notify_all(); + + // Write a one-byte 1 to the reader. + data.write_slice(&[1u8; 1])?; + Ok(1) + } + + fn write( + shared: &Ref, + _: &File, + data: &mut impl IoBufferReader, + _offset: u64, + ) -> Result { + { + let mut inner = shared.inner.lock(); + + // Wait until we are allowed to increment the token count or a signal arrives. + while inner.token_count == MAX_TOKENS { + if shared.state_changed.wait(&mut inner) { + return Err(Error::EINTR); + } + } + + // Increment the number of token so that a reader can be released. + inner.token_count += 1; + } + + // Notify a possible reader waiting. + shared.state_changed.notify_all(); + Ok(data.len()) + } +} + +struct RustMiscdev { + _dev: Pin>>>>, +} + +impl KernelModule for RustMiscdev { + fn init() -> Result { + pr_info!("Rust miscellaneous device sample (init)\n"); + + let state = SharedState::try_new()?; + + Ok(RustMiscdev { + _dev: miscdev::Registration::new_pinned::(c_str!("rust_miscdev"), None, state)?, + }) + } +} + +impl Drop for RustMiscdev { + fn drop(&mut self) { + pr_info!("Rust miscellaneous device sample (exit)\n"); + } +} diff --git a/samples/rust/rust_module_parameters.rs b/samples/rust/rust_module_parameters.rs new file mode 100644 index 0000000000000..57e59e8050275 --- /dev/null +++ b/samples/rust/rust_module_parameters.rs @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust module parameters sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::prelude::*; + +module! { + type: RustModuleParameters, + name: b"rust_module_parameters", + author: b"Rust for Linux Contributors", + description: b"Rust module parameters sample", + license: b"GPL v2", + params: { + my_bool: bool { + default: true, + permissions: 0, + description: b"Example of bool", + }, + my_i32: i32 { + default: 42, + permissions: 0o644, + description: b"Example of i32", + }, + my_str: str { + default: b"default str val", + permissions: 0o644, + description: b"Example of a string param", + }, + my_usize: usize { + default: 42, + permissions: 0o644, + description: b"Example of usize", + }, + my_array: ArrayParam { + default: [0, 1], + permissions: 0, + description: b"Example of array", + }, + }, +} + +struct RustModuleParameters; + +impl KernelModule for RustModuleParameters { + fn init() -> Result { + pr_info!("Rust module parameters sample (init)\n"); + + { + let lock = THIS_MODULE.kernel_param_lock(); + pr_info!("Parameters:\n"); + pr_info!(" my_bool: {}\n", my_bool.read()); + pr_info!(" my_i32: {}\n", my_i32.read(&lock)); + pr_info!( + " my_str: {}\n", + core::str::from_utf8(my_str.read(&lock))? + ); + pr_info!(" my_usize: {}\n", my_usize.read(&lock)); + pr_info!(" my_array: {:?}\n", my_array.read()); + } + + Ok(RustModuleParameters) + } +} + +impl Drop for RustModuleParameters { + fn drop(&mut self) { + pr_info!("Rust module parameters sample (exit)\n"); + } +} diff --git a/samples/rust/rust_print.rs b/samples/rust/rust_print.rs new file mode 100644 index 0000000000000..c43338fca5071 --- /dev/null +++ b/samples/rust/rust_print.rs @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust printing macros sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::pr_cont; +use kernel::prelude::*; + +module! { + type: RustPrint, + name: b"rust_print", + author: b"Rust for Linux Contributors", + description: b"Rust printing macros sample", + license: b"GPL v2", +} + +struct RustPrint; + +impl KernelModule for RustPrint { + fn init() -> Result { + pr_info!("Rust printing macros sample (init)\n"); + + pr_emerg!("Emergency message (level 0) without args\n"); + pr_alert!("Alert message (level 1) without args\n"); + pr_crit!("Critical message (level 2) without args\n"); + pr_err!("Error message (level 3) without args\n"); + pr_warn!("Warning message (level 4) without args\n"); + pr_notice!("Notice message (level 5) without args\n"); + pr_info!("Info message (level 6) without args\n"); + + pr_info!("A line that"); + pr_cont!(" is continued"); + pr_cont!(" without args\n"); + + pr_emerg!("{} message (level {}) with args\n", "Emergency", 0); + pr_alert!("{} message (level {}) with args\n", "Alert", 1); + pr_crit!("{} message (level {}) with args\n", "Critical", 2); + pr_err!("{} message (level {}) with args\n", "Error", 3); + pr_warn!("{} message (level {}) with args\n", "Warning", 4); + pr_notice!("{} message (level {}) with args\n", "Notice", 5); + pr_info!("{} message (level {}) with args\n", "Info", 6); + + pr_info!("A {} that", "line"); + pr_cont!(" is {}", "continued"); + pr_cont!(" with {}\n", "args"); + + Ok(RustPrint) + } +} + +impl Drop for RustPrint { + fn drop(&mut self) { + pr_info!("Rust printing macros sample (exit)\n"); + } +} diff --git a/samples/rust/rust_random.rs b/samples/rust/rust_random.rs new file mode 100644 index 0000000000000..ef28c5f84f716 --- /dev/null +++ b/samples/rust/rust_random.rs @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust random device +//! +//! Adapted from Alex Gaynor's original available at +//! . + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::{ + file::File, + file_operations::FileOperations, + io_buffer::{IoBufferReader, IoBufferWriter}, + prelude::*, +}; + +#[derive(Default)] +struct RandomFile; + +impl FileOperations for RandomFile { + kernel::declare_file_operations!(read, write, read_iter, write_iter); + + fn read(_this: &Self, file: &File, buf: &mut impl IoBufferWriter, _: u64) -> Result { + let total_len = buf.len(); + let mut chunkbuf = [0; 256]; + + while !buf.is_empty() { + let len = chunkbuf.len().min(buf.len()); + let chunk = &mut chunkbuf[0..len]; + + if file.is_blocking() { + kernel::random::getrandom(chunk)?; + } else { + kernel::random::getrandom_nonblock(chunk)?; + } + buf.write_slice(chunk)?; + } + Ok(total_len) + } + + fn write(_this: &Self, _file: &File, buf: &mut impl IoBufferReader, _: u64) -> Result { + let total_len = buf.len(); + let mut chunkbuf = [0; 256]; + while !buf.is_empty() { + let len = chunkbuf.len().min(buf.len()); + let chunk = &mut chunkbuf[0..len]; + buf.read_slice(chunk)?; + kernel::random::add_randomness(chunk); + } + Ok(total_len) + } +} + +module_misc_device! { + type: RandomFile, + name: b"rust_random", + author: b"Rust for Linux Contributors", + description: b"Just use /dev/urandom: Now with early-boot safety", + license: b"GPL v2", +} diff --git a/samples/rust/rust_semaphore.rs b/samples/rust/rust_semaphore.rs new file mode 100644 index 0000000000000..794d298003cea --- /dev/null +++ b/samples/rust/rust_semaphore.rs @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust semaphore sample +//! +//! A counting semaphore that can be used by userspace. +//! +//! The count is incremented by writes to the device. A write of `n` bytes results in an increment +//! of `n`. It is decremented by reads; each read results in the count being decremented by 1. If +//! the count is already zero, a read will block until another write increments it. +//! +//! This can be used in user space from the shell for example as follows (assuming a node called +//! `semaphore`): `cat semaphore` decrements the count by 1 (waiting for it to become non-zero +//! before decrementing); `echo -n 123 > semaphore` increments the semaphore by 3, potentially +//! unblocking up to 3 blocked readers. + +#![no_std] +#![feature(allocator_api, global_asm)] + +use core::sync::atomic::{AtomicU64, Ordering}; +use kernel::{ + c_str, condvar_init, declare_file_operations, + file::File, + file_operations::{FileOpener, FileOperations, IoctlCommand, IoctlHandler}, + io_buffer::{IoBufferReader, IoBufferWriter}, + miscdev::Registration, + mutex_init, + prelude::*, + sync::{CondVar, Mutex, Ref}, + user_ptr::{UserSlicePtrReader, UserSlicePtrWriter}, +}; + +module! { + type: RustSemaphore, + name: b"rust_semaphore", + author: b"Rust for Linux Contributors", + description: b"Rust semaphore sample", + license: b"GPL v2", +} + +struct SemaphoreInner { + count: usize, + max_seen: usize, +} + +struct Semaphore { + changed: CondVar, + inner: Mutex, +} + +struct FileState { + read_count: AtomicU64, + shared: Ref, +} + +impl FileState { + fn consume(&self) -> Result { + let mut inner = self.shared.inner.lock(); + while inner.count == 0 { + if self.shared.changed.wait(&mut inner) { + return Err(Error::EINTR); + } + } + inner.count -= 1; + Ok(()) + } +} + +impl FileOpener> for FileState { + fn open(shared: &Ref) -> Result> { + Ok(Box::try_new(Self { + read_count: AtomicU64::new(0), + shared: shared.clone(), + })?) + } +} + +impl FileOperations for FileState { + declare_file_operations!(read, write, ioctl); + + fn read(this: &Self, _: &File, data: &mut impl IoBufferWriter, offset: u64) -> Result { + if data.is_empty() || offset > 0 { + return Ok(0); + } + this.consume()?; + data.write_slice(&[0u8; 1])?; + this.read_count.fetch_add(1, Ordering::Relaxed); + Ok(1) + } + + fn write(this: &Self, _: &File, data: &mut impl IoBufferReader, _offs: u64) -> Result { + { + let mut inner = this.shared.inner.lock(); + inner.count = inner.count.saturating_add(data.len()); + if inner.count > inner.max_seen { + inner.max_seen = inner.count; + } + } + + this.shared.changed.notify_all(); + Ok(data.len()) + } + + fn ioctl(this: &Self, file: &File, cmd: &mut IoctlCommand) -> Result { + cmd.dispatch::(this, file) + } +} + +struct RustSemaphore { + _dev: Pin>>>, +} + +impl KernelModule for RustSemaphore { + fn init() -> Result { + pr_info!("Rust semaphore sample (init)\n"); + + let sema = Ref::try_new_and_init( + Semaphore { + // SAFETY: `condvar_init!` is called below. + changed: unsafe { CondVar::new() }, + + // SAFETY: `mutex_init!` is called below. + inner: unsafe { + Mutex::new(SemaphoreInner { + count: 0, + max_seen: 0, + }) + }, + }, + |mut sema| { + // SAFETY: `changed` is pinned when `sema` is. + let pinned = unsafe { sema.as_mut().map_unchecked_mut(|s| &mut s.changed) }; + condvar_init!(pinned, "Semaphore::changed"); + + // SAFETY: `inner` is pinned when `sema` is. + let pinned = unsafe { sema.as_mut().map_unchecked_mut(|s| &mut s.inner) }; + mutex_init!(pinned, "Semaphore::inner"); + }, + )?; + + Ok(Self { + _dev: Registration::new_pinned::(c_str!("rust_semaphore"), None, sema)?, + }) + } +} + +impl Drop for RustSemaphore { + fn drop(&mut self) { + pr_info!("Rust semaphore sample (exit)\n"); + } +} + +const IOCTL_GET_READ_COUNT: u32 = 0x80086301; +const IOCTL_SET_READ_COUNT: u32 = 0x40086301; + +impl IoctlHandler for FileState { + type Target = Self; + + fn read(this: &Self, _: &File, cmd: u32, writer: &mut UserSlicePtrWriter) -> Result { + match cmd { + IOCTL_GET_READ_COUNT => { + writer.write(&this.read_count.load(Ordering::Relaxed))?; + Ok(0) + } + _ => Err(Error::EINVAL), + } + } + + fn write(this: &Self, _: &File, cmd: u32, reader: &mut UserSlicePtrReader) -> Result { + match cmd { + IOCTL_SET_READ_COUNT => { + this.read_count.store(reader.read()?, Ordering::Relaxed); + Ok(0) + } + _ => Err(Error::EINVAL), + } + } +} diff --git a/samples/rust/rust_semaphore_c.c b/samples/rust/rust_semaphore_c.c new file mode 100644 index 0000000000000..cdc121d4030d0 --- /dev/null +++ b/samples/rust/rust_semaphore_c.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Rust semaphore sample (in C, for comparison) + * + * This is a C implementation of `rust_semaphore.rs`. Refer to the description + * in that file for details on the device. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#define IOCTL_GET_READ_COUNT _IOR('c', 1, u64) +#define IOCTL_SET_READ_COUNT _IOW('c', 1, u64) + +struct semaphore_state { + struct kref ref; + struct miscdevice miscdev; + wait_queue_head_t changed; + struct mutex mutex; + size_t count; + size_t max_seen; +}; + +struct file_state { + atomic64_t read_count; + struct semaphore_state *shared; +}; + +static int semaphore_consume(struct semaphore_state *state) +{ + DEFINE_WAIT(wait); + + mutex_lock(&state->mutex); + while (state->count == 0) { + prepare_to_wait(&state->changed, &wait, TASK_INTERRUPTIBLE); + mutex_unlock(&state->mutex); + schedule(); + finish_wait(&state->changed, &wait); + if (signal_pending(current)) + return -EINTR; + mutex_lock(&state->mutex); + } + + state->count--; + mutex_unlock(&state->mutex); + + return 0; +} + +static int semaphore_open(struct inode *nodp, struct file *filp) +{ + struct semaphore_state *shared = + container_of(filp->private_data, struct semaphore_state, miscdev); + struct file_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + kref_get(&shared->ref); + state->shared = shared; + atomic64_set(&state->read_count, 0); + + filp->private_data = state; + + return 0; +} + +static ssize_t semaphore_write(struct file *filp, const char __user *buffer, size_t count, + loff_t *ppos) +{ + struct file_state *state = filp->private_data; + struct semaphore_state *shared = state->shared; + + mutex_lock(&shared->mutex); + + shared->count += count; + if (shared->count < count) + shared->count = SIZE_MAX; + + if (shared->count > shared->max_seen) + shared->max_seen = shared->count; + + mutex_unlock(&shared->mutex); + + wake_up_all(&shared->changed); + + return count; +} + +static ssize_t semaphore_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct file_state *state = filp->private_data; + char c = 0; + int ret; + + if (count == 0 || *ppos > 0) + return 0; + + ret = semaphore_consume(state->shared); + if (ret) + return ret; + + if (copy_to_user(buffer, &c, sizeof(c))) + return -EFAULT; + + atomic64_add(1, &state->read_count); + *ppos += 1; + return 1; +} + +static long semaphore_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct file_state *state = filp->private_data; + void __user *buffer = (void __user *)arg; + u64 value; + + switch (cmd) { + case IOCTL_GET_READ_COUNT: + value = atomic64_read(&state->read_count); + if (copy_to_user(buffer, &value, sizeof(value))) + return -EFAULT; + return 0; + case IOCTL_SET_READ_COUNT: + if (copy_from_user(&value, buffer, sizeof(value))) + return -EFAULT; + atomic64_set(&state->read_count, value); + return 0; + default: + return -EINVAL; + } +} + +static void semaphore_free(struct kref *kref) +{ + struct semaphore_state *device; + + device = container_of(kref, struct semaphore_state, ref); + kfree(device); +} + +static int semaphore_release(struct inode *nodp, struct file *filp) +{ + struct file_state *state = filp->private_data; + + kref_put(&state->shared->ref, semaphore_free); + kfree(state); + return 0; +} + +static const struct file_operations semaphore_fops = { + .owner = THIS_MODULE, + .open = semaphore_open, + .read = semaphore_read, + .write = semaphore_write, + .compat_ioctl = semaphore_ioctl, + .release = semaphore_release, +}; + +static struct semaphore_state *device; + +static int __init semaphore_init(void) +{ + int ret; + struct semaphore_state *state; + + pr_info("Rust semaphore sample (in C, for comparison) (init)\n"); + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + mutex_init(&state->mutex); + kref_init(&state->ref); + init_waitqueue_head(&state->changed); + + state->miscdev.fops = &semaphore_fops; + state->miscdev.minor = MISC_DYNAMIC_MINOR; + state->miscdev.name = "semaphore"; + + ret = misc_register(&state->miscdev); + if (ret < 0) { + kfree(state); + return ret; + } + + device = state; + + return 0; +} + +static void __exit semaphore_exit(void) +{ + pr_info("Rust semaphore sample (in C, for comparison) (exit)\n"); + + misc_deregister(&device->miscdev); + kref_put(&device->ref, semaphore_free); +} + +module_init(semaphore_init); +module_exit(semaphore_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Rust for Linux Contributors"); +MODULE_DESCRIPTION("Rust semaphore sample (in C, for comparison)"); diff --git a/samples/rust/rust_stack_probing.rs b/samples/rust/rust_stack_probing.rs new file mode 100644 index 0000000000000..fcb87a53ce7f1 --- /dev/null +++ b/samples/rust/rust_stack_probing.rs @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust stack probing sample + +#![no_std] +#![feature(allocator_api, global_asm)] +#![feature(bench_black_box)] + +use kernel::prelude::*; + +module! { + type: RustStackProbing, + name: b"rust_stack_probing", + author: b"Rust for Linux Contributors", + description: b"Rust stack probing sample", + license: b"GPL v2", +} + +struct RustStackProbing; + +impl KernelModule for RustStackProbing { + fn init() -> Result { + pr_info!("Rust stack probing sample (init)\n"); + + // Including this large variable on the stack will trigger + // stack probing on the supported archs. + // This will verify that stack probing does not lead to + // any errors if we need to link `__rust_probestack`. + let x: [u64; 514] = core::hint::black_box([5; 514]); + pr_info!("Large array has length: {}\n", x.len()); + + Ok(RustStackProbing) + } +} + +impl Drop for RustStackProbing { + fn drop(&mut self) { + pr_info!("Rust stack probing sample (exit)\n"); + } +} diff --git a/samples/rust/rust_sync.rs b/samples/rust/rust_sync.rs new file mode 100644 index 0000000000000..40bbfec9ad278 --- /dev/null +++ b/samples/rust/rust_sync.rs @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust synchronisation primitives sample + +#![no_std] +#![feature(allocator_api, global_asm)] + +use kernel::prelude::*; +use kernel::{ + condvar_init, mutex_init, spinlock_init, + sync::{CondVar, Mutex, SpinLock}, +}; + +module! { + type: RustSync, + name: b"rust_sync", + author: b"Rust for Linux Contributors", + description: b"Rust synchronisation primitives sample", + license: b"GPL v2", +} + +struct RustSync; + +impl KernelModule for RustSync { + fn init() -> Result { + pr_info!("Rust synchronisation primitives sample (init)\n"); + + // Test mutexes. + { + // SAFETY: `init` is called below. + let mut data = Pin::from(Box::try_new(unsafe { Mutex::new(0) })?); + mutex_init!(data.as_mut(), "RustSync::init::data1"); + *data.lock() = 10; + pr_info!("Value: {}\n", *data.lock()); + + // SAFETY: `init` is called below. + let mut cv = Pin::from(Box::try_new(unsafe { CondVar::new() })?); + condvar_init!(cv.as_mut(), "RustSync::init::cv1"); + + { + let mut guard = data.lock(); + while *guard != 10 { + let _ = cv.wait(&mut guard); + } + } + cv.notify_one(); + cv.notify_all(); + cv.free_waiters(); + } + + // Test spinlocks. + { + // SAFETY: `init` is called below. + let mut data = Pin::from(Box::try_new(unsafe { SpinLock::new(0) })?); + spinlock_init!(data.as_mut(), "RustSync::init::data2"); + *data.lock() = 10; + pr_info!("Value: {}\n", *data.lock()); + + // SAFETY: `init` is called below. + let mut cv = Pin::from(Box::try_new(unsafe { CondVar::new() })?); + condvar_init!(cv.as_mut(), "RustSync::init::cv2"); + { + let mut guard = data.lock(); + while *guard != 10 { + let _ = cv.wait(&mut guard); + } + } + cv.notify_one(); + cv.notify_all(); + cv.free_waiters(); + } + + Ok(RustSync) + } +} + +impl Drop for RustSync { + fn drop(&mut self) { + pr_info!("Rust synchronisation primitives sample (exit)\n"); + } +} From 2b8dc4465aae43a8b8dbc5beac2bb19509906435 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:26:15 +0200 Subject: [PATCH 0392/1202] scripts: add `generate_rust_analyzer.py` The `generate_rust_analyzer.py` script generates the configuration file (`rust-project.json`) for rust-analyzer. rust-analyzer is a modular compiler frontend for the Rust language. It provides an LSP server which can be used in editors such as VS Code, Emacs or Vim. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Geoffrey Thomas Signed-off-by: Geoffrey Thomas Co-developed-by: Finn Behrens Signed-off-by: Finn Behrens Co-developed-by: Adam Bratschi-Kaye Signed-off-by: Adam Bratschi-Kaye Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Co-developed-by: Boqun Feng Signed-off-by: Boqun Feng Co-developed-by: Sumera Priyadarsini Signed-off-by: Sumera Priyadarsini Co-developed-by: Michael Ellerman Signed-off-by: Michael Ellerman Co-developed-by: Sven Van Asbroeck Signed-off-by: Sven Van Asbroeck Co-developed-by: Gary Guo Signed-off-by: Gary Guo Co-developed-by: Boris-Chengbiao Zhou Signed-off-by: Boris-Chengbiao Zhou Co-developed-by: Fox Chen Signed-off-by: Fox Chen Co-developed-by: Ayaan Zaidi Signed-off-by: Ayaan Zaidi Co-developed-by: Douglas Su Signed-off-by: Douglas Su Co-developed-by: Yuki Okushi Signed-off-by: Yuki Okushi Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 133 ++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100755 scripts/generate_rust_analyzer.py diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py new file mode 100755 index 0000000000000..1beb5ee09251f --- /dev/null +++ b/scripts/generate_rust_analyzer.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +"""generate_rust_analyzer - Generates the `rust-project.json` file for `rust-analyzer`. +""" + +import argparse +import json +import logging +import pathlib +import sys + +def generate_crates(srctree, objtree, sysroot_src): + # Generate the configuration list. + cfg = [] + with open(objtree / "include" / "generated" / "rustc_cfg") as fd: + for line in fd: + line = line.replace("--cfg=", "") + line = line.replace("\n", "") + cfg.append(line) + + # Now fill the crates list -- dependencies need to come first. + # + # Avoid O(n^2) iterations by keeping a map of indexes. + crates = [] + crates_indexes = {} + + def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=True, is_proc_macro=False): + crates_indexes[display_name] = len(crates) + crates.append({ + "display_name": display_name, + "root_module": str(root_module), + "is_workspace_member": is_workspace_member, + "is_proc_macro": is_proc_macro, + "deps": [{"crate": crates_indexes[dep], "name": dep} for dep in deps], + "cfg": cfg, + "edition": "2018", + "env": { + "RUST_MODFILE": "This is only for rust-analyzer" + } + }) + + # First, the ones in `rust/` since they are a bit special. + append_crate( + "core", + sysroot_src / "core" / "src" / "lib.rs", + [], + is_workspace_member=False, + ) + + append_crate( + "compiler_builtins", + srctree / "rust" / "compiler_builtins.rs", + [], + ) + + append_crate( + "alloc", + srctree / "rust" / "alloc" / "lib.rs", + ["core", "compiler_builtins"], + ) + + append_crate( + "macros", + srctree / "rust" / "macros" / "lib.rs", + [], + is_proc_macro=True, + ) + crates[-1]["proc_macro_dylib_path"] = "rust/libmacros.so" + + append_crate( + "build_error", + srctree / "rust" / "build_error.rs", + ["core", "compiler_builtins"], + ) + + append_crate( + "kernel", + srctree / "rust" / "kernel" / "lib.rs", + ["core", "alloc", "macros", "build_error"], + cfg=cfg, + ) + crates[-1]["env"]["OBJTREE"] = str(objtree.resolve(True)) + crates[-1]["source"] = { + "include_dirs": [ + str(srctree / "rust" / "kernel"), + str(objtree / "rust") + ], + "exclude_dirs": [], + } + + # Then, the rest outside of `rust/`. + # + # We explicitly mention the top-level folders we want to cover. + for folder in ("samples", "drivers"): + for path in (srctree / folder).rglob("*.rs"): + logging.info("Checking %s", path) + name = path.name.replace(".rs", "") + + # Skip those that are not crate roots. + if f"{name}.o" not in open(path.parent / "Makefile").read(): + continue + + logging.info("Adding %s", name) + append_crate( + name, + path, + ["core", "alloc", "kernel"], + cfg=cfg, + ) + + return crates + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--verbose', '-v', action='store_true') + parser.add_argument("srctree", type=pathlib.Path) + parser.add_argument("objtree", type=pathlib.Path) + parser.add_argument("sysroot_src", type=pathlib.Path) + args = parser.parse_args() + + logging.basicConfig( + format="[%(asctime)s] [%(levelname)s] %(message)s", + level=logging.INFO if args.verbose else logging.WARNING + ) + + rust_project = { + "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src), + "sysroot_src": str(args.sysroot_src), + } + + json.dump(rust_project, sys.stdout, sort_keys=True, indent=4) + +if __name__ == "__main__": + main() From a5085d70a3bab91e91dd7f6273e3138ba92fb006 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 3 Jul 2021 17:24:21 +0200 Subject: [PATCH 0393/1202] MAINTAINERS: Rust Miguel, Alex and Wedson will be maintaining the Rust support. Co-developed-by: Alex Gaynor Signed-off-by: Alex Gaynor Co-developed-by: Wedson Almeida Filho Signed-off-by: Wedson Almeida Filho Signed-off-by: Miguel Ojeda --- MAINTAINERS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a4a0c2baaf274..190dd51a83751 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16278,6 +16278,20 @@ L: linux-rdma@vger.kernel.org S: Maintained F: drivers/infiniband/ulp/rtrs/ +RUST +M: Miguel Ojeda +M: Alex Gaynor +M: Wedson Almeida Filho +L: rust-for-linux@vger.kernel.org +S: Supported +W: https://github.com/Rust-for-Linux/linux +B: https://github.com/Rust-for-Linux/linux/issues +T: git https://github.com/Rust-for-Linux/linux.git rust-next +F: rust/ +F: samples/rust/ +F: Documentation/rust/ +K: \b(?i:rust)\b + RXRPC SOCKETS (AF_RXRPC) M: David Howells M: Marc Dionne From 8d6ad05cf7ffdf4548efe28e1fdc94676a6c5b66 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 20 Oct 2021 21:06:34 +0800 Subject: [PATCH 0394/1202] dt-bindings: phy: qcom,qmp: IPQ6018 and IPQ8074 PCIe PHY require no supply The qmp-phy driver suggests that 'vdda-phy-supply' and 'vdda-pll-supply' are not required for IPQ6018 and IPQ8074 QMP PCIe PHY. Update the bindings to reflect it. While at it, also correct the clock properties for IPQ8074 QMP PCIe PHY. And as the result, 'qcom,ipq8074-qmp-pcie-phy' and 'qcom,ipq6018-qmp-pcie-phy' share the same clock, reset and supply bindings. Signed-off-by: Shawn Guo Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211020130634.26194-1-shawn.guo@linaro.org Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,qmp-phy.yaml | 55 +++++++++++-------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml index 647b45cfec848..630ceaf915e2a 100644 --- a/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml @@ -117,8 +117,6 @@ required: - clock-names - resets - reset-names - - vdda-phy-supply - - vdda-pll-supply additionalProperties: false @@ -151,6 +149,9 @@ allOf: items: - const: phy - const: common + required: + - vdda-phy-supply + - vdda-pll-supply - if: properties: compatible: @@ -177,6 +178,9 @@ allOf: items: - const: phy - const: common + required: + - vdda-phy-supply + - vdda-pll-supply - if: properties: compatible: @@ -205,6 +209,9 @@ allOf: - const: phy - const: common - const: cfg + required: + - vdda-phy-supply + - vdda-pll-supply - if: properties: compatible: @@ -234,6 +241,9 @@ allOf: items: - const: phy - const: common + required: + - vdda-phy-supply + - vdda-pll-supply - if: properties: compatible: @@ -254,6 +264,9 @@ allOf: reset-names: items: - const: ufsphy + required: + - vdda-phy-supply + - vdda-pll-supply - if: properties: compatible: @@ -279,34 +292,16 @@ allOf: reset-names: items: - const: ufsphy - - if: - properties: - compatible: - contains: - enum: - - qcom,ipq8074-qmp-pcie-phy - then: - properties: - clocks: - items: - - description: pipe clk. - clock-names: - items: - - const: pipe_clk - resets: - items: - - description: reset of phy block. - - description: phy common block reset. - reset-names: - items: - - const: phy - - const: common + required: + - vdda-phy-supply + - vdda-pll-supply - if: properties: compatible: contains: enum: - qcom,ipq6018-qmp-pcie-phy + - qcom,ipq8074-qmp-pcie-phy then: properties: clocks: @@ -357,6 +352,9 @@ allOf: reset-names: items: - const: phy + required: + - vdda-phy-supply + - vdda-pll-supply - if: properties: compatible: @@ -388,6 +386,9 @@ allOf: items: - const: phy - const: common + required: + - vdda-phy-supply + - vdda-pll-supply - if: properties: compatible: @@ -415,6 +416,9 @@ allOf: items: - const: phy - const: common + required: + - vdda-phy-supply + - vdda-pll-supply - if: properties: compatible: @@ -441,6 +445,9 @@ allOf: items: - const: phy_phy - const: phy + required: + - vdda-phy-supply + - vdda-pll-supply examples: - | From f543a47064a4cef81233a7182d796cd8fa05386e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 20 Oct 2021 17:53:33 +0200 Subject: [PATCH 0395/1202] soc: document merges Signed-off-by: Arnd Bergmann --- arch/arm/arm-soc-for-next-contents.txt | 39 ++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/arch/arm/arm-soc-for-next-contents.txt b/arch/arm/arm-soc-for-next-contents.txt index 743eb57a9a09d..ac3e28e4b18f4 100644 --- a/arch/arm/arm-soc-for-next-contents.txt +++ b/arch/arm/arm-soc-for-next-contents.txt @@ -62,6 +62,30 @@ arm/dt git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel tags/renesas-arm-dt-for-v5.16-tag2 stm32/dt git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32 tags/stm32-dt-for-v5.16-1 + mvebu/dt64 + git://git.kernel.org/pub/scm/linux/kernel/git/gclement/mvebu tags/mvebu-dt64-5.16-1 + mvebu/dt + git://git.kernel.org/pub/scm/linux/kernel/git/gclement/mvebu tags/mvebu-dt-5.16-1 + toshiba/dt + git://git.kernel.org/pub/scm/linux/kernel/git/iwamatsu/linux-visconti tags/visconti-arm-dt-for-v5.16 + imx/dt-bindings + git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux tags/imx-bindings-5.16 + imx/dt + git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux tags/imx-dt-5.16 + imx/dt64 + git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux tags/imx-dt64-5.16 + sunxi/dt + git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux tags/sunxi-dt-for-5.16-1 + ixp4xx/dt + git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik tags/ixp4xx-dts-for-v5.16 + gemini/dt + git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik tags/gemini-dts-for-v5.16 + rockchip/dt64-2 + git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip tags/v5.16-rockchip-dts64-2 + rockchip/dt-2 + git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip tags/v5.16-rockchip-dts32-2 + (bc22b6208f416ba702c17a93c9af6474f19e8212) + https://github.com/Broadcom/stblinux tags/arm-soc/for-5.15/devicetree arm/drivers renesas/drivers @@ -88,6 +112,15 @@ arm/drivers git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux tags/qcom-drivers-for-5.16 broadcom/drivers https://github.com/Broadcom/stblinux tags/arm-soc/for-5.16/drivers + imx/drivers + git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux tags/imx-drivers-5.16 + sunxi/drivers + git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux tags/sunxi-drivers-for-5.16-1 + patch + ASoC: cirrus: i2s: Prepare clock before using it + ep93xx: clock: convert in-place to COMMON_CLK + drivers/optee + git://git.linaro.org/people/jens.wiklander/linux-tee tags/optee-ffa-for-v5.16 arm/defconfigs defconfig/multiv7 @@ -104,4 +137,10 @@ arm/newsoc arm/late arm/fixes + imx/fixes-4 + git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux tags/imx-fixes-5.15-4 + (55dd7e059098ce4bd0a55c251cb78e74604abb57) + git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux tags/sunxi-fixes-for-5.15-1 + drivers/reset-fixes + git://git.pengutronix.de/pza/linux tags/reset-fixes-for-v5.15 From cfae1d2e6d6dd1a16ac9cb35f98905dabf7894fe Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 24 Sep 2021 15:33:32 +0200 Subject: [PATCH 0396/1202] mfd: exynos-lpass: Describe driver in KConfig Describe better which driver applies to which SoC, to make configuring kernel for Samsung SoC easier. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210924133332.112092-1-krzysztof.kozlowski@canonical.com --- drivers/mfd/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 251ffcb06cc4c..f99334fa0ae67 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -417,7 +417,9 @@ config MFD_EXYNOS_LPASS select REGMAP_MMIO help Select this option to enable support for Samsung Exynos Low Power - Audio Subsystem. + Audio Subsystem present on some of Samsung Exynos + SoCs (e.g. Exynos5433). + Choose Y here only if you build for such Samsung SoC. config MFD_GATEWORKS_GSC tristate "Gateworks System Controller" From 197e7a12be243551f001ae8a6bd44f933b08316e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Sep 2021 15:33:45 +0100 Subject: [PATCH 0397/1202] mfd: altr_a10sr: Add SPI device ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding a SPI device ID table. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210924143347.14721-2-broonie@kernel.org --- drivers/mfd/altera-a10sr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mfd/altera-a10sr.c b/drivers/mfd/altera-a10sr.c index a3bf64f9afd1b..6d04fdd09ca2e 100644 --- a/drivers/mfd/altera-a10sr.c +++ b/drivers/mfd/altera-a10sr.c @@ -150,6 +150,13 @@ static const struct of_device_id altr_a10sr_spi_of_match[] = { { .compatible = "altr,a10sr" }, { }, }; +MODULE_DEVICE_TABLE(of, altr_a10sr_spi_of_match); + +static const struct spi_device_id altr_a10sr_spi_ids[] = { + { .name = "a10sr" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, altr_a10sr_spi_ids); static struct spi_driver altr_a10sr_spi_driver = { .probe = altr_a10sr_spi_probe, @@ -157,5 +164,6 @@ static struct spi_driver altr_a10sr_spi_driver = { .name = "altr_a10sr", .of_match_table = of_match_ptr(altr_a10sr_spi_of_match), }, + .id_table = altr_a10sr_spi_ids, }; builtin_driver(altr_a10sr_spi_driver, spi_register_driver) From 081be399fca1c0f89a2d98fb61d62d57ab6730e5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Sep 2021 15:33:46 +0100 Subject: [PATCH 0398/1202] mfd: cpcap: Add SPI device ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding a SPI device ID table. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210924143347.14721-3-broonie@kernel.org --- drivers/mfd/motorola-cpcap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mfd/motorola-cpcap.c b/drivers/mfd/motorola-cpcap.c index 6fb206da27298..265464b5d7cc5 100644 --- a/drivers/mfd/motorola-cpcap.c +++ b/drivers/mfd/motorola-cpcap.c @@ -202,6 +202,13 @@ static const struct of_device_id cpcap_of_match[] = { }; MODULE_DEVICE_TABLE(of, cpcap_of_match); +static const struct spi_device_id cpcap_spi_ids[] = { + { .name = "cpcap", }, + { .name = "6556002", }, + {}, +}; +MODULE_DEVICE_TABLE(spi, cpcap_spi_ids); + static const struct regmap_config cpcap_regmap_config = { .reg_bits = 16, .reg_stride = 4, @@ -342,6 +349,7 @@ static struct spi_driver cpcap_driver = { .pm = &cpcap_pm, }, .probe = cpcap_probe, + .id_table = cpcap_spi_ids, }; module_spi_driver(cpcap_driver); From a1198da3ef0b1cae960e44883075090a08edf861 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Sep 2021 15:33:47 +0100 Subject: [PATCH 0399/1202] mfd: sprd: Add SPI device ID table Currently autoloading for SPI devices does not use the DT ID table, it uses SPI modalises. Supporting OF modalises is going to be difficult if not impractical, an attempt was made but has been reverted, so ensure that module autoloading works for this driver by adding a SPI device ID table. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Mark Brown Reviewed-by: Baolin Wang Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210924143347.14721-4-broonie@kernel.org --- drivers/mfd/sprd-sc27xx-spi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c index 57fb2445720fd..55d2c31bdfb20 100644 --- a/drivers/mfd/sprd-sc27xx-spi.c +++ b/drivers/mfd/sprd-sc27xx-spi.c @@ -246,6 +246,12 @@ static const struct of_device_id sprd_pmic_match[] = { }; MODULE_DEVICE_TABLE(of, sprd_pmic_match); +static const struct spi_device_id sprd_pmic_spi_ids[] = { + { .name = "sc2731", .driver_data = (unsigned long)&sc2731_data }, + {}, +}; +MODULE_DEVICE_TABLE(spi, sprd_pmic_spi_ids); + static struct spi_driver sprd_pmic_driver = { .driver = { .name = "sc27xx-pmic", @@ -253,6 +259,7 @@ static struct spi_driver sprd_pmic_driver = { .pm = &sprd_pmic_pm_ops, }, .probe = sprd_pmic_probe, + .id_table = sprd_pmic_spi_ids, }; static int __init sprd_pmic_init(void) From e517a146f83a1edb1fbb4f64163b13c38f103d79 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 26 Sep 2021 02:43:33 +0300 Subject: [PATCH 0400/1202] mfd: qcom-pm8xxx: switch away from using chained IRQ handlers PM8xxx PMIC family uses GPIO as parent IRQ. Using it together with the irq_set_chained_handler_and_data() results in warnings from the GPIOLIB (see 461c1a7d4733 ("gpiolib: override irq_enable/disable")) as in this path the IRQ resources are not allocated (and thus the corresponding GPIO is not marked as used for the IRQ. Use request_irq so that the IRQ resources are proprely setup. [ 0.803271] ------------[ cut here ]------------ [ 0.803338] WARNING: CPU: 3 PID: 1 at drivers/gpio/gpiolib.c:3207 gpiochip_enable_irq+0xa4/0xa8 [ 0.803470] Modules linked in: [ 0.803542] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.14.0-rc6-next-20210820-postmarketos-qcom-apq8064+ #1 [ 0.803645] Hardware name: Generic DT based system [ 0.803710] Backtrace: [ 0.803777] [] (dump_backtrace) from [] (show_stack+0x20/0x24) [ 0.803911] r7:00000c87 r6:c07062dc r5:60000093 r4:c11d0f54 [ 0.803980] [] (show_stack) from [] (dump_stack_lvl+0x48/0x54) [ 0.804097] [] (dump_stack_lvl) from [] (dump_stack+0x18/0x1c) [ 0.804217] r5:00000009 r4:c11fe208 [ 0.804274] [] (dump_stack) from [] (__warn+0xfc/0x114) [ 0.804387] [] (__warn) from [] (warn_slowpath_fmt+0x74/0xd0) [ 0.804509] r7:c07062dc r6:00000c87 r5:c11fe208 r4:00000000 [ 0.804577] [] (warn_slowpath_fmt) from [] (gpiochip_enable_irq+0xa4/0xa8) [ 0.804716] r8:c27b6200 r7:c27aec00 r6:c27aec18 r5:cf77a448 r4:c02225f0 [ 0.804789] [] (gpiochip_enable_irq) from [] (gpiochip_irq_enable+0x28/0x38) [ 0.804921] r5:cf77a448 r4:c27aec18 [ 0.804977] [] (gpiochip_irq_enable) from [] (irq_enable+0x48/0x78) [ 0.805111] r5:00000000 r4:c27aec00 [ 0.805167] [] (irq_enable) from [] (__irq_startup+0x80/0xbc) [ 0.805286] r5:00000000 r4:c27aec00 [ 0.805343] [] (__irq_startup) from [] (irq_startup+0xe0/0x18c) [ 0.805468] r7:c27aec00 r6:00000001 r5:00000000 r4:c27aec00 [ 0.805535] [] (irq_startup) from [] (irq_activate_and_startup+0x3c/0x74) [ 0.805669] r7:c27aec00 r6:00000001 r5:c27aec00 r4:00000000 [ 0.805736] [] (irq_activate_and_startup) from [] (__irq_do_set_handler+0xcc/0x1c0) [ 0.805875] r7:c27aec00 r6:c0383710 r5:c08a16b0 r4:00000001 [ 0.805943] [] (__irq_do_set_handler) from [] (irq_set_chained_handler_and_data+0x60/0x98) [ 0.806087] r7:c27b5c10 r6:c27aed40 r5:c08a16b0 r4:c27aec00 [ 0.806154] [] (irq_set_chained_handler_and_data) from [] (pm8xxx_probe+0x1fc/0x24c) [ 0.806298] r6:0000003a r5:0000003a r4:c27b5c00 [ 0.806359] [] (pm8xxx_probe) from [] (platform_probe+0x6c/0xc8) [ 0.806495] r10:c2507080 r9:e8bea2cc r8:c165e0e0 r7:c165e0e0 r6:c15f08f8 r5:c27b5c10 [ 0.806582] r4:00000000 [ 0.806632] [] (platform_probe) from [] (really_probe+0xe8/0x460) [ 0.806769] r7:c165e0e0 r6:c15f08f8 r5:00000000 r4:c27b5c10 [ 0.806837] [] (really_probe) from [] (__driver_probe_device+0xb0/0x22c) [ 0.806975] r7:c27b5c10 r6:cf70fba4 r5:c15f08f8 r4:c27b5c10 [ 0.807042] [] (__driver_probe_device) from [] (driver_probe_device+0x44/0xe0) [ 0.807188] r9:e8bea2cc r8:00000000 r7:c27b5c10 r6:cf70fba4 r5:c16ae4b4 r4:c16ae4b0 [ 0.807271] [] (driver_probe_device) from [] (__device_attach_driver+0xb4/0x12c) [ 0.807421] r9:e8bea2cc r8:c15eec08 r7:c27b5c10 r6:cf70fba4 r5:c15f08f8 r4:00000001 [ 0.807506] [] (__device_attach_driver) from [] (bus_for_each_drv+0x94/0xe4) [ 0.807651] r7:c16ae484 r6:c086ec24 r5:cf70fba4 r4:00000000 [ 0.807718] [] (bus_for_each_drv) from [] (__device_attach+0x104/0x19c) [ 0.807852] r6:00000001 r5:c27b5c54 r4:c27b5c10 [ 0.807913] [] (__device_attach) from [] (device_initial_probe+0x1c/0x20) [ 0.808050] r6:c27b5c10 r5:c15ef1b0 r4:c27b5c10 [ 0.808111] [] (device_initial_probe) from [] (bus_probe_device+0x94/0x9c) [ 0.808240] [] (bus_probe_device) from [] (device_add+0x404/0x8f4) [ 0.808370] r7:c16ae484 r6:c251ba10 r5:00000000 r4:c27b5c10 [ 0.808439] [] (device_add) from [] (of_device_add+0x44/0x4c) [ 0.808581] r10:c144c854 r9:00000001 r8:e8bea314 r7:c251ba10 r6:00000000 r5:00000000 [ 0.808669] r4:c27b5c00 [ 0.808718] [] (of_device_add) from [] (of_platform_device_create_pdata+0xa0/0xc8) [ 0.808850] [] (of_platform_device_create_pdata) from [] (of_platform_bus_create+0x1f0/0x514) [ 0.809005] r9:00000001 r8:c251ba10 r7:00000000 r6:00000000 r5:00000000 r4:e8bea2b0 [ 0.809086] [] (of_platform_bus_create) from [] (of_platform_populate+0x98/0x128) [ 0.809233] r10:c144c854 r9:00000001 r8:c251ba10 r7:00000000 r6:00000000 r5:e8bea170 [ 0.809321] r4:e8bea2b0 [ 0.809371] [] (of_platform_populate) from [] (devm_of_platform_populate+0x60/0xa8) [ 0.809521] r9:0000011d r8:c165e0e0 r7:e8bea170 r6:c2c34f40 r5:c2cac140 r4:c251ba10 [ 0.809604] [] (devm_of_platform_populate) from [] (ssbi_probe+0x138/0x16c) [ 0.809738] r6:c2c34f40 r5:c251ba10 r4:ff822700 [ 0.809800] [] (ssbi_probe) from [] (platform_probe+0x6c/0xc8) [ 0.809923] r7:c165e0e0 r6:c15f0a80 r5:c251ba10 r4:00000000 [ 0.809989] [] (platform_probe) from [] (really_probe+0xe8/0x460) [ 0.810120] r7:c165e0e0 r6:c15f0a80 r5:00000000 r4:c251ba10 [ 0.810187] [] (really_probe) from [] (__driver_probe_device+0xb0/0x22c) [ 0.810325] r7:c251ba10 r6:c15f0a80 r5:c15f0a80 r4:c251ba10 [ 0.810393] [] (__driver_probe_device) from [] (driver_probe_device+0x44/0xe0) [ 0.810539] r9:0000011d r8:00000000 r7:c251ba10 r6:c15f0a80 r5:c16ae4b4 r4:c16ae4b0 [ 0.810623] [] (driver_probe_device) from [] (__driver_attach+0xdc/0x188) [ 0.810766] r9:0000011d r8:c144c834 r7:00000000 r6:c15f0a80 r5:c251ba10 r4:00000000 [ 0.810849] [] (__driver_attach) from [] (bus_for_each_dev+0x88/0xd4) [ 0.810985] r7:00000000 r6:c086ed50 r5:c15f0a80 r4:00000000 [ 0.811052] [] (bus_for_each_dev) from [] (driver_attach+0x2c/0x30) [ 0.811182] r6:c15ef1b0 r5:c2c34e80 r4:c15f0a80 [ 0.811243] [] (driver_attach) from [] (bus_add_driver+0x180/0x21c) [ 0.811364] [] (bus_add_driver) from [] (driver_register+0x84/0x118) [ 0.811492] r7:00000000 r6:ffffe000 r5:c1428210 r4:c15f0a80 [ 0.811558] [] (driver_register) from [] (__platform_driver_register+0x2c/0x34) [ 0.811683] r5:c1428210 r4:c16524a0 [ 0.811739] [] (__platform_driver_register) from [] (ssbi_driver_init+0x24/0x28) [ 0.811868] [] (ssbi_driver_init) from [] (do_one_initcall+0x68/0x2c8) [ 0.811990] [] (do_one_initcall) from [] (kernel_init_freeable+0x1dc/0x23c) [ 0.812135] r7:cf7b0400 r6:c130339c r5:00000007 r4:c147f6a0 [ 0.812204] [] (kernel_init_freeable) from [] (kernel_init+0x20/0x138) [ 0.812345] r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:c0e40e40 [ 0.812433] r4:00000000 [ 0.812483] [] (kernel_init) from [] (ret_from_fork+0x14/0x24) [ 0.812596] Exception stack(0xcf70ffb0 to 0xcf70fff8) [ 0.812684] ffa0: 00000000 00000000 00000000 00000000 [ 0.812809] ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 0.812923] ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 0.813008] r5:c0e40e40 r4:00000000 [ 0.813075] ---[ end trace ad2443eee078d094 ]--- Signed-off-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Reviewed-by: Linus Walleij Tested-by: David Heidelberg # on Nexus 7 (deb) Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210925234333.2430755-1-dmitry.baryshkov@linaro.org --- drivers/mfd/qcom-pm8xxx.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/drivers/mfd/qcom-pm8xxx.c b/drivers/mfd/qcom-pm8xxx.c index ec18a04de3555..2f2734ba5273e 100644 --- a/drivers/mfd/qcom-pm8xxx.c +++ b/drivers/mfd/qcom-pm8xxx.c @@ -65,7 +65,7 @@ struct pm_irq_data { int num_irqs; struct irq_chip *irq_chip; - void (*irq_handler)(struct irq_desc *desc); + irq_handler_t irq_handler; }; struct pm_irq_chip { @@ -169,19 +169,16 @@ static int pm8xxx_irq_master_handler(struct pm_irq_chip *chip, int master) return ret; } -static void pm8xxx_irq_handler(struct irq_desc *desc) +static irqreturn_t pm8xxx_irq_handler(int irq, void *data) { - struct pm_irq_chip *chip = irq_desc_get_handler_data(desc); - struct irq_chip *irq_chip = irq_desc_get_chip(desc); + struct pm_irq_chip *chip = data; unsigned int root; int i, ret, masters = 0; - chained_irq_enter(irq_chip, desc); - ret = regmap_read(chip->regmap, SSBI_REG_ADDR_IRQ_ROOT, &root); if (ret) { pr_err("Can't read root status ret=%d\n", ret); - return; + return IRQ_NONE; } /* on pm8xxx series masters start from bit 1 of the root */ @@ -192,7 +189,7 @@ static void pm8xxx_irq_handler(struct irq_desc *desc) if (masters & (1 << i)) pm8xxx_irq_master_handler(chip, i); - chained_irq_exit(irq_chip, desc); + return IRQ_HANDLED; } static void pm8821_irq_block_handler(struct pm_irq_chip *chip, @@ -230,19 +227,17 @@ static inline void pm8821_irq_master_handler(struct pm_irq_chip *chip, pm8821_irq_block_handler(chip, master, block); } -static void pm8821_irq_handler(struct irq_desc *desc) +static irqreturn_t pm8821_irq_handler(int irq, void *data) { - struct pm_irq_chip *chip = irq_desc_get_handler_data(desc); - struct irq_chip *irq_chip = irq_desc_get_chip(desc); + struct pm_irq_chip *chip = data; unsigned int master; int ret; - chained_irq_enter(irq_chip, desc); ret = regmap_read(chip->regmap, PM8821_SSBI_REG_ADDR_IRQ_MASTER0, &master); if (ret) { pr_err("Failed to read master 0 ret=%d\n", ret); - goto done; + return IRQ_NONE; } /* bits 1 through 7 marks the first 7 blocks in master 0 */ @@ -251,19 +246,18 @@ static void pm8821_irq_handler(struct irq_desc *desc) /* bit 0 marks if master 1 contains any bits */ if (!(master & BIT(0))) - goto done; + return IRQ_NONE; ret = regmap_read(chip->regmap, PM8821_SSBI_REG_ADDR_IRQ_MASTER1, &master); if (ret) { pr_err("Failed to read master 1 ret=%d\n", ret); - goto done; + return IRQ_NONE; } pm8821_irq_master_handler(chip, 1, master); -done: - chained_irq_exit(irq_chip, desc); + return IRQ_HANDLED; } static void pm8xxx_irq_mask_ack(struct irq_data *d) @@ -574,14 +568,15 @@ static int pm8xxx_probe(struct platform_device *pdev) if (!chip->irqdomain) return -ENODEV; - irq_set_chained_handler_and_data(irq, data->irq_handler, chip); + rc = devm_request_irq(&pdev->dev, irq, data->irq_handler, 0, dev_name(&pdev->dev), chip); + if (rc) + return rc; + irq_set_irq_wake(irq, 1); rc = of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); - if (rc) { - irq_set_chained_handler_and_data(irq, NULL, NULL); + if (rc) irq_domain_remove(chip->irqdomain); - } return rc; } @@ -594,11 +589,9 @@ static int pm8xxx_remove_child(struct device *dev, void *unused) static int pm8xxx_remove(struct platform_device *pdev) { - int irq = platform_get_irq(pdev, 0); struct pm_irq_chip *chip = platform_get_drvdata(pdev); device_for_each_child(&pdev->dev, NULL, pm8xxx_remove_child); - irq_set_chained_handler_and_data(irq, NULL, NULL); irq_domain_remove(chip->irqdomain); return 0; From ad1ebdb0e3c822cb35671669bfe514559fd1cfde Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 24 Sep 2021 09:16:14 +0200 Subject: [PATCH 0401/1202] dt-bindings: mfd: Convert X-Powers AC100 binding to a schema The X-Powers AC100 hybrid devices are supported by Linux thanks to its device tree binding. Now that we have the DT validation in place, let's convert the device tree bindings for that driver over to a YAML schema. Signed-off-by: Maxime Ripard Acked-by: Chen-Yu Tsai Reviewed-by: Rob Herring Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210924071614.868307-3-maxime@cerno.tech --- .../devicetree/bindings/mfd/ac100.txt | 50 -------- .../bindings/mfd/x-powers,ac100.yaml | 116 ++++++++++++++++++ 2 files changed, 116 insertions(+), 50 deletions(-) delete mode 100644 Documentation/devicetree/bindings/mfd/ac100.txt create mode 100644 Documentation/devicetree/bindings/mfd/x-powers,ac100.yaml diff --git a/Documentation/devicetree/bindings/mfd/ac100.txt b/Documentation/devicetree/bindings/mfd/ac100.txt deleted file mode 100644 index dff219f074931..0000000000000 --- a/Documentation/devicetree/bindings/mfd/ac100.txt +++ /dev/null @@ -1,50 +0,0 @@ -X-Powers AC100 Codec/RTC IC Device Tree bindings - -AC100 is a audio codec and RTC subsystem combo IC. The 2 parts are -separated, including power supplies and interrupt lines, but share -a common register address space and host interface. - -Required properties: -- compatible: "x-powers,ac100" -- reg: The I2C slave address or RSB hardware address for the chip -- sub-nodes: - - codec - - compatible: "x-powers,ac100-codec" - - interrupts: SoC NMI / GPIO interrupt connected to the - IRQ_AUDIO pin - - #clock-cells: Shall be 0 - - clock-output-names: "4M_adda" - - - see clock/clock-bindings.txt for common clock bindings - - - rtc - - compatible: "x-powers,ac100-rtc" - - clocks: A phandle to the codec's "4M_adda" clock - - #clock-cells: Shall be 1 - - clock-output-names: "cko1_rtc", "cko2_rtc", "cko3_rtc" - - - see clock/clock-bindings.txt for common clock bindings - -Example: - -ac100: codec@e89 { - compatible = "x-powers,ac100"; - reg = <0xe89>; - - ac100_codec: codec { - compatible = "x-powers,ac100-codec"; - interrupt-parent = <&r_pio>; - interrupts = <0 9 IRQ_TYPE_LEVEL_LOW>; /* PL9 */ - #clock-cells = <0>; - clock-output-names = "4M_adda"; - }; - - ac100_rtc: rtc { - compatible = "x-powers,ac100-rtc"; - interrupt-parent = <&nmi_intc>; - interrupts = <0 IRQ_TYPE_LEVEL_LOW>; - clocks = <&ac100_codec>; - #clock-cells = <1>; - clock-output-names = "cko1_rtc", "cko2_rtc", "cko3_rtc"; - }; -}; diff --git a/Documentation/devicetree/bindings/mfd/x-powers,ac100.yaml b/Documentation/devicetree/bindings/mfd/x-powers,ac100.yaml new file mode 100644 index 0000000000000..de330c9869ffb --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/x-powers,ac100.yaml @@ -0,0 +1,116 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/mfd/x-powers,ac100.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: X-Powers AC100 Device Tree Bindings + +maintainers: + - Chen-Yu Tsai + +properties: + compatible: + const: x-powers,ac100 + + reg: + maxItems: 1 + + codec: + type: object + + properties: + "#clock-cells": + const: 0 + + compatible: + const: x-powers,ac100-codec + + interrupts: + maxItems: 1 + + clock-output-names: + maxItems: 1 + description: > + Name of the 4M_adda clock exposed by the codec + + required: + - "#clock-cells" + - compatible + - interrupts + - clock-output-names + + additionalProperties: false + + rtc: + type: object + + properties: + "#clock-cells": + const: 1 + + compatible: + const: x-powers,ac100-rtc + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + description: > + A phandle to the codec's "4M_adda" clock + + clock-output-names: + maxItems: 3 + description: > + Name of the cko1, cko2 and cko3 clocks exposed by the codec + + required: + - "#clock-cells" + - compatible + - interrupts + - clocks + - clock-output-names + + additionalProperties: false + +required: + - compatible + - reg + - codec + - rtc + +additionalProperties: false + +examples: + - | + #include + + rsb { + #address-cells = <1>; + #size-cells = <0>; + + codec@e89 { + compatible = "x-powers,ac100"; + reg = <0xe89>; + + ac100_codec: codec { + compatible = "x-powers,ac100-codec"; + interrupt-parent = <&r_pio>; + interrupts = <0 9 IRQ_TYPE_LEVEL_LOW>; /* PL9 */ + #clock-cells = <0>; + clock-output-names = "4M_adda"; + }; + + ac100_rtc: rtc { + compatible = "x-powers,ac100-rtc"; + interrupt-parent = <&nmi_intc>; + interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + clocks = <&ac100_codec>; + #clock-cells = <1>; + clock-output-names = "cko1_rtc", "cko2_rtc", "cko3_rtc"; + }; + }; + }; + +... From 66e31059d11a33d48ad59ad59c94436c133863e3 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 24 Sep 2021 09:16:13 +0200 Subject: [PATCH 0402/1202] dt-bindings: mfd: Convert X-Powers AXP binding to a schema The X-Powers AXP PMICs are supported by Linux thanks to its device tree binding. Now that we have the DT validation in place, let's convert the device tree bindings for that driver over to a YAML schema. Signed-off-by: Maxime Ripard Reviewed-by: Rob Herring Acked-by: Chen-Yu Tsai Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210924071614.868307-2-maxime@cerno.tech --- .../i2c/allwinner,sun6i-a31-p2wi.yaml | 2 +- .../devicetree/bindings/mfd/axp20x.txt | 273 ------------ .../bindings/mfd/x-powers,axp152.yaml | 400 ++++++++++++++++++ 3 files changed, 401 insertions(+), 274 deletions(-) delete mode 100644 Documentation/devicetree/bindings/mfd/axp20x.txt create mode 100644 Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml diff --git a/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml b/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml index 6097e8ac46c1f..1b03810d4b4d0 100644 --- a/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml +++ b/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml @@ -55,7 +55,7 @@ examples: #size-cells = <0>; axp221: pmic@68 { - compatible = "x-powers,axp221"; + /* compatible = "x-powers,axp221"; */ reg = <0x68>; }; }; diff --git a/Documentation/devicetree/bindings/mfd/axp20x.txt b/Documentation/devicetree/bindings/mfd/axp20x.txt deleted file mode 100644 index 2b53dcc0ea61c..0000000000000 --- a/Documentation/devicetree/bindings/mfd/axp20x.txt +++ /dev/null @@ -1,273 +0,0 @@ -AXP family PMIC device tree bindings - -The axp20x family current members : -axp152 (X-Powers) -axp202 (X-Powers) -axp209 (X-Powers) -axp221 (X-Powers) -axp223 (X-Powers) -axp803 (X-Powers) -axp806 (X-Powers) -axp809 (X-Powers) -axp813 (X-Powers) - -The AXP813 is 2 chips packaged into 1. The 2 chips do not share anything -other than the packaging. Pins are routed separately. As such they should -be treated as separate entities. The other half is an AC100 RTC/codec -combo chip. Please see ./ac100.txt for its bindings. - -Required properties: -- compatible: should be one of: - * "x-powers,axp152" - * "x-powers,axp202" - * "x-powers,axp209" - * "x-powers,axp221" - * "x-powers,axp223" - * "x-powers,axp803" - * "x-powers,axp806" - * "x-powers,axp805", "x-powers,axp806" - * "x-powers,axp305", "x-powers,axp805", "x-powers,axp806" - * "x-powers,axp809" - * "x-powers,axp813" -- reg: The I2C slave address or RSB hardware address for the AXP chip -- interrupt-controller: The PMIC has its own internal IRQs -- #interrupt-cells: Should be set to 1 - -Supported common regulator properties, see ../regulator/regulator.txt for -more information: -- regulator-ramp-delay: sets the ramp up delay in uV/us - AXP20x/DCDC2: 1600, 800 - AXP20x/LDO3: 1600, 800 -- regulator-soft-start: enable the output at the lowest possible voltage and - only then set the desired voltage - AXP20x/LDO3: software-based implementation - -Optional properties: -- interrupts: SoC NMI / GPIO interrupt connected to the PMIC's IRQ pin -- x-powers,dcdc-freq: defines the work frequency of DC-DC in KHz - AXP152/20X: range: 750-1875, Default: 1.5 MHz - AXP22X/8XX: range: 1800-4050, Default: 3 MHz - -- x-powers,drive-vbus-en: boolean, set this when the N_VBUSEN pin is - used as an output pin to control an external - regulator to drive the OTG VBus, rather then - as an input pin which signals whether the - board is driving OTG VBus or not. - (axp221 / axp223 / axp803/ axp813 only) - -- x-powers,self-working-mode and - x-powers,master-mode: Boolean (axp806 only). Set either of these when the - PMIC is wired for self-working mode or master mode. - If neither is set then slave mode is assumed. - This corresponds to how the MODESET pin is wired. - -- -supply: a phandle to the regulator supply node. May be omitted if - inputs are unregulated, such as using the IPSOUT output - from the PMIC. - -- regulators: A node that houses a sub-node for each regulator. Regulators - not used but preferred to be managed by the OS should be - listed as well. - See Documentation/devicetree/bindings/regulator/regulator.txt - for more information on standard regulator bindings. - -Optional properties for DCDC regulators: -- x-powers,dcdc-workmode: 1 for PWM mode, 0 for AUTO (PWM/PFM) mode - Default: Current hardware setting - The DCDC regulators work in a mixed PWM/PFM mode, - using PFM under light loads and switching to PWM - for heavier loads. Forcing PWM mode trades efficiency - under light loads for lower output noise. This - probably makes sense for HiFi audio related - applications that aren't battery constrained. - -AXP202/AXP209 regulators, type, and corresponding input supply names: - -Regulator Type Supply Name Notes ---------- ---- ----------- ----- -DCDC2 : DC-DC buck : vin2-supply -DCDC3 : DC-DC buck : vin3-supply -LDO1 : LDO : acin-supply : always on -LDO2 : LDO : ldo24in-supply : shared supply -LDO3 : LDO : ldo3in-supply -LDO4 : LDO : ldo24in-supply : shared supply -LDO5 : LDO : ldo5in-supply - -AXP221/AXP223 regulators, type, and corresponding input supply names: - -Regulator Type Supply Name Notes ---------- ---- ----------- ----- -DCDC1 : DC-DC buck : vin1-supply -DCDC2 : DC-DC buck : vin2-supply -DCDC3 : DC-DC buck : vin3-supply -DCDC4 : DC-DC buck : vin4-supply -DCDC5 : DC-DC buck : vin5-supply -DC1SW : On/Off Switch : : DCDC1 secondary output -DC5LDO : LDO : : input from DCDC5 -ALDO1 : LDO : aldoin-supply : shared supply -ALDO2 : LDO : aldoin-supply : shared supply -ALDO3 : LDO : aldoin-supply : shared supply -DLDO1 : LDO : dldoin-supply : shared supply -DLDO2 : LDO : dldoin-supply : shared supply -DLDO3 : LDO : dldoin-supply : shared supply -DLDO4 : LDO : dldoin-supply : shared supply -ELDO1 : LDO : eldoin-supply : shared supply -ELDO2 : LDO : eldoin-supply : shared supply -ELDO3 : LDO : eldoin-supply : shared supply -LDO_IO0 : LDO : ips-supply : GPIO 0 -LDO_IO1 : LDO : ips-supply : GPIO 1 -RTC_LDO : LDO : ips-supply : always on -DRIVEVBUS : Enable output : drivevbus-supply : external regulator - -AXP803 regulators, type, and corresponding input supply names: - -Regulator Type Supply Name Notes ---------- ---- ----------- ----- -DCDC1 : DC-DC buck : vin1-supply -DCDC2 : DC-DC buck : vin2-supply : poly-phase capable -DCDC3 : DC-DC buck : vin3-supply : poly-phase capable -DCDC4 : DC-DC buck : vin4-supply -DCDC5 : DC-DC buck : vin5-supply : poly-phase capable -DCDC6 : DC-DC buck : vin6-supply : poly-phase capable -DC1SW : On/Off Switch : : DCDC1 secondary output -ALDO1 : LDO : aldoin-supply : shared supply -ALDO2 : LDO : aldoin-supply : shared supply -ALDO3 : LDO : aldoin-supply : shared supply -DLDO1 : LDO : dldoin-supply : shared supply -DLDO2 : LDO : dldoin-supply : shared supply -DLDO3 : LDO : dldoin-supply : shared supply -DLDO4 : LDO : dldoin-supply : shared supply -ELDO1 : LDO : eldoin-supply : shared supply -ELDO2 : LDO : eldoin-supply : shared supply -ELDO3 : LDO : eldoin-supply : shared supply -FLDO1 : LDO : fldoin-supply : shared supply -FLDO2 : LDO : fldoin-supply : shared supply -LDO_IO0 : LDO : ips-supply : GPIO 0 -LDO_IO1 : LDO : ips-supply : GPIO 1 -RTC_LDO : LDO : ips-supply : always on -DRIVEVBUS : Enable output : drivevbus-supply : external regulator - -AXP806 regulators, type, and corresponding input supply names: - -Regulator Type Supply Name Notes ---------- ---- ----------- ----- -DCDCA : DC-DC buck : vina-supply : poly-phase capable -DCDCB : DC-DC buck : vinb-supply : poly-phase capable -DCDCC : DC-DC buck : vinc-supply : poly-phase capable -DCDCD : DC-DC buck : vind-supply : poly-phase capable -DCDCE : DC-DC buck : vine-supply : poly-phase capable -ALDO1 : LDO : aldoin-supply : shared supply -ALDO2 : LDO : aldoin-supply : shared supply -ALDO3 : LDO : aldoin-supply : shared supply -BLDO1 : LDO : bldoin-supply : shared supply -BLDO2 : LDO : bldoin-supply : shared supply -BLDO3 : LDO : bldoin-supply : shared supply -BLDO4 : LDO : bldoin-supply : shared supply -CLDO1 : LDO : cldoin-supply : shared supply -CLDO2 : LDO : cldoin-supply : shared supply -CLDO3 : LDO : cldoin-supply : shared supply -SW : On/Off Switch : swin-supply - -Additionally, the AXP806 DC-DC regulators support poly-phase arrangements -for higher output current. The possible groupings are: A+B, A+B+C, D+E. - -AXP809 regulators, type, and corresponding input supply names: - -Regulator Type Supply Name Notes ---------- ---- ----------- ----- -DCDC1 : DC-DC buck : vin1-supply -DCDC2 : DC-DC buck : vin2-supply -DCDC3 : DC-DC buck : vin3-supply -DCDC4 : DC-DC buck : vin4-supply -DCDC5 : DC-DC buck : vin5-supply -DC1SW : On/Off Switch : : DCDC1 secondary output -DC5LDO : LDO : : input from DCDC5 -ALDO1 : LDO : aldoin-supply : shared supply -ALDO2 : LDO : aldoin-supply : shared supply -ALDO3 : LDO : aldoin-supply : shared supply -DLDO1 : LDO : dldoin-supply : shared supply -DLDO2 : LDO : dldoin-supply : shared supply -ELDO1 : LDO : eldoin-supply : shared supply -ELDO2 : LDO : eldoin-supply : shared supply -ELDO3 : LDO : eldoin-supply : shared supply -LDO_IO0 : LDO : ips-supply : GPIO 0 -LDO_IO1 : LDO : ips-supply : GPIO 1 -RTC_LDO : LDO : ips-supply : always on -SW : On/Off Switch : swin-supply - -AXP813 regulators, type, and corresponding input supply names: - -Regulator Type Supply Name Notes ---------- ---- ----------- ----- -DCDC1 : DC-DC buck : vin1-supply -DCDC2 : DC-DC buck : vin2-supply : poly-phase capable -DCDC3 : DC-DC buck : vin3-supply : poly-phase capable -DCDC4 : DC-DC buck : vin4-supply -DCDC5 : DC-DC buck : vin5-supply : poly-phase capable -DCDC6 : DC-DC buck : vin6-supply : poly-phase capable -DCDC7 : DC-DC buck : vin7-supply -ALDO1 : LDO : aldoin-supply : shared supply -ALDO2 : LDO : aldoin-supply : shared supply -ALDO3 : LDO : aldoin-supply : shared supply -DLDO1 : LDO : dldoin-supply : shared supply -DLDO2 : LDO : dldoin-supply : shared supply -DLDO3 : LDO : dldoin-supply : shared supply -DLDO4 : LDO : dldoin-supply : shared supply -ELDO1 : LDO : eldoin-supply : shared supply -ELDO2 : LDO : eldoin-supply : shared supply -ELDO3 : LDO : eldoin-supply : shared supply -FLDO1 : LDO : fldoin-supply : shared supply -FLDO2 : LDO : fldoin-supply : shared supply -FLDO3 : LDO : fldoin-supply : shared supply -LDO_IO0 : LDO : ips-supply : GPIO 0 -LDO_IO1 : LDO : ips-supply : GPIO 1 -RTC_LDO : LDO : ips-supply : always on -SW : On/Off Switch : swin-supply -DRIVEVBUS : Enable output : drivevbus-supply : external regulator - -Example: - -axp209: pmic@34 { - compatible = "x-powers,axp209"; - reg = <0x34>; - interrupt-parent = <&nmi_intc>; - interrupts = <0 IRQ_TYPE_LEVEL_LOW>; - interrupt-controller; - #interrupt-cells = <1>; - - regulators { - x-powers,dcdc-freq = <1500>; - - vdd_cpu: dcdc2 { - regulator-always-on; - regulator-min-microvolt = <1000000>; - regulator-max-microvolt = <1450000>; - regulator-name = "vdd-cpu"; - }; - - vdd_int_dll: dcdc3 { - regulator-always-on; - regulator-min-microvolt = <1000000>; - regulator-max-microvolt = <1400000>; - regulator-name = "vdd-int-dll"; - }; - - vdd_rtc: ldo1 { - regulator-always-on; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1400000>; - regulator-name = "vdd-rtc"; - }; - - avcc: ldo2 { - regulator-always-on; - regulator-min-microvolt = <2700000>; - regulator-max-microvolt = <3300000>; - regulator-name = "avcc"; - }; - - ldo3 { - /* unused but preferred to be managed by OS */ - }; - }; -}; diff --git a/Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml b/Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml new file mode 100644 index 0000000000000..3a53bae611bcd --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml @@ -0,0 +1,400 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/x-powers,axp152.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: X-Powers AXP PMIC Device Tree Bindings + +maintainers: + - Chen-Yu Tsai + +allOf: + - if: + properties: + compatible: + contains: + enum: + - x-powers,axp152 + - x-powers,axp202 + - x-powers,axp209 + + then: + properties: + regulators: + properties: + x-powers,dcdc-freq: + minimum: 750 + maximum: 1875 + default: 1500 + + else: + properties: + regulators: + properties: + x-powers,dcdc-freq: + minimum: 1800 + maximum: 4050 + default: 3000 + + - if: + properties: + compatible: + contains: + enum: + - x-powers,axp152 + - x-powers,axp202 + - x-powers,axp209 + + then: + not: + required: + - x-powers,drive-vbus-en + + - if: + not: + properties: + compatible: + contains: + const: x-powers,axp806 + + then: + allOf: + - not: + required: + - x-powers,self-working-mode + + - not: + required: + - x-powers,master-mode + + - if: + not: + properties: + compatible: + contains: + const: x-powers,axp305 + + then: + required: + - interrupts + +properties: + compatible: + oneOf: + - enum: + - x-powers,axp152 + - x-powers,axp202 + - x-powers,axp209 + - x-powers,axp221 + - x-powers,axp223 + - x-powers,axp803 + - x-powers,axp806 + - x-powers,axp809 + - x-powers,axp813 + - items: + - const: x-powers,axp805 + - const: x-powers,axp806 + - items: + - const: x-powers,axp305 + - const: x-powers,axp805 + - const: x-powers,axp806 + - items: + - const: x-powers,axp818 + - const: x-powers,axp813 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + interrupt-controller: true + + "#interrupt-cells": + const: 1 + + x-powers,drive-vbus-en: + type: boolean + description: > + Set this when the N_VBUSEN pin is used as an output pin to control an + external regulator to drive the OTG VBus, rather then as an input pin + which signals whether the board is driving OTG VBus or not. + + x-powers,self-working-mode: + type: boolean + description: > + Set this when the PMIC is wired for self-working mode through the MODESET + pin. + + x-powers,master-mode: + type: boolean + description: > + Set this when the PMIC is wired for master mode through the MODESET pin. + + vin1-supply: + description: > + DCDC1 power supply node, if present. + + vin2-supply: + description: > + DCDC2 power supply node, if present. + + vin3-supply: + description: > + DCDC3 power supply node, if present. + + vin4-supply: + description: > + DCDC4 power supply node, if present. + + vin5-supply: + description: > + DCDC5 power supply node, if present. + + vin6-supply: + description: > + DCDC6 power supply node, if present. + + vin7-supply: + description: > + DCDC7 power supply node, if present. + + vina-supply: + description: > + DCDCA power supply node, if present. + + vinb-supply: + description: > + DCDCB power supply node, if present. + + vinc-supply: + description: > + DCDCC power supply node, if present. + + vind-supply: + description: > + DCDCD power supply node, if present. + + vine-supply: + description: > + DCDCE power supply node, if present. + + acin-supply: + description: > + LDO1 power supply node, if present. + + ldo24in-supply: + description: > + LDO2 and LDO4 power supply node, if present. + + ldo3in-supply: + description: > + LDO3 power supply node, if present. + + ldo5in-supply: + description: > + LDO5 power supply node, if present. + + aldoin-supply: + description: > + ALDO* power supply node, if present. + + bldoin-supply: + description: > + BLDO* power supply node, if present. + + cldoin-supply: + description: > + CLDO* power supply node, if present. + + dldoin-supply: + description: > + DLDO* power supply node, if present. + + eldoin-supply: + description: > + ELDO* power supply node, if present. + + fldoin-supply: + description: > + FLDO* power supply node, if present. + + ips-supply: + description: > + LDO_IO0, LDO_IO1 and RTC_LDO power supply node, if present. + + drivevbus-supply: + description: > + DRIVEVBUS power supply node, if present. + + swin-supply: + description: > + SW power supply node, if present. + + adc: + $ref: /schemas/iio/adc/x-powers,axp209-adc.yaml# + + gpio: + $ref: /schemas/gpio/x-powers,axp209-gpio.yaml# + + ac-power: + $ref: /schemas/power/supply/x-powers,axp20x-ac-power-supply.yaml# + + battery-power: + $ref: /schemas/power/supply/x-powers,axp20x-battery-power-supply.yaml# + + usb-power: + $ref: /schemas/power/supply/x-powers,axp20x-usb-power-supply.yaml# + + regulators: + type: object + + properties: + x-powers,dcdc-freq: + $ref: /schemas/types.yaml#/definitions/uint32 + description: > + Defines the work frequency of DC-DC in kHz. + + patternProperties: + "^(([a-f])?ldo[0-9]|dcdc[0-7a-e]|ldo(_|-)io(0|1)|(dc1)?sw|rtc(_|-)ldo|drivevbus|dc5ldo)$": + $ref: /schemas/regulator/regulator.yaml# + type: object + + properties: + regulator-ramp-delay: + description: > + Only 800 and 1600 are valid for the DCDC2 and LDO3 regulators on + the AXP209. + + regulator-soft-start: + description: > + Only valid for the LDO3 regulator. + + x-powers,dcdc-workmode: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1] + description: > + Only valid for DCDC regulators. Setup 1 for PWM mode, 0 + for AUTO (PWM/PFM) mode. The DCDC regulators work in a + mixed PWM/PFM mode, using PFM under light loads and + switching to PWM for heavier loads. Forcing PWM mode + trades efficiency under light loads for lower output + noise. This probably makes sense for HiFi audio related + applications that aren't battery constrained. + + additionalProperties: false + +required: + - compatible + - reg + - "#interrupt-cells" + - interrupt-controller + +additionalProperties: false + +examples: + - | + i2c0 { + #address-cells = <1>; + #size-cells = <0>; + + pmic@30 { + compatible = "x-powers,axp152"; + reg = <0x30>; + interrupts = <0>; + interrupt-controller; + #interrupt-cells = <1>; + }; + }; + + - | + #include + + i2c0 { + #address-cells = <1>; + #size-cells = <0>; + + pmic@34 { + compatible = "x-powers,axp209"; + reg = <0x34>; + interrupt-parent = <&nmi_intc>; + interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + interrupt-controller; + #interrupt-cells = <1>; + + ac_power_supply: ac-power { + compatible = "x-powers,axp202-ac-power-supply"; + }; + + axp_adc: adc { + compatible = "x-powers,axp209-adc"; + #io-channel-cells = <1>; + }; + + axp_gpio: gpio { + compatible = "x-powers,axp209-gpio"; + gpio-controller; + #gpio-cells = <2>; + + gpio0-adc-pin { + pins = "GPIO0"; + function = "adc"; + }; + }; + + battery_power_supply: battery-power { + compatible = "x-powers,axp209-battery-power-supply"; + }; + + regulators { + /* Default work frequency for buck regulators */ + x-powers,dcdc-freq = <1500>; + + reg_dcdc2: dcdc2 { + regulator-always-on; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <1450000>; + regulator-name = "vdd-cpu"; + }; + + reg_dcdc3: dcdc3 { + regulator-always-on; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <1400000>; + regulator-name = "vdd-int-dll"; + }; + + reg_ldo1: ldo1 { + /* LDO1 is a fixed output regulator */ + regulator-always-on; + regulator-min-microvolt = <1300000>; + regulator-max-microvolt = <1300000>; + regulator-name = "vdd-rtc"; + }; + + reg_ldo2: ldo2 { + regulator-always-on; + regulator-min-microvolt = <3000000>; + regulator-max-microvolt = <3000000>; + regulator-name = "avcc"; + }; + + reg_ldo3: ldo3 { + regulator-name = "ldo3"; + }; + + reg_ldo4: ldo4 { + regulator-name = "ldo4"; + }; + + reg_ldo5: ldo5 { + regulator-name = "ldo5"; + }; + }; + + usb_power_supply: usb-power { + compatible = "x-powers,axp202-usb-power-supply"; + }; + }; + }; From 8cf7685c92ef4296b680c7460504b8a1c895e678 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 28 Sep 2021 17:30:35 +0100 Subject: [PATCH 0403/1202] mfd: arizona: Split of_match table into I2C and SPI versions The Arizona driver has both some devices which only have an I2C interface and some which only have a SPI interface. Currently both of these share an of_match table, but this means inapproriate compatibles are available for each interface. Tidy this up by creating a table for each interface listing only the appropriate compatibles. Reported-by: Mark Brown Signed-off-by: Charles Keepax Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210928163035.23960-1-ckeepax@opensource.cirrus.com --- drivers/mfd/arizona-core.c | 13 ------------- drivers/mfd/arizona-i2c.c | 14 +++++++++++++- drivers/mfd/arizona-spi.c | 13 ++++++++++++- drivers/mfd/arizona.h | 2 -- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index 9323b1e3a69ef..cbf1dd90b70d5 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -845,19 +845,6 @@ static int arizona_of_get_core_pdata(struct arizona *arizona) return 0; } - -const struct of_device_id arizona_of_match[] = { - { .compatible = "wlf,wm5102", .data = (void *)WM5102 }, - { .compatible = "wlf,wm5110", .data = (void *)WM5110 }, - { .compatible = "wlf,wm8280", .data = (void *)WM8280 }, - { .compatible = "wlf,wm8997", .data = (void *)WM8997 }, - { .compatible = "wlf,wm8998", .data = (void *)WM8998 }, - { .compatible = "wlf,wm1814", .data = (void *)WM1814 }, - { .compatible = "wlf,wm1831", .data = (void *)WM1831 }, - { .compatible = "cirrus,cs47l24", .data = (void *)CS47L24 }, - {}, -}; -EXPORT_SYMBOL_GPL(arizona_of_match); #else static inline int arizona_of_get_core_pdata(struct arizona *arizona) { diff --git a/drivers/mfd/arizona-i2c.c b/drivers/mfd/arizona-i2c.c index 5e83b730c4ced..3ed810e81f631 100644 --- a/drivers/mfd/arizona-i2c.c +++ b/drivers/mfd/arizona-i2c.c @@ -104,11 +104,23 @@ static const struct i2c_device_id arizona_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, arizona_i2c_id); +#ifdef CONFIG_OF +const struct of_device_id arizona_i2c_of_match[] = { + { .compatible = "wlf,wm5102", .data = (void *)WM5102 }, + { .compatible = "wlf,wm5110", .data = (void *)WM5110 }, + { .compatible = "wlf,wm8280", .data = (void *)WM8280 }, + { .compatible = "wlf,wm8997", .data = (void *)WM8997 }, + { .compatible = "wlf,wm8998", .data = (void *)WM8998 }, + { .compatible = "wlf,wm1814", .data = (void *)WM1814 }, + {}, +}; +#endif + static struct i2c_driver arizona_i2c_driver = { .driver = { .name = "arizona", .pm = &arizona_pm_ops, - .of_match_table = of_match_ptr(arizona_of_match), + .of_match_table = of_match_ptr(arizona_i2c_of_match), }, .probe = arizona_i2c_probe, .remove = arizona_i2c_remove, diff --git a/drivers/mfd/arizona-spi.c b/drivers/mfd/arizona-spi.c index aa1d6f94ae532..9fe06dda37829 100644 --- a/drivers/mfd/arizona-spi.c +++ b/drivers/mfd/arizona-spi.c @@ -225,11 +225,22 @@ static const struct spi_device_id arizona_spi_ids[] = { }; MODULE_DEVICE_TABLE(spi, arizona_spi_ids); +#ifdef CONFIG_OF +const struct of_device_id arizona_spi_of_match[] = { + { .compatible = "wlf,wm5102", .data = (void *)WM5102 }, + { .compatible = "wlf,wm5110", .data = (void *)WM5110 }, + { .compatible = "wlf,wm8280", .data = (void *)WM8280 }, + { .compatible = "wlf,wm1831", .data = (void *)WM1831 }, + { .compatible = "cirrus,cs47l24", .data = (void *)CS47L24 }, + {}, +}; +#endif + static struct spi_driver arizona_spi_driver = { .driver = { .name = "arizona", .pm = &arizona_pm_ops, - .of_match_table = of_match_ptr(arizona_of_match), + .of_match_table = of_match_ptr(arizona_spi_of_match), .acpi_match_table = ACPI_PTR(arizona_acpi_match), }, .probe = arizona_spi_probe, diff --git a/drivers/mfd/arizona.h b/drivers/mfd/arizona.h index 801cbbcd71cb5..66d6092d08515 100644 --- a/drivers/mfd/arizona.h +++ b/drivers/mfd/arizona.h @@ -28,8 +28,6 @@ extern const struct regmap_config wm8998_i2c_regmap; extern const struct dev_pm_ops arizona_pm_ops; -extern const struct of_device_id arizona_of_match[]; - extern const struct regmap_irq_chip wm5102_aod; extern const struct regmap_irq_chip wm5102_irq; From 412a235df11f0d491fd9536f392e7dd03154e832 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sat, 25 Sep 2021 11:04:04 +0200 Subject: [PATCH 0404/1202] dt-bindings: mfd: syscon: Add rk3368 QoS register compatible Document rk3368 compatible for QoS registers. Signed-off-by: Heiko Stuebner Acked-by: Rob Herring Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210925090405.2601792-2-heiko@sntech.de --- Documentation/devicetree/bindings/mfd/syscon.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/mfd/syscon.yaml b/Documentation/devicetree/bindings/mfd/syscon.yaml index 0dcffc273c2bb..c0fec008346b5 100644 --- a/Documentation/devicetree/bindings/mfd/syscon.yaml +++ b/Documentation/devicetree/bindings/mfd/syscon.yaml @@ -50,6 +50,7 @@ properties: - rockchip,rk3066-qos - rockchip,rk3228-qos - rockchip,rk3288-qos + - rockchip,rk3368-qos - rockchip,rk3399-qos - rockchip,rk3568-qos - samsung,exynos3-sysreg From c52ed00093cc0b3a0804eb5d952e86f7085e822d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 24 Sep 2021 09:16:12 +0200 Subject: [PATCH 0405/1202] dt-bindings: gpio: Convert X-Powers AXP209 GPIO binding to a schema The X-Powers AXP PMICs feature a GPIO Controller supported by Linux thanks to its device tree binding. Now that we have the DT validation in place, let's convert the device tree bindings for that driver over to a YAML schema. Cc: Chen-Yu Tsai Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org Signed-off-by: Maxime Ripard Acked-by: Bartosz Golaszewski Reviewed-by: Rob Herring Acked-by: Chen-Yu Tsai Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20210924071614.868307-1-maxime@cerno.tech --- .../devicetree/bindings/gpio/gpio-axp209.txt | 75 ------------------- .../bindings/gpio/x-powers,axp209-gpio.yaml | 55 ++++++++++++++ 2 files changed, 55 insertions(+), 75 deletions(-) delete mode 100644 Documentation/devicetree/bindings/gpio/gpio-axp209.txt create mode 100644 Documentation/devicetree/bindings/gpio/x-powers,axp209-gpio.yaml diff --git a/Documentation/devicetree/bindings/gpio/gpio-axp209.txt b/Documentation/devicetree/bindings/gpio/gpio-axp209.txt deleted file mode 100644 index fc42b2caa06d7..0000000000000 --- a/Documentation/devicetree/bindings/gpio/gpio-axp209.txt +++ /dev/null @@ -1,75 +0,0 @@ -AXP209 GPIO & pinctrl controller - -This driver follows the usual GPIO bindings found in -Documentation/devicetree/bindings/gpio/gpio.txt - -This driver follows the usual pinctrl bindings found in -Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt - -This driver employs the per-pin muxing pattern. - -Required properties: -- compatible: Should be one of: - - "x-powers,axp209-gpio" - - "x-powers,axp813-gpio" -- #gpio-cells: Should be two. The first cell is the pin number and the - second is the GPIO flags. -- gpio-controller: Marks the device node as a GPIO controller. - -This node must be a subnode of the axp20x PMIC, documented in -Documentation/devicetree/bindings/mfd/axp20x.txt - -Example: - -axp209: pmic@34 { - compatible = "x-powers,axp209"; - reg = <0x34>; - interrupt-parent = <&nmi_intc>; - interrupts = <0 IRQ_TYPE_LEVEL_LOW>; - interrupt-controller; - #interrupt-cells = <1>; - - axp_gpio: gpio { - compatible = "x-powers,axp209-gpio"; - gpio-controller; - #gpio-cells = <2>; - }; -}; - -The GPIOs can be muxed to other functions and therefore, must be a subnode of -axp_gpio. - -Example: - -&axp_gpio { - gpio0_adc: gpio0-adc { - pins = "GPIO0"; - function = "adc"; - }; -}; - -&example_node { - pinctrl-names = "default"; - pinctrl-0 = <&gpio0_adc>; -}; - -GPIOs and their functions -------------------------- - -Each GPIO is independent from the other (i.e. GPIO0 in gpio_in function does -not force GPIO1 and GPIO2 to be in gpio_in function as well). - -axp209 ------- -GPIO | Functions ------------------------- -GPIO0 | gpio_in, gpio_out, ldo, adc -GPIO1 | gpio_in, gpio_out, ldo, adc -GPIO2 | gpio_in, gpio_out - -axp813 ------- -GPIO | Functions ------------------------- -GPIO0 | gpio_in, gpio_out, ldo, adc -GPIO1 | gpio_in, gpio_out, ldo diff --git a/Documentation/devicetree/bindings/gpio/x-powers,axp209-gpio.yaml b/Documentation/devicetree/bindings/gpio/x-powers,axp209-gpio.yaml new file mode 100644 index 0000000000000..0f628b088cec8 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/x-powers,axp209-gpio.yaml @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/gpio/x-powers,axp209-gpio.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: X-Powers AXP209 GPIO Device Tree Bindings + +maintainers: + - Chen-Yu Tsai + +properties: + "#gpio-cells": + const: 2 + description: > + The first cell is the pin number and the second is the GPIO flags. + + compatible: + oneOf: + - enum: + - x-powers,axp209-gpio + - x-powers,axp813-gpio + - items: + - const: x-powers,axp803-gpio + - const: x-powers,axp813-gpio + + gpio-controller: true + +patternProperties: + "^.*-pins?$": + $ref: /schemas/pinctrl/pinmux-node.yaml# + + properties: + pins: + items: + enum: + - GPIO0 + - GPIO1 + - GPIO2 + + function: + enum: + - adc + - ldo + - gpio_in + - gpio_out + +required: + - compatible + - "#gpio-cells" + - gpio-controller + +additionalProperties: false + +... From 813c24f4caf323dd7d90de95ab8a8d96ee73647a Mon Sep 17 00:00:00 2001 From: Kai Song Date: Wed, 6 Oct 2021 22:19:26 +0800 Subject: [PATCH 0406/1202] mfd: altera-sysmgr: Fix a mistake caused by resource_size conversion The resource_size defines that: res->end - res->start + 1; The origin original code is: sysmgr_config.max_register = res->end - res->start - 3; So, the correct fix is that: sysmgr_config.max_register = resource_size(res) - 4; Fixes: d12edf9661a4 ("mfd: altera-sysmgr: Use resource_size function on resource object") Signed-off-by: Kai Song Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20211006141926.6120-1-songkai01@inspur.com --- drivers/mfd/altera-sysmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c index 20cb294c75122..5d3715a28b28e 100644 --- a/drivers/mfd/altera-sysmgr.c +++ b/drivers/mfd/altera-sysmgr.c @@ -153,7 +153,7 @@ static int sysmgr_probe(struct platform_device *pdev) if (!base) return -ENOMEM; - sysmgr_config.max_register = resource_size(res) - 3; + sysmgr_config.max_register = resource_size(res) - 4; regmap = devm_regmap_init_mmio(dev, base, &sysmgr_config); } From 4c46557a230c89b379b6f90a6a2fc6ce8d329d86 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 5 Oct 2021 16:12:44 -0400 Subject: [PATCH 0407/1202] btrfs: use btrfs_get_dev_args_from_path in dev removal ioctls For device removal and replace we call btrfs_find_device_by_devspec, which if we give it a device path and nothing else will call btrfs_get_dev_args_from_path, which opens the block device and reads the super block and then looks up our device based on that. However at this point we're holding the sb write "lock", so reading the block device pulls in the dependency of ->open_mutex, which produces the following lockdep splat ====================================================== WARNING: possible circular locking dependency detected 5.14.0-rc2+ #405 Not tainted ------------------------------------------------------ losetup/11576 is trying to acquire lock: ffff9bbe8cded938 ((wq_completion)loop0){+.+.}-{0:0}, at: flush_workqueue+0x67/0x5e0 but task is already holding lock: ffff9bbe88e4fc68 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x41/0x660 [loop] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (&lo->lo_mutex){+.+.}-{3:3}: __mutex_lock+0x7d/0x750 lo_open+0x28/0x60 [loop] blkdev_get_whole+0x25/0xf0 blkdev_get_by_dev.part.0+0x168/0x3c0 blkdev_open+0xd2/0xe0 do_dentry_open+0x161/0x390 path_openat+0x3cc/0xa20 do_filp_open+0x96/0x120 do_sys_openat2+0x7b/0x130 __x64_sys_openat+0x46/0x70 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #3 (&disk->open_mutex){+.+.}-{3:3}: __mutex_lock+0x7d/0x750 blkdev_get_by_dev.part.0+0x56/0x3c0 blkdev_get_by_path+0x98/0xa0 btrfs_get_bdev_and_sb+0x1b/0xb0 btrfs_find_device_by_devspec+0x12b/0x1c0 btrfs_rm_device+0x127/0x610 btrfs_ioctl+0x2a31/0x2e70 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #2 (sb_writers#12){.+.+}-{0:0}: lo_write_bvec+0xc2/0x240 [loop] loop_process_work+0x238/0xd00 [loop] process_one_work+0x26b/0x560 worker_thread+0x55/0x3c0 kthread+0x140/0x160 ret_from_fork+0x1f/0x30 -> #1 ((work_completion)(&lo->rootcg_work)){+.+.}-{0:0}: process_one_work+0x245/0x560 worker_thread+0x55/0x3c0 kthread+0x140/0x160 ret_from_fork+0x1f/0x30 -> #0 ((wq_completion)loop0){+.+.}-{0:0}: __lock_acquire+0x10ea/0x1d90 lock_acquire+0xb5/0x2b0 flush_workqueue+0x91/0x5e0 drain_workqueue+0xa0/0x110 destroy_workqueue+0x36/0x250 __loop_clr_fd+0x9a/0x660 [loop] block_ioctl+0x3f/0x50 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae other info that might help us debug this: Chain exists of: (wq_completion)loop0 --> &disk->open_mutex --> &lo->lo_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&lo->lo_mutex); lock(&disk->open_mutex); lock(&lo->lo_mutex); lock((wq_completion)loop0); *** DEADLOCK *** 1 lock held by losetup/11576: #0: ffff9bbe88e4fc68 (&lo->lo_mutex){+.+.}-{3:3}, at: __loop_clr_fd+0x41/0x660 [loop] stack backtrace: CPU: 0 PID: 11576 Comm: losetup Not tainted 5.14.0-rc2+ #405 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x72 check_noncircular+0xcf/0xf0 ? stack_trace_save+0x3b/0x50 __lock_acquire+0x10ea/0x1d90 lock_acquire+0xb5/0x2b0 ? flush_workqueue+0x67/0x5e0 ? lockdep_init_map_type+0x47/0x220 flush_workqueue+0x91/0x5e0 ? flush_workqueue+0x67/0x5e0 ? verify_cpu+0xf0/0x100 drain_workqueue+0xa0/0x110 destroy_workqueue+0x36/0x250 __loop_clr_fd+0x9a/0x660 [loop] ? blkdev_ioctl+0x8d/0x2a0 block_ioctl+0x3f/0x50 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f31b02404cb Instead what we want to do is populate our device lookup args before we grab any locks, and then pass these args into btrfs_rm_device(). From there we can find the device and do the appropriate removal. Suggested-by: Anand Jain Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 67 +++++++++++++++++++++++++++------------------- fs/btrfs/volumes.c | 15 +++++------ fs/btrfs/volumes.h | 2 +- 3 files changed, 48 insertions(+), 36 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b8508af4e5395..c9d3f375df835 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3160,6 +3160,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg) static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) { + BTRFS_DEV_LOOKUP_ARGS(args); struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args_v2 *vol_args; @@ -3171,35 +3172,39 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - ret = mnt_want_write_file(file); - if (ret) - return ret; - vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) { ret = PTR_ERR(vol_args); - goto err_drop; + goto out; } if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) { ret = -EOPNOTSUPP; goto out; } + vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; - if (!(vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) && - strcmp("cancel", vol_args->name) == 0) + if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) { + args.devid = vol_args->devid; + } else if (!strcmp("cancel", vol_args->name)) { cancel = true; + } else { + ret = btrfs_get_dev_args_from_path(fs_info, &args, vol_args->name); + if (ret) + goto out; + } + + ret = mnt_want_write_file(file); + if (ret) + goto out; ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, cancel); if (ret) - goto out; - /* Exclusive operation is now claimed */ + goto err_drop; - if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) - ret = btrfs_rm_device(fs_info, NULL, vol_args->devid, &bdev, &mode); - else - ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode); + /* Exclusive operation is now claimed */ + ret = btrfs_rm_device(fs_info, &args, &bdev, &mode); btrfs_exclop_finish(fs_info); @@ -3211,17 +3216,19 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) btrfs_info(fs_info, "device deleted: %s", vol_args->name); } -out: - kfree(vol_args); err_drop: mnt_drop_write_file(file); if (bdev) blkdev_put(bdev, mode); +out: + btrfs_put_dev_args_from_path(&args); + kfree(vol_args); return ret; } static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) { + BTRFS_DEV_LOOKUP_ARGS(args); struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_ioctl_vol_args *vol_args; @@ -3233,32 +3240,38 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - ret = mnt_want_write_file(file); - if (ret) - return ret; - vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) { - ret = PTR_ERR(vol_args); - goto out_drop_write; - } + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - cancel = (strcmp("cancel", vol_args->name) == 0); + if (!strcmp("cancel", vol_args->name)) { + cancel = true; + } else { + ret = btrfs_get_dev_args_from_path(fs_info, &args, vol_args->name); + if (ret) + goto out; + } + + ret = mnt_want_write_file(file); + if (ret) + goto out; ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE, cancel); if (ret == 0) { - ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode); + ret = btrfs_rm_device(fs_info, &args, &bdev, &mode); if (!ret) btrfs_info(fs_info, "disk deleted %s", vol_args->name); btrfs_exclop_finish(fs_info); } - kfree(vol_args); -out_drop_write: mnt_drop_write_file(file); if (bdev) blkdev_put(bdev, mode); +out: + btrfs_put_dev_args_from_path(&args); + kfree(vol_args); return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 340583942d222..debba6f048586 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2076,8 +2076,9 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, update_dev_time(bdev); } -int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, - u64 devid, struct block_device **bdev, fmode_t *mode) +int btrfs_rm_device(struct btrfs_fs_info *fs_info, + struct btrfs_dev_lookup_args *args, + struct block_device **bdev, fmode_t *mode) { struct btrfs_device *device; struct btrfs_fs_devices *cur_devices; @@ -2096,14 +2097,12 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, if (ret) goto out; - device = btrfs_find_device_by_devspec(fs_info, devid, device_path); - - if (IS_ERR(device)) { - if (PTR_ERR(device) == -ENOENT && - device_path && strcmp(device_path, "missing") == 0) + device = btrfs_find_device(fs_info->fs_devices, args); + if (!device) { + if (args->missing) ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; else - ret = PTR_ERR(device); + ret = -ENOENT; goto out; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index acf7c1307e147..3b81306807493 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -529,7 +529,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args); void btrfs_free_device(struct btrfs_device *device); int btrfs_rm_device(struct btrfs_fs_info *fs_info, - const char *device_path, u64 devid, + struct btrfs_dev_lookup_args *args, struct block_device **bdev, fmode_t *mode); void __exit btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); From f2dc13a31107dfe1a0d820fb284c70afe132b55c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Oct 2021 11:05:50 +0200 Subject: [PATCH 0408/1202] btrfs: check-integrity: stop storing the block device name in btrfsic_dev_state Just use the %pg format specifier in all the debug printks previously using it. Note that both bdevname and the %pg specifier never print a pathname, so the kbasename call wasn't needed to start with. Reviewed-by: Nikolay Borisov Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba [ adjust messages and indentation ] Signed-off-by: David Sterba --- fs/btrfs/check-integrity.c | 201 ++++++++++++++++++++----------------- 1 file changed, 110 insertions(+), 91 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index ed646f2b46f4e..7e9f90fa0388b 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -186,7 +186,6 @@ struct btrfsic_dev_state { struct list_head collision_resolving_node; /* list node */ struct btrfsic_block dummy_block_for_bio_bh_flush; u64 last_flush_gen; - char name[BDEVNAME_SIZE]; }; struct btrfsic_block_hashtable { @@ -403,7 +402,6 @@ static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds) ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER; ds->bdev = NULL; ds->state = NULL; - ds->name[0] = '\0'; INIT_LIST_HEAD(&ds->collision_resolving_node); ds->last_flush_gen = 0; btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush); @@ -756,10 +754,10 @@ static int btrfsic_process_superblock_dev_mirror( superblock_tmp->mirror_num = 1 + superblock_mirror_num; if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) btrfs_info_in_rcu(fs_info, - "new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)", + "new initial S-block (bdev %p, %s) @%llu (%pg/%llu/%d)", superblock_bdev, rcu_str_deref(device->name), dev_bytenr, - dev_state->name, dev_bytenr, + dev_state->bdev, dev_bytenr, superblock_mirror_num); list_add(&superblock_tmp->all_blocks_node, &state->all_blocks_list); @@ -938,9 +936,10 @@ static noinline_for_stack int btrfsic_process_metablock( if (disk_item_offset + sizeof(struct btrfs_item) > sf->block_ctx->len) { leaf_item_out_of_bounce_error: - pr_info("btrfsic: leaf item out of bounce at logical %llu, dev %s\n", + pr_info( + "btrfsic: leaf item out of bounce at logical %llu, dev %pg\n", sf->block_ctx->start, - sf->block_ctx->dev->name); + sf->block_ctx->dev->bdev); goto one_stack_frame_backwards; } btrfsic_read_from_block_data(sf->block_ctx, @@ -1058,9 +1057,10 @@ static noinline_for_stack int btrfsic_process_metablock( (uintptr_t)nodehdr; if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > sf->block_ctx->len) { - pr_info("btrfsic: node item out of bounce at logical %llu, dev %s\n", + pr_info( + "btrfsic: node item out of bounce at logical %llu, dev %pg\n", sf->block_ctx->start, - sf->block_ctx->dev->name); + sf->block_ctx->dev->bdev); goto one_stack_frame_backwards; } btrfsic_read_from_block_data( @@ -1228,15 +1228,17 @@ static int btrfsic_create_link_to_next_block( if (next_block->logical_bytenr != next_bytenr && !(!next_block->is_metadata && 0 == next_block->logical_bytenr)) - pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n", - next_bytenr, next_block_ctx->dev->name, + pr_info( +"referenced block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n", + next_bytenr, next_block_ctx->dev->bdev, next_block_ctx->dev_bytenr, *mirror_nump, btrfsic_get_block_type(state, next_block), next_block->logical_bytenr); else - pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n", - next_bytenr, next_block_ctx->dev->name, + pr_info( + "referenced block @%llu (%pg/%llu/%d) found in hash table, %c\n", + next_bytenr, next_block_ctx->dev->bdev, next_block_ctx->dev_bytenr, *mirror_nump, btrfsic_get_block_type(state, next_block)); @@ -1324,8 +1326,8 @@ static int btrfsic_handle_extent_data( if (file_extent_item_offset + offsetof(struct btrfs_file_extent_item, disk_num_bytes) > block_ctx->len) { - pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n", - block_ctx->start, block_ctx->dev->name); + pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n", + block_ctx->start, block_ctx->dev->bdev); return -1; } @@ -1344,8 +1346,8 @@ static int btrfsic_handle_extent_data( if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > block_ctx->len) { - pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n", - block_ctx->start, block_ctx->dev->name); + pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n", + block_ctx->start, block_ctx->dev->bdev); return -1; } btrfsic_read_from_block_data(block_ctx, &file_extent_item, @@ -1421,9 +1423,10 @@ static int btrfsic_handle_extent_data( next_block->logical_bytenr != next_bytenr && !(!next_block->is_metadata && 0 == next_block->logical_bytenr)) { - pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu).\n", + pr_info( +"referenced block @%llu (%pg/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu)\n", next_bytenr, - next_block_ctx.dev->name, + next_block_ctx.dev->bdev, next_block_ctx.dev_bytenr, mirror_num, next_block->logical_bytenr); @@ -1577,8 +1580,8 @@ static int btrfsic_read_block(struct btrfsic_state *state, return -1; } if (submit_bio_wait(bio)) { - pr_info("btrfsic: read error at logical %llu dev %s!\n", - block_ctx->start, block_ctx->dev->name); + pr_info("btrfsic: read error at logical %llu dev %pg!\n", + block_ctx->start, block_ctx->dev->bdev); bio_put(bio); return -1; } @@ -1602,33 +1605,35 @@ static void btrfsic_dump_database(struct btrfsic_state *state) list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) { const struct btrfsic_block_link *l; - pr_info("%c-block @%llu (%s/%llu/%d)\n", + pr_info("%c-block @%llu (%pg/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - b_all->logical_bytenr, b_all->dev_state->name, + b_all->logical_bytenr, b_all->dev_state->bdev, b_all->dev_bytenr, b_all->mirror_num); list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) { - pr_info(" %c @%llu (%s/%llu/%d) refers %u* to %c @%llu (%s/%llu/%d)\n", + pr_info( + " %c @%llu (%pg/%llu/%d) refers %u* to %c @%llu (%pg/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - b_all->logical_bytenr, b_all->dev_state->name, + b_all->logical_bytenr, b_all->dev_state->bdev, b_all->dev_bytenr, b_all->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, + l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) { - pr_info(" %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n", + pr_info( + " %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n", btrfsic_get_block_type(state, b_all), - b_all->logical_bytenr, b_all->dev_state->name, + b_all->logical_bytenr, b_all->dev_state->bdev, b_all->dev_bytenr, b_all->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), l->block_ref_from->logical_bytenr, - l->block_ref_from->dev_state->name, + l->block_ref_from->dev_state->bdev, l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num); } @@ -1743,16 +1748,18 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, if (block->logical_bytenr != bytenr && !(!block->is_metadata && block->logical_bytenr == 0)) - pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n", - bytenr, dev_state->name, + pr_info( +"written block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n", + bytenr, dev_state->bdev, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block), block->logical_bytenr); else - pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n", - bytenr, dev_state->name, + pr_info( + "written block @%llu (%pg/%llu/%d) found in hash table, %c\n", + bytenr, dev_state->bdev, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); @@ -1767,8 +1774,9 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, processed_len = state->datablock_size; bytenr = block->logical_bytenr; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n", - bytenr, dev_state->name, dev_bytenr, + pr_info( + "written block @%llu (%pg/%llu/%d) found in hash table, %c\n", + bytenr, dev_state->bdev, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); } @@ -1778,9 +1786,10 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, list_empty(&block->ref_to_list) ? ' ' : '!', list_empty(&block->ref_from_list) ? ' ' : '!'); if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { - pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n", + pr_info( +"btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n", btrfsic_get_block_type(state, block), bytenr, - dev_state->name, dev_bytenr, block->mirror_num, + dev_state->bdev, dev_bytenr, block->mirror_num, block->generation, btrfs_disk_key_objectid(&block->disk_key), block->disk_key.type, @@ -1792,9 +1801,10 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, } if (!block->is_iodone && !block->never_written) { - pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n", + pr_info( +"btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n", btrfsic_get_block_type(state, block), bytenr, - dev_state->name, dev_bytenr, block->mirror_num, + dev_state->bdev, dev_bytenr, block->mirror_num, block->generation, btrfs_stack_header_generation( (struct btrfs_header *) @@ -1921,8 +1931,9 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, if (!is_metadata) { processed_len = state->datablock_size; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - pr_info("Written block (%s/%llu/?) !found in hash table, D.\n", - dev_state->name, dev_bytenr); + pr_info( + "written block (%pg/%llu/?) !found in hash table, D\n", + dev_state->bdev, dev_bytenr); if (!state->include_extent_data) { /* ignore that written D block */ goto continue_loop; @@ -1939,8 +1950,9 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, dev_bytenr); if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - pr_info("Written block @%llu (%s/%llu/?) !found in hash table, M.\n", - bytenr, dev_state->name, dev_bytenr); + pr_info( + "written block @%llu (%pg/%llu/?) !found in hash table, M\n", + bytenr, dev_state->bdev, dev_bytenr); } block_ctx.dev = dev_state; @@ -1995,9 +2007,9 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, block->next_in_same_bio = NULL; } if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - pr_info("New written %c-block @%llu (%s/%llu/%d)\n", + pr_info("new written %c-block @%llu (%pg/%llu/%d)\n", is_metadata ? 'M' : 'D', - block->logical_bytenr, block->dev_state->name, + block->logical_bytenr, block->dev_state->bdev, block->dev_bytenr, block->mirror_num); list_add(&block->all_blocks_node, &state->all_blocks_list); btrfsic_block_hashtable_add(block, &state->block_hashtable); @@ -2041,10 +2053,10 @@ static void btrfsic_bio_end_io(struct bio *bp) if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) - pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", + pr_info("bio_end_io(err=%d) for %c @%llu (%pg/%llu/%d)\n", bp->bi_status, btrfsic_get_block_type(dev_state->state, block), - block->logical_bytenr, dev_state->name, + block->logical_bytenr, dev_state->bdev, block->dev_bytenr, block->mirror_num); next_block = block->next_in_same_bio; block->iodone_w_error = iodone_w_error; @@ -2052,8 +2064,8 @@ static void btrfsic_bio_end_io(struct bio *bp) dev_state->last_flush_gen++; if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) - pr_info("bio_end_io() new %s flush_gen=%llu\n", - dev_state->name, + pr_info("bio_end_io() new %pg flush_gen=%llu\n", + dev_state->bdev, dev_state->last_flush_gen); } if (block->submit_bio_bh_rw & REQ_FUA) @@ -2078,17 +2090,19 @@ static int btrfsic_process_written_superblock( if (!(superblock->generation > state->max_superblock_generation || 0 == state->max_superblock_generation)) { if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) - pr_info("btrfsic: superblock @%llu (%s/%llu/%d) with old gen %llu <= %llu\n", + pr_info( + "btrfsic: superblock @%llu (%pg/%llu/%d) with old gen %llu <= %llu\n", superblock->logical_bytenr, - superblock->dev_state->name, + superblock->dev_state->bdev, superblock->dev_bytenr, superblock->mirror_num, btrfs_super_generation(super_hdr), state->max_superblock_generation); } else { if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) - pr_info("btrfsic: got new superblock @%llu (%s/%llu/%d) with new gen %llu > %llu\n", + pr_info( + "btrfsic: got new superblock @%llu (%pg/%llu/%d) with new gen %llu > %llu\n", superblock->logical_bytenr, - superblock->dev_state->name, + superblock->dev_state->bdev, superblock->dev_bytenr, superblock->mirror_num, btrfs_super_generation(super_hdr), state->max_superblock_generation); @@ -2232,38 +2246,42 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, */ list_for_each_entry(l, &block->ref_to_list, node_ref_to) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - pr_info("rl=%d, %c @%llu (%s/%llu/%d) %u* refers to %c @%llu (%s/%llu/%d)\n", + pr_info( + "rl=%d, %c @%llu (%pg/%llu/%d) %u* refers to %c @%llu (%pg/%llu/%d)\n", recursion_level, btrfsic_get_block_type(state, block), - block->logical_bytenr, block->dev_state->name, + block->logical_bytenr, block->dev_state->bdev, block->dev_bytenr, block->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, + l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); if (l->block_ref_to->never_written) { - pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is never written!\n", + pr_info( +"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is never written!\n", btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, + l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (!l->block_ref_to->is_iodone) { - pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not yet iodone!\n", + pr_info( +"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not yet iodone!\n", btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, + l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; } else if (l->block_ref_to->iodone_w_error) { - pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which has write error!\n", + pr_info( +"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which has write error!\n", btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, + l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); ret = -1; @@ -2273,10 +2291,11 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, l->parent_generation && BTRFSIC_GENERATION_UNKNOWN != l->block_ref_to->generation) { - pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) with generation %llu != parent generation %llu!\n", + pr_info( +"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) with generation %llu != parent generation %llu!\n", btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, + l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num, l->block_ref_to->generation, @@ -2284,10 +2303,11 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, ret = -1; } else if (l->block_ref_to->flush_gen > l->block_ref_to->dev_state->last_flush_gen) { - pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n", + pr_info( +"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n", btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, + l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num, block->flush_gen, l->block_ref_to->dev_state->last_flush_gen); @@ -2324,15 +2344,16 @@ static int btrfsic_is_block_ref_by_superblock( */ list_for_each_entry(l, &block->ref_from_list, node_ref_from) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - pr_info("rl=%d, %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n", + pr_info( + "rl=%d, %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n", recursion_level, btrfsic_get_block_type(state, block), - block->logical_bytenr, block->dev_state->name, + block->logical_bytenr, block->dev_state->bdev, block->dev_bytenr, block->mirror_num, l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), l->block_ref_from->logical_bytenr, - l->block_ref_from->dev_state->name, + l->block_ref_from->dev_state->bdev, l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num); if (l->block_ref_from->is_superblock && @@ -2354,30 +2375,30 @@ static int btrfsic_is_block_ref_by_superblock( static void btrfsic_print_add_link(const struct btrfsic_state *state, const struct btrfsic_block_link *l) { - pr_info("Add %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n", + pr_info("add %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n", l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), l->block_ref_from->logical_bytenr, - l->block_ref_from->dev_state->name, + l->block_ref_from->dev_state->bdev, l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } static void btrfsic_print_rem_link(const struct btrfsic_state *state, const struct btrfsic_block_link *l) { - pr_info("Rem %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n", + pr_info("rem %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n", l->ref_cnt, btrfsic_get_block_type(state, l->block_ref_from), l->block_ref_from->logical_bytenr, - l->block_ref_from->dev_state->name, + l->block_ref_from->dev_state->bdev, l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, btrfsic_get_block_type(state, l->block_ref_to), l->block_ref_to->logical_bytenr, - l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr, + l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, l->block_ref_to->mirror_num); } @@ -2419,9 +2440,9 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, * This algorithm is recursive because the amount of used stack space * is very small and the max recursion depth is limited. */ - indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)", + indent_add = sprintf(buf, "%c-%llu(%pg/%llu/%u)", btrfsic_get_block_type(state, block), - block->logical_bytenr, block->dev_state->name, + block->logical_bytenr, block->dev_state->bdev, block->dev_bytenr, block->mirror_num); if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { printk("[...]\n"); @@ -2542,10 +2563,10 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add( block->never_written = never_written; block->mirror_num = mirror_num; if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) - pr_info("New %s%c-block @%llu (%s/%llu/%d)\n", + pr_info("New %s%c-block @%llu (%pg/%llu/%d)\n", additional_string, btrfsic_get_block_type(state, block), - block->logical_bytenr, dev_state->name, + block->logical_bytenr, dev_state->bdev, block->dev_bytenr, mirror_num); list_add(&block->all_blocks_node, &state->all_blocks_list); btrfsic_block_hashtable_add(block, &state->block_hashtable); @@ -2592,8 +2613,9 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, } if (WARN_ON(!match)) { - pr_info("btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%s, phys_bytenr=%llu)!\n", - bytenr, dev_state->name, dev_bytenr); + pr_info( +"btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%pg, phys_bytenr=%llu)!\n", + bytenr, dev_state->bdev, dev_bytenr); for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { ret = btrfsic_map_block(state, bytenr, state->metablock_size, @@ -2601,8 +2623,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, if (ret) continue; - pr_info("Read logical bytenr @%llu maps to (%s/%llu/%d)\n", - bytenr, block_ctx.dev->name, + pr_info("read logical bytenr @%llu maps to (%pg/%llu/%d)\n", + bytenr, block_ctx.dev->bdev, block_ctx.dev_bytenr, mirror_num); } } @@ -2675,8 +2697,9 @@ static void __btrfsic_submit_bio(struct bio *bio) if ((dev_state->state->print_mask & (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | BTRFSIC_PRINT_MASK_VERBOSE))) - pr_info("btrfsic_submit_bio(%s) with FLUSH but dummy block already in use (ignored)!\n", - dev_state->name); + pr_info( +"btrfsic_submit_bio(%pg) with FLUSH but dummy block already in use (ignored)!\n", + dev_state->bdev); } else { struct btrfsic_block *const block = &dev_state->dummy_block_for_bio_bh_flush; @@ -2751,7 +2774,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info, list_for_each_entry(device, dev_head, dev_list) { struct btrfsic_dev_state *ds; - const char *p; if (!device->bdev || !device->name) continue; @@ -2763,10 +2785,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info, } ds->bdev = device->bdev; ds->state = state; - bdevname(ds->bdev, ds->name); - ds->name[BDEVNAME_SIZE - 1] = '\0'; - p = kbasename(ds->name); - strlcpy(ds->name, p, sizeof(ds->name)); btrfsic_dev_state_hashtable_add(ds, &btrfsic_dev_state_hashtable); } @@ -2844,9 +2862,10 @@ void btrfsic_unmount(struct btrfs_fs_devices *fs_devices) if (b_all->is_iodone || b_all->never_written) btrfsic_block_free(b_all); else - pr_info("btrfs: attempt to free %c-block @%llu (%s/%llu/%d) on umount which is not yet iodone!\n", + pr_info( +"btrfs: attempt to free %c-block @%llu (%pg/%llu/%d) on umount which is not yet iodone!\n", btrfsic_get_block_type(state, b_all), - b_all->logical_bytenr, b_all->dev_state->name, + b_all->logical_bytenr, b_all->dev_state->bdev, b_all->dev_bytenr, b_all->mirror_num); } From d083008e6f75875cadf870b1363e94f41168c72f Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 14 Oct 2021 18:39:02 +0900 Subject: [PATCH 0409/1202] btrfs: zoned: use greedy gc for auto reclaim Currently auto reclaim of unusable zones reclaims the block-groups in the order they have been added to the reclaim list. Change this to a greedy algorithm by sorting the list so we have the block-groups with the least amount of valid bytes reclaimed first. Note: we can't splice the block groups from reclaim_bgs to let the sort happen outside of the lock. The block groups can be still in use by other parts eg. via bg_list and we must hold unused_bgs_lock while processing them. Signed-off-by: Johannes Thumshirn [ write note and comment why we can't splice the list ] Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 7dba9028c80c0..de9aeb3733cfb 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include "misc.h" #include "ctree.h" #include "block-group.h" @@ -1486,6 +1487,21 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg) spin_unlock(&fs_info->unused_bgs_lock); } +/* + * We want block groups with a low number of used bytes to be in the beginning + * of the list, so they will get reclaimed first. + */ +static int reclaim_bgs_cmp(void *unused, const struct list_head *a, + const struct list_head *b) +{ + const struct btrfs_block_group *bg1, *bg2; + + bg1 = list_entry(a, struct btrfs_block_group, bg_list); + bg2 = list_entry(b, struct btrfs_block_group, bg_list); + + return bg1->used > bg2->used; +} + void btrfs_reclaim_bgs_work(struct work_struct *work) { struct btrfs_fs_info *fs_info = @@ -1510,6 +1526,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) } spin_lock(&fs_info->unused_bgs_lock); + /* + * Sort happens under lock because we can't simply splice it and sort. + * The block groups might still be in use and reachable via bg_list, + * and their presence in the reclaim_bgs list must be preserved. + */ + list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp); while (!list_empty(&fs_info->reclaim_bgs)) { u64 zone_unusable; int ret = 0; From 9c1f73374a175b95be4391d61ae38b7c5da92785 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 13 Oct 2021 10:12:49 +0100 Subject: [PATCH 0410/1202] btrfs: fix deadlock between chunk allocation and chunk btree modifications When a task is doing some modification to the chunk btree and it is not in the context of a chunk allocation or a chunk removal, it can deadlock with another task that is currently allocating a new data or metadata chunk. These contexts are the following: * When relocating a system chunk, when we need to COW the extent buffers that belong to the chunk btree; * When adding a new device (ioctl), where we need to add a new device item to the chunk btree; * When removing a device (ioctl), where we need to remove a device item from the chunk btree; * When resizing a device (ioctl), where we need to update a device item in the chunk btree and may need to relocate a system chunk that lies beyond the new device size when shrinking a device. The problem happens due to a sequence of steps like the following: 1) Task A starts a data or metadata chunk allocation and it locks the chunk mutex; 2) Task B is relocating a system chunk, and when it needs to COW an extent buffer of the chunk btree, it has locked both that extent buffer as well as its parent extent buffer; 3) Since there is not enough available system space, either because none of the existing system block groups have enough free space or because the only one with enough free space is in RO mode due to the relocation, task B triggers a new system chunk allocation. It blocks when trying to acquire the chunk mutex, currently held by task A; 4) Task A enters btrfs_chunk_alloc_add_chunk_item(), in order to insert the new chunk item into the chunk btree and update the existing device items there. But in order to do that, it has to lock the extent buffer that task B locked at step 2, or its parent extent buffer, but task B is waiting on the chunk mutex, which is currently locked by task A, therefore resulting in a deadlock. One example report when the deadlock happens with system chunk relocation: INFO: task kworker/u9:5:546 blocked for more than 143 seconds. Not tainted 5.15.0-rc3+ #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u9:5 state:D stack:25936 pid: 546 ppid: 2 flags:0x00004000 Workqueue: events_unbound btrfs_async_reclaim_metadata_space Call Trace: context_switch kernel/sched/core.c:4940 [inline] __schedule+0xcd9/0x2530 kernel/sched/core.c:6287 schedule+0xd3/0x270 kernel/sched/core.c:6366 rwsem_down_read_slowpath+0x4ee/0x9d0 kernel/locking/rwsem.c:993 __down_read_common kernel/locking/rwsem.c:1214 [inline] __down_read kernel/locking/rwsem.c:1223 [inline] down_read_nested+0xe6/0x440 kernel/locking/rwsem.c:1590 __btrfs_tree_read_lock+0x31/0x350 fs/btrfs/locking.c:47 btrfs_tree_read_lock fs/btrfs/locking.c:54 [inline] btrfs_read_lock_root_node+0x8a/0x320 fs/btrfs/locking.c:191 btrfs_search_slot_get_root fs/btrfs/ctree.c:1623 [inline] btrfs_search_slot+0x13b4/0x2140 fs/btrfs/ctree.c:1728 btrfs_update_device+0x11f/0x500 fs/btrfs/volumes.c:2794 btrfs_chunk_alloc_add_chunk_item+0x34d/0xea0 fs/btrfs/volumes.c:5504 do_chunk_alloc fs/btrfs/block-group.c:3408 [inline] btrfs_chunk_alloc+0x84d/0xf50 fs/btrfs/block-group.c:3653 flush_space+0x54e/0xd80 fs/btrfs/space-info.c:670 btrfs_async_reclaim_metadata_space+0x396/0xa90 fs/btrfs/space-info.c:953 process_one_work+0x9df/0x16d0 kernel/workqueue.c:2297 worker_thread+0x90/0xed0 kernel/workqueue.c:2444 kthread+0x3e5/0x4d0 kernel/kthread.c:319 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 INFO: task syz-executor:9107 blocked for more than 143 seconds. Not tainted 5.15.0-rc3+ #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor state:D stack:23200 pid: 9107 ppid: 7792 flags:0x00004004 Call Trace: context_switch kernel/sched/core.c:4940 [inline] __schedule+0xcd9/0x2530 kernel/sched/core.c:6287 schedule+0xd3/0x270 kernel/sched/core.c:6366 schedule_preempt_disabled+0xf/0x20 kernel/sched/core.c:6425 __mutex_lock_common kernel/locking/mutex.c:669 [inline] __mutex_lock+0xc96/0x1680 kernel/locking/mutex.c:729 btrfs_chunk_alloc+0x31a/0xf50 fs/btrfs/block-group.c:3631 find_free_extent_update_loop fs/btrfs/extent-tree.c:3986 [inline] find_free_extent+0x25cb/0x3a30 fs/btrfs/extent-tree.c:4335 btrfs_reserve_extent+0x1f1/0x500 fs/btrfs/extent-tree.c:4415 btrfs_alloc_tree_block+0x203/0x1120 fs/btrfs/extent-tree.c:4813 __btrfs_cow_block+0x412/0x1620 fs/btrfs/ctree.c:415 btrfs_cow_block+0x2f6/0x8c0 fs/btrfs/ctree.c:570 btrfs_search_slot+0x1094/0x2140 fs/btrfs/ctree.c:1768 relocate_tree_block fs/btrfs/relocation.c:2694 [inline] relocate_tree_blocks+0xf73/0x1770 fs/btrfs/relocation.c:2757 relocate_block_group+0x47e/0xc70 fs/btrfs/relocation.c:3673 btrfs_relocate_block_group+0x48a/0xc60 fs/btrfs/relocation.c:4070 btrfs_relocate_chunk+0x96/0x280 fs/btrfs/volumes.c:3181 __btrfs_balance fs/btrfs/volumes.c:3911 [inline] btrfs_balance+0x1f03/0x3cd0 fs/btrfs/volumes.c:4301 btrfs_ioctl_balance+0x61e/0x800 fs/btrfs/ioctl.c:4137 btrfs_ioctl+0x39ea/0x7b70 fs/btrfs/ioctl.c:4949 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae So fix this by making sure that whenever we try to modify the chunk btree and we are neither in a chunk allocation context nor in a chunk remove context, we reserve system space before modifying the chunk btree. Reported-by: Hao Sun Link: https://lore.kernel.org/linux-btrfs/CACkBjsax51i4mu6C0C3vJqQN3NR_iVuucoeG3U1HXjrgzn5FFQ@mail.gmail.com/ Fixes: 79bd37120b1495 ("btrfs: rework chunk allocation to avoid exhaustion of the system chunk array") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 146 +++++++++++++++++++++++++---------------- fs/btrfs/block-group.h | 2 + fs/btrfs/relocation.c | 4 ++ fs/btrfs/volumes.c | 15 ++++- 4 files changed, 111 insertions(+), 56 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index de9aeb3733cfb..f971d043469c8 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3425,25 +3425,6 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) goto out; } - /* - * If this is a system chunk allocation then stop right here and do not - * add the chunk item to the chunk btree. This is to prevent a deadlock - * because this system chunk allocation can be triggered while COWing - * some extent buffer of the chunk btree and while holding a lock on a - * parent extent buffer, in which case attempting to insert the chunk - * item (or update the device item) would result in a deadlock on that - * parent extent buffer. In this case defer the chunk btree updates to - * the second phase of chunk allocation and keep our reservation until - * the second phase completes. - * - * This is a rare case and can only be triggered by the very few cases - * we have where we need to touch the chunk btree outside chunk allocation - * and chunk removal. These cases are basically adding a device, removing - * a device or resizing a device. - */ - if (flags & BTRFS_BLOCK_GROUP_SYSTEM) - return 0; - ret = btrfs_chunk_alloc_add_chunk_item(trans, bg); /* * Normally we are not expected to fail with -ENOSPC here, since we have @@ -3576,14 +3557,14 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) * This has happened before and commit eafa4fd0ad0607 ("btrfs: fix exhaustion of * the system chunk array due to concurrent allocations") provides more details. * - * For allocation of system chunks, we defer the updates and insertions into the - * chunk btree to phase 2. This is to prevent deadlocks on extent buffers because - * if the chunk allocation is triggered while COWing an extent buffer of the - * chunk btree, we are holding a lock on the parent of that extent buffer and - * doing the chunk btree updates and insertions can require locking that parent. - * This is for the very few and rare cases where we update the chunk btree that - * are not chunk allocation or chunk removal: adding a device, removing a device - * or resizing a device. + * Allocation of system chunks does not happen through this function. A task that + * needs to update the chunk btree (the only btree that uses system chunks), must + * preallocate chunk space by calling either check_system_chunk() or + * btrfs_reserve_chunk_metadata() - the former is used when allocating a data or + * metadata chunk or when removing a chunk, while the later is used before doing + * a modification to the chunk btree - use cases for the later are adding, + * removing and resizing a device as well as relocation of a system chunk. + * See the comment below for more details. * * The reservation of system space, done through check_system_chunk(), as well * as all the updates and insertions into the chunk btree must be done while @@ -3620,11 +3601,27 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, if (trans->allocating_chunk) return -ENOSPC; /* - * If we are removing a chunk, don't re-enter or we would deadlock. - * System space reservation and system chunk allocation is done by the - * chunk remove operation (btrfs_remove_chunk()). + * Allocation of system chunks can not happen through this path, as we + * could end up in a deadlock if we are allocating a data or metadata + * chunk and there is another task modifying the chunk btree. + * + * This is because while we are holding the chunk mutex, we will attempt + * to add the new chunk item to the chunk btree or update an existing + * device item in the chunk btree, while the other task that is modifying + * the chunk btree is attempting to COW an extent buffer while holding a + * lock on it and on its parent - if the COW operation triggers a system + * chunk allocation, then we can deadlock because we are holding the + * chunk mutex and we may need to access that extent buffer or its parent + * in order to add the chunk item or update a device item. + * + * Tasks that want to modify the chunk tree should reserve system space + * before updating the chunk btree, by calling either + * btrfs_reserve_chunk_metadata() or check_system_chunk(). + * It's possible that after a task reserves the space, it still ends up + * here - this happens in the cases described above at do_chunk_alloc(). + * The task will have to either retry or fail. */ - if (trans->removing_chunk) + if (flags & BTRFS_BLOCK_GROUP_SYSTEM) return -ENOSPC; space_info = btrfs_find_space_info(fs_info, flags); @@ -3723,17 +3720,14 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type) return num_dev; } -/* - * Reserve space in the system space for allocating or removing a chunk - */ -void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) +static void reserve_chunk_space(struct btrfs_trans_handle *trans, + u64 bytes, + u64 type) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_space_info *info; u64 left; - u64 thresh; int ret = 0; - u64 num_devs; /* * Needed because we can end up allocating a system chunk and for an @@ -3746,19 +3740,13 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) left = info->total_bytes - btrfs_space_info_used(info, true); spin_unlock(&info->lock); - num_devs = get_profile_num_devs(fs_info, type); - - /* num_devs device items to update and 1 chunk item to add or remove */ - thresh = btrfs_calc_metadata_size(fs_info, num_devs) + - btrfs_calc_insert_metadata_size(fs_info, 1); - - if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { + if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu", - left, thresh, type); + left, bytes, type); btrfs_dump_space_info(fs_info, info, 0, 0); } - if (left < thresh) { + if (left < bytes) { u64 flags = btrfs_system_alloc_profile(fs_info); struct btrfs_block_group *bg; @@ -3767,21 +3755,20 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) * needing it, as we might not need to COW all nodes/leafs from * the paths we visit in the chunk tree (they were already COWed * or created in the current transaction for example). - * - * Also, if our caller is allocating a system chunk, do not - * attempt to insert the chunk item in the chunk btree, as we - * could deadlock on an extent buffer since our caller may be - * COWing an extent buffer from the chunk btree. */ bg = btrfs_create_chunk(trans, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); - } else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) { + } else { /* * If we fail to add the chunk item here, we end up * trying again at phase 2 of chunk allocation, at * btrfs_create_pending_block_groups(). So ignore - * any error here. + * any error here. An ENOSPC here could happen, due to + * the cases described at do_chunk_alloc() - the system + * block group we just created was just turned into RO + * mode by a scrub for example, or a running discard + * temporarily removed its free space entries, etc. */ btrfs_chunk_alloc_add_chunk_item(trans, bg); } @@ -3790,12 +3777,61 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) if (!ret) { ret = btrfs_block_rsv_add(fs_info->chunk_root, &fs_info->chunk_block_rsv, - thresh, BTRFS_RESERVE_NO_FLUSH); + bytes, BTRFS_RESERVE_NO_FLUSH); if (!ret) - trans->chunk_bytes_reserved += thresh; + trans->chunk_bytes_reserved += bytes; } } +/* + * Reserve space in the system space for allocating or removing a chunk. + * The caller must be holding fs_info->chunk_mutex. + */ +void check_system_chunk(struct btrfs_trans_handle *trans, u64 type) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + const u64 num_devs = get_profile_num_devs(fs_info, type); + u64 bytes; + + /* num_devs device items to update and 1 chunk item to add or remove. */ + bytes = btrfs_calc_metadata_size(fs_info, num_devs) + + btrfs_calc_insert_metadata_size(fs_info, 1); + + reserve_chunk_space(trans, bytes, type); +} + +/* + * Reserve space in the system space, if needed, for doing a modification to the + * chunk btree. + * + * @trans: A transaction handle. + * @is_item_insertion: Indicate if the modification is for inserting a new item + * in the chunk btree or if it's for the deletion or update + * of an existing item. + * + * This is used in a context where we need to update the chunk btree outside + * block group allocation and removal, to avoid a deadlock with a concurrent + * task that is allocating a metadata or data block group and therefore needs to + * update the chunk btree while holding the chunk mutex. After the update to the + * chunk btree is done, btrfs_trans_release_chunk_metadata() should be called. + * + */ +void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans, + bool is_item_insertion) +{ + struct btrfs_fs_info *fs_info = trans->fs_info; + u64 bytes; + + if (is_item_insertion) + bytes = btrfs_calc_insert_metadata_size(fs_info, 1); + else + bytes = btrfs_calc_metadata_size(fs_info, 1); + + mutex_lock(&fs_info->chunk_mutex); + reserve_chunk_space(trans, bytes, BTRFS_BLOCK_GROUP_SYSTEM); + mutex_unlock(&fs_info->chunk_mutex); +} + void btrfs_put_block_group_cache(struct btrfs_fs_info *info) { struct btrfs_block_group *block_group; diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 07f977d3816c7..5878b7ce3b78e 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -293,6 +293,8 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, enum btrfs_chunk_alloc_enum force); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type); void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type); +void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans, + bool is_item_insertion); u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags); void btrfs_put_block_group_cache(struct btrfs_fs_info *info); int btrfs_free_block_groups(struct btrfs_fs_info *info); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index fed8235962488..33a0ee7ac5906 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2692,8 +2692,12 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, list_add_tail(&node->list, &rc->backref_cache.changed); } else { path->lowest_level = node->level; + if (root == root->fs_info->chunk_root) + btrfs_reserve_chunk_metadata(trans, false); ret = btrfs_search_slot(trans, root, key, path, 0, 1); btrfs_release_path(path); + if (root == root->fs_info->chunk_root) + btrfs_trans_release_chunk_metadata(trans); if (ret > 0) ret = 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index debba6f048586..9eab8a7411665 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1847,8 +1847,10 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; + btrfs_reserve_chunk_metadata(trans, true); ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path, &key, sizeof(*dev_item)); + btrfs_trans_release_chunk_metadata(trans); if (ret) goto out; @@ -1921,7 +1923,9 @@ static int btrfs_rm_dev_item(struct btrfs_device *device) key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; + btrfs_reserve_chunk_metadata(trans, false); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + btrfs_trans_release_chunk_metadata(trans); if (ret) { if (ret > 0) ret = -ENOENT; @@ -2513,7 +2517,9 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) key.type = BTRFS_DEV_ITEM_KEY; while (1) { + btrfs_reserve_chunk_metadata(trans, false); ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + btrfs_trans_release_chunk_metadata(trans); if (ret < 0) goto error; @@ -2862,6 +2868,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_super_block *super_copy = fs_info->super_copy; u64 old_total; u64 diff; + int ret; if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) return -EACCES; @@ -2890,7 +2897,11 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, &trans->transaction->dev_update_list); mutex_unlock(&fs_info->chunk_mutex); - return btrfs_update_device(trans, device); + btrfs_reserve_chunk_metadata(trans, false); + ret = btrfs_update_device(trans, device); + btrfs_trans_release_chunk_metadata(trans); + + return ret; } static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) @@ -4925,8 +4936,10 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) round_down(old_total - diff, fs_info->sectorsize)); mutex_unlock(&fs_info->chunk_mutex); + btrfs_reserve_chunk_metadata(trans, false); /* Now btrfs_update_device() will change the on-disk size. */ ret = btrfs_update_device(trans, device); + btrfs_trans_release_chunk_metadata(trans); if (ret < 0) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); From c16a57c9117b2c65f24f00478032864d306afd21 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 13 Oct 2021 10:12:50 +0100 Subject: [PATCH 0411/1202] btrfs: update comments for chunk allocation -ENOSPC cases Update the comments at btrfs_chunk_alloc() and do_chunk_alloc() that describe which cases can lead to a failure to allocate metadata and system space despite having previously reserved space. This adds one more reason that I previously forgot to mention. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index f971d043469c8..444e9c89ff3e9 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -3429,7 +3429,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) /* * Normally we are not expected to fail with -ENOSPC here, since we have * previously reserved space in the system space_info and allocated one - * new system chunk if necessary. However there are two exceptions: + * new system chunk if necessary. However there are three exceptions: * * 1) We may have enough free space in the system space_info but all the * existing system block groups have a profile which can not be used @@ -3455,7 +3455,14 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) * with enough free space got turned into RO mode by a running scrub, * and in this case we have to allocate a new one and retry. We only * need do this allocate and retry once, since we have a transaction - * handle and scrub uses the commit root to search for block groups. + * handle and scrub uses the commit root to search for block groups; + * + * 3) We had one system block group with enough free space when we called + * check_system_chunk(), but after that, right before we tried to + * allocate the last extent buffer we needed, a discard operation came + * in and it temporarily removed the last free space entry from the + * block group (discard removes a free space entry, discards it, and + * then adds back the entry to the block group cache). */ if (ret == -ENOSPC) { const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info); @@ -3539,7 +3546,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) * properly, either intentionally or as a bug. One example where this is * done intentionally is fsync, as it does not reserve any transaction units * and ends up allocating a variable number of metadata extents for log - * tree extent buffers. + * tree extent buffers; + * + * 4) The task has reserved enough transaction units / metadata space, but right + * before it tries to allocate the last extent buffer it needs, a discard + * operation comes in and, temporarily, removes the last free space entry from + * the only metadata block group that had free space (discard starts by + * removing a free space entry from a block group, then does the discard + * operation and, once it's done, it adds back the free space entry to the + * block group). * * We also need this 2 phases setup when adding a device to a filesystem with * a seed device - we must create new metadata and system chunks without adding From 19acaae447d77569bb7c893d3a50d7f07587d487 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Sep 2021 14:00:38 +0300 Subject: [PATCH 0412/1202] phy: ti: gmii-sel: check of_get_address() for failure Smatch complains that if of_get_address() returns NULL, then "size" isn't initialized. Also it would lead to an Oops. Fixes: 7f78322cdd67 ("phy: ti: gmii-sel: retrieve ports number and base offset from dt") Signed-off-by: Dan Carpenter Reviewed-by: Grygorii Strashko Link: https://lore.kernel.org/r/20210914110038.GB11657@kili Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-gmii-sel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index 5fd2e8a08bfcf..d0ab69750c6b4 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -320,6 +320,8 @@ static int phy_gmii_sel_init_ports(struct phy_gmii_sel_priv *priv) u64 size; offset = of_get_address(dev->of_node, 0, &size, NULL); + if (!offset) + return -EINVAL; priv->num_ports = size / sizeof(u32); if (!priv->num_ports) return -EINVAL; From 2384bf50c6c687687b0faa31bab515fd0b8d3de9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 18 Oct 2021 19:21:44 +0200 Subject: [PATCH 0413/1202] gfs2: Fix mmap + page fault deadlocks for buffered I/O In the .read_iter and .write_iter file operations, we're accessing user-space memory while holding the inode glock. There is a possibility that the memory is mapped to the same file, in which case we'd recurse on the same glock. We could detect and work around this simple case of recursive locking, but more complex scenarios exist that involve multiple glocks, processes, and cluster nodes, and working around all of those cases isn't practical or even possible. Avoid these kinds of problems by disabling page faults while holding the inode glock. If a page fault would occur, we either end up with a partial read or write or with -EFAULT if nothing could be read or written. In either case, we know that we're not done with the operation, so we indicate that we're willing to give up the inode glock and then we fault in the missing pages. If that made us lose the inode glock, we return a partial read or write. Otherwise, we resume the operation. This locking problem was originally reported by Jan Kara. Linus came up with the idea of disabling page faults. Many thanks to Al Viro and Matthew Wilcox for their feedback. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 96 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 8f37e4bab995b..b07b9c2d04463 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -776,6 +776,36 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret ? ret : ret1; } +static bool should_fault_in_pages(struct iov_iter *i, size_t *prev_count, + size_t *window_size) +{ + char __user *p = i->iov[0].iov_base + i->iov_offset; + size_t count = iov_iter_count(i); + size_t size; + + if (!iter_is_iovec(i)) + return false; + + if (*prev_count != count || !*window_size) { + int pages, nr_dirtied; + + pages = min_t(int, BIO_MAX_VECS, + DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE)); + nr_dirtied = max(current->nr_dirtied_pause - + current->nr_dirtied, 1); + pages = min(pages, nr_dirtied); + size = (size_t)PAGE_SIZE * pages - offset_in_page(p); + } else { + size = (size_t)PAGE_SIZE - offset_in_page(p); + if (*window_size <= size) + return false; + } + + *prev_count = count; + *window_size = size; + return true; +} + static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, struct gfs2_holder *gh) { @@ -840,9 +870,17 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct gfs2_inode *ip; struct gfs2_holder gh; + size_t prev_count = 0, window_size = 0; size_t written = 0; ssize_t ret; + /* + * In this function, we disable page faults when we're holding the + * inode glock while doing I/O. If a page fault occurs, we indicate + * that the inode glock may be dropped, fault in the pages manually, + * and retry. + */ + if (iocb->ki_flags & IOCB_DIRECT) { ret = gfs2_file_direct_read(iocb, to, &gh); if (likely(ret != -ENOTBLK)) @@ -864,13 +902,35 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) } ip = GFS2_I(iocb->ki_filp->f_mapping->host); gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); +retry: ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; +retry_under_glock: + pagefault_disable(); ret = generic_file_read_iter(iocb, to); + pagefault_enable(); if (ret > 0) written += ret; - gfs2_glock_dq(&gh); + + if (unlikely(iov_iter_count(to) && (ret > 0 || ret == -EFAULT)) && + should_fault_in_pages(to, &prev_count, &window_size)) { + size_t leftover; + + gfs2_holder_allow_demote(&gh); + leftover = fault_in_iov_iter_writeable(to, window_size); + gfs2_holder_disallow_demote(&gh); + if (leftover != window_size) { + if (!gfs2_holder_queued(&gh)) { + if (written) + goto out_uninit; + goto retry; + } + goto retry_under_glock; + } + } + if (gfs2_holder_queued(&gh)) + gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); return written ? written : ret; @@ -885,8 +945,17 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder *statfs_gh = NULL; + size_t prev_count = 0, window_size = 0; + size_t read = 0; ssize_t ret; + /* + * In this function, we disable page faults when we're holding the + * inode glock while doing I/O. If a page fault occurs, we indicate + * that the inode glock may be dropped, fault in the pages manually, + * and retry. + */ + if (inode == sdp->sd_rindex) { statfs_gh = kmalloc(sizeof(*statfs_gh), GFP_NOFS); if (!statfs_gh) @@ -894,10 +963,11 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, } gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh); +retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; - +retry_under_glock: if (inode == sdp->sd_rindex) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); @@ -908,21 +978,42 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, } current->backing_dev_info = inode_to_bdi(inode); + pagefault_disable(); ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + pagefault_enable(); current->backing_dev_info = NULL; - if (ret > 0) + if (ret > 0) { iocb->ki_pos += ret; + read += ret; + } if (inode == sdp->sd_rindex) gfs2_glock_dq_uninit(statfs_gh); + if (unlikely(iov_iter_count(from) && (ret > 0 || ret == -EFAULT)) && + should_fault_in_pages(from, &prev_count, &window_size)) { + size_t leftover; + + gfs2_holder_allow_demote(gh); + leftover = fault_in_iov_iter_readable(from, window_size); + gfs2_holder_disallow_demote(gh); + if (leftover != window_size) { + if (!gfs2_holder_queued(gh)) { + if (read) + goto out_uninit; + goto retry; + } + goto retry_under_glock; + } + } out_unlock: - gfs2_glock_dq(gh); + if (gfs2_holder_queued(gh)) + gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); if (statfs_gh) kfree(statfs_gh); - return ret; + return read ? read : ret; } /** From a0b6265f5ce70385c6634d114e4a7c616b3ec11f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 15 Jul 2021 22:31:02 +0200 Subject: [PATCH 0414/1202] iomap: Fix iomap_dio_rw return value for user copies When a user copy fails in one of the helpers of iomap_dio_rw, fail with -EFAULT instead of returning 0. This matches what iomap_dio_bio_actor returns when it gets an -EFAULT from bio_iov_iter_get_pages. With these changes, iomap_dio_actor now consistently fails with -EFAULT when a user page cannot be faulted in. Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/direct-io.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 4ecd255e0511c..a2a368e824c03 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -371,6 +371,8 @@ static loff_t iomap_dio_hole_iter(const struct iomap_iter *iter, loff_t length = iov_iter_zero(iomap_length(iter), dio->submit.iter); dio->size += length; + if (!length) + return -EFAULT; return length; } @@ -402,6 +404,8 @@ static loff_t iomap_dio_inline_iter(const struct iomap_iter *iomi, copied = copy_to_iter(inline_data, length, iter); } dio->size += copied; + if (!copied) + return -EFAULT; return copied; } From 6597350fa8e27f42a07cd90f07d0d03b5add1328 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 23 Jul 2021 01:59:41 +0200 Subject: [PATCH 0415/1202] iomap: Support partial direct I/O on user copy failures In iomap_dio_rw, when iomap_apply returns an -EFAULT error and the IOMAP_DIO_PARTIAL flag is set, complete the request synchronously and return a partial result. This allows the caller to deal with the page fault and retry the remainder of the request. Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong --- fs/iomap/direct-io.c | 6 ++++++ include/linux/iomap.h | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index a2a368e824c03..a434fb7887b28 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -581,6 +581,12 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == READ && iomi.pos >= dio->i_size) iov_iter_revert(iter, iomi.pos - dio->i_size); + if (ret == -EFAULT && dio->size && (dio_flags & IOMAP_DIO_PARTIAL)) { + if (!(iocb->ki_flags & IOCB_NOWAIT)) + wait_for_completion = true; + ret = 0; + } + /* magic error code to fall back to buffered I/O */ if (ret == -ENOTBLK) { wait_for_completion = true; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 24f8489583ca7..2a213b0d1e1f5 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -330,6 +330,13 @@ struct iomap_dio_ops { */ #define IOMAP_DIO_OVERWRITE_ONLY (1 << 1) +/* + * When a page fault occurs, return a partial synchronous result and allow + * the caller to retry the rest of the operation after dealing with the page + * fault. + */ +#define IOMAP_DIO_PARTIAL (1 << 2) + ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags); From 1bca16e40c8f0e80591f7b7138ff72ae924eb7c1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 24 Jul 2021 12:26:41 +0200 Subject: [PATCH 0416/1202] iomap: Add done_before argument to iomap_dio_rw Add a done_before argument to iomap_dio_rw that indicates how much of the request has already been transferred. When the request succeeds, we report that done_before additional bytes were tranferred. This is useful for finishing a request asynchronously when part of the request has already been completed synchronously. We'll use that to allow iomap_dio_rw to be used with page faults disabled: when a page fault occurs while submitting a request, we synchronously complete the part of the request that has already been submitted. The caller can then take care of the page fault and call iomap_dio_rw again for the rest of the request, passing in the number of bytes already tranferred. Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong --- fs/btrfs/file.c | 5 +++-- fs/erofs/data.c | 2 +- fs/ext4/file.c | 5 +++-- fs/gfs2/file.c | 4 ++-- fs/iomap/direct-io.c | 19 ++++++++++++++++--- fs/xfs/xfs_file.c | 6 +++--- fs/zonefs/super.c | 4 ++-- include/linux/iomap.h | 4 ++-- 8 files changed, 32 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f37211d3bb69e..9d41b28c67baf 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1957,7 +1957,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) } dio = __iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops, - 0); + 0, 0); btrfs_inode_unlock(inode, ilock_flags); @@ -3658,7 +3658,8 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) return 0; btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); - ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops, 0); + ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops, + 0, 0); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); return ret; } diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 9db8297156527..16a41d0db55a3 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -287,7 +287,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!err) return iomap_dio_rw(iocb, to, &erofs_iomap_ops, - NULL, 0); + NULL, 0, 0); if (err < 0) return err; } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ac0e11bbb4450..b25c1f8f7c4f1 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -74,7 +74,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) return generic_file_read_iter(iocb, to); } - ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0); + ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, 0); inode_unlock_shared(inode); file_accessed(iocb->ki_filp); @@ -566,7 +566,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ilock_shared) iomap_ops = &ext4_iomap_overwrite_ops; ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, - (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0); + (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0, + 0); if (ret == -ENOTBLK) ret = 0; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index b07b9c2d04463..ae06defcf3693 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -822,7 +822,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, if (ret) goto out_uninit; - ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0); + ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0, 0); gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); @@ -856,7 +856,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, if (offset + len > i_size_read(&ip->i_inode)) goto out; - ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0); + ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0, 0); if (ret == -ENOTBLK) ret = 0; out: diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index a434fb7887b28..468dcbba45bcb 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -31,6 +31,7 @@ struct iomap_dio { atomic_t ref; unsigned flags; int error; + size_t done_before; bool wait_for_completion; union { @@ -124,6 +125,9 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC)) ret = generic_write_sync(iocb, ret); + if (ret > 0) + ret += dio->done_before; + kfree(dio); return ret; @@ -450,13 +454,21 @@ static loff_t iomap_dio_iter(const struct iomap_iter *iter, * may be pure data writes. In that case, we still need to do a full data sync * completion. * + * When page faults are disabled and @dio_flags includes IOMAP_DIO_PARTIAL, + * __iomap_dio_rw can return a partial result if it encounters a non-resident + * page in @iter after preparing a transfer. In that case, the non-resident + * pages can be faulted in and the request resumed with @done_before set to the + * number of bytes previously transferred. The request will then complete with + * the correct total number of bytes transferred; this is essential for + * completing partial requests asynchronously. + * * Returns -ENOTBLK In case of a page invalidation invalidation failure for * writes. The callers needs to fall back to buffered I/O in this case. */ struct iomap_dio * __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, - unsigned int dio_flags) + unsigned int dio_flags, size_t done_before) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = file_inode(iocb->ki_filp); @@ -486,6 +498,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->dops = dops; dio->error = 0; dio->flags = 0; + dio->done_before = done_before; dio->submit.iter = iter; dio->submit.waiter = current; @@ -652,11 +665,11 @@ EXPORT_SYMBOL_GPL(__iomap_dio_rw); ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, - unsigned int dio_flags) + unsigned int dio_flags, size_t done_before) { struct iomap_dio *dio; - dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags); + dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, done_before); if (IS_ERR_OR_NULL(dio)) return PTR_ERR_OR_ZERO(dio); return iomap_dio_complete(dio); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7aa943edfc02f..240eb932c014b 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -259,7 +259,7 @@ xfs_file_dio_read( ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED); if (ret) return ret; - ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0); + ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, 0); xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; @@ -569,7 +569,7 @@ xfs_file_dio_write_aligned( } trace_xfs_file_direct_write(iocb, from); ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, - &xfs_dio_write_ops, 0); + &xfs_dio_write_ops, 0, 0); out_unlock: if (iolock) xfs_iunlock(ip, iolock); @@ -647,7 +647,7 @@ xfs_file_dio_write_unaligned( trace_xfs_file_direct_write(iocb, from); ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, - &xfs_dio_write_ops, flags); + &xfs_dio_write_ops, flags, 0); /* * Retry unaligned I/O with exclusive blocking semantics if the DIO diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index ddc346a9df9ba..6122c38ab44d9 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -852,7 +852,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = zonefs_file_dio_append(iocb, from); else ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops, - &zonefs_write_dio_ops, 0); + &zonefs_write_dio_ops, 0, 0); if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && (ret > 0 || ret == -EIOCBQUEUED)) { if (ret > 0) @@ -987,7 +987,7 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) } file_accessed(iocb->ki_filp); ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops, - &zonefs_read_dio_ops, 0); + &zonefs_read_dio_ops, 0, 0); } else { ret = generic_file_read_iter(iocb, to); if (ret == -EIO) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 2a213b0d1e1f5..829f2325ecbab 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -339,10 +339,10 @@ struct iomap_dio_ops { ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, - unsigned int dio_flags); + unsigned int dio_flags, size_t done_before); struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, - unsigned int dio_flags); + unsigned int dio_flags, size_t done_before); ssize_t iomap_dio_complete(struct iomap_dio *dio); int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); From 88ed3852da645728a5e636aec005b7b60cc29134 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 17 Aug 2021 22:52:08 +0200 Subject: [PATCH 0417/1202] gup: Introduce FOLL_NOFAULT flag to disable page faults Introduce a new FOLL_NOFAULT flag that causes get_user_pages to return -EFAULT when it would otherwise trigger a page fault. This is roughly similar to FOLL_FAST_ONLY but available on all architectures, and less fragile. Signed-off-by: Andreas Gruenbacher --- include/linux/mm.h | 3 ++- mm/gup.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 73a52aba448f9..2f0e6b9f8f3b5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2851,7 +2851,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ #define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO * and return without waiting upon it */ -#define FOLL_POPULATE 0x40 /* fault in page */ +#define FOLL_POPULATE 0x40 /* fault in pages (with FOLL_MLOCK) */ +#define FOLL_NOFAULT 0x80 /* do not fault in pages */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ diff --git a/mm/gup.c b/mm/gup.c index 795f15c410cce..e1c7e4bde11fd 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -918,6 +918,8 @@ static int faultin_page(struct vm_area_struct *vma, /* mlock all present pages, but do not fault in new pages */ if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) return -ENOENT; + if (*flags & FOLL_NOFAULT) + return -EFAULT; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (*flags & FOLL_REMOTE) @@ -2843,7 +2845,7 @@ static int internal_get_user_pages_fast(unsigned long start, if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | FOLL_FORCE | FOLL_PIN | FOLL_GET | - FOLL_FAST_ONLY))) + FOLL_FAST_ONLY | FOLL_NOFAULT))) return -EINVAL; if (gup_flags & FOLL_PIN) From 0df01a3528677b5fb299cabeb4eae89a3780c1c4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 12 Jul 2021 12:06:14 +0200 Subject: [PATCH 0418/1202] iov_iter: Introduce nofault flag to disable page faults Introduce a new nofault flag to indicate to iov_iter_get_pages not to fault in user pages. This is implemented by passing the FOLL_NOFAULT flag to get_user_pages, which causes get_user_pages to fail when it would otherwise fault in a page. We'll use the ->nofault flag to prevent iomap_dio_rw from faulting in pages when page faults are not allowed. Signed-off-by: Andreas Gruenbacher --- include/linux/uio.h | 1 + lib/iov_iter.c | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index 25d1c24fd829a..6350354f97e90 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -35,6 +35,7 @@ struct iov_iter_state { struct iov_iter { u8 iter_type; + bool nofault; bool data_source; size_t iov_offset; size_t count; diff --git a/lib/iov_iter.c b/lib/iov_iter.c index ac9a87e727a32..66a740e6e153c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -513,6 +513,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_IOVEC, + .nofault = false, .data_source = direction, .iov = iov, .nr_segs = nr_segs, @@ -1527,13 +1528,17 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return 0; if (likely(iter_is_iovec(i))) { + unsigned int gup_flags = 0; unsigned long addr; + if (iov_iter_rw(i) != WRITE) + gup_flags |= FOLL_WRITE; + if (i->nofault) + gup_flags |= FOLL_NOFAULT; + addr = first_iovec_segment(i, &len, start, maxsize, maxpages); n = DIV_ROUND_UP(len, PAGE_SIZE); - res = get_user_pages_fast(addr, n, - iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, - pages); + res = get_user_pages_fast(addr, n, gup_flags, pages); if (unlikely(res <= 0)) return res; return (res == n ? len : res * PAGE_SIZE) - *start; @@ -1649,15 +1654,20 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, return 0; if (likely(iter_is_iovec(i))) { + unsigned int gup_flags = 0; unsigned long addr; + if (iov_iter_rw(i) != WRITE) + gup_flags |= FOLL_WRITE; + if (i->nofault) + gup_flags |= FOLL_NOFAULT; + addr = first_iovec_segment(i, &len, start, maxsize, ~0U); n = DIV_ROUND_UP(len, PAGE_SIZE); p = get_pages_array(n); if (!p) return -ENOMEM; - res = get_user_pages_fast(addr, n, - iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); + res = get_user_pages_fast(addr, n, gup_flags, p); if (unlikely(res <= 0)) { kvfree(p); *pages = NULL; From 9244c8d48fcc17619f42c4d2af6ad03d4ad59ac5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 12 Jul 2021 13:40:38 +0200 Subject: [PATCH 0419/1202] gfs2: Fix mmap + page fault deadlocks for direct I/O Also disable page faults during direct I/O requests and implement a similar kind of retry logic as in the buffered I/O case. The retry logic in the direct I/O case differs from the buffered I/O case in the following way: direct I/O doesn't provide the kinds of consistency guarantees between concurrent reads and writes that buffered I/O provides, so once we lose the inode glock while faulting in user pages, we always resume the operation. We never need to return a partial read or write. This locking problem was originally reported by Jan Kara. Linus came up with the idea of disabling page faults. Many thanks to Al Viro and Matthew Wilcox for their feedback. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 101 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index ae06defcf3693..a8e440b4d21c9 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -811,22 +811,65 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, { struct file *file = iocb->ki_filp; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - size_t count = iov_iter_count(to); + size_t prev_count = 0, window_size = 0; + size_t written = 0; ssize_t ret; - if (!count) + /* + * In this function, we disable page faults when we're holding the + * inode glock while doing I/O. If a page fault occurs, we indicate + * that the inode glock may be dropped, fault in the pages manually, + * and retry. + * + * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger + * physical as well as manual page faults, and we need to disable both + * kinds. + * + * For direct I/O, gfs2 takes the inode glock in deferred mode. This + * locking mode is compatible with other deferred holders, so multiple + * processes and nodes can do direct I/O to a file at the same time. + * There's no guarantee that reads or writes will be atomic. Any + * coordination among readers and writers needs to happen externally. + */ + + if (!iov_iter_count(to)) return 0; /* skip atime */ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh); +retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; +retry_under_glock: + pagefault_disable(); + to->nofault = true; + ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, + IOMAP_DIO_PARTIAL, written); + to->nofault = false; + pagefault_enable(); + if (ret > 0) + written = ret; - ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0, 0); - gfs2_glock_dq(gh); + if (unlikely(iov_iter_count(to) && (ret > 0 || ret == -EFAULT)) && + should_fault_in_pages(to, &prev_count, &window_size)) { + size_t leftover; + + gfs2_holder_allow_demote(gh); + leftover = fault_in_iov_iter_writeable(to, window_size); + gfs2_holder_disallow_demote(gh); + if (leftover != window_size) { + if (!gfs2_holder_queued(gh)) + goto retry; + goto retry_under_glock; + } + } + if (gfs2_holder_queued(gh)) + gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); - return ret; + if (ret < 0) + return ret; + return written; } static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, @@ -835,10 +878,20 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - size_t len = iov_iter_count(from); - loff_t offset = iocb->ki_pos; + size_t prev_count = 0, window_size = 0; + size_t read = 0; ssize_t ret; + /* + * In this function, we disable page faults when we're holding the + * inode glock while doing I/O. If a page fault occurs, we indicate + * that the inode glock may be dropped, fault in the pages manually, + * and retry. + * + * For writes, iomap_dio_rw only triggers manual page faults, so we + * don't need to disable physical ones. + */ + /* * Deferred lock, even if its a write, since we do no allocation on * this path. All we need to change is the atime, and this lock mode @@ -848,22 +901,46 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, * VFS does. */ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh); +retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; - +retry_under_glock: /* Silently fall back to buffered I/O when writing beyond EOF */ - if (offset + len > i_size_read(&ip->i_inode)) + if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode)) goto out; - ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0, 0); + from->nofault = true; + ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, + IOMAP_DIO_PARTIAL, read); + from->nofault = false; + if (ret == -ENOTBLK) ret = 0; + if (ret > 0) + read = ret; + + if (unlikely(iov_iter_count(from) && (ret > 0 || ret == -EFAULT)) && + should_fault_in_pages(from, &prev_count, &window_size)) { + size_t leftover; + + gfs2_holder_allow_demote(gh); + leftover = fault_in_iov_iter_readable(from, window_size); + gfs2_holder_disallow_demote(gh); + if (leftover != window_size) { + if (!gfs2_holder_queued(gh)) + goto retry; + goto retry_under_glock; + } + } out: - gfs2_glock_dq(gh); + if (gfs2_holder_queued(gh)) + gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); - return ret; + if (ret < 0) + return ret; + return read; } static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) From 85c4fd3fddded9d219e700a0ef297de1be0b2a21 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 13 Sep 2021 12:34:38 -0500 Subject: [PATCH 0420/1202] gfs2: remove redundant check in gfs2_rgrp_go_lock Before this patch, function gfs2_rgrp_go_lock checked if GL_SKIP and ar_rgrplvb were both true. However, GL_SKIP is only set for rgrps if ar_rgrplvb is true (see gfs2_inplace_reserve). This patch simply removes the redundant check. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c3b00ba92ed2e..7a13a687e4f20 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1291,9 +1291,8 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) int gfs2_rgrp_go_lock(struct gfs2_holder *gh) { struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; - struct gfs2_sbd *sdp = rgd->rd_sbd; - if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) + if (gh->gh_flags & GL_SKIP) return 0; return gfs2_rgrp_bh_get(rgd); } From e2574b7026b9588b47fb677682601424103929ff Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 13 Sep 2021 12:40:09 -0500 Subject: [PATCH 0421/1202] gfs2: Add GL_SKIP holder flag to dump_holder Somehow, the GL_SKIP flag was missed when dumping glock holders. This patch adds it to function hflags2str. I added it at the end because I wanted Holder and Skip flags together to read "Hs" rather than "sH" to avoid confusion with "Shared" ("SH") holder state. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f686083d02505..ff12272872845 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2238,6 +2238,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) *p++ = 'W'; if (test_bit(HIF_MAY_DEMOTE, &iflags)) *p++ = 'D'; + if (flags & GL_SKIP) + *p++ = 's'; *p = 0; return buf; } From 5a8fc29c6fd7eb535fd1921f58adef801a955c91 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 13 Sep 2021 12:54:16 -0500 Subject: [PATCH 0422/1202] gfs2: move GL_SKIP check from glops to do_promote Before this patch, each individual "go_lock" glock operation (glop) checked the GL_SKIP flag, and if set, would skip further processing. This patch changes the logic so the go_lock caller, function go_promote, checks the GL_SKIP flag before calling the go_lock op in the first place. This avoids having to unnecessarily unlock gl_lockref.lock only to re-lock it again. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 26 ++++++++++++++------------ fs/gfs2/glops.c | 2 +- fs/gfs2/rgrp.c | 2 -- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ff12272872845..5d6b253030a19 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -502,18 +502,20 @@ __acquires(&gl->gl_lockref.lock) } if (gh->gh_list.prev == &gl->gl_holders && glops->go_lock) { - spin_unlock(&gl->gl_lockref.lock); - /* FIXME: eliminate this eventually */ - ret = glops->go_lock(gh); - spin_lock(&gl->gl_lockref.lock); - if (ret) { - if (ret == 1) - return 2; - gh->gh_error = ret; - list_del_init(&gh->gh_list); - trace_gfs2_glock_queue(gh, 0); - gfs2_holder_wake(gh); - goto restart; + if (!(gh->gh_flags & GL_SKIP)) { + spin_unlock(&gl->gl_lockref.lock); + /* FIXME: eliminate this eventually */ + ret = glops->go_lock(gh); + spin_lock(&gl->gl_lockref.lock); + if (ret) { + if (ret == 1) + return 2; + gh->gh_error = ret; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); + goto restart; + } } set_bit(HIF_HOLDER, &gh->gh_iflags); trace_gfs2_promote(gh, 1); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 79c621c7863d2..4b19f513570f7 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -495,7 +495,7 @@ static int inode_go_lock(struct gfs2_holder *gh) struct gfs2_inode *ip = gl->gl_object; int error = 0; - if (!ip || (gh->gh_flags & GL_SKIP)) + if (!ip) return 0; if (test_bit(GIF_INVALID, &ip->i_flags)) { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7a13a687e4f20..1fb66f6e6a0ce 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1292,8 +1292,6 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) { struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; - if (gh->gh_flags & GL_SKIP) - return 0; return gfs2_rgrp_bh_get(rgd); } From 6333089f2d9489494886c76aa83edc7eef60473e Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 14 Sep 2021 08:16:58 -0500 Subject: [PATCH 0423/1202] gfs2: Switch some BUG_ON to GLOCK_BUG_ON for debug In rgrp.c, there are several places where it does BUG_ON. This tells us the call stack but nothing more, which is not very helpful. This patch switches them to GLOCK_BUG_ON which also prints the glock, its holders, and many of the rgrp values, which will help us debug problems in the future. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 1fb66f6e6a0ce..96b2fbed6bf1f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1230,7 +1230,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) rgrp_set_bitmap_flags(rgd); rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; - BUG_ON(rgd->rd_reserved); + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved); /* max out the rgrp allocation failure point */ rgd->rd_extfail_pt = rgd->rd_free; } @@ -1280,7 +1280,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); rgrp_set_bitmap_flags(rgd); rgd->rd_free_clone = rgd->rd_free; - BUG_ON(rgd->rd_reserved); + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved); /* max out the rgrp allocation failure point */ rgd->rd_extfail_pt = rgd->rd_free; rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); @@ -2212,7 +2212,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) struct gfs2_rgrpd *rgd = rs->rs_rgd; spin_lock(&rgd->rd_rsspin); - BUG_ON(rgd->rd_reserved < rs->rs_reserved); + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved < rs->rs_reserved); rgd->rd_reserved -= rs->rs_reserved; spin_unlock(&rgd->rd_rsspin); rs->rs_reserved = 0; @@ -2473,9 +2473,9 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, spin_unlock(&rbm.rgd->rd_rsspin); goto rgrp_error; } - BUG_ON(rbm.rgd->rd_reserved < *nblocks); - BUG_ON(rbm.rgd->rd_free_clone < *nblocks); - BUG_ON(rbm.rgd->rd_free < *nblocks); + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_reserved < *nblocks); + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free_clone < *nblocks); + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free < *nblocks); rbm.rgd->rd_reserved -= *nblocks; rbm.rgd->rd_free_clone -= *nblocks; rbm.rgd->rd_free -= *nblocks; @@ -2762,8 +2762,8 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) void rgrp_lock_local(struct gfs2_rgrpd *rgd) { - BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) && - !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags)); + GLOCK_BUG_ON(rgd->rd_gl, !gfs2_glock_is_held_excl(rgd->rd_gl) && + !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags)); mutex_lock(&rgd->rd_mutex); } From 9e72c567ba42002412c90c7eed8ea7126bfbbb2b Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 25 Aug 2021 12:57:24 -0500 Subject: [PATCH 0424/1202] gfs2: Allow append and immutable bits to coexist Before this patch, function do_gfs2_set_flags checked if the append and immutable flags were being set while already set. If so, error -EPERM was given. There's no reason why these two flags should be mutually exclusive, and if you set them separately, you will, in essence, set one while it is already set. For example: chattr +a /mnt/gfs2/file1 chattr +i /mnt/gfs2/file1 The first command sets the append-only flag. Since they are additive, the second command sets the immutable flag AND append-only flag, since they both coexist in i_diskflags. So the second command should not return an error. This bug caused xfstests generic/545 to fail. This patch simply removes the invalid checks. I also eliminated an unused parm from do_gfs2_set_flags. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index a8e440b4d21c9..f2abe7301059f 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -213,11 +213,9 @@ void gfs2_set_inode_flags(struct inode *inode) * @inode: The inode * @reqflags: The flags to set * @mask: Indicates which flags are valid - * @fsflags: The FS_* inode flags passed in * */ -static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask, - const u32 fsflags) +static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -237,10 +235,6 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask, goto out; error = -EPERM; - if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE)) - goto out; - if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY)) - goto out; if (!IS_IMMUTABLE(inode)) { error = gfs2_permission(&init_user_ns, inode, MAY_WRITE); if (error) @@ -313,7 +307,7 @@ int gfs2_fileattr_set(struct user_namespace *mnt_userns, mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA); } - return do_gfs2_set_flags(inode, gfsflags, mask, fsflags); + return do_gfs2_set_flags(inode, gfsflags, mask); } static int gfs2_getlabel(struct file *filp, char __user *label) From e59413325f085bf866bea67425470f77afe6f6bd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 30 Sep 2021 13:49:36 -0500 Subject: [PATCH 0425/1202] gfs2: Save ip from gfs2_glock_nq_init Before this patch, when a glock was locked by function gfs2_glock_nq_init, it initialized the holder gh_ip (return address) as gfs2_glock_nq_init. That made it extremely difficult to track down problems because many functions call gfs2_glock_nq_init. This patch changes the function so that it saves gh_ip from the caller of gfs2_glock_nq_init, which makes it easy to backtrack which holder took the lock. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/glock.c | 8 ++++---- fs/gfs2/glock.h | 13 ++++++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5d6b253030a19..54a1ed373ff3a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -911,7 +911,7 @@ static void gfs2_glock_poke(struct gfs2_glock *gl) struct gfs2_holder gh; int error; - gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh); + __gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh, _RET_IP_); error = gfs2_glock_nq(&gh); if (!error) gfs2_glock_dq(&gh); @@ -1208,12 +1208,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, * */ -void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, - struct gfs2_holder *gh) +void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, + struct gfs2_holder *gh, unsigned long ip) { INIT_LIST_HEAD(&gh->gh_list); gh->gh_gl = gl; - gh->gh_ip = _RET_IP_; + gh->gh_ip = ip; gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 9012487da4c69..a36104229d0d5 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -190,8 +190,15 @@ extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, extern void gfs2_glock_hold(struct gfs2_glock *gl); extern void gfs2_glock_put(struct gfs2_glock *gl); extern void gfs2_glock_queue_put(struct gfs2_glock *gl); -extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, - u16 flags, struct gfs2_holder *gh); + +extern void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, + u16 flags, struct gfs2_holder *gh, + unsigned long ip); +static inline void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, + u16 flags, struct gfs2_holder *gh) { + __gfs2_holder_init(gl, state, flags, gh, _RET_IP_); +} + extern void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh); extern void gfs2_holder_uninit(struct gfs2_holder *gh); @@ -241,7 +248,7 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, { int error; - gfs2_holder_init(gl, state, flags, gh); + __gfs2_holder_init(gl, state, flags, gh, _RET_IP_); error = gfs2_glock_nq(gh); if (error) From e0f87bc07f0973181f56d3d900c7c4b58a9b6cea Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 29 Sep 2021 14:42:38 -0500 Subject: [PATCH 0426/1202] gfs2: dequeue iopen holder in gfs2_inode_lookup error Before this patch, if function gfs2_inode_lookup encountered an error after it had locked the iopen glock, it never unlocked it, relying on the evict code to do the cleanup. The evict code then took the inode glock while holding the iopen glock, which violates the locking order. For example, (1) node A does a gfs2_inode_lookup that fails, leaving the iopen glock locked. (2) node B calls delete_work_func -> gfs2_lookup_by_inum -> gfs2_inode_lookup. It locks the inode glock and blocks trying to lock the iopen glock, which is held by node A. (3) node A eventually calls gfs2_evict_inode -> evict_should_delete. It blocks trying to lock the inode glock, which is now held by node B. This patch introduces error handling to function gfs2_inode_lookup so it properly dequeues held iopen glocks on errors. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 3130f85d2b3f4..d8cd835c560a5 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -225,6 +225,10 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, return inode; fail: + if (gfs2_holder_initialized(&ip->i_iopen_gh)) { + glock_clear_object(ip->i_iopen_gh.gh_gl, ip); + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + } if (io_gl) gfs2_glock_put(io_gl); if (gfs2_holder_initialized(&i_gh)) From d4a990061c72e610e96f4c0c6e355578d48b71a9 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 30 Sep 2021 07:48:29 -0500 Subject: [PATCH 0427/1202] gfs2: dump glocks from gfs2_consist_OBJ_i Before this patch, failed consistency checks printed out the object that failed, but not the object's glock. This patch makes it also print out the object glock so we can see the glock's holders and flags to aid with debugging. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/util.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index cf345a86ef67b..8241029a2a5d2 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -454,6 +454,7 @@ void gfs2_consist_inode_i(struct gfs2_inode *ip, (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, function, file, line); + gfs2_dump_glock(NULL, ip->i_gl, 1); gfs2_withdraw(sdp); } @@ -475,6 +476,7 @@ void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, " function = %s, file = %s, line = %u\n", (unsigned long long)rgd->rd_addr, function, file, line); + gfs2_dump_glock(NULL, rgd->rd_gl, 1); gfs2_withdraw(sdp); } From 1aeb65c8241959523950ce2d4e4e4692caac6d17 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 29 Sep 2021 15:06:21 -0500 Subject: [PATCH 0428/1202] gfs2: change go_lock to go_instantiate Before this patch, the go_lock glock operations (glops) did not do any actual locking. They were used to instantiate objects, like reading in dinodes and rgrps from the media. This patch renames the functions to go_instantiate for clarity. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 4 ++-- fs/gfs2/glops.c | 8 ++++---- fs/gfs2/incore.h | 2 +- fs/gfs2/rgrp.c | 2 +- fs/gfs2/rgrp.h | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 54a1ed373ff3a..7053628d129e3 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -501,11 +501,11 @@ __acquires(&gl->gl_lockref.lock) first_gh = gh; } if (gh->gh_list.prev == &gl->gl_holders && - glops->go_lock) { + glops->go_instantiate) { if (!(gh->gh_flags & GL_SKIP)) { spin_unlock(&gl->gl_lockref.lock); /* FIXME: eliminate this eventually */ - ret = glops->go_lock(gh); + ret = glops->go_instantiate(gh); spin_lock(&gl->gl_lockref.lock); if (ret) { if (ret == 1) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4b19f513570f7..8452a83bd55aa 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -482,13 +482,13 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) } /** - * inode_go_lock - operation done after an inode lock is locked by a process + * inode_go_instantiate - read in an inode if necessary * @gh: The glock holder * * Returns: errno */ -static int inode_go_lock(struct gfs2_holder *gh) +static int inode_go_instantiate(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -740,7 +740,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_sync = inode_go_sync, .go_inval = inode_go_inval, .go_demote_ok = inode_go_demote_ok, - .go_lock = inode_go_lock, + .go_instantiate = inode_go_instantiate, .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, .go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB, @@ -750,7 +750,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_sync = rgrp_go_sync, .go_inval = rgrp_go_inval, - .go_lock = gfs2_rgrp_go_lock, + .go_instantiate = gfs2_rgrp_go_instantiate, .go_dump = gfs2_rgrp_go_dump, .go_type = LM_TYPE_RGRP, .go_flags = GLOF_LVB, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index ca42d310fd4d6..af632dc652310 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -220,7 +220,7 @@ struct gfs2_glock_operations { int (*go_xmote_bh)(struct gfs2_glock *gl); void (*go_inval) (struct gfs2_glock *gl, int flags); int (*go_demote_ok) (const struct gfs2_glock *gl); - int (*go_lock) (struct gfs2_holder *gh); + int (*go_instantiate) (struct gfs2_holder *gh); void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 96b2fbed6bf1f..cdcbc822064d6 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1288,7 +1288,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) return 0; } -int gfs2_rgrp_go_lock(struct gfs2_holder *gh) +int gfs2_rgrp_go_instantiate(struct gfs2_holder *gh) { struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index a6855fd796e03..3e2ca1fb43056 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -31,7 +31,7 @@ extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); extern int gfs2_rindex_update(struct gfs2_sbd *sdp); extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); -extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); +extern int gfs2_rgrp_go_instantiate(struct gfs2_holder *gh); extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); From 6348775980bf0aef8e0f32d3949487ba51847843 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 5 Oct 2021 18:24:36 +0200 Subject: [PATCH 0429/1202] gfs2: Remove 'first' trace_gfs2_promote argument Remove the 'first' argument of trace_gfs2_promote: with GL_SKIP, the 'first' holder isn't the one that instantiates the glock (gl_instantiate), which is what the 'first' flag was apparently supposed to indicate. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 4 ++-- fs/gfs2/trace_gfs2.h | 9 +++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7053628d129e3..a5e1974234606 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -518,12 +518,12 @@ __acquires(&gl->gl_lockref.lock) } } set_bit(HIF_HOLDER, &gh->gh_iflags); - trace_gfs2_promote(gh, 1); + trace_gfs2_promote(gh); gfs2_holder_wake(gh); goto restart; } set_bit(HIF_HOLDER, &gh->gh_iflags); - trace_gfs2_promote(gh, 0); + trace_gfs2_promote(gh); gfs2_holder_wake(gh); continue; } diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index bd6c8e9e49db0..a5deb9f868318 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -197,15 +197,14 @@ TRACE_EVENT(gfs2_demote_rq, /* Promotion/grant of a glock */ TRACE_EVENT(gfs2_promote, - TP_PROTO(const struct gfs2_holder *gh, int first), + TP_PROTO(const struct gfs2_holder *gh), - TP_ARGS(gh, first), + TP_ARGS(gh), TP_STRUCT__entry( __field( dev_t, dev ) __field( u64, glnum ) __field( u32, gltype ) - __field( int, first ) __field( u8, state ) ), @@ -213,14 +212,12 @@ TRACE_EVENT(gfs2_promote, __entry->dev = gh->gh_gl->gl_name.ln_sbd->sd_vfs->s_dev; __entry->glnum = gh->gh_gl->gl_name.ln_number; __entry->gltype = gh->gh_gl->gl_name.ln_type; - __entry->first = first; __entry->state = glock_trace_state(gh->gh_state); ), - TP_printk("%u,%u glock %u:%llu promote %s %s", + TP_printk("%u,%u glock %u:%llu promote %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, (unsigned long long)__entry->glnum, - __entry->first ? "first": "other", glock_trace_name(__entry->state)) ); From 680dc3abde834cbdaf0fb5b94061a67b5345ec58 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 6 Oct 2021 07:35:04 -0500 Subject: [PATCH 0430/1202] gfs2: re-factor function do_promote This patch simply re-factors function do_promote to reduce the indents. The logic should be unchanged. This makes future patches more readable. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 71 ++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a5e1974234606..f9aebeba74546 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -494,48 +494,47 @@ __acquires(&gl->gl_lockref.lock) list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { if (!test_bit(HIF_WAIT, &gh->gh_iflags)) continue; - if (may_grant(gl, first_gh, gh)) { - if (!incompat_holders_demoted) { - demote_incompat_holders(gl, first_gh); - incompat_holders_demoted = true; - first_gh = gh; - } - if (gh->gh_list.prev == &gl->gl_holders && - glops->go_instantiate) { - if (!(gh->gh_flags & GL_SKIP)) { - spin_unlock(&gl->gl_lockref.lock); - /* FIXME: eliminate this eventually */ - ret = glops->go_instantiate(gh); - spin_lock(&gl->gl_lockref.lock); - if (ret) { - if (ret == 1) - return 2; - gh->gh_error = ret; - list_del_init(&gh->gh_list); - trace_gfs2_glock_queue(gh, 0); - gfs2_holder_wake(gh); - goto restart; - } + if (!may_grant(gl, first_gh, gh)) { + /* + * If we get here, it means we may not grant this holder for + * some reason. If this holder is the head of the list, it + * means we have a blocked holder at the head, so return 1. + */ + if (gh->gh_list.prev == &gl->gl_holders) + return 1; + do_error(gl, 0); + break; + } + if (!incompat_holders_demoted) { + demote_incompat_holders(gl, first_gh); + incompat_holders_demoted = true; + first_gh = gh; + } + if (gh->gh_list.prev == &gl->gl_holders && + glops->go_instantiate) { + if (!(gh->gh_flags & GL_SKIP)) { + spin_unlock(&gl->gl_lockref.lock); + /* FIXME: eliminate this eventually */ + ret = glops->go_instantiate(gh); + spin_lock(&gl->gl_lockref.lock); + if (ret) { + if (ret == 1) + return 2; + gh->gh_error = ret; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); + goto restart; } - set_bit(HIF_HOLDER, &gh->gh_iflags); - trace_gfs2_promote(gh); - gfs2_holder_wake(gh); - goto restart; } set_bit(HIF_HOLDER, &gh->gh_iflags); trace_gfs2_promote(gh); gfs2_holder_wake(gh); - continue; + goto restart; } - /* - * If we get here, it means we may not grant this holder for - * some reason. If this holder is the head of the list, it - * means we have a blocked holder at the head, so return 1. - */ - if (gh->gh_list.prev == &gl->gl_holders) - return 1; - do_error(gl, 0); - break; + set_bit(HIF_HOLDER, &gh->gh_iflags); + trace_gfs2_promote(gh); + gfs2_holder_wake(gh); } return 0; } From 1f13e649bc3dfc8b644418373d09421a31cb17da Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 6 Oct 2021 07:59:59 -0500 Subject: [PATCH 0431/1202] gfs2: further simplify do_promote This patch further simplifies function do_promote by eliminating some redundant code in favor of using a lock_released flag. This is just prep work for a future patch. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f9aebeba74546..db213f0ef9605 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -487,12 +487,14 @@ __acquires(&gl->gl_lockref.lock) const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh, *tmp, *first_gh; bool incompat_holders_demoted = false; + bool lock_released; int ret; restart: first_gh = find_first_strong_holder(gl); list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - if (!test_bit(HIF_WAIT, &gh->gh_iflags)) + lock_released = false; + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; if (!may_grant(gl, first_gh, gh)) { /* @@ -511,30 +513,31 @@ __acquires(&gl->gl_lockref.lock) first_gh = gh; } if (gh->gh_list.prev == &gl->gl_holders && - glops->go_instantiate) { - if (!(gh->gh_flags & GL_SKIP)) { - spin_unlock(&gl->gl_lockref.lock); - /* FIXME: eliminate this eventually */ - ret = glops->go_instantiate(gh); - spin_lock(&gl->gl_lockref.lock); - if (ret) { - if (ret == 1) - return 2; - gh->gh_error = ret; - list_del_init(&gh->gh_list); - trace_gfs2_glock_queue(gh, 0); - gfs2_holder_wake(gh); - goto restart; - } + !(gh->gh_flags & GL_SKIP) && glops->go_instantiate) { + lock_released = true; + spin_unlock(&gl->gl_lockref.lock); + ret = glops->go_instantiate(gh); + spin_lock(&gl->gl_lockref.lock); + if (ret) { + if (ret == 1) + return 2; + gh->gh_error = ret; + list_del_init(&gh->gh_list); + trace_gfs2_glock_queue(gh, 0); + gfs2_holder_wake(gh); + goto restart; } - set_bit(HIF_HOLDER, &gh->gh_iflags); - trace_gfs2_promote(gh); - gfs2_holder_wake(gh); - goto restart; } set_bit(HIF_HOLDER, &gh->gh_iflags); trace_gfs2_promote(gh); gfs2_holder_wake(gh); + /* + * If we released the gl_lockref.lock the holders list may have + * changed. For that reason, we start again at the start of + * the holders queue. + */ + if (lock_released) + goto restart; } return 0; } From 0b4450d569316dfbac42a1a705868b0cdc691276 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 6 Oct 2021 08:59:52 -0500 Subject: [PATCH 0432/1202] gfs2: split glock instantiation off from do_promote Before this patch, function do_promote had a section of code that did the actual instantiation. This patch splits that off into its own function, gfs2_instantiate, which prepares us for the next patch that will use that function. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index db213f0ef9605..75d54ed7e54ed 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -472,6 +472,21 @@ find_first_strong_holder(struct gfs2_glock *gl) return NULL; } +/* + * gfs2_instantiate - Call the glops instantiate function + * @gl: The glock + * + * Returns: 0 if instantiate was successful, 2 if type specific operation is + * underway, or error. + */ +static int gfs2_instantiate(struct gfs2_holder *gh) +{ + struct gfs2_glock *gl = gh->gh_gl; + const struct gfs2_glock_operations *glops = gl->gl_ops; + + return glops->go_instantiate(gh); +} + /** * do_promote - promote as many requests as possible on the current queue * @gl: The glock @@ -484,7 +499,6 @@ static int do_promote(struct gfs2_glock *gl) __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { - const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh, *tmp, *first_gh; bool incompat_holders_demoted = false; bool lock_released; @@ -513,10 +527,10 @@ __acquires(&gl->gl_lockref.lock) first_gh = gh; } if (gh->gh_list.prev == &gl->gl_holders && - !(gh->gh_flags & GL_SKIP) && glops->go_instantiate) { + !(gh->gh_flags & GL_SKIP) && gl->gl_ops->go_instantiate) { lock_released = true; spin_unlock(&gl->gl_lockref.lock); - ret = glops->go_instantiate(gh); + ret = gfs2_instantiate(gh); spin_lock(&gl->gl_lockref.lock); if (ret) { if (ret == 1) From 236275a9b703ad8db35dfcc6d503dcfe5252dd65 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 6 Oct 2021 09:29:18 -0500 Subject: [PATCH 0433/1202] gfs2: fix GL_SKIP node_scope problems Before this patch, when a glock was locked, the very first holder on the queue would unlock the lockref and call the go_instantiate glops function (if one existed), unless GL_SKIP was specified. When we introduced the new node-scope concept, we allowed multiple holders to lock glocks in EX mode and share the lock. But node-scope introduced a new problem: if the first holder has GL_SKIP and the next one does NOT, since it is not the first holder on the queue, the go_instantiate op was not called. Eventually the GL_SKIP holder may call the instantiate sub-function (e.g. gfs2_rgrp_bh_get) but there was still a window of time in which another non-GL_SKIP holder assumes the instantiate function had been called by the first holder. In the case of rgrp glocks, this led to a NULL pointer dereference on the buffer_heads. This patch tries to fix the problem by introducing two new glock flags: GLF_INSTANTIATE_NEEDED, which keeps track of when the instantiate function needs to be called to "fill in" or "read in" the object before it is referenced. GLF_INSTANTIATE_IN_PROG which is used to determine when a process is in the process of reading in the object. Whenever a function needs to reference the object, it checks the GLF_INSTANTIATE_NEEDED flag, and if set, it sets GLF_INSTANTIATE_IN_PROG and calls the glops "go_instantiate" function. As before, the gl_lockref spin_lock is unlocked during the IO operation, which may take a relatively long amount of time to complete. While unlocked, if another process determines go_instantiate is still needed, it sees GLF_INSTANTIATE_IN_PROG is set, and waits for the go_instantiate glop operation to be completed. Once GLF_INSTANTIATE_IN_PROG is cleared, it needs to check GLF_INSTANTIATE_NEEDED again because the other process's go_instantiate operation may not have been successful. Functions that previously called the instantiate sub-functions now call directly into gfs2_instantiate so the new bits are managed properly. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 42 ++++++++++++++++++++++++++++++++++++++---- fs/gfs2/glock.h | 1 + fs/gfs2/glops.c | 10 ++++++---- fs/gfs2/incore.h | 2 ++ fs/gfs2/inode.c | 3 ++- fs/gfs2/rgrp.c | 21 +++++++++++---------- fs/gfs2/super.c | 2 +- 7 files changed, 61 insertions(+), 20 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 75d54ed7e54ed..08073dcaf30e2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -479,12 +479,42 @@ find_first_strong_holder(struct gfs2_glock *gl) * Returns: 0 if instantiate was successful, 2 if type specific operation is * underway, or error. */ -static int gfs2_instantiate(struct gfs2_holder *gh) +int gfs2_instantiate(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; const struct gfs2_glock_operations *glops = gl->gl_ops; + int ret; + +again: + if (!test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)) + return 0; - return glops->go_instantiate(gh); + /* + * Since we unlock the lockref lock, we set a flag to indicate + * instantiate is in progress. + */ + if (test_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) { + wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG, + TASK_UNINTERRUPTIBLE); + /* + * Here we just waited for a different instantiate to finish. + * But that may not have been successful, as when a process + * locks an inode glock _before_ it has an actual inode to + * instantiate into. So we check again. This process might + * have an inode to instantiate, so might be successful. + */ + goto again; + } + + set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); + + ret = glops->go_instantiate(gh); + if (!ret) + clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); + clear_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags); + smp_mb__after_atomic(); + wake_up_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG); + return ret; } /** @@ -526,7 +556,7 @@ __acquires(&gl->gl_lockref.lock) incompat_holders_demoted = true; first_gh = gh; } - if (gh->gh_list.prev == &gl->gl_holders && + if (test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags) && !(gh->gh_flags & GL_SKIP) && gl->gl_ops->go_instantiate) { lock_released = true; spin_unlock(&gl->gl_lockref.lock); @@ -1162,7 +1192,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, atomic_inc(&sdp->sd_glock_disposal); gl->gl_node.next = NULL; - gl->gl_flags = 0; + gl->gl_flags = glops->go_instantiate ? BIT(GLF_INSTANTIATE_NEEDED) : 0; gl->gl_name = name; lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); gl->gl_lockref.count = 1; @@ -2326,6 +2356,10 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'P'; if (test_bit(GLF_FREEING, gflags)) *p++ = 'x'; + if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) + *p++ = 'n'; + if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags)) + *p++ = 'N'; *p = 0; return buf; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index a36104229d0d5..4f8642301801b 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -204,6 +204,7 @@ extern void gfs2_holder_reinit(unsigned int state, u16 flags, extern void gfs2_holder_uninit(struct gfs2_holder *gh); extern int gfs2_glock_nq(struct gfs2_holder *gh); extern int gfs2_glock_poll(struct gfs2_holder *gh); +extern int gfs2_instantiate(struct gfs2_holder *gh); extern int gfs2_glock_wait(struct gfs2_holder *gh); extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs); extern void gfs2_glock_dq(struct gfs2_holder *gh); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 8452a83bd55aa..e2656baf38a38 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -357,6 +357,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) truncate_inode_pages(mapping, 0); if (ip) { set_bit(GIF_INVALID, &ip->i_flags); + set_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); forget_all_cached_acls(&ip->i_inode); security_inode_invalidate_secctx(&ip->i_inode); gfs2_dir_hash_inval(ip); @@ -495,13 +496,13 @@ static int inode_go_instantiate(struct gfs2_holder *gh) struct gfs2_inode *ip = gl->gl_object; int error = 0; - if (!ip) - return 0; + if (!ip) /* no inode to populate - read it in later */ + goto out; if (test_bit(GIF_INVALID, &ip->i_flags)) { error = gfs2_inode_refresh(ip); if (error) - return error; + goto out; } if (gh->gh_state != LM_ST_DEFERRED) @@ -515,9 +516,10 @@ static int inode_go_instantiate(struct gfs2_holder *gh) list_add(&ip->i_trunc_list, &sdp->sd_trunc_list); spin_unlock(&sdp->sd_trunc_lock); wake_up(&sdp->sd_quota_wait); - return 1; + error = 1; } +out: return error; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index af632dc652310..19a4c6132c679 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -316,6 +316,7 @@ struct gfs2_alloc_parms { enum { GLF_LOCK = 1, + GLF_INSTANTIATE_NEEDED = 2, /* needs instantiate */ GLF_DEMOTE = 3, GLF_PENDING_DEMOTE = 4, GLF_DEMOTE_IN_PROGRESS = 5, @@ -325,6 +326,7 @@ enum { GLF_REPLY_PENDING = 9, GLF_INITIAL = 10, GLF_FROZEN = 11, + GLF_INSTANTIATE_IN_PROG = 12, /* instantiate happening now */ GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d8cd835c560a5..940d3e37250df 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -183,6 +183,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, glock_set_object(ip->i_gl, ip); set_bit(GIF_INVALID, &ip->i_flags); + set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) goto fail; @@ -196,7 +197,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, if (type == DT_UNKNOWN) { /* Inode glock must be locked already */ - error = gfs2_inode_refresh(GFS2_I(inode)); + error = gfs2_instantiate(&i_gh); if (error) goto fail; } else { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index cdcbc822064d6..5ee7da3a450e9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1238,8 +1238,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); - } - else if (sdp->sd_args.ar_rgrplvb) { + } else if (sdp->sd_args.ar_rgrplvb) { if (!gfs2_rgrp_lvb_valid(rgd)){ gfs2_consist_rgrpd(rgd); error = -EIO; @@ -1257,11 +1256,10 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) bi->bi_bh = NULL; gfs2_assert_warn(sdp, !bi->bi_clone); } - return error; } -static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh) { u32 rl_flags; @@ -1269,7 +1267,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) return 0; if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) - return gfs2_rgrp_bh_get(rgd); + return gfs2_instantiate(gh); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); rl_flags &= ~GFS2_RDF_MASK; @@ -1312,6 +1310,7 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd) bi->bi_bh = NULL; } } + set_bit(GLF_INSTANTIATE_NEEDED, &rgd->rd_gl->gl_flags); } int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, @@ -2110,7 +2109,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) gfs2_rgrp_congested(rs->rs_rgd, loops)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rgd); + error = update_rgrp_lvb(rs->rs_rgd, + &ip->i_rgd_gh); if (unlikely(error)) { rgrp_unlock_local(rs->rs_rgd); gfs2_glock_dq_uninit(&ip->i_rgd_gh); @@ -2125,8 +2125,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) (loops == 0 && target > rs->rs_rgd->rd_extfail_pt)) goto skip_rgrp; - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rgd); + if (sdp->sd_args.ar_rgrplvb) { + error = gfs2_instantiate(&ip->i_rgd_gh); + if (error) + goto skip_rgrp; + } /* Get a reservation if we don't already have one */ if (!gfs2_rs_active(rs)) @@ -2762,8 +2765,6 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) void rgrp_lock_local(struct gfs2_rgrpd *rgd) { - GLOCK_BUG_ON(rgd->rd_gl, !gfs2_glock_is_held_excl(rgd->rd_gl) && - !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags)); mutex_lock(&rgd->rd_mutex); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6e00d15ef0a82..26c7265800418 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1245,7 +1245,7 @@ static enum dinode_demise evict_should_delete(struct inode *inode, return SHOULD_NOT_DELETE_DINODE; if (test_bit(GIF_INVALID, &ip->i_flags)) { - ret = gfs2_inode_refresh(ip); + ret = gfs2_instantiate(gh); if (ret) return SHOULD_NOT_DELETE_DINODE; } From 7d01358519e1e12adfd6ba02920b38b7f7105138 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 5 Oct 2021 09:10:51 -0500 Subject: [PATCH 0434/1202] gfs2: Eliminate GIF_INVALID flag With the addition of the new GLF_INSTANTIATE_NEEDED flag, the GIF_INVALID flag is now redundant. This patch removes it. Since inode_instantiate is only called when instantiation is needed, the check in inode_instantiate is removed too. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 11 +++-------- fs/gfs2/incore.h | 1 - fs/gfs2/inode.c | 1 - fs/gfs2/super.c | 2 +- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index e2656baf38a38..0b6a59f71eeff 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -356,7 +356,6 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) struct address_space *mapping = gfs2_glock2aspace(gl); truncate_inode_pages(mapping, 0); if (ip) { - set_bit(GIF_INVALID, &ip->i_flags); set_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); forget_all_cached_acls(&ip->i_inode); security_inode_invalidate_secctx(&ip->i_inode); @@ -477,8 +476,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) error = gfs2_dinode_in(ip, dibh->b_data); brelse(dibh); - clear_bit(GIF_INVALID, &ip->i_flags); - return error; } @@ -499,11 +496,9 @@ static int inode_go_instantiate(struct gfs2_holder *gh) if (!ip) /* no inode to populate - read it in later */ goto out; - if (test_bit(GIF_INVALID, &ip->i_flags)) { - error = gfs2_inode_refresh(ip); - if (error) - goto out; - } + error = gfs2_inode_refresh(ip); + if (error) + goto out; if (gh->gh_state != LM_ST_DEFERRED) inode_dio_wait(&ip->i_inode); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 19a4c6132c679..d5edc27d88b21 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -373,7 +373,6 @@ struct gfs2_glock { }; enum { - GIF_INVALID = 0, GIF_QD_LOCKED = 1, GIF_ALLOC_FAILED = 2, GIF_SW_PAGED = 3, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 940d3e37250df..9c28b7090895f 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -182,7 +182,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, } glock_set_object(ip->i_gl, ip); - set_bit(GIF_INVALID, &ip->i_flags); set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags); error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (unlikely(error)) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 26c7265800418..5b121371508a5 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1244,7 +1244,7 @@ static enum dinode_demise evict_should_delete(struct inode *inode, if (ret) return SHOULD_NOT_DELETE_DINODE; - if (test_bit(GIF_INVALID, &ip->i_flags)) { + if (test_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags)) { ret = gfs2_instantiate(gh); if (ret) return SHOULD_NOT_DELETE_DINODE; From 852bce1177a8ae3f32b0a367b4a88526d3eaee3c Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 5 Oct 2021 09:28:17 -0500 Subject: [PATCH 0435/1202] gfs2: remove RDF_UPTODATE flag The new GLF_INSTANTIATE_NEEDED flag obsoletes the old rgrp flag GFS2_RDF_UPTODATE, so this patch replaces it like we did with inodes. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 2 +- fs/gfs2/incore.h | 1 - fs/gfs2/rgrp.c | 36 ++++++++++++++---------------------- 3 files changed, 15 insertions(+), 24 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 0b6a59f71eeff..650ad77c4d0b4 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -228,7 +228,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) gfs2_rgrp_brelse(rgd); WARN_ON_ONCE(!(flags & DIO_METADATA)); truncate_inode_pages_range(mapping, start, end); - rgd->rd_flags &= ~GFS2_RDF_UPTODATE; + set_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags); } static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d5edc27d88b21..8c00fb389ae5e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -119,7 +119,6 @@ struct gfs2_rgrpd { u32 rd_flags; u32 rd_extfail_pt; /* extent failure point */ #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ -#define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ #define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 5ee7da3a450e9..0fb3c01bc5577 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -932,7 +932,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; - rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); + rgd->rd_flags &= ~GFS2_RDF_PREFERRED; if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; spin_lock(&sdp->sd_rindex_spin); @@ -1185,8 +1185,8 @@ static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd) } /** - * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps - * @rgd: the struct gfs2_rgrpd describing the RG to read in + * gfs2_rgrp_go_instantiate - Read in a RG's header and bitmaps + * @gh: the glock holder representing the rgrpd to read in * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps. @@ -1194,10 +1194,11 @@ static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd) * Returns: errno */ -static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) +int gfs2_rgrp_go_instantiate(struct gfs2_holder *gh) { + struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_sbd *sdp = rgd->rd_sbd; - struct gfs2_glock *gl = rgd->rd_gl; unsigned int length = rgd->rd_length; struct gfs2_bitmap *bi; unsigned int x, y; @@ -1225,15 +1226,13 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) } } - if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { - gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); - rgrp_set_bitmap_flags(rgd); - rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); - rgd->rd_free_clone = rgd->rd_free; - GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved); - /* max out the rgrp allocation failure point */ - rgd->rd_extfail_pt = rgd->rd_free; - } + gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); + rgrp_set_bitmap_flags(rgd); + rgd->rd_flags |= GFS2_RDF_CHECK; + rgd->rd_free_clone = rgd->rd_free; + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved); + /* max out the rgrp allocation failure point */ + rgd->rd_extfail_pt = rgd->rd_free; if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, @@ -1263,7 +1262,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh) { u32 rl_flags; - if (rgd->rd_flags & GFS2_RDF_UPTODATE) + if (!test_bit(GLF_INSTANTIATE_NEEDED, &gh->gh_gl->gl_flags)) return 0; if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) @@ -1286,13 +1285,6 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh) return 0; } -int gfs2_rgrp_go_instantiate(struct gfs2_holder *gh) -{ - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; - - return gfs2_rgrp_bh_get(rgd); -} - /** * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get() * @rgd: The resource group From 7e1ef46c5a68eb99d9fc265741b884010143dd06 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 1 Oct 2021 13:00:21 -0500 Subject: [PATCH 0436/1202] gfs2: set glock object after nq Before this patch, function gfs2_create_inode called glock_set_object to set the gl_object for inode and iopen glocks before the glock was locked. That's wrong because other competing processes like evict may be blocked waiting for the glock and still have gl_object set before the actual eviction can take place. This patch moves the call to glock_set_object until after the glock is acquire in function gfs2_create_inode, so it waits for possibly competing evicts to finish their processing first. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 9c28b7090895f..6424b903e8851 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -731,18 +731,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_free_inode; flush_delayed_work(&ip->i_gl->gl_work); - glock_set_object(ip->i_gl, ip); error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (error) goto fail_free_inode; gfs2_cancel_delete_work(io_gl); - glock_set_object(io_gl, ip); error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); if (error) goto fail_gunlock2; + glock_set_object(ip->i_gl, ip); error = gfs2_trans_begin(sdp, blocks, 0); if (error) goto fail_gunlock2; @@ -758,6 +757,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; + glock_set_object(io_gl, ip); gfs2_set_iop(inode); insert_inode_hash(inode); From ee82b2c9d7a2f06481be8cc7167cc7e241553f4b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 11 Oct 2021 20:53:02 +0200 Subject: [PATCH 0437/1202] gfs2: Cancel remote delete work asynchronously In gfs2_inode_lookup and gfs2_create_inode, we're calling gfs2_cancel_delete_work which currently cancels any remote delete work (delete_work_func) synchronously. This means that if the work is currently running, it will wait for it to finish. We're doing this to pevent a previous instance of an inode from having any influence on the next instance. However, delete_work_func uses gfs2_inode_lookup internally, and we can end up in a deadlock when delete_work_func gets interrupted at the wrong time. For example, (1) An inode's iopen glock has delete work queued, but the inode itself has been evicted from the inode cache. (2) The delete work is preempted before reaching gfs2_inode_lookup. (3) Another process recreates the inode (gfs2_create_inode). It tries to cancel any outstanding delete work, which blocks waiting for the ongoing delete work to finish. (4) The delete work calls gfs2_inode_lookup, which blocks waiting for gfs2_create_inode to instantiate and unlock the new inode => deadlock. It turns out that when the delete work notices that its inode has been re-instantiated, it will do nothing. This means that it's safe to cancel the delete work asynchronously. This prevents the kind of deadlock described above. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson --- fs/gfs2/glock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 08073dcaf30e2..190d7eb2d0eae 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2127,7 +2127,7 @@ bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay) void gfs2_cancel_delete_work(struct gfs2_glock *gl) { - if (cancel_delayed_work_sync(&gl->gl_delete)) { + if (cancel_delayed_work(&gl->gl_delete)) { clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); gfs2_glock_put(gl); } From 662a8c38f35f925bdb5021e831e34fec52cae1ed Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 7 Oct 2021 15:57:44 +0200 Subject: [PATCH 0438/1202] gfs2: Fix glock_hash_walk bugs So far, glock_hash_walk took a reference on each glock it iterated over, and it was the examiner's responsibility to drop those references. Dropping the final reference to a glock can sleep and the examiners are called in a RCU critical section with spin locks held, so examiners that didn't need the extra reference had to drop it asynchronously via gfs2_glock_queue_put or similar. This wasn't done correctly in thaw_glock which did call gfs2_glock_put, and not at all in dump_glock_func. Change glock_hash_walk to not take glock references at all. That way, the examiners that don't need them won't have to bother with slow asynchronous puts, and the examiners that do need references can take them themselves. Reported-by: Alexander Aring Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 190d7eb2d0eae..6e3bd5ab91089 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2101,10 +2101,10 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) do { rhashtable_walk_start(&iter); - while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) - if (gl->gl_name.ln_sbd == sdp && - lockref_get_not_dead(&gl->gl_lockref)) + while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) { + if (gl->gl_name.ln_sbd == sdp) examiner(gl); + } rhashtable_walk_stop(&iter); } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); @@ -2146,7 +2146,6 @@ static void flush_delete_work(struct gfs2_glock *gl) &gl->gl_delete, 0); } } - gfs2_glock_queue_work(gl, 0); } void gfs2_flush_delete_work(struct gfs2_sbd *sdp) @@ -2163,10 +2162,10 @@ void gfs2_flush_delete_work(struct gfs2_sbd *sdp) static void thaw_glock(struct gfs2_glock *gl) { - if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) { - gfs2_glock_put(gl); + if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + return; + if (!lockref_get_not_dead(&gl->gl_lockref)) return; - } set_bit(GLF_REPLY_PENDING, &gl->gl_flags); gfs2_glock_queue_work(gl, 0); } @@ -2182,9 +2181,12 @@ static void clear_glock(struct gfs2_glock *gl) gfs2_glock_remove_from_lru(gl); spin_lock(&gl->gl_lockref.lock); - if (gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); - __gfs2_glock_queue_work(gl, 0); + if (!__lockref_is_dead(&gl->gl_lockref)) { + gl->gl_lockref.count++; + if (gl->gl_state != LM_ST_UNLOCKED) + handle_callback(gl, LM_ST_UNLOCKED, 0, false); + __gfs2_glock_queue_work(gl, 0); + } spin_unlock(&gl->gl_lockref.lock); } From 0b388a30547d21af078ff8b364374d6793695b1b Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 13 Oct 2021 10:02:00 -0400 Subject: [PATCH 0439/1202] gfs2: check context in gfs2_glock_put Add a might_sleep call into gfs2_glock_put which can sleep in DLM when the last reference is released. This will show problems earlier, and not only when the last reference is put. Signed-off-by: Alexander Aring Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6e3bd5ab91089..19f38aee1b618 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -301,6 +301,9 @@ void gfs2_glock_queue_put(struct gfs2_glock *gl) void gfs2_glock_put(struct gfs2_glock *gl) { + /* last put could call sleepable dlm api */ + might_sleep(); + if (lockref_put_or_lock(&gl->gl_lockref)) return; From d000f3b901c3c423933fbac37db3a78cb6d27e80 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Mon, 20 Sep 2021 13:05:18 -0600 Subject: [PATCH 0440/1202] gfs2: Fix unused value warning in do_gfs2_set_flags() Coverity complains of an unused value: CID 119623 (#1 of 1): Unused value (UNUSED_VALUE) assigned_value: Assigning value -1 to error here, but that stored value is overwritten before it can be used. 237 error = -EPERM; Fix it by removing the assignment. Signed-off-by: Tim Gardner Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index f2abe7301059f..aca6a885f9b8f 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -234,7 +234,6 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) if ((new_flags ^ flags) == 0) goto out; - error = -EPERM; if (!IS_IMMUTABLE(inode)) { error = gfs2_permission(&init_user_ns, inode, MAY_WRITE); if (error) From a3b7659a1561af469401d25bb66d0b30db77aa7b Mon Sep 17 00:00:00 2001 From: Swapnil Jakhade Date: Wed, 22 Sep 2021 14:37:32 +0200 Subject: [PATCH 0441/1202] phy: cadence-torrent: Migrate to clk_hw based registration and OF APIs Use clk_hw based provider APIs to register clks to remove the usage of deprecated APIs. Signed-off-by: Swapnil Jakhade Link: https://lore.kernel.org/r/20210922123735.21927-2-sjakhade@cadence.com Signed-off-by: Vinod Koul --- drivers/phy/cadence/phy-cadence-torrent.c | 30 ++++++++++++++--------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/phy/cadence/phy-cadence-torrent.c b/drivers/phy/cadence/phy-cadence-torrent.c index 415ace64adc5c..ecb1aa883c05c 100644 --- a/drivers/phy/cadence/phy-cadence-torrent.c +++ b/drivers/phy/cadence/phy-cadence-torrent.c @@ -235,6 +235,8 @@ #define PHY_PMA_CMN_CTRL2 0x0001U #define PHY_PMA_PLL_RAW_CTRL 0x0003U +#define CDNS_TORRENT_OUTPUT_CLOCKS 1 + static const char * const clk_names[] = { [CDNS_TORRENT_REFCLK_DRIVER] = "refclk-driver", }; @@ -333,8 +335,7 @@ struct cdns_torrent_phy { struct regmap_field *phy_pma_pll_raw_ctrl; struct regmap_field *phy_reset_ctrl; struct regmap_field *phy_pcs_iso_link_ctrl_1[MAX_NUM_LANES]; - struct clk *clks[CDNS_TORRENT_REFCLK_DRIVER + 1]; - struct clk_onecell_data clk_data; + struct clk_hw_onecell_data *clk_hw_data; }; enum phy_powerstate { @@ -1659,8 +1660,9 @@ static int cdns_torrent_derived_refclk_register(struct cdns_torrent_phy *cdns_ph const char *parent_name; struct regmap *regmap; char clk_name[100]; + struct clk_hw *hw; struct clk *clk; - int i; + int i, ret; derived_refclk = devm_kzalloc(dev, sizeof(*derived_refclk), GFP_KERNEL); if (!derived_refclk) @@ -1706,11 +1708,12 @@ static int cdns_torrent_derived_refclk_register(struct cdns_torrent_phy *cdns_ph derived_refclk->hw.init = init; - clk = devm_clk_register(dev, &derived_refclk->hw); - if (IS_ERR(clk)) - return PTR_ERR(clk); + hw = &derived_refclk->hw; + ret = devm_clk_hw_register(dev, hw); + if (ret) + return ret; - cdns_phy->clks[CDNS_TORRENT_REFCLK_DRIVER] = clk; + cdns_phy->clk_hw_data->hws[CDNS_TORRENT_REFCLK_DRIVER] = hw; return 0; } @@ -2188,18 +2191,23 @@ static int cdns_torrent_clk_register(struct cdns_torrent_phy *cdns_phy) { struct device *dev = cdns_phy->dev; struct device_node *node = dev->of_node; + struct clk_hw_onecell_data *data; int ret; + data = devm_kzalloc(dev, struct_size(data, hws, CDNS_TORRENT_OUTPUT_CLOCKS), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->num = CDNS_TORRENT_OUTPUT_CLOCKS; + cdns_phy->clk_hw_data = data; + ret = cdns_torrent_derived_refclk_register(cdns_phy); if (ret) { dev_err(dev, "failed to register derived refclk\n"); return ret; } - cdns_phy->clk_data.clks = cdns_phy->clks; - cdns_phy->clk_data.clk_num = CDNS_TORRENT_REFCLK_DRIVER + 1; - - ret = of_clk_add_provider(node, of_clk_src_onecell_get, &cdns_phy->clk_data); + ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, data); if (ret) { dev_err(dev, "Failed to add clock provider: %s\n", node->name); return ret; From a6b3293b79a59796e52c9f13ade77a854cef872d Mon Sep 17 00:00:00 2001 From: Swapnil Jakhade Date: Wed, 22 Sep 2021 14:37:33 +0200 Subject: [PATCH 0442/1202] dt-bindings: phy: cadence-torrent: Add clock IDs for derived and received refclk Add clock IDs for derived and received reference clock output. Signed-off-by: Swapnil Jakhade Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210922123735.21927-3-sjakhade@cadence.com Signed-off-by: Vinod Koul --- include/dt-bindings/phy/phy-cadence.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/dt-bindings/phy/phy-cadence.h b/include/dt-bindings/phy/phy-cadence.h index 4652bcb862652..24fdc9e11bd6c 100644 --- a/include/dt-bindings/phy/phy-cadence.h +++ b/include/dt-bindings/phy/phy-cadence.h @@ -12,6 +12,8 @@ #define TORRENT_SERDES_INTERNAL_SSC 2 #define CDNS_TORRENT_REFCLK_DRIVER 0 +#define CDNS_TORRENT_DERIVED_REFCLK 1 +#define CDNS_TORRENT_RECEIVED_REFCLK 2 /* Sierra */ #define CDNS_SIERRA_PLL_CMNLC 0 From 1aa4e4072b994879d52ed258e309c921c90f6f10 Mon Sep 17 00:00:00 2001 From: Swapnil Jakhade Date: Wed, 22 Sep 2021 14:37:34 +0200 Subject: [PATCH 0443/1202] phy: cadence-torrent: Model reference clock driver as a clock to enable derived refclk When reference clock driver is enabled, either derived or received refclk is output on cmn_refclk_p/m. Update the reference clock driver implementation by modelling reference clock driver as a "clock" with derived reference clock set as its default parent. The support for received reference clock will be added in a separate patch. Signed-off-by: Swapnil Jakhade Link: https://lore.kernel.org/r/20210922123735.21927-4-sjakhade@cadence.com Signed-off-by: Vinod Koul --- drivers/phy/cadence/phy-cadence-torrent.c | 157 ++++++++++++++++++---- 1 file changed, 132 insertions(+), 25 deletions(-) diff --git a/drivers/phy/cadence/phy-cadence-torrent.c b/drivers/phy/cadence/phy-cadence-torrent.c index ecb1aa883c05c..615aca6bd52bd 100644 --- a/drivers/phy/cadence/phy-cadence-torrent.c +++ b/drivers/phy/cadence/phy-cadence-torrent.c @@ -235,10 +235,11 @@ #define PHY_PMA_CMN_CTRL2 0x0001U #define PHY_PMA_PLL_RAW_CTRL 0x0003U -#define CDNS_TORRENT_OUTPUT_CLOCKS 1 +#define CDNS_TORRENT_OUTPUT_CLOCKS 2 static const char * const clk_names[] = { [CDNS_TORRENT_REFCLK_DRIVER] = "refclk-driver", + [CDNS_TORRENT_DERIVED_REFCLK] = "refclk-der", }; static const struct reg_field phy_pll_cfg = @@ -261,10 +262,12 @@ static const struct reg_field phy_pcs_iso_link_ctrl_1 = static const struct reg_field phy_pipe_cmn_ctrl1_0 = REG_FIELD(PHY_PIPE_CMN_CTRL1, 0, 0); -#define REFCLK_OUT_NUM_CMN_CONFIG 5 +static const struct reg_field cmn_cdiag_refclk_ovrd_4 = + REG_FIELD(CMN_CDIAG_REFCLK_OVRD, 4, 4); + +#define REFCLK_OUT_NUM_CMN_CONFIG 4 enum cdns_torrent_refclk_out_cmn { - CMN_CDIAG_REFCLK_OVRD_4, CMN_CDIAG_REFCLK_DRV0_CTRL_1, CMN_CDIAG_REFCLK_DRV0_CTRL_4, CMN_CDIAG_REFCLK_DRV0_CTRL_5, @@ -272,7 +275,6 @@ enum cdns_torrent_refclk_out_cmn { }; static const struct reg_field refclk_out_cmn_cfg[] = { - [CMN_CDIAG_REFCLK_OVRD_4] = REG_FIELD(CMN_CDIAG_REFCLK_OVRD, 4, 4), [CMN_CDIAG_REFCLK_DRV0_CTRL_1] = REG_FIELD(CMN_CDIAG_REFCLK_DRV0_CTRL, 1, 1), [CMN_CDIAG_REFCLK_DRV0_CTRL_4] = REG_FIELD(CMN_CDIAG_REFCLK_DRV0_CTRL, 4, 4), [CMN_CDIAG_REFCLK_DRV0_CTRL_5] = REG_FIELD(CMN_CDIAG_REFCLK_DRV0_CTRL, 5, 5), @@ -330,6 +332,8 @@ struct cdns_torrent_phy { struct regmap *regmap_phy_pcs_lane_cdb[MAX_NUM_LANES]; struct regmap *regmap_dptx_phy_reg; struct regmap_field *phy_pll_cfg; + struct regmap_field *phy_pipe_cmn_ctrl1_0; + struct regmap_field *cmn_cdiag_refclk_ovrd_4; struct regmap_field *phy_pma_cmn_ctrl_1; struct regmap_field *phy_pma_cmn_ctrl_2; struct regmap_field *phy_pma_pll_raw_ctrl; @@ -345,10 +349,19 @@ enum phy_powerstate { POWERSTATE_A3 = 3, }; +struct cdns_torrent_refclk_driver { + struct clk_hw hw; + struct regmap_field *cmn_fields[REFCLK_OUT_NUM_CMN_CONFIG]; + struct clk_init_data clk_data; +}; + +#define to_cdns_torrent_refclk_driver(_hw) \ + container_of(_hw, struct cdns_torrent_refclk_driver, hw) + struct cdns_torrent_derived_refclk { struct clk_hw hw; struct regmap_field *phy_pipe_cmn_ctrl1_0; - struct regmap_field *cmn_fields[REFCLK_OUT_NUM_CMN_CONFIG]; + struct regmap_field *cmn_cdiag_refclk_ovrd_4; struct clk_init_data clk_data; }; @@ -1618,11 +1631,7 @@ static int cdns_torrent_derived_refclk_enable(struct clk_hw *hw) { struct cdns_torrent_derived_refclk *derived_refclk = to_cdns_torrent_derived_refclk(hw); - regmap_field_write(derived_refclk->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_6], 0); - regmap_field_write(derived_refclk->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_4], 1); - regmap_field_write(derived_refclk->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_5], 1); - regmap_field_write(derived_refclk->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_1], 0); - regmap_field_write(derived_refclk->cmn_fields[CMN_CDIAG_REFCLK_OVRD_4], 1); + regmap_field_write(derived_refclk->cmn_cdiag_refclk_ovrd_4, 1); regmap_field_write(derived_refclk->phy_pipe_cmn_ctrl1_0, 1); return 0; @@ -1633,6 +1642,7 @@ static void cdns_torrent_derived_refclk_disable(struct clk_hw *hw) struct cdns_torrent_derived_refclk *derived_refclk = to_cdns_torrent_derived_refclk(hw); regmap_field_write(derived_refclk->phy_pipe_cmn_ctrl1_0, 0); + regmap_field_write(derived_refclk->cmn_cdiag_refclk_ovrd_4, 0); } static int cdns_torrent_derived_refclk_is_enabled(struct clk_hw *hw) @@ -1640,7 +1650,7 @@ static int cdns_torrent_derived_refclk_is_enabled(struct clk_hw *hw) struct cdns_torrent_derived_refclk *derived_refclk = to_cdns_torrent_derived_refclk(hw); int val; - regmap_field_read(derived_refclk->phy_pipe_cmn_ctrl1_0, &val); + regmap_field_read(derived_refclk->cmn_cdiag_refclk_ovrd_4, &val); return !!val; } @@ -1655,21 +1665,19 @@ static int cdns_torrent_derived_refclk_register(struct cdns_torrent_phy *cdns_ph { struct cdns_torrent_derived_refclk *derived_refclk; struct device *dev = cdns_phy->dev; - struct regmap_field *field; struct clk_init_data *init; const char *parent_name; - struct regmap *regmap; char clk_name[100]; struct clk_hw *hw; struct clk *clk; - int i, ret; + int ret; derived_refclk = devm_kzalloc(dev, sizeof(*derived_refclk), GFP_KERNEL); if (!derived_refclk) return -ENOMEM; snprintf(clk_name, sizeof(clk_name), "%s_%s", dev_name(dev), - clk_names[CDNS_TORRENT_REFCLK_DRIVER]); + clk_names[CDNS_TORRENT_DERIVED_REFCLK]); clk = devm_clk_get_optional(dev, "phy_en_refclk"); if (IS_ERR(clk)) { @@ -1688,27 +1696,104 @@ static int cdns_torrent_derived_refclk_register(struct cdns_torrent_phy *cdns_ph init->flags = 0; init->name = clk_name; - regmap = cdns_phy->regmap_phy_pcs_common_cdb; - field = devm_regmap_field_alloc(dev, regmap, phy_pipe_cmn_ctrl1_0); - if (IS_ERR(field)) { - dev_err(dev, "phy_pipe_cmn_ctrl1_0 reg field init failed\n"); - return PTR_ERR(field); + derived_refclk->phy_pipe_cmn_ctrl1_0 = cdns_phy->phy_pipe_cmn_ctrl1_0; + derived_refclk->cmn_cdiag_refclk_ovrd_4 = cdns_phy->cmn_cdiag_refclk_ovrd_4; + + derived_refclk->hw.init = init; + + hw = &derived_refclk->hw; + ret = devm_clk_hw_register(dev, hw); + if (ret) + return ret; + + cdns_phy->clk_hw_data->hws[CDNS_TORRENT_DERIVED_REFCLK] = hw; + + return 0; +} + +static int cdns_torrent_refclk_driver_enable(struct clk_hw *hw) +{ + struct cdns_torrent_refclk_driver *refclk_driver = to_cdns_torrent_refclk_driver(hw); + + regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_6], 0); + regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_4], 1); + regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_5], 1); + regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_1], 0); + + return 0; +} + +static void cdns_torrent_refclk_driver_disable(struct clk_hw *hw) +{ + struct cdns_torrent_refclk_driver *refclk_driver = to_cdns_torrent_refclk_driver(hw); + + regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_1], 1); +} + +static int cdns_torrent_refclk_driver_is_enabled(struct clk_hw *hw) +{ + struct cdns_torrent_refclk_driver *refclk_driver = to_cdns_torrent_refclk_driver(hw); + int val; + + regmap_field_read(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_1], &val); + + return !val; +} + +static const struct clk_ops cdns_torrent_refclk_driver_ops = { + .enable = cdns_torrent_refclk_driver_enable, + .disable = cdns_torrent_refclk_driver_disable, + .is_enabled = cdns_torrent_refclk_driver_is_enabled, +}; + +static int cdns_torrent_refclk_driver_register(struct cdns_torrent_phy *cdns_phy) +{ + struct cdns_torrent_refclk_driver *refclk_driver; + struct device *dev = cdns_phy->dev; + struct regmap_field *field; + struct clk_init_data *init; + const char *parent_name; + struct regmap *regmap; + char clk_name[100]; + struct clk_hw *hw; + int i, ret; + + refclk_driver = devm_kzalloc(dev, sizeof(*refclk_driver), GFP_KERNEL); + if (!refclk_driver) + return -ENOMEM; + + hw = cdns_phy->clk_hw_data->hws[CDNS_TORRENT_DERIVED_REFCLK]; + if (IS_ERR_OR_NULL(hw)) { + dev_err(dev, "No parent clock for refclk driver clock\n"); + return IS_ERR(hw) ? PTR_ERR(hw) : -ENOENT; } - derived_refclk->phy_pipe_cmn_ctrl1_0 = field; + parent_name = clk_hw_get_name(hw); + + snprintf(clk_name, sizeof(clk_name), "%s_%s", dev_name(dev), + clk_names[CDNS_TORRENT_REFCLK_DRIVER]); + + init = &refclk_driver->clk_data; + + init->ops = &cdns_torrent_refclk_driver_ops; + init->flags = 0; + init->parent_names = &parent_name; + init->num_parents = 1; + init->name = clk_name; regmap = cdns_phy->regmap_common_cdb; + for (i = 0; i < REFCLK_OUT_NUM_CMN_CONFIG; i++) { field = devm_regmap_field_alloc(dev, regmap, refclk_out_cmn_cfg[i]); if (IS_ERR(field)) { - dev_err(dev, "CMN reg field init failed\n"); + dev_err(dev, "Refclk driver CMN reg field init failed\n"); return PTR_ERR(field); } - derived_refclk->cmn_fields[i] = field; + refclk_driver->cmn_fields[i] = field; } - derived_refclk->hw.init = init; + refclk_driver->hw.init = init; - hw = &derived_refclk->hw; + hw = &refclk_driver->hw; ret = devm_clk_hw_register(dev, hw); if (ret) return ret; @@ -1768,6 +1853,22 @@ static int cdns_torrent_regfield_init(struct cdns_torrent_phy *cdns_phy) } cdns_phy->phy_pll_cfg = field; + regmap = cdns_phy->regmap_phy_pcs_common_cdb; + field = devm_regmap_field_alloc(dev, regmap, phy_pipe_cmn_ctrl1_0); + if (IS_ERR(field)) { + dev_err(dev, "phy_pipe_cmn_ctrl1_0 reg field init failed\n"); + return PTR_ERR(field); + } + cdns_phy->phy_pipe_cmn_ctrl1_0 = field; + + regmap = cdns_phy->regmap_common_cdb; + field = devm_regmap_field_alloc(dev, regmap, cmn_cdiag_refclk_ovrd_4); + if (IS_ERR(field)) { + dev_err(dev, "cmn_cdiag_refclk_ovrd_4 reg field init failed\n"); + return PTR_ERR(field); + } + cdns_phy->cmn_cdiag_refclk_ovrd_4 = field; + regmap = cdns_phy->regmap_phy_pma_common_cdb; field = devm_regmap_field_alloc(dev, regmap, phy_pma_cmn_ctrl_1); if (IS_ERR(field)) { @@ -2207,6 +2308,12 @@ static int cdns_torrent_clk_register(struct cdns_torrent_phy *cdns_phy) return ret; } + ret = cdns_torrent_refclk_driver_register(cdns_phy); + if (ret) { + dev_err(dev, "failed to register refclk driver\n"); + return ret; + } + ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, data); if (ret) { dev_err(dev, "Failed to add clock provider: %s\n", node->name); From fc2a3a3641af90aeb0c5aaa17449108aa8a4afde Mon Sep 17 00:00:00 2001 From: Swapnil Jakhade Date: Wed, 22 Sep 2021 14:37:35 +0200 Subject: [PATCH 0444/1202] phy: cadence-torrent: Add support to output received reference clock Add support to output received reference clock. Model the received reference clock as an alternate parent of reference clock driver clock. When received refclk is selected to output on cmn_refclk_p/m, this is the internal reference clock driven on the pma_cmn_refclk_int. Signed-off-by: Swapnil Jakhade Link: https://lore.kernel.org/r/20210922123735.21927-5-sjakhade@cadence.com Signed-off-by: Vinod Koul --- drivers/phy/cadence/phy-cadence-torrent.c | 159 ++++++++++++++++++++-- 1 file changed, 148 insertions(+), 11 deletions(-) diff --git a/drivers/phy/cadence/phy-cadence-torrent.c b/drivers/phy/cadence/phy-cadence-torrent.c index 615aca6bd52bd..5786166133d30 100644 --- a/drivers/phy/cadence/phy-cadence-torrent.c +++ b/drivers/phy/cadence/phy-cadence-torrent.c @@ -235,11 +235,12 @@ #define PHY_PMA_CMN_CTRL2 0x0001U #define PHY_PMA_PLL_RAW_CTRL 0x0003U -#define CDNS_TORRENT_OUTPUT_CLOCKS 2 +#define CDNS_TORRENT_OUTPUT_CLOCKS 3 static const char * const clk_names[] = { [CDNS_TORRENT_REFCLK_DRIVER] = "refclk-driver", [CDNS_TORRENT_DERIVED_REFCLK] = "refclk-der", + [CDNS_TORRENT_RECEIVED_REFCLK] = "refclk-rec", }; static const struct reg_field phy_pll_cfg = @@ -281,6 +282,13 @@ static const struct reg_field refclk_out_cmn_cfg[] = { [CMN_CDIAG_REFCLK_DRV0_CTRL_6] = REG_FIELD(CMN_CDIAG_REFCLK_DRV0_CTRL, 6, 6), }; +static const int refclk_driver_parent_index[] = { + CDNS_TORRENT_DERIVED_REFCLK, + CDNS_TORRENT_RECEIVED_REFCLK +}; + +static u32 cdns_torrent_refclk_driver_mux_table[] = { 1, 0 }; + enum cdns_torrent_phy_type { TYPE_NONE, TYPE_DP, @@ -368,6 +376,16 @@ struct cdns_torrent_derived_refclk { #define to_cdns_torrent_derived_refclk(_hw) \ container_of(_hw, struct cdns_torrent_derived_refclk, hw) +struct cdns_torrent_received_refclk { + struct clk_hw hw; + struct regmap_field *phy_pipe_cmn_ctrl1_0; + struct regmap_field *cmn_cdiag_refclk_ovrd_4; + struct clk_init_data clk_data; +}; + +#define to_cdns_torrent_received_refclk(_hw) \ + container_of(_hw, struct cdns_torrent_received_refclk, hw) + struct cdns_reg_pairs { u32 val; u32 off; @@ -1711,12 +1729,94 @@ static int cdns_torrent_derived_refclk_register(struct cdns_torrent_phy *cdns_ph return 0; } +static int cdns_torrent_received_refclk_enable(struct clk_hw *hw) +{ + struct cdns_torrent_received_refclk *received_refclk = to_cdns_torrent_received_refclk(hw); + + regmap_field_write(received_refclk->phy_pipe_cmn_ctrl1_0, 1); + + return 0; +} + +static void cdns_torrent_received_refclk_disable(struct clk_hw *hw) +{ + struct cdns_torrent_received_refclk *received_refclk = to_cdns_torrent_received_refclk(hw); + + regmap_field_write(received_refclk->phy_pipe_cmn_ctrl1_0, 0); +} + +static int cdns_torrent_received_refclk_is_enabled(struct clk_hw *hw) +{ + struct cdns_torrent_received_refclk *received_refclk = to_cdns_torrent_received_refclk(hw); + int val, cmn_val; + + regmap_field_read(received_refclk->phy_pipe_cmn_ctrl1_0, &val); + regmap_field_read(received_refclk->cmn_cdiag_refclk_ovrd_4, &cmn_val); + + return val && !cmn_val; +} + +static const struct clk_ops cdns_torrent_received_refclk_ops = { + .enable = cdns_torrent_received_refclk_enable, + .disable = cdns_torrent_received_refclk_disable, + .is_enabled = cdns_torrent_received_refclk_is_enabled, +}; + +static int cdns_torrent_received_refclk_register(struct cdns_torrent_phy *cdns_phy) +{ + struct cdns_torrent_received_refclk *received_refclk; + struct device *dev = cdns_phy->dev; + struct clk_init_data *init; + const char *parent_name; + char clk_name[100]; + struct clk_hw *hw; + struct clk *clk; + int ret; + + received_refclk = devm_kzalloc(dev, sizeof(*received_refclk), GFP_KERNEL); + if (!received_refclk) + return -ENOMEM; + + snprintf(clk_name, sizeof(clk_name), "%s_%s", dev_name(dev), + clk_names[CDNS_TORRENT_RECEIVED_REFCLK]); + + clk = devm_clk_get_optional(dev, "phy_en_refclk"); + if (IS_ERR(clk)) { + dev_err(dev, "No parent clock for received_refclk\n"); + return PTR_ERR(clk); + } + + init = &received_refclk->clk_data; + + if (clk) { + parent_name = __clk_get_name(clk); + init->parent_names = &parent_name; + init->num_parents = 1; + } + init->ops = &cdns_torrent_received_refclk_ops; + init->flags = 0; + init->name = clk_name; + + received_refclk->phy_pipe_cmn_ctrl1_0 = cdns_phy->phy_pipe_cmn_ctrl1_0; + received_refclk->cmn_cdiag_refclk_ovrd_4 = cdns_phy->cmn_cdiag_refclk_ovrd_4; + + received_refclk->hw.init = init; + + hw = &received_refclk->hw; + ret = devm_clk_hw_register(dev, hw); + if (ret) + return ret; + + cdns_phy->clk_hw_data->hws[CDNS_TORRENT_RECEIVED_REFCLK] = hw; + + return 0; +} + static int cdns_torrent_refclk_driver_enable(struct clk_hw *hw) { struct cdns_torrent_refclk_driver *refclk_driver = to_cdns_torrent_refclk_driver(hw); regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_6], 0); - regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_4], 1); regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_5], 1); regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_1], 0); @@ -1740,10 +1840,30 @@ static int cdns_torrent_refclk_driver_is_enabled(struct clk_hw *hw) return !val; } +static u8 cdns_torrent_refclk_driver_get_parent(struct clk_hw *hw) +{ + struct cdns_torrent_refclk_driver *refclk_driver = to_cdns_torrent_refclk_driver(hw); + unsigned int val; + + regmap_field_read(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_4], &val); + return clk_mux_val_to_index(hw, cdns_torrent_refclk_driver_mux_table, 0, val); +} + +static int cdns_torrent_refclk_driver_set_parent(struct clk_hw *hw, u8 index) +{ + struct cdns_torrent_refclk_driver *refclk_driver = to_cdns_torrent_refclk_driver(hw); + unsigned int val; + + val = cdns_torrent_refclk_driver_mux_table[index]; + return regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_4], val); +} + static const struct clk_ops cdns_torrent_refclk_driver_ops = { .enable = cdns_torrent_refclk_driver_enable, .disable = cdns_torrent_refclk_driver_disable, .is_enabled = cdns_torrent_refclk_driver_is_enabled, + .set_parent = cdns_torrent_refclk_driver_set_parent, + .get_parent = cdns_torrent_refclk_driver_get_parent, }; static int cdns_torrent_refclk_driver_register(struct cdns_torrent_phy *cdns_phy) @@ -1752,7 +1872,8 @@ static int cdns_torrent_refclk_driver_register(struct cdns_torrent_phy *cdns_phy struct device *dev = cdns_phy->dev; struct regmap_field *field; struct clk_init_data *init; - const char *parent_name; + const char **parent_names; + unsigned int num_parents; struct regmap *regmap; char clk_name[100]; struct clk_hw *hw; @@ -1762,12 +1883,19 @@ static int cdns_torrent_refclk_driver_register(struct cdns_torrent_phy *cdns_phy if (!refclk_driver) return -ENOMEM; - hw = cdns_phy->clk_hw_data->hws[CDNS_TORRENT_DERIVED_REFCLK]; - if (IS_ERR_OR_NULL(hw)) { - dev_err(dev, "No parent clock for refclk driver clock\n"); - return IS_ERR(hw) ? PTR_ERR(hw) : -ENOENT; + num_parents = ARRAY_SIZE(refclk_driver_parent_index); + parent_names = devm_kzalloc(dev, (sizeof(char *) * num_parents), GFP_KERNEL); + if (!parent_names) + return -ENOMEM; + + for (i = 0; i < num_parents; i++) { + hw = cdns_phy->clk_hw_data->hws[refclk_driver_parent_index[i]]; + if (IS_ERR_OR_NULL(hw)) { + dev_err(dev, "No parent clock for refclk driver clock\n"); + return IS_ERR(hw) ? PTR_ERR(hw) : -ENOENT; + } + parent_names[i] = clk_hw_get_name(hw); } - parent_name = clk_hw_get_name(hw); snprintf(clk_name, sizeof(clk_name), "%s_%s", dev_name(dev), clk_names[CDNS_TORRENT_REFCLK_DRIVER]); @@ -1775,9 +1903,9 @@ static int cdns_torrent_refclk_driver_register(struct cdns_torrent_phy *cdns_phy init = &refclk_driver->clk_data; init->ops = &cdns_torrent_refclk_driver_ops; - init->flags = 0; - init->parent_names = &parent_name; - init->num_parents = 1; + init->flags = CLK_SET_RATE_NO_REPARENT; + init->parent_names = parent_names; + init->num_parents = num_parents; init->name = clk_name; regmap = cdns_phy->regmap_common_cdb; @@ -1791,6 +1919,9 @@ static int cdns_torrent_refclk_driver_register(struct cdns_torrent_phy *cdns_phy refclk_driver->cmn_fields[i] = field; } + /* Enable Derived reference clock as default */ + regmap_field_write(refclk_driver->cmn_fields[CMN_CDIAG_REFCLK_DRV0_CTRL_4], 1); + refclk_driver->hw.init = init; hw = &refclk_driver->hw; @@ -2308,6 +2439,12 @@ static int cdns_torrent_clk_register(struct cdns_torrent_phy *cdns_phy) return ret; } + ret = cdns_torrent_received_refclk_register(cdns_phy); + if (ret) { + dev_err(dev, "failed to register received refclk\n"); + return ret; + } + ret = cdns_torrent_refclk_driver_register(cdns_phy); if (ret) { dev_err(dev, "failed to register refclk driver\n"); From c175536e7effbc0ca432a804bcbd043f276d6947 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 20 Oct 2021 23:35:48 +0200 Subject: [PATCH 0445/1202] soc: document merges Signed-off-by: Arnd Bergmann --- arch/arm/arm-soc-for-next-contents.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/arm/arm-soc-for-next-contents.txt b/arch/arm/arm-soc-for-next-contents.txt index ac3e28e4b18f4..761cedbc1b0af 100644 --- a/arch/arm/arm-soc-for-next-contents.txt +++ b/arch/arm/arm-soc-for-next-contents.txt @@ -3,6 +3,12 @@ arm/soc git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap tags/omap-for-v5.16/soc-signed at91/soc git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux tags/at91-soc-5.16 + stm32/soc + git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32 tags/stm32-soc-for-v5.16-1 + imx/maintainer + git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux tags/imx-maintainers-5.16 + sunxi/soc + git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux tags/sunxi-core-for-5.16-1 arm/dt zynq/dt @@ -86,6 +92,10 @@ arm/dt git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip tags/v5.16-rockchip-dts32-2 (bc22b6208f416ba702c17a93c9af6474f19e8212) https://github.com/Broadcom/stblinux tags/arm-soc/for-5.15/devicetree + mstar/dt + https://github.com/linux-chenxing/linux mstar-dt-next + omap/dt-gpmc + git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap tags/omap-for-v5.16/gpmc-signed arm/drivers renesas/drivers @@ -121,6 +131,8 @@ arm/drivers ep93xx: clock: convert in-place to COMMON_CLK drivers/optee git://git.linaro.org/people/jens.wiklander/linux-tee tags/optee-ffa-for-v5.16 + drivers/reset + git://git.pengutronix.de/pza/linux tags/reset-for-v5.16 arm/defconfigs defconfig/multiv7 @@ -131,6 +143,12 @@ arm/defconfigs arm64: defconfig: drop obsolete ARCH_* configs tegra/defconfig git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux tags/tegra-for-5.16-arm64-defconfig + mvebu/defconfig + git://git.kernel.org/pub/scm/linux/kernel/git/gclement/mvebu tags/mvebu-defconfig-5.16-1 + toshiba/defconfigs + git://git.kernel.org/pub/scm/linux/kernel/git/iwamatsu/linux-visconti tags/visconti-arm-defconfig-for-v5.16 + imx/defconfig + git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux tags/imx-defconfig-5.16 arm/newsoc From 6f81b9945dccc0978b8bcb057ecefbc0da0435f2 Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Thu, 21 Oct 2021 15:06:25 +1100 Subject: [PATCH 0446/1202] mm/vmalloc: fix numa spreading for large hash tables Eric Dumazet reported a strange numa spreading info in [1], and found commit 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings") introduced this issue [2]. Dig into the difference before and after this patch, page allocation has some difference: before: alloc_large_system_hash __vmalloc __vmalloc_node(..., NUMA_NO_NODE, ...) __vmalloc_node_range __vmalloc_area_node alloc_page /* because NUMA_NO_NODE, so choose alloc_page branch */ alloc_pages_current alloc_page_interleave /* can be proved by print policy mode */ after: alloc_large_system_hash __vmalloc __vmalloc_node(..., NUMA_NO_NODE, ...) __vmalloc_node_range __vmalloc_area_node alloc_pages_node /* choose nid by nuam_mem_id() */ __alloc_pages_node(nid, ....) So after commit 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings"), it will allocate memory in current node instead of interleaving allocate memory. [1] https://lore.kernel.org/linux-mm/CANn89iL6AAyWhfxdHO+jaT075iOa3XcYn9k6JJc7JR2XYn6k_Q@mail.gmail.com/ [2] https://lore.kernel.org/linux-mm/CANn89iLofTR=AK-QOZY87RdUZENCZUT4O6a0hvhu3_EwRMerOg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20210928121040.2547407-1-chenwandun@huawei.com Fixes: 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings") Signed-off-by: Chen Wandun Reported-by: Eric Dumazet Cc: Nicholas Piggin Cc: Kefeng Wang Cc: Hanjun Guo Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d77830ff604ca..9e37c828a1f0e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2816,6 +2816,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, unsigned int order, unsigned int nr_pages, struct page **pages) { unsigned int nr_allocated = 0; + struct page *page; + int i; /* * For order-0 pages we make use of bulk allocator, if @@ -2826,6 +2828,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, if (!order) { while (nr_allocated < nr_pages) { unsigned int nr, nr_pages_request; + page = NULL; /* * A maximum allowed request is hard-coded and is 100 @@ -2835,9 +2838,23 @@ vm_area_alloc_pages(gfp_t gfp, int nid, */ nr_pages_request = min(100U, nr_pages - nr_allocated); - nr = alloc_pages_bulk_array_node(gfp, nid, - nr_pages_request, pages + nr_allocated); - + if (nid == NUMA_NO_NODE) { + for (i = 0; i < nr_pages_request; i++) { + page = alloc_page(gfp); + if (page) + pages[nr_allocated + i] = page; + else { + nr = i; + break; + } + } + if (i >= nr_pages_request) + nr = nr_pages_request; + } else { + nr = alloc_pages_bulk_array_node(gfp, nid, + nr_pages_request, + pages + nr_allocated); + } nr_allocated += nr; cond_resched(); @@ -2856,11 +2873,13 @@ vm_area_alloc_pages(gfp_t gfp, int nid, gfp |= __GFP_COMP; /* High-order pages or fallback path if "bulk" fails. */ - while (nr_allocated < nr_pages) { - struct page *page; - int i; - page = alloc_pages_node(nid, gfp, order); + page = NULL; + while (nr_allocated < nr_pages) { + if (nid == NUMA_NO_NODE) + page = alloc_pages(gfp, order); + else + page = alloc_pages_node(nid, gfp, order); if (unlikely(!page)) break; From 9d7747e7d3efa86a4aee2625b5ef3005dc284098 Mon Sep 17 00:00:00 2001 From: David Yang Date: Thu, 21 Oct 2021 15:06:26 +1100 Subject: [PATCH 0447/1202] tools/testing/selftests/vm/split_huge_page_test.c: fix application of sizeof to pointer The coccinelle check report: "./tools/testing/selftests/vm/split_huge_page_test.c:344:36-42: ERROR: application of sizeof to pointer" Using the "strlen" to fix it. Link: https://lkml.kernel.org/r/20211012030116.184027-1-davidcomponentone@gmail.com Signed-off-by: David Yang Reported-by: Zeal Robot Cc: Zi Yan Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/split_huge_page_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 1af16d2c2a0ac..52497b7b9f1db 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -341,7 +341,7 @@ void split_file_backed_thp(void) } /* write something to the file, so a file-backed THP can be allocated */ - num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc)); + num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); close(fd); if (num_written < 1) { From a0682782ac117c6b53d35f329d226d40d64f5ced Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Oct 2021 15:06:26 +1100 Subject: [PATCH 0448/1202] lib/test_kasan.c: use underlying string helpers Calling memcmp() and memchr() with an intentional buffer overflow is now caught at compile time: In function 'memcmp', inlined from 'kasan_memcmp' at lib/test_kasan.c:897:2: include/linux/fortify-string.h:263:25: error: call to '__read_overflow' declared with attribute error: detected read beyond size of object (1st parameter) 263 | __read_overflow(); | ^~~~~~~~~~~~~~~~~ In function 'memchr', inlined from 'kasan_memchr' at lib/test_kasan.c:872:2: include/linux/fortify-string.h:277:17: error: call to '__read_overflow' declared with attribute error: detected read beyond size of object (1st parameter) 277 | __read_overflow(); | ^~~~~~~~~~~~~~~~~ Change the kasan tests to wrap those inside of a noinline function to prevent the compiler from noticing the bug and let kasan find it at runtime. Link: https://lkml.kernel.org/r/20211013150025.2875883-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Kees Cook Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Marco Elver Cc: Catalin Marinas Cc: Peter Collingbourne Cc: Patricia Alfonso Cc: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/test_kasan.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 8835e07845785..c91b6181c7703 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -831,6 +831,21 @@ static void kmem_cache_invalid_free(struct kunit *test) kmem_cache_destroy(cache); } +/* + * noinline wrappers to prevent the compiler from noticing the overflow + * at compile time rather than having kasan catch it. + * */ +static noinline void *__kasan_memchr(const void *s, int c, size_t n) +{ + return memchr(s, c, n); +} + +static noinline int __kasan_memcmp(const void *s1, const void *s2, size_t n) +{ + return memcmp(s1, s2, n); +} + + static void kasan_memchr(struct kunit *test) { char *ptr; @@ -849,7 +864,7 @@ static void kasan_memchr(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); KUNIT_EXPECT_KASAN_FAIL(test, - kasan_ptr_result = memchr(ptr, '1', size + 1)); + kasan_ptr_result = __kasan_memchr(ptr, '1', size + 1)); kfree(ptr); } @@ -874,7 +889,7 @@ static void kasan_memcmp(struct kunit *test) memset(arr, 0, sizeof(arr)); KUNIT_EXPECT_KASAN_FAIL(test, - kasan_int_result = memcmp(ptr, arr, size+1)); + kasan_int_result = __kasan_memcmp(ptr, arr, size+1)); kfree(ptr); } From 7d57474130ad68b3334b3fc57ec5711119f87934 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 21 Oct 2021 15:06:27 +1100 Subject: [PATCH 0449/1202] kasan-test-use-underlying-string-helpers-checkpatch-fixes WARNING: Block comments use a trailing */ on a separate line #49: FILE: lib/test_kasan.c:837: + * */ total: 0 errors, 1 warnings, 37 lines checked NOTE: For some of the reported defects, checkpatch may be able to mechanically convert to the typical style using --fix or --fix-inplace. ./patches/kasan-test-use-underlying-string-helpers.patch has style problems, please review. NOTE: If any of the errors are false positives, please report them to the maintainer, see CHECKPATCH in MAINTAINERS. Please run checkpatch prior to sending patches Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/test_kasan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index c91b6181c7703..decd765152d62 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -834,7 +834,7 @@ static void kmem_cache_invalid_free(struct kunit *test) /* * noinline wrappers to prevent the compiler from noticing the overflow * at compile time rather than having kasan catch it. - * */ + */ static noinline void *__kasan_memchr(const void *s, int c, size_t n) { return memchr(s, c, n); From 51d4531e2f257ff88ac53cd1d3800f601539f34a Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 21 Oct 2021 15:06:28 +1100 Subject: [PATCH 0450/1202] memcg: page_alloc: skip bulk allocator for __GFP_ACCOUNT commit 5c1f4e690eec ("mm/vmalloc: switch to bulk allocator in __vmalloc_area_node()") switched to bulk page allocator for order 0 allocation backing vmalloc. However bulk page allocator does not support __GFP_ACCOUNT allocations and there are several users of kvmalloc(__GFP_ACCOUNT). For now make __GFP_ACCOUNT allocations bypass bulk page allocator. In future if there is workload that can be significantly improved with the bulk page allocator with __GFP_ACCCOUNT support, we can revisit the decision. Link: https://lkml.kernel.org/r/20211014151607.2171970-1-shakeelb@google.com Fixes: 5c1f4e690eec ("mm/vmalloc: switch to bulk allocator in __vmalloc_area_node()") Signed-off-by: Shakeel Butt Reported-by: Vasily Averin Tested-by: Vasily Averin Acked-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Roman Gushchin Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b37435c274cf1..3ec39552d00f6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5223,6 +5223,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, if (unlikely(page_array && nr_pages - nr_populated == 0)) goto out; + /* Bulk allocator does not support memcg accounting. */ + if (memcg_kmem_enabled() && (gfp & __GFP_ACCOUNT)) + goto failed; + /* Use the single page allocator for one page. */ if (nr_pages - nr_populated == 1) goto failed; From 34dd6e6a8f5c06bf56ae97cc774bd54bd5190046 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 21 Oct 2021 15:06:28 +1100 Subject: [PATCH 0451/1202] mm: hwpoison: remove the unnecessary THP check When handling THP hwpoison checked if the THP is in allocation or free stage since hwpoison may mistreat it as hugetlb page. After commit 415c64c1453a ("mm/memory-failure: split thp earlier in memory error handling") the problem has been fixed, so this check is no longer needed. Remove it. The side effect of the removal is hwpoison may report unsplit THP instead of unknown error for shmem THP. It seems not like a big deal. The following patch "mm: filemap: check if THP has hwpoisoned subpage for PMD page fault" depends on this, which fixes shmem THP with hwpoisoned subpage(s) are mapped PMD wrongly. So this patch needs to be backported to -stable as well. Link: https://lkml.kernel.org/r/20211020210755.23964-2-shy828301@gmail.com Signed-off-by: Yang Shi Suggested-by: Naoya Horiguchi Acked-by: Naoya Horiguchi Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Oscar Salvador Cc: Peter Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory-failure.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3e6449f2102a7..73f68699e7ab4 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1147,20 +1147,6 @@ static int __get_hwpoison_page(struct page *page) if (!HWPoisonHandlable(head)) return -EBUSY; - if (PageTransHuge(head)) { - /* - * Non anonymous thp exists only in allocation/free time. We - * can't handle such a case correctly, so let's give it up. - * This should be better than triggering BUG_ON when kernel - * tries to touch the "partially handled" page. - */ - if (!PageAnon(head)) { - pr_err("Memory failure: %#lx: non anonymous thp\n", - page_to_pfn(page)); - return 0; - } - } - if (get_page_unless_zero(head)) { if (head == compound_head(page)) return 1; From e95de3e8d81112614964daaf74e85b3d1d59ab3a Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 21 Oct 2021 15:06:29 +1100 Subject: [PATCH 0452/1202] mm: filemap: check if THP has hwpoisoned subpage for PMD page fault When handling shmem page fault the THP with corrupted subpage could be PMD mapped if certain conditions are satisfied. But kernel is supposed to send SIGBUS when trying to map hwpoisoned page. There are two paths which may do PMD map: fault around and regular fault. Before commit f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") the thing was even worse in fault around path. The THP could be PMD mapped as long as the VMA fits regardless what subpage is accessed and corrupted. After this commit as long as head page is not corrupted the THP could be PMD mapped. In the regular fault path the THP could be PMD mapped as long as the corrupted page is not accessed and the VMA fits. This loophole could be fixed by iterating every subpage to check if any of them is hwpoisoned or not, but it is somewhat costly in page fault path. So introduce a new page flag called HasHWPoisoned on the first tail page. It indicates the THP has hwpoisoned subpage(s). It is set if any subpage of THP is found hwpoisoned by memory failure and after the refcount is bumped successfully, then cleared when the THP is freed or split. The soft offline path doesn't need this since soft offline handler just marks a subpage hwpoisoned when the subpage is migrated successfully. But shmem THP didn't get split then migrated at all. Link: https://lkml.kernel.org/r/20211020210755.23964-3-shy828301@gmail.com Fixes: 800d8c63b2e9 ("shmem: add huge pages support") Signed-off-by: Yang Shi Reviewed-by: Naoya Horiguchi Suggested-by: Kirill A. Shutemov Cc: Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Oscar Salvador Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/page-flags.h | 23 +++++++++++++++++++++++ mm/huge_memory.c | 2 ++ mm/memory-failure.c | 14 ++++++++++++++ mm/memory.c | 9 +++++++++ mm/page_alloc.c | 4 +++- 5 files changed, 51 insertions(+), 1 deletion(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a558d67ee86f5..fbfd3fad48f21 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -171,6 +171,15 @@ enum pageflags { /* Compound pages. Stored in first tail page's flags */ PG_double_map = PG_workingset, +#ifdef CONFIG_MEMORY_FAILURE + /* + * Compound pages. Stored in first tail page's flags. + * Indicates that at least one subpage is hwpoisoned in the + * THP. + */ + PG_has_hwpoisoned = PG_mappedtodisk, +#endif + /* non-lru isolated movable page */ PG_isolated = PG_reclaim, @@ -668,6 +677,20 @@ PAGEFLAG_FALSE(DoubleMap) TESTSCFLAG_FALSE(DoubleMap) #endif +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +/* + * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the + * compound page. + * + * This flag is set by hwpoison handler. Cleared by THP split or free page. + */ +PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) + TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) +#else +PAGEFLAG_FALSE(HasHWPoisoned) + TESTSCFLAG_FALSE(HasHWPoisoned) +#endif + /* * Check if a page is currently marked HWPoisoned. Note that this check is * best effort only and inherently racy: there is no way to synchronize with diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 92192cb086c79..c5142d237e482 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2426,6 +2426,8 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ lruvec = lock_page_lruvec(head); + ClearPageHasHWPoisoned(head); + for (i = nr - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond EOF: drop them from page cache */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 73f68699e7ab4..bdbbb32211a58 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1694,6 +1694,20 @@ int memory_failure(unsigned long pfn, int flags) } if (PageTransHuge(hpage)) { + /* + * The flag must be set after the refcount is bumped + * otherwise it may race with THP split. + * And the flag can't be set in get_hwpoison_page() since + * it is called by soft offline too and it is just called + * for !MF_COUNT_INCREASE. So here seems to be the best + * place. + * + * Don't need care about the above error handling paths for + * get_hwpoison_page() since they handle either free page + * or unhandlable page. The refcount is bumped iff the + * page is a valid handlable page. + */ + SetPageHasHWPoisoned(hpage); if (try_to_split_thp_page(p, "Memory Failure") < 0) { action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); res = -EBUSY; diff --git a/mm/memory.c b/mm/memory.c index adf9b9ef8277d..c52be6d6b6055 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3906,6 +3906,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) if (compound_order(page) != HPAGE_PMD_ORDER) return ret; + /* + * Just backoff if any subpage of a THP is corrupted otherwise + * the corrupted page may mapped by PMD silently to escape the + * check. This kind of THP just can be PTE mapped. Access to + * the corrupted subpage should trigger SIGBUS as expected. + */ + if (unlikely(PageHasHWPoisoned(page))) + return ret; + /* * Archs like ppc64 need additional space to store information * related to pte entry. Use the preallocated table for that. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3ec39552d00f6..23d3339ac4e8e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1312,8 +1312,10 @@ static __always_inline bool free_pages_prepare(struct page *page, VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); - if (compound) + if (compound) { ClearPageDoubleMap(page); + ClearPageHasHWPoisoned(page); + } for (i = 1; i < (1 << order); i++) { if (compound) bad += free_tail_pages_check(page, page + i); From 1042bdf1731790c2fe5b3d9e82a53c492d02172b Mon Sep 17 00:00:00 2001 From: Toshiki Fukasawa Date: Thu, 21 Oct 2021 15:06:30 +1100 Subject: [PATCH 0453/1202] /proc/kpageflags: prevent an integer overflow in stable_page_flags() stable_page_flags() returns kpageflags info in u64, but it uses "1 << KPF_*" internally which is considered as int. This type mismatch causes no visible problem now, but it will if you set bit 32 or more as done in a subsequent patch. So use BIT_ULL in order to avoid future overflow issues. Link: http://lkml.kernel.org/r/20190725023100.31141-2-t-fukasawa@vx.jp.nec.com Signed-off-by: Toshiki Fukasawa Cc: Michal Hocko Cc: Dan Williams Cc: Alexey Dobriyan Cc: Christoph Hellwig Cc: Naoya Horiguchi Cc: Junichi Nomura Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/page.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/fs/proc/page.c b/fs/proc/page.c index 9f1077d94cde1..265f4fca15e29 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -115,7 +115,7 @@ u64 stable_page_flags(struct page *page) * it differentiates a memory hole from a page with no flags */ if (!page) - return 1 << KPF_NOPAGE; + return BIT_ULL(KPF_NOPAGE); k = page->flags; u = 0; @@ -127,22 +127,22 @@ u64 stable_page_flags(struct page *page) * simple test in page_mapped() is not enough. */ if (!PageSlab(page) && page_mapped(page)) - u |= 1 << KPF_MMAP; + u |= BIT_ULL(KPF_MMAP); if (PageAnon(page)) - u |= 1 << KPF_ANON; + u |= BIT_ULL(KPF_ANON); if (PageKsm(page)) - u |= 1 << KPF_KSM; + u |= BIT_ULL(KPF_KSM); /* * compound pages: export both head/tail info * they together define a compound page's start/end pos and order */ if (PageHead(page)) - u |= 1 << KPF_COMPOUND_HEAD; + u |= BIT_ULL(KPF_COMPOUND_HEAD); if (PageTail(page)) - u |= 1 << KPF_COMPOUND_TAIL; + u |= BIT_ULL(KPF_COMPOUND_TAIL); if (PageHuge(page)) - u |= 1 << KPF_HUGE; + u |= BIT_ULL(KPF_HUGE); /* * PageTransCompound can be true for non-huge compound pages (slab * pages or pages allocated by drivers with __GFP_COMP) because it @@ -153,14 +153,13 @@ u64 stable_page_flags(struct page *page) struct page *head = compound_head(page); if (PageLRU(head) || PageAnon(head)) - u |= 1 << KPF_THP; + u |= BIT_ULL(KPF_THP); else if (is_huge_zero_page(head)) { - u |= 1 << KPF_ZERO_PAGE; - u |= 1 << KPF_THP; + u |= BIT_ULL(KPF_ZERO_PAGE); + u |= BIT_ULL(KPF_THP); } } else if (is_zero_pfn(page_to_pfn(page))) - u |= 1 << KPF_ZERO_PAGE; - + u |= BIT_ULL(KPF_ZERO_PAGE); /* * Caveats on high order pages: page->_refcount will only be set @@ -168,23 +167,23 @@ u64 stable_page_flags(struct page *page) * SLOB won't set PG_slab at all on compound pages. */ if (PageBuddy(page)) - u |= 1 << KPF_BUDDY; + u |= BIT_ULL(KPF_BUDDY); else if (page_count(page) == 0 && is_free_buddy_page(page)) - u |= 1 << KPF_BUDDY; + u |= BIT_ULL(KPF_BUDDY); if (PageOffline(page)) - u |= 1 << KPF_OFFLINE; + u |= BIT_ULL(KPF_OFFLINE); if (PageTable(page)) - u |= 1 << KPF_PGTABLE; + u |= BIT_ULL(KPF_PGTABLE); if (page_is_idle(page)) - u |= 1 << KPF_IDLE; + u |= BIT_ULL(KPF_IDLE); u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); if (PageTail(page) && PageSlab(compound_head(page))) - u |= 1 << KPF_SLAB; + u |= BIT_ULL(KPF_SLAB); u |= kpf_copy_bit(k, KPF_ERROR, PG_error); u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); @@ -197,7 +196,7 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim); if (PageSwapCache(page)) - u |= 1 << KPF_SWAPCACHE; + u |= BIT_ULL(KPF_SWAPCACHE); u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked); u |= kpf_copy_bit(k, KPF_UNEVICTABLE, PG_unevictable); From 1fc85b1a193b8ab42b3910ee3d1cf23e5af1231f Mon Sep 17 00:00:00 2001 From: Toshiki Fukasawa Date: Thu, 21 Oct 2021 15:06:31 +1100 Subject: [PATCH 0454/1202] /proc/kpageflags: do not use uninitialized struct pages A kernel panic was observed during reading /proc/kpageflags for first few pfns allocated by pmem namespace: BUG: unable to handle page fault for address: fffffffffffffffe [ 114.495280] #PF: supervisor read access in kernel mode [ 114.495738] #PF: error_code(0x0000) - not-present page [ 114.496203] PGD 17120e067 P4D 17120e067 PUD 171210067 PMD 0 [ 114.496713] Oops: 0000 [#1] SMP PTI [ 114.497037] CPU: 9 PID: 1202 Comm: page-types Not tainted 5.3.0-rc1 #1 [ 114.497621] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 [ 114.498706] RIP: 0010:stable_page_flags+0x27/0x3f0 [ 114.499142] Code: 82 66 90 66 66 66 66 90 48 85 ff 0f 84 d1 03 00 00 41 54 55 48 89 fd 53 48 8b 57 08 48 8b 1f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 c4 02 0f 84 57 03 00 00 45 31 e4 48 8b 55 08 48 89 ef [ 114.500788] RSP: 0018:ffffa5e601a0fe60 EFLAGS: 00010202 [ 114.501373] RAX: fffffffffffffffe RBX: ffffffffffffffff RCX: 0000000000000000 [ 114.502009] RDX: 0000000000000001 RSI: 00007ffca13a7310 RDI: ffffd07489000000 [ 114.502637] RBP: ffffd07489000000 R08: 0000000000000001 R09: 0000000000000000 [ 114.503270] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000240000 [ 114.503896] R13: 0000000000080000 R14: 00007ffca13a7310 R15: ffffa5e601a0ff08 [ 114.504530] FS: 00007f0266c7f540(0000) GS:ffff962dbbac0000(0000) knlGS:0000000000000000 [ 114.505245] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 114.505754] CR2: fffffffffffffffe CR3: 000000023a204000 CR4: 00000000000006e0 [ 114.506401] Call Trace: [ 114.506660] kpageflags_read+0xb1/0x130 [ 114.507051] proc_reg_read+0x39/0x60 [ 114.507387] vfs_read+0x8a/0x140 [ 114.507686] ksys_pread64+0x61/0xa0 [ 114.508021] do_syscall_64+0x5f/0x1a0 [ 114.508372] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 114.508844] RIP: 0033:0x7f0266ba426b The reason for the panic is that stable_page_flags() which parses the page flags uses uninitialized struct pages reserved by the ZONE_DEVICE driver. Earlier approach to fix this was discussed here: https://marc.info/?l=linux-mm&m=152964770000672&w=2 This is another approach. To avoid using the uninitialized struct page, immediately return with KPF_RESERVED at the beginning of stable_page_flags() if the page is reserved by ZONE_DEVICE driver. Dan said: : The nvdimm implementation uses vmem_altmap to arrange for the 'struct : page' array to be allocated from a reservation of a pmem namespace. A : namespace in this mode contains an info-block that consumes the first : 8K of the namespace capacity, capacity designated for page mapping, : capacity for padding the start of data to optionally 4K, 2MB, or 1GB : (on x86), and then the namespace data itself. The implementation : specifies a section aligned (now sub-section aligned) address to : arch_add_memory() to establish the linear mapping to map the metadata, : and then vmem_altmap indicates to memmap_init_zone() which pfns : represent data. The implementation only specifies enough 'struct page' : capacity for pfn_to_page() to operate on the data space, not the : namespace metadata space. : : The proposal to validate ZONE_DEVICE pfns against the altmap seems the : right approach to me. Link: http://lkml.kernel.org/r/20190725023100.31141-3-t-fukasawa@vx.jp.nec.com Signed-off-by: Toshiki Fukasawa Cc: Alexey Dobriyan Cc: Christoph Hellwig Cc: Dan Williams Cc: Junichi Nomura Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/page.c | 3 +++ include/linux/memremap.h | 6 ++++++ mm/memremap.c | 20 ++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/fs/proc/page.c b/fs/proc/page.c index 265f4fca15e29..4dcbcd506cb6e 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -117,6 +117,9 @@ u64 stable_page_flags(struct page *page) if (!page) return BIT_ULL(KPF_NOPAGE); + if (pfn_zone_device_reserved(page_to_pfn(page))) + return BIT_ULL(KPF_RESERVED); + k = page->flags; u = 0; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index c0e9d35889e8d..119f130ef8f10 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -131,6 +131,7 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) } #ifdef CONFIG_ZONE_DEVICE +bool pfn_zone_device_reserved(unsigned long pfn); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); @@ -143,6 +144,11 @@ unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); unsigned long memremap_compat_align(void); #else +static inline bool pfn_zone_device_reserved(unsigned long pfn) +{ + return false; +} + static inline void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { diff --git a/mm/memremap.c b/mm/memremap.c index ed593bf87109a..e99944ab8eb3c 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -109,6 +109,26 @@ static unsigned long pfn_next(unsigned long pfn) return pfn + 1; } +/* + * This returns true if the page is reserved by ZONE_DEVICE driver. + */ +bool pfn_zone_device_reserved(unsigned long pfn) +{ + struct dev_pagemap *pgmap; + struct vmem_altmap *altmap; + bool ret = false; + + pgmap = get_dev_pagemap(pfn, NULL); + if (!pgmap) + return ret; + altmap = pgmap_altmap(pgmap); + if (altmap && pfn < (altmap->base_pfn + altmap->reserve)) + ret = true; + put_dev_pagemap(pgmap); + + return ret; +} + #define for_each_device_pfn(pfn, map, i) \ for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn)) From d542855e028789446a5362ecc5ceebcc1fddb8dd Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Thu, 21 Oct 2021 15:06:32 +1100 Subject: [PATCH 0455/1202] procfs: prevent unpriveleged processes accessing fdinfo dir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The file permissions on the fdinfo dir from were changed from S_IRUSR|S_IXUSR to S_IRUGO|S_IXUGO, and a PTRACE_MODE_READ check was added for opening the fdinfo files [1]. However, the ptrace permission check was not added to the directory, allowing anyone to get the open FD numbers by reading the fdinfo directory. Add the missing ptrace permission check for opening the fdinfo directory. [1] https://lkml.kernel.org/r/20210308170651.919148-1-kaleshsingh@google.com Link: https://lkml.kernel.org/r/20210713162008.1056986-1-kaleshsingh@google.com Fixes: 7bc3fa0172a4 ("procfs: allow reading fdinfo with PTRACE_MODE_READ") Signed-off-by: Kalesh Singh Cc: Kees Cook Cc: Eric W. Biederman Cc: Christian Brauner Cc: Christian König Cc: Suren Baghdasaryan Cc: Hridya Valsaraju Cc: Jann Horn Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/fd.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 172c86270b312..913bef0d2a36c 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -72,7 +72,7 @@ static int seq_show(struct seq_file *m, void *v) return 0; } -static int seq_fdinfo_open(struct inode *inode, struct file *file) +static int proc_fdinfo_access_allowed(struct inode *inode) { bool allowed = false; struct task_struct *task = get_proc_task(inode); @@ -86,6 +86,16 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file) if (!allowed) return -EACCES; + return 0; +} + +static int seq_fdinfo_open(struct inode *inode, struct file *file) +{ + int ret = proc_fdinfo_access_allowed(inode); + + if (ret) + return ret; + return single_open(file, seq_show, inode); } @@ -348,12 +358,23 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx) proc_fdinfo_instantiate); } +static int proc_open_fdinfo(struct inode *inode, struct file *file) +{ + int ret = proc_fdinfo_access_allowed(inode); + + if (ret) + return ret; + + return 0; +} + const struct inode_operations proc_fdinfo_inode_operations = { .lookup = proc_lookupfdinfo, .setattr = proc_setattr, }; const struct file_operations proc_fdinfo_operations = { + .open = proc_open_fdinfo, .read = generic_read_dir, .iterate_shared = proc_readfdinfo, .llseek = generic_file_llseek, From 1799063d1370c71cacbb36db1347b393b862b688 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 21 Oct 2021 15:06:33 +1100 Subject: [PATCH 0456/1202] scripts/spelling.txt: add more spellings to spelling.txt Some of the more common spelling mistakes and typos that I've found while fixing up spelling mistakes in the kernel in the past few months. Link: https://lkml.kernel.org/r/20210907072941.7033-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- scripts/spelling.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 17fdc620d5488..fd8f07317b8e1 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -178,6 +178,7 @@ assum||assume assumtpion||assumption asuming||assuming asycronous||asynchronous +asychronous||asynchronous asynchnous||asynchronous asynchromous||asynchronous asymetric||asymmetric @@ -241,6 +242,7 @@ beter||better betweeen||between bianries||binaries bitmast||bitmask +bitwiedh||bitwidth boardcast||broadcast borad||board boundry||boundary @@ -265,7 +267,10 @@ calucate||calculate calulate||calculate cancelation||cancellation cancle||cancel +cant||can't +cant'||can't canot||cannot +cann't||can't capabilites||capabilities capabilties||capabilities capabilty||capability @@ -501,6 +506,7 @@ disble||disable disgest||digest disired||desired dispalying||displaying +dissable||disable diplay||display directon||direction direcly||directly @@ -595,6 +601,7 @@ exceded||exceeded exceds||exceeds exceeed||exceed excellant||excellent +exchnage||exchange execeeded||exceeded execeeds||exceeds exeed||exceed @@ -938,6 +945,7 @@ migrateable||migratable milliseonds||milliseconds minium||minimum minimam||minimum +minimun||minimum miniumum||minimum minumum||minimum misalinged||misaligned @@ -956,6 +964,7 @@ mmnemonic||mnemonic mnay||many modfiy||modify modifer||modifier +modul||module modulues||modules momery||memory memomry||memory @@ -1154,6 +1163,7 @@ programable||programmable programers||programmers programm||program programms||programs +progres||progress progresss||progress prohibitted||prohibited prohibitting||prohibiting @@ -1328,6 +1338,7 @@ servive||service setts||sets settting||setting shapshot||snapshot +shoft||shift shotdown||shutdown shoud||should shouldnt||shouldn't @@ -1439,6 +1450,7 @@ syfs||sysfs symetric||symmetric synax||syntax synchonized||synchronized +synchronization||synchronization synchronuously||synchronously syncronize||synchronize syncronized||synchronized @@ -1521,6 +1533,7 @@ unexpexted||unexpected unfortunatelly||unfortunately unifiy||unify uniterrupted||uninterrupted +uninterruptable||uninterruptible unintialized||uninitialized unitialized||uninitialized unkmown||unknown @@ -1553,6 +1566,7 @@ unuseful||useless unvalid||invalid upate||update upsupported||unsupported +useable||usable usefule||useful usefull||useful usege||usage @@ -1574,6 +1588,7 @@ varient||variant vaule||value verbse||verbose veify||verify +verfication||verification veriosn||version verisons||versions verison||version @@ -1586,6 +1601,7 @@ visiters||visitors vitual||virtual vunerable||vulnerable wakeus||wakeups +was't||wasn't wathdog||watchdog wating||waiting wiat||wait From ec06309d354d26663ccd2b2c7fdfd89b913bbc95 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 21 Oct 2021 15:06:33 +1100 Subject: [PATCH 0457/1202] scripts/spelling.txt: fix "mistake" version of "synchronization" If both "mistake" version and "correction" version are the same, a warning message is created by checkpatch which is impossible to fix. But it was noticed that Colan Ian King created a commit e6c0a0889b80 ("ALSA: aloop: Fix spelling mistake "synchronization" -> "synchronization"") which suggests that this spelling mistake was fixed by replacing the word "synchronization" with itself. But the actual diff shows that the mistake in the code was "sychronization". It is rather likely that the "mistake" in spelling.txt should have been the latter. Link: https://lkml.kernel.org/r/20210926065529.6880-1-sven@narfation.org Fixes: 2e74c9433ba8 ("scripts/spelling.txt: add more spellings to spelling.txt") Signed-off-by: Sven Eckelmann Reviewed-by: Colin Ian King Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- scripts/spelling.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index fd8f07317b8e1..acf6ea7112992 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -1450,7 +1450,7 @@ syfs||sysfs symetric||symmetric synax||syntax synchonized||synchronized -synchronization||synchronization +sychronization||synchronization synchronuously||synchronously syncronize||synchronize syncronized||synchronized From 76dc0f16eeec25edeec4dcec6c7e41f6574d0621 Mon Sep 17 00:00:00 2001 From: weidonghui Date: Thu, 21 Oct 2021 15:06:34 +1100 Subject: [PATCH 0458/1202] scripts/decodecode: fix faulting instruction no print when opps.file is DOS format If opps.file is in DOS format, faulting instruction cannot be printed: / # ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- / # ./scripts/decodecode < oops.file [ 0.734345] Code: d0002881 912f9c21 94067e68 d2800001 (b900003f) aarch64-linux-gnu-strip: '/tmp/tmp.5Y9eybnnSi.o': No such file aarch64-linux-gnu-objdump: '/tmp/tmp.5Y9eybnnSi.o': No such file All code ======== 0: d0002881 adrp x1, 0x512000 4: 912f9c21 add x1, x1, #0xbe7 8: 94067e68 bl 0x19f9a8 c: d2800001 mov x1, #0x0 // #0 10: b900003f str wzr, [x1] Code starting with the faulting instruction =========================================== Background: The compilation environment is Ubuntu, and the test environment is Windows. Most logs are generated in the Windows environment. In this way, CR (carriage return) will inevitably appear, which will affect the use of decodecode in the Ubuntu environment. The repaired effect is as follows: / # ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- / # ./scripts/decodecode < oops.file [ 0.734345] Code: d0002881 912f9c21 94067e68 d2800001 (b900003f) All code ======== 0: d0002881 adrp x1, 0x512000 4: 912f9c21 add x1, x1, #0xbe7 8: 94067e68 bl 0x19f9a8 c: d2800001 mov x1, #0x0 // #0 10:* b900003f str wzr, [x1] <-- trapping instruction Code starting with the faulting instruction =========================================== 0: b900003f str wzr, [x1] Link: https://lkml.kernel.org/r/20211008064712.926-1-weidonghui@allwinnertech.com Signed-off-by: weidonghui Acked-by: Borislav Petkov Cc: Marc Zyngier Cc: Will Deacon Cc: Rabin Vincent Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- scripts/decodecode | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/decodecode b/scripts/decodecode index 31d884e35f2f1..c711a196511c6 100755 --- a/scripts/decodecode +++ b/scripts/decodecode @@ -126,7 +126,7 @@ if [ $marker -ne 0 ]; then fi echo Code starting with the faulting instruction > $T.aa echo =========================================== >> $T.aa -code=`echo $code | sed -e 's/ [<(]/ /;s/[>)] / /;s/ /,0x/g; s/[>)]$//'` +code=`echo $code | sed -e 's/\r//;s/ [<(]/ /;s/[>)] / /;s/ /,0x/g; s/[>)]$//'` echo -n " .$type 0x" > $T.s echo $code >> $T.s disas $T 0 From 7746a03c64ad0f4555ff21931381392d46451b97 Mon Sep 17 00:00:00 2001 From: Chenyuan Mi Date: Thu, 21 Oct 2021 15:06:34 +1100 Subject: [PATCH 0459/1202] ocfs2: Fix handle refcount leak in two exception handling paths The reference counting issue happens in two exception handling paths of ocfs2_replay_truncate_records(). When executing these two exception handling paths, the function forgets to decrease the refcount of handle increased by ocfs2_start_trans(), causing a refcount leak. Fix this issue by using ocfs2_commit_trans() to decrease the refcount of handle in two handling paths. Link: https://lkml.kernel.org/r/20210908102055.10168-1-cymi20@fudan.edu.cn Signed-off-by: Chenyuan Mi Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Reviewed-by: Joseph Qi Cc: Wengang Wang Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/ocfs2/alloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 5d9ae17bd443f..1550f18be4511 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5940,6 +5940,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { + ocfs2_commit_trans(osb, handle); mlog_errno(status); goto bail; } @@ -5964,6 +5965,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, data_alloc_bh, start_blk, num_clusters); if (status < 0) { + ocfs2_commit_trans(osb, handle); mlog_errno(status); goto bail; } From 9a8421029201ae0d273e5bcfc0a13c9c8ae144b8 Mon Sep 17 00:00:00 2001 From: Valentin Vidic Date: Thu, 21 Oct 2021 15:06:35 +1100 Subject: [PATCH 0460/1202] ocfs2: cleanup journal init and shutdown Allocate and free struct ocfs2_journal in ocfs2_journal_init and ocfs2_journal_shutdown. Init and release of system inodes references the journal so reorder calls to make sure they work correctly. Link: https://lkml.kernel.org/r/20211009145006.3478-1-vvidic@valentin-vidic.from.hr Signed-off-by: Valentin Vidic Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/ocfs2/inode.c | 4 ++-- fs/ocfs2/journal.c | 26 +++++++++++++++++++++----- fs/ocfs2/journal.h | 3 +-- fs/ocfs2/super.c | 40 +++------------------------------------- 4 files changed, 27 insertions(+), 46 deletions(-) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index bc8f32fab964c..6c2411c2afcf1 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -125,7 +125,6 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, struct inode *inode = NULL; struct super_block *sb = osb->sb; struct ocfs2_find_inode_args args; - journal_t *journal = OCFS2_SB(sb)->journal->j_journal; trace_ocfs2_iget_begin((unsigned long long)blkno, flags, sysfile_type); @@ -172,10 +171,11 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, * part of the transaction - the inode could have been reclaimed and * now it is reread from disk. */ - if (journal) { + if (osb->journal) { transaction_t *transaction; tid_t tid; struct ocfs2_inode_info *oi = OCFS2_I(inode); + journal_t *journal = osb->journal->j_journal; read_lock(&journal->j_state_lock); if (journal->j_running_transaction) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4f15750aac5d5..b9c339335a53d 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -810,19 +810,34 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb) write_unlock(&journal->j_state_lock); } -int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) +int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty) { int status = -1; struct inode *inode = NULL; /* the journal inode */ journal_t *j_journal = NULL; + struct ocfs2_journal *journal = NULL; struct ocfs2_dinode *di = NULL; struct buffer_head *bh = NULL; - struct ocfs2_super *osb; int inode_lock = 0; - BUG_ON(!journal); + /* initialize our journal structure */ + journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL); + if (!journal) { + mlog(ML_ERROR, "unable to alloc journal\n"); + status = -ENOMEM; + goto done; + } + osb->journal = journal; + journal->j_osb = osb; - osb = journal->j_osb; + atomic_set(&journal->j_num_trans, 0); + init_rwsem(&journal->j_trans_barrier); + init_waitqueue_head(&journal->j_checkpointed); + spin_lock_init(&journal->j_lock); + journal->j_trans_id = 1UL; + INIT_LIST_HEAD(&journal->j_la_cleanups); + INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); + journal->j_state = OCFS2_JOURNAL_FREE; /* already have the inode for our journal */ inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, @@ -1028,9 +1043,10 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) journal->j_state = OCFS2_JOURNAL_FREE; -// up_write(&journal->j_trans_barrier); done: iput(inode); + kfree(journal); + osb->journal = NULL; } static void ocfs2_clear_journal_error(struct super_block *sb, diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index d158acb8b38a8..8dcb2f2cadbc5 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -167,8 +167,7 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb); * ocfs2_start_checkpoint - Kick the commit thread to do a checkpoint. */ void ocfs2_set_journal_params(struct ocfs2_super *osb); -int ocfs2_journal_init(struct ocfs2_journal *journal, - int *dirty); +int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty); void ocfs2_journal_shutdown(struct ocfs2_super *osb); int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5c914ce9b3ac9..1286b88b6fa17 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1894,8 +1894,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) /* This will disable recovery and flush any recovery work. */ ocfs2_recovery_exit(osb); - ocfs2_journal_shutdown(osb); - ocfs2_sync_blockdev(sb); ocfs2_purge_refcount_trees(osb); @@ -1918,6 +1916,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ocfs2_release_system_inodes(osb); + ocfs2_journal_shutdown(osb); + /* * If we're dismounting due to mount error, mount.ocfs2 will clean * up heartbeat. If we're a local mount, there is no heartbeat. @@ -2016,7 +2016,6 @@ static int ocfs2_initialize_super(struct super_block *sb, int i, cbits, bbits; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; struct inode *inode = NULL; - struct ocfs2_journal *journal; struct ocfs2_super *osb; u64 total_blocks; @@ -2197,33 +2196,6 @@ static int ocfs2_initialize_super(struct super_block *sb, get_random_bytes(&osb->s_next_generation, sizeof(u32)); - /* FIXME - * This should be done in ocfs2_journal_init(), but unknown - * ordering issues will cause the filesystem to crash. - * If anyone wants to figure out what part of the code - * refers to osb->journal before ocfs2_journal_init() is run, - * be my guest. - */ - /* initialize our journal structure */ - - journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL); - if (!journal) { - mlog(ML_ERROR, "unable to alloc journal\n"); - status = -ENOMEM; - goto bail; - } - osb->journal = journal; - journal->j_osb = osb; - - atomic_set(&journal->j_num_trans, 0); - init_rwsem(&journal->j_trans_barrier); - init_waitqueue_head(&journal->j_checkpointed); - spin_lock_init(&journal->j_lock); - journal->j_trans_id = (unsigned long) 1; - INIT_LIST_HEAD(&journal->j_la_cleanups); - INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); - journal->j_state = OCFS2_JOURNAL_FREE; - INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); init_llist_head(&osb->dquot_drop_list); @@ -2404,7 +2376,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) * ourselves. */ /* Init our journal object. */ - status = ocfs2_journal_init(osb->journal, &dirty); + status = ocfs2_journal_init(osb, &dirty); if (status < 0) { mlog(ML_ERROR, "Could not initialize journal!\n"); goto finally; @@ -2513,12 +2485,6 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) kfree(osb->osb_orphan_wipes); kfree(osb->slot_recovery_generations); - /* FIXME - * This belongs in journal shutdown, but because we have to - * allocate osb->journal at the start of ocfs2_initialize_osb(), - * we free it here. - */ - kfree(osb->journal); kfree(osb->local_alloc_copy); kfree(osb->uuid_str); kfree(osb->vol_label); From d1c72d109177b77644faf4fd873b6c1c1e4d5311 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 21 Oct 2021 15:06:36 +1100 Subject: [PATCH 0461/1202] ocfs2/dlm: remove redundant assignment of variable ret The variable ret is being assigned a value that is never read, it is updated later on with a different value. The assignment is redundant and can be removed. Addresses-Coverity: ("Unused value") Link: https://lkml.kernel.org/r/20211007233452.30815-1-colin.king@canonical.com Signed-off-by: Colin Ian King Reviewed-by: Andrew Morton Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/ocfs2/dlm/dlmrecovery.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 0e7aad1b11cc4..5cd5f7511dacd 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2698,7 +2698,6 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node) continue; } retry: - ret = -EINVAL; mlog(0, "attempting to send begin reco msg to %d\n", nodenum); ret = o2net_send_message(DLM_BEGIN_RECO_MSG, dlm->key, From 436ef47e0692c3224bb3ec12bccb86c8f81d52d3 Mon Sep 17 00:00:00 2001 From: Gang He Date: Thu, 21 Oct 2021 15:06:36 +1100 Subject: [PATCH 0462/1202] ocfs2: reflink deadlock when clone file to the same directory simultaneously Running reflink from multiple nodes simultaneously to clone a file to the same directory probably triggers a deadlock issue. For example, there is a three node ocfs2 cluster, each node mounts the ocfs2 file system to /mnt/shared, and run the reflink command from each node repeatedly, like reflink "/mnt/shared/test" \ "/mnt/shared/.snapshots/test.`date +%m%d%H%M%S`.`hostname`" then, reflink command process will be hung on each node, and you can't list this file system directory. The problematic reflink command process is blocked at one node, task:reflink state:D stack: 0 pid: 1283 ppid: 4154 Call Trace: __schedule+0x2fd/0x750 schedule+0x2f/0xa0 schedule_timeout+0x1cc/0x310 ? ocfs2_control_cfu+0x50/0x50 [ocfs2_stack_user] ? 0xffffffffc0e3e000 wait_for_completion+0xba/0x140 ? wake_up_q+0xa0/0xa0 __ocfs2_cluster_lock.isra.41+0x3b5/0x820 [ocfs2] ? ocfs2_inode_lock_full_nested+0x1fc/0x960 [ocfs2] ocfs2_inode_lock_full_nested+0x1fc/0x960 [ocfs2] ocfs2_init_security_and_acl+0xbe/0x1d0 [ocfs2] ocfs2_reflink+0x436/0x4c0 [ocfs2] ? ocfs2_reflink_ioctl+0x2ca/0x360 [ocfs2] ocfs2_reflink_ioctl+0x2ca/0x360 [ocfs2] ocfs2_ioctl+0x25e/0x670 [ocfs2] do_vfs_ioctl+0xa0/0x680 ksys_ioctl+0x70/0x80 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x5b/0x1e0 The other reflink command processes are blocked at other nodes, task:reflink state:D stack: 0 pid:29759 ppid: 4088 Call Trace: __schedule+0x2fd/0x750 schedule+0x2f/0xa0 schedule_timeout+0x1cc/0x310 ? ocfs2_control_cfu+0x50/0x50 [ocfs2_stack_user] ? 0xffffffffc0b19000 wait_for_completion+0xba/0x140 ? wake_up_q+0xa0/0xa0 __ocfs2_cluster_lock.isra.41+0x3b5/0x820 [ocfs2] ? ocfs2_inode_lock_full_nested+0x1fc/0x960 [ocfs2] ocfs2_inode_lock_full_nested+0x1fc/0x960 [ocfs2] ocfs2_mv_orphaned_inode_to_new+0x87/0x7e0 [ocfs2] ocfs2_reflink+0x335/0x4c0 [ocfs2] ? ocfs2_reflink_ioctl+0x2ca/0x360 [ocfs2] ocfs2_reflink_ioctl+0x2ca/0x360 [ocfs2] ocfs2_ioctl+0x25e/0x670 [ocfs2] do_vfs_ioctl+0xa0/0x680 ksys_ioctl+0x70/0x80 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x5b/0x1e0 or task:reflink state:D stack: 0 pid:18465 ppid: 4156 Call Trace: __schedule+0x302/0x940 ? usleep_range+0x80/0x80 schedule+0x46/0xb0 schedule_timeout+0xff/0x140 ? ocfs2_control_cfu+0x50/0x50 [ocfs2_stack_user] ? 0xffffffffc0c3b000 __wait_for_common+0xb9/0x170 __ocfs2_cluster_lock.constprop.0+0x1d6/0x860 [ocfs2] ? ocfs2_wait_for_recovery+0x49/0xd0 [ocfs2] ? ocfs2_inode_lock_full_nested+0x30f/0xa50 [ocfs2] ocfs2_inode_lock_full_nested+0x30f/0xa50 [ocfs2] ocfs2_inode_lock_tracker+0xf2/0x2b0 [ocfs2] ? dput+0x32/0x2f0 ocfs2_permission+0x45/0xe0 [ocfs2] inode_permission+0xcc/0x170 link_path_walk.part.0.constprop.0+0x2a2/0x380 ? path_init+0x2c1/0x3f0 path_parentat+0x3c/0x90 filename_parentat+0xc1/0x1d0 ? filename_lookup+0x138/0x1c0 filename_create+0x43/0x160 ocfs2_reflink_ioctl+0xe6/0x380 [ocfs2] ocfs2_ioctl+0x1ea/0x2c0 [ocfs2] ? do_sys_openat2+0x81/0x150 __x64_sys_ioctl+0x82/0xb0 do_syscall_64+0x61/0xb0 The deadlock is caused by multiple acquiring the destination directory inode dlm lock in ocfs2_reflink function, we should acquire this directory inode dlm lock at the beginning, and hold this dlm lock until end of the function. Link: https://lkml.kernel.org/r/20210729110230.18983-1-ghe@suse.com Signed-off-by: Gang He Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/ocfs2/namei.c | 32 +++++++++++++------------------- fs/ocfs2/namei.h | 2 ++ fs/ocfs2/refcounttree.c | 15 +++++++++++---- fs/ocfs2/xattr.c | 12 +----------- fs/ocfs2/xattr.h | 1 + 5 files changed, 28 insertions(+), 34 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 2c46ff6ba4ea2..f8bbb22cc60ba 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2489,6 +2489,7 @@ static int ocfs2_prep_new_orphaned_file(struct inode *dir, } int ocfs2_create_inode_in_orphan(struct inode *dir, + struct buffer_head **dir_bh, int mode, struct inode **new_inode) { @@ -2597,13 +2598,16 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, brelse(new_di_bh); - if (!status) - *new_inode = inode; - ocfs2_free_dir_lookup_result(&orphan_insert); - ocfs2_inode_unlock(dir, 1); - brelse(parent_di_bh); + if (!status) { + *new_inode = inode; + *dir_bh = parent_di_bh; + } else { + ocfs2_inode_unlock(dir, 1); + brelse(parent_di_bh); + } + return status; } @@ -2760,11 +2764,11 @@ int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb, } int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, + struct buffer_head *dir_bh, struct inode *inode, struct dentry *dentry) { int status = 0; - struct buffer_head *parent_di_bh = NULL; handle_t *handle = NULL; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); struct ocfs2_dinode *dir_di, *di; @@ -2778,14 +2782,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, (unsigned long long)OCFS2_I(dir)->ip_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno); - status = ocfs2_inode_lock(dir, &parent_di_bh, 1); - if (status < 0) { - if (status != -ENOENT) - mlog_errno(status); - return status; - } - - dir_di = (struct ocfs2_dinode *) parent_di_bh->b_data; + dir_di = (struct ocfs2_dinode *) dir_bh->b_data; if (!dir_di->i_links_count) { /* can't make a file in a deleted directory. */ status = -ENOENT; @@ -2798,7 +2795,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, goto leave; /* get a spot inside the dir. */ - status = ocfs2_prepare_dir_for_insert(osb, dir, parent_di_bh, + status = ocfs2_prepare_dir_for_insert(osb, dir, dir_bh, dentry->d_name.name, dentry->d_name.len, &lookup); if (status < 0) { @@ -2862,7 +2859,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, ocfs2_journal_dirty(handle, di_bh); status = ocfs2_add_entry(handle, dentry, inode, - OCFS2_I(inode)->ip_blkno, parent_di_bh, + OCFS2_I(inode)->ip_blkno, dir_bh, &lookup); if (status < 0) { mlog_errno(status); @@ -2886,10 +2883,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, iput(orphan_dir_inode); leave: - ocfs2_inode_unlock(dir, 1); - brelse(di_bh); - brelse(parent_di_bh); brelse(orphan_dir_bh); ocfs2_free_dir_lookup_result(&lookup); diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h index 9cc891eb874e0..03a2c526e2c1b 100644 --- a/fs/ocfs2/namei.h +++ b/fs/ocfs2/namei.h @@ -24,6 +24,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, struct buffer_head *orphan_dir_bh, bool dio); int ocfs2_create_inode_in_orphan(struct inode *dir, + struct buffer_head **dir_bh, int mode, struct inode **new_inode); int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb, @@ -32,6 +33,7 @@ int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb, struct inode *inode, struct buffer_head *di_bh, int update_isize, loff_t end); int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, + struct buffer_head *dir_bh, struct inode *new_inode, struct dentry *new_dentry); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 7f6355cbb5875..a9a0c7c37e8ed 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4250,7 +4250,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, { int error, had_lock; struct inode *inode = d_inode(old_dentry); - struct buffer_head *old_bh = NULL; + struct buffer_head *old_bh = NULL, *dir_bh = NULL; struct inode *new_orphan_inode = NULL; struct ocfs2_lock_holder oh; @@ -4258,7 +4258,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, return -EOPNOTSUPP; - error = ocfs2_create_inode_in_orphan(dir, inode->i_mode, + error = ocfs2_create_inode_in_orphan(dir, &dir_bh, inode->i_mode, &new_orphan_inode); if (error) { mlog_errno(error); @@ -4304,13 +4304,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, /* If the security isn't preserved, we need to re-initialize them. */ if (!preserve) { - error = ocfs2_init_security_and_acl(dir, new_orphan_inode, + error = ocfs2_init_security_and_acl(dir, dir_bh, + new_orphan_inode, &new_dentry->d_name); if (error) mlog_errno(error); } if (!error) { - error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, + error = ocfs2_mv_orphaned_inode_to_new(dir, dir_bh, + new_orphan_inode, new_dentry); if (error) mlog_errno(error); @@ -4328,6 +4330,11 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, iput(new_orphan_inode); } + if (dir_bh) { + ocfs2_inode_unlock(dir, 1); + brelse(dir_bh); + } + return error; } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index dd784eb0cd7c4..3f23e3a5018ce 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7203,16 +7203,13 @@ int ocfs2_reflink_xattrs(struct inode *old_inode, /* * Initialize security and acl for a already created inode. * Used for reflink a non-preserve-security file. - * - * It uses common api like ocfs2_xattr_set, so the caller - * must not hold any lock expect i_mutex. */ int ocfs2_init_security_and_acl(struct inode *dir, + struct buffer_head *dir_bh, struct inode *inode, const struct qstr *qstr) { int ret = 0; - struct buffer_head *dir_bh = NULL; ret = ocfs2_init_security_get(inode, dir, qstr, NULL); if (ret) { @@ -7220,17 +7217,10 @@ int ocfs2_init_security_and_acl(struct inode *dir, goto leave; } - ret = ocfs2_inode_lock(dir, &dir_bh, 0); - if (ret) { - mlog_errno(ret); - goto leave; - } ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL); if (ret) mlog_errno(ret); - ocfs2_inode_unlock(dir, 0); - brelse(dir_bh); leave: return ret; } diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 00308b57f64f1..b27fd8ba00196 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -83,6 +83,7 @@ int ocfs2_reflink_xattrs(struct inode *old_inode, struct buffer_head *new_bh, bool preserve_security); int ocfs2_init_security_and_acl(struct inode *dir, + struct buffer_head *dir_bh, struct inode *inode, const struct qstr *qstr); #endif /* OCFS2_XATTR_H */ From cf863bde01f1aef3d72bc56a2618564c39b5ee98 Mon Sep 17 00:00:00 2001 From: Wangyan Date: Thu, 21 Oct 2021 15:06:37 +1100 Subject: [PATCH 0463/1202] ocfs2: clear links count in ocfs2_mknod() if an error occurs In this condition, the inode can not be wiped when error happened. ocfs2_mkdir() ->ocfs2_mknod() ->ocfs2_mknod_locked() ->__ocfs2_mknod_locked() ->ocfs2_set_links_count() // i_links_count is 2 -> ... // an error accrue, goto roll_back or leave. ->ocfs2_commit_trans() ->iput(inode) ->evict() ->ocfs2_evict_inode() ->ocfs2_delete_inode() ->ocfs2_inode_lock() ->ocfs2_inode_lock_update() ->ocfs2_refresh_inode() ->set_nlink(); // inode->i_nlink is 2 now. /* if wipe is 0, it will goto bail_unlock_inode */ ->ocfs2_query_inode_wipe() ->if (inode->i_nlink) return; // wipe is 0. /* inode can not be wiped */ ->ocfs2_wipe_inode() So, we need clear links before the transaction committed. Link: http://lkml.kernel.org/r/d8147c41-fb2b-bdf7-b660-1f3c8448c33f@huawei.com Signed-off-by: Yan Wang Reviewed-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/ocfs2/namei.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index f8bbb22cc60ba..a5c975715aa05 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -453,8 +453,12 @@ static int ocfs2_mknod(struct user_namespace *mnt_userns, leave: if (status < 0 && did_quota_inode) dquot_free_inode(inode); - if (handle) + if (handle) { + if (status < 0 && new_fe_bh != NULL) + ocfs2_set_links_count((struct ocfs2_dinode *) + new_fe_bh->b_data, 0); ocfs2_commit_trans(osb, handle); + } ocfs2_inode_unlock(dir, 1); if (did_block_signals) @@ -598,6 +602,8 @@ static int __ocfs2_mknod_locked(struct inode *dir, leave: if (status < 0) { if (*new_fe_bh) { + if (fe) + ocfs2_set_links_count(fe, 0); brelse(*new_fe_bh); *new_fe_bh = NULL; } @@ -2027,8 +2033,12 @@ static int ocfs2_symlink(struct user_namespace *mnt_userns, ocfs2_clusters_to_bytes(osb->sb, 1)); if (status < 0 && did_quota_inode) dquot_free_inode(inode); - if (handle) + if (handle) { + if (status < 0 && new_fe_bh != NULL) + ocfs2_set_links_count((struct ocfs2_dinode *) + new_fe_bh->b_data, 0); ocfs2_commit_trans(osb, handle); + } ocfs2_inode_unlock(dir, 1); if (did_block_signals) From 4b34ee77e167b984b309d33695ec7dd71533431e Mon Sep 17 00:00:00 2001 From: Wangyan Date: Thu, 21 Oct 2021 15:06:38 +1100 Subject: [PATCH 0464/1202] ocfs2: fix ocfs2 corrupt when iputting an inode In this condition, it will cause an bug on error. ocfs2_mkdir() ->ocfs2_mknod() ->ocfs2_mknod_locked() ->__ocfs2_mknod_locked() //Assume inode->i_generation is genN. ->inode->i_generation = osb->s_next_generation++; // The inode lockres has been initialized. ->ocfs2_populate_inode() ->ocfs2_create_new_inode_locks() ->An error happened, returned value is non-zero // free the start_bit x in bg_blkno ->ocfs2_free_suballoc_bits() ->... /* Another process execute mkdir success in this place, and it occupied the start_bit x in bg_blkno which has been freed before. Its inode->i_generation is genN + 1 */ ->iput(inode) ->evict() ->ocfs2_evict_inode() ->ocfs2_delete_inode() ->ocfs2_inode_lock() ->ocfs2_inode_lock_update() /* Bug on here, genN != genN + 1 */ ->mlog_bug_on_msg(inode->i_generation != le32_to_cpu(fe->i_generation)) So, we need not to reclaim the inode when the inode->ip_inode_lockres has been initialized. It will be freed in iput(). Link: http://lkml.kernel.org/r/ef080ca3-5d74-e276-17a1-d9e7c7e662c9@huawei.com Fixes: b1529a41f777 ("ocfs2: should reclaim the inode if '__ocfs2_mknod_locked' returns an error") Signed-off-by: Yan Wang Reviewed-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/ocfs2/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index a5c975715aa05..0939186f010e3 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -640,7 +640,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, status = __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, parent_fe_bh, handle, inode_ac, fe_blkno, suballoc_loc, suballoc_bit); - if (status < 0) { + if (status < 0 && !(OCFS2_I(inode)->ip_inode_lockres.l_flags & + OCFS2_LOCK_INITIALIZED)) { u64 bg_blkno = ocfs2_which_suballoc_group(fe_blkno, suballoc_bit); int tmp = ocfs2_free_suballoc_bits(handle, inode_ac->ac_inode, inode_ac->ac_bh, suballoc_bit, bg_blkno, 1); From ae4b7b84f5249262d353f98715be4a39df5fbfb8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Oct 2021 15:06:39 +1100 Subject: [PATCH 0465/1202] fs/posix_acl.c: avoid -Wempty-body warning The fallthrough comment for an ignored cmpxchg() return value produces a harmless warning with 'make W=1': fs/posix_acl.c: In function 'get_acl': fs/posix_acl.c:127:36: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 127 | /* fall through */ ; | ^ Simplify it as a step towards a clean W=1 build. As all architectures define cmpxchg() as a statement expression these days, it is no longer necessary to evaluate its return code, and the if() can just be droped. Link: https://lkml.kernel.org/r/20210927102410.1863853-1-arnd@kernel.org Link: https://lore.kernel.org/all/20210322132103.qiun2rjilnlgztxe@wittgenstein/ Signed-off-by: Arnd Bergmann Reviewed-by: Christian Brauner Cc: Alexander Viro Cc: James Morris Cc: Serge Hallyn Cc: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/posix_acl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/posix_acl.c b/fs/posix_acl.c index f5c25f580dd92..9323a854a60ae 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -134,8 +134,7 @@ struct posix_acl *get_acl(struct inode *inode, int type) * to just call ->get_acl to fetch the ACL ourself. (This is going to * be an unlikely race.) */ - if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) - /* fall through */ ; + cmpxchg(p, ACL_NOT_CACHED, sentinel); /* * Normally, the ACL returned by ->get_acl will be cached. From 9192e3be4cc2c700e259c2a1e2a5471abcdc5963 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Oct 2021 15:06:39 +1100 Subject: [PATCH 0466/1202] mm: move kvmalloc-related functions to slab.h Not all files in the kernel should include mm.h. Migrating callers from kmalloc to kvmalloc is easier if the kvmalloc functions are in slab.h. [akpm@linux-foundation.org: move the new kvrealloc() also] Link: https://lkml.kernel.org/r/20210622215757.3525604-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Pekka Enberg Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/of/kexec.c | 1 + include/linux/mm.h | 34 ---------------------------------- include/linux/slab.h | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index 761fd870d1db2..053e241f593c7 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #define RNG_SEED_SIZE 128 diff --git a/include/linux/mm.h b/include/linux/mm.h index 73a52aba448f9..32b2ecc474087 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -799,40 +799,6 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif -extern void *kvmalloc_node(size_t size, gfp_t flags, int node); -static inline void *kvmalloc(size_t size, gfp_t flags) -{ - return kvmalloc_node(size, flags, NUMA_NO_NODE); -} -static inline void *kvzalloc_node(size_t size, gfp_t flags, int node) -{ - return kvmalloc_node(size, flags | __GFP_ZERO, node); -} -static inline void *kvzalloc(size_t size, gfp_t flags) -{ - return kvmalloc(size, flags | __GFP_ZERO); -} - -static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) -{ - size_t bytes; - - if (unlikely(check_mul_overflow(n, size, &bytes))) - return NULL; - - return kvmalloc(bytes, flags); -} - -static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) -{ - return kvmalloc_array(n, size, flags | __GFP_ZERO); -} - -extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, - gfp_t flags); -extern void kvfree(const void *addr); -extern void kvfree_sensitive(const void *addr, size_t len); - static inline int head_compound_mapcount(struct page *head) { return atomic_read(compound_mapcount_ptr(head)) + 1; diff --git a/include/linux/slab.h b/include/linux/slab.h index 083f3ce550bca..c0d46b6fa12a2 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -732,6 +732,40 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node) return kmalloc_node(size, flags | __GFP_ZERO, node); } +extern void *kvmalloc_node(size_t size, gfp_t flags, int node); +static inline void *kvmalloc(size_t size, gfp_t flags) +{ + return kvmalloc_node(size, flags, NUMA_NO_NODE); +} +static inline void *kvzalloc_node(size_t size, gfp_t flags, int node) +{ + return kvmalloc_node(size, flags | __GFP_ZERO, node); +} +static inline void *kvzalloc(size_t size, gfp_t flags) +{ + return kvmalloc(size, flags | __GFP_ZERO); +} + +static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + + return kvmalloc(bytes, flags); +} + +static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) +{ + return kvmalloc_array(n, size, flags | __GFP_ZERO); +} + +extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, + gfp_t flags); +extern void kvfree(const void *addr); +extern void kvfree_sensitive(const void *addr, size_t len); + unsigned int kmem_cache_size(struct kmem_cache *s); void __init kmem_cache_init_late(void); From 13d964e1b65ab1912af06f83cfe7966638742021 Mon Sep 17 00:00:00 2001 From: Shi Lei Date: Thu, 21 Oct 2021 15:06:40 +1100 Subject: [PATCH 0467/1202] mm/slab.c: remove useless lines in enable_cpucache() These lines are useless, so remove them. Link: https://lkml.kernel.org/r/20210930034845.2539-1-shi_lei@massclouds.com Fixes: 10befea91b61 ("mm: memcg/slab: use a single set of kmem_caches for all allocations") Signed-off-by: Shi Lei Reviewed-by: Vlastimil Babka Acked-by: David Rientjes Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/slab.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 874b3f8fe80da..64ec17a3bc2ba 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3900,8 +3900,6 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) if (err) goto end; - if (limit && shared && batchcount) - goto skip_setup; /* * The head array serves three purposes: * - create a LIFO ordering, i.e. return objects that are cache-warm @@ -3944,7 +3942,6 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) limit = 32; #endif batchcount = (limit + 1) / 2; -skip_setup: err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp); end: if (err) From a82934015d249dc72e01f6b1df13718ccec6f186 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:06:40 +1100 Subject: [PATCH 0468/1202] slub: add back check for free nonslab objects After commit ("f227f0faf63b slub: fix unreclaimable slab stat for bulk free"), the check for free nonslab page is replaced by VM_BUG_ON_PAGE, which only check with CONFIG_DEBUG_VM enabled, but this config may impact performance, so it only for debug. Commit ("0937502af7c9 slub: Add check for kfree() of non slab objects.") add the ability, which should be needed in any configs to catch the invalid free, they even could be potential issue, eg, memory corruption, use after free and double free, so replace VM_BUG_ON_PAGE to WARN_ON_ONCE, add object address printing to help use to debug the issue. Link: https://lkml.kernel.org/r/20210930070214.61499-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Matthew Wilcox Cc: Shakeel Butt Cc: Vlastimil Babka Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rienjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/slub.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index d8f77346376d8..b6a1790812f77 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3522,7 +3522,9 @@ static inline void free_nonslab_page(struct page *page, void *object) { unsigned int order = compound_order(page); - VM_BUG_ON_PAGE(!PageCompound(page), page); + if (WARN_ON_ONCE(!PageCompound(page))) + pr_warn_once("object pointer: 0x%p\n", object); + kfree_hook(object); mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); __free_pages(page, order); From e2a0f3ed207f1e7c7ed0d1681a0d7257b2f155a2 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 21 Oct 2021 15:06:41 +1100 Subject: [PATCH 0469/1202] mm, slub: change percpu partial accounting from objects to pages With CONFIG_SLUB_CPU_PARTIAL enabled, SLUB keeps a percpu list of partial slabs that can be promoted to cpu slab when the previous one is depleted, without accessing the shared partial list. A slab can be added to this list by 1) refill of an empty list from get_partial_node() - once we really have to access the shared partial list, we acquire multiple slabs to amortize the cost of locking, and 2) first free to a previously full slab - instead of putting the slab on a shared partial list, we can more cheaply freeze it and put it on the per-cpu list. To control how large a percpu partial list can grow for a kmem cache, set_cpu_partial() calculates a target number of free objects on each cpu's percpu partial list, and this can be also set by the sysfs file cpu_partial. However, the tracking of actual number of objects is imprecise, in order to limit overhead from cpu X freeing an objects to a slab on percpu partial list of cpu Y. Basically, the percpu partial slabs form a single linked list, and when we add a new slab to the list with current head "oldpage", we set in the struct page of the slab we're adding: page->pages = oldpage->pages + 1; // this is precise page->pobjects = oldpage->pobjects + (page->objects - page->inuse); page->next = oldpage; Thus the real number of free objects in the slab (objects - inuse) is only determined at the moment of adding the slab to the percpu partial list, and further freeing doesn't update the pobjects counter nor propagate it to the current list head. As Jann reports [1], this can easily lead to large inaccuracies, where the target number of objects (up to 30 by default) can translate to the same number of (empty) slab pages on the list. In case 2) above, we put a slab with 1 free object on the list, thus only increase page->pobjects by 1, even if there are subsequent frees on the same slab. Jann has noticed this in practice and so did we [2] when investigating significant increase of kmemcg usage after switching from SLAB to SLUB. While this is no longer a problem in kmemcg context thanks to the accounting rewrite in 5.9, the memory waste is still not ideal and it's questionable whether it makes sense to perform free object count based control when object counts can easily become so much inaccurate. So this patch converts the accounting to be based on number of pages only (which is precise) and removes the page->pobjects field completely. This is also ultimately simpler. To retain the existing set_cpu_partial() heuristic, first calculate the target number of objects as previously, but then convert it to target number of pages by assuming the pages will be half-filled on average. This assumption might obviously also be inaccurate in practice, but cannot degrade to actual number of pages being equal to the target number of objects. We could also skip the intermediate step with target number of objects and rewrite the heuristic in terms of pages. However we still have the sysfs file cpu_partial which uses number of objects and could break existing users if it suddenly becomes number of pages, so this patch doesn't do that. In practice, after this patch the heuristics limit the size of percpu partial list up to 2 pages. In case of a reported regression (which would mean some workload has benefited from the previous imprecise object based counting), we can tune the heuristics to get a better compromise within the new scheme, while still avoid the unexpectedly long percpu partial lists. [1] https://lore.kernel.org/linux-mm/CAG48ez2Qx5K1Cab-m8BdSibp6wLTip6ro4=-umR7BLsEgjEYzA@mail.gmail.com/ [2] https://lore.kernel.org/all/2f0f46e8-2535-410a-1859-e9cfa4e57c18@suse.cz/ ========== Evaluation ========== Mel was kind enough to run v1 through mmtests machinery for netperf (localhost) and hackbench and, for most significant results see below. So there are some apparent regressions, especially with hackbench, which I think ultimately boils down to having shorter percpu partial lists on average and some benchmarks benefiting from longer ones. Monitoring slab usage also indicated less memory usage by slab. Based on that, the following patch will bump the defaults to allow longer percpu partial lists than after this patch. However the goal is certainly not such that we would limit the percpu partial lists to 30 pages just because previously a specific alloc/free pattern could lead to the limit of 30 objects translate to a limit to 30 pages - that would make little sense. This is a correctness patch, and if a workload benefits from larger lists, the sysfs tuning knobs are still there to allow that. Netperf 2-socket Intel(R) Xeon(R) Gold 5218R CPU @ 2.10GHz (20 cores, 40 threads per socket), 384GB RAM TCP-RR: hmean before 127045.79 after 121092.94 (-4.69%, worse) stddev before 2634.37 after 1254.08 UDP-RR: hmean before 166985.45 after 160668.94 ( -3.78%, worse) stddev before 4059.69 after 1943.63 2-socket Intel(R) Xeon(R) CPU E5-2698 v4 @ 2.20GHz (20 cores, 40 threads per socket), 512GB RAM TCP-RR: hmean before 84173.25 after 76914.72 ( -8.62%, worse) UDP-RR: hmean before 93571.12 after 96428.69 ( 3.05%, better) stddev before 23118.54 after 16828.14 2-socket Intel(R) Xeon(R) CPU E5-2670 v3 @ 2.30GHz (12 cores, 24 threads per socket), 64GB RAM TCP-RR: hmean before 49984.92 after 48922.27 ( -2.13%, worse) stddev before 6248.15 after 4740.51 UDP-RR: hmean before 61854.31 after 68761.81 ( 11.17%, better) stddev before 4093.54 after 5898.91 other machines - within 2% Hackbench (results before and after the patch, negative % means worse) 2-socket AMD EPYC 7713 (64 cores, 128 threads per core), 256GB RAM hackbench-process-sockets Amean 1 0.5380 0.5583 ( -3.78%) Amean 4 0.7510 0.8150 ( -8.52%) Amean 7 0.7930 0.9533 ( -20.22%) Amean 12 0.7853 1.1313 ( -44.06%) Amean 21 1.1520 1.4993 ( -30.15%) Amean 30 1.6223 1.9237 ( -18.57%) Amean 48 2.6767 2.9903 ( -11.72%) Amean 79 4.0257 5.1150 ( -27.06%) Amean 110 5.5193 7.4720 ( -35.38%) Amean 141 7.2207 9.9840 ( -38.27%) Amean 172 8.4770 12.1963 ( -43.88%) Amean 203 9.6473 14.3137 ( -48.37%) Amean 234 11.3960 18.7917 ( -64.90%) Amean 265 13.9627 22.4607 ( -60.86%) Amean 296 14.9163 26.0483 ( -74.63%) hackbench-thread-sockets Amean 1 0.5597 0.5877 ( -5.00%) Amean 4 0.7913 0.8960 ( -13.23%) Amean 7 0.8190 1.0017 ( -22.30%) Amean 12 0.9560 1.1727 ( -22.66%) Amean 21 1.7587 1.5660 ( 10.96%) Amean 30 2.4477 1.9807 ( 19.08%) Amean 48 3.4573 3.0630 ( 11.41%) Amean 79 4.7903 5.1733 ( -8.00%) Amean 110 6.1370 7.4220 ( -20.94%) Amean 141 7.5777 9.2617 ( -22.22%) Amean 172 9.2280 11.0907 ( -20.18%) Amean 203 10.2793 13.3470 ( -29.84%) Amean 234 11.2410 17.1070 ( -52.18%) Amean 265 12.5970 23.3323 ( -85.22%) Amean 296 17.1540 24.2857 ( -41.57%) 2-socket Intel(R) Xeon(R) Gold 5218R CPU @ 2.10GHz (20 cores, 40 threads per socket), 384GB RAM hackbench-process-sockets Amean 1 0.5760 0.4793 ( 16.78%) Amean 4 0.9430 0.9707 ( -2.93%) Amean 7 1.5517 1.8843 ( -21.44%) Amean 12 2.4903 2.7267 ( -9.49%) Amean 21 3.9560 4.2877 ( -8.38%) Amean 30 5.4613 5.8343 ( -6.83%) Amean 48 8.5337 9.2937 ( -8.91%) Amean 79 14.0670 15.2630 ( -8.50%) Amean 110 19.2253 21.2467 ( -10.51%) Amean 141 23.7557 25.8550 ( -8.84%) Amean 172 28.4407 29.7603 ( -4.64%) Amean 203 33.3407 33.9927 ( -1.96%) Amean 234 38.3633 39.1150 ( -1.96%) Amean 265 43.4420 43.8470 ( -0.93%) Amean 296 48.3680 48.9300 ( -1.16%) hackbench-thread-sockets Amean 1 0.6080 0.6493 ( -6.80%) Amean 4 1.0000 1.0513 ( -5.13%) Amean 7 1.6607 2.0260 ( -22.00%) Amean 12 2.7637 2.9273 ( -5.92%) Amean 21 5.0613 4.5153 ( 10.79%) Amean 30 6.3340 6.1140 ( 3.47%) Amean 48 9.0567 9.5577 ( -5.53%) Amean 79 14.5657 15.7983 ( -8.46%) Amean 110 19.6213 21.6333 ( -10.25%) Amean 141 24.1563 26.2697 ( -8.75%) Amean 172 28.9687 30.2187 ( -4.32%) Amean 203 33.9763 34.6970 ( -2.12%) Amean 234 38.8647 39.3207 ( -1.17%) Amean 265 44.0813 44.1507 ( -0.16%) Amean 296 49.2040 49.4330 ( -0.47%) 2-socket Intel(R) Xeon(R) CPU E5-2698 v4 @ 2.20GHz (20 cores, 40 threads per socket), 512GB RAM hackbench-process-sockets Amean 1 0.5027 0.5017 ( 0.20%) Amean 4 1.1053 1.2033 ( -8.87%) Amean 7 1.8760 2.1820 ( -16.31%) Amean 12 2.9053 3.1810 ( -9.49%) Amean 21 4.6777 4.9920 ( -6.72%) Amean 30 6.5180 6.7827 ( -4.06%) Amean 48 10.0710 10.5227 ( -4.48%) Amean 79 16.4250 17.5053 ( -6.58%) Amean 110 22.6203 24.4617 ( -8.14%) Amean 141 28.0967 31.0363 ( -10.46%) Amean 172 34.4030 36.9233 ( -7.33%) Amean 203 40.5933 43.0850 ( -6.14%) Amean 234 46.6477 48.7220 ( -4.45%) Amean 265 53.0530 53.9597 ( -1.71%) Amean 296 59.2760 59.9213 ( -1.09%) hackbench-thread-sockets Amean 1 0.5363 0.5330 ( 0.62%) Amean 4 1.1647 1.2157 ( -4.38%) Amean 7 1.9237 2.2833 ( -18.70%) Amean 12 2.9943 3.3110 ( -10.58%) Amean 21 4.9987 5.1880 ( -3.79%) Amean 30 6.7583 7.0043 ( -3.64%) Amean 48 10.4547 10.8353 ( -3.64%) Amean 79 16.6707 17.6790 ( -6.05%) Amean 110 22.8207 24.4403 ( -7.10%) Amean 141 28.7090 31.0533 ( -8.17%) Amean 172 34.9387 36.8260 ( -5.40%) Amean 203 41.1567 43.0450 ( -4.59%) Amean 234 47.3790 48.5307 ( -2.43%) Amean 265 53.9543 54.6987 ( -1.38%) Amean 296 60.0820 60.2163 ( -0.22%) 1-socket Intel(R) Xeon(R) CPU E3-1240 v5 @ 3.50GHz (4 cores, 8 threads), 32 GB RAM hackbench-process-sockets Amean 1 1.4760 1.5773 ( -6.87%) Amean 3 3.9370 4.0910 ( -3.91%) Amean 5 6.6797 6.9357 ( -3.83%) Amean 7 9.3367 9.7150 ( -4.05%) Amean 12 15.7627 16.1400 ( -2.39%) Amean 18 23.5360 23.6890 ( -0.65%) Amean 24 31.0663 31.3137 ( -0.80%) Amean 30 38.7283 39.0037 ( -0.71%) Amean 32 41.3417 41.6097 ( -0.65%) hackbench-thread-sockets Amean 1 1.5250 1.6043 ( -5.20%) Amean 3 4.0897 4.2603 ( -4.17%) Amean 5 6.7760 7.0933 ( -4.68%) Amean 7 9.4817 9.9157 ( -4.58%) Amean 12 15.9610 16.3937 ( -2.71%) Amean 18 23.9543 24.3417 ( -1.62%) Amean 24 31.4400 31.7217 ( -0.90%) Amean 30 39.2457 39.5467 ( -0.77%) Amean 32 41.8267 42.1230 ( -0.71%) 2-socket Intel(R) Xeon(R) CPU E5-2670 v3 @ 2.30GHz (12 cores, 24 threads per socket), 64GB RAM hackbench-process-sockets Amean 1 1.0347 1.0880 ( -5.15%) Amean 4 1.7267 1.8527 ( -7.30%) Amean 7 2.6707 2.8110 ( -5.25%) Amean 12 4.1617 4.3383 ( -4.25%) Amean 21 7.0070 7.2600 ( -3.61%) Amean 30 9.9187 10.2397 ( -3.24%) Amean 48 15.6710 16.3923 ( -4.60%) Amean 79 24.7743 26.1247 ( -5.45%) Amean 110 34.3000 35.9307 ( -4.75%) Amean 141 44.2043 44.8010 ( -1.35%) Amean 172 54.2430 54.7260 ( -0.89%) Amean 192 60.6557 60.9777 ( -0.53%) hackbench-thread-sockets Amean 1 1.0610 1.1353 ( -7.01%) Amean 4 1.7543 1.9140 ( -9.10%) Amean 7 2.7840 2.9573 ( -6.23%) Amean 12 4.3813 4.4937 ( -2.56%) Amean 21 7.3460 7.5350 ( -2.57%) Amean 30 10.2313 10.5190 ( -2.81%) Amean 48 15.9700 16.5940 ( -3.91%) Amean 79 25.3973 26.6637 ( -4.99%) Amean 110 35.1087 36.4797 ( -3.91%) Amean 141 45.8220 46.3053 ( -1.05%) Amean 172 55.4917 55.7320 ( -0.43%) Amean 192 62.7490 62.5410 ( 0.33%) Link: https://lkml.kernel.org/r/20211012134651.11258-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Reported-by: Jann Horn Cc: Roman Gushchin Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mm_types.h | 2 - include/linux/slub_def.h | 13 +----- mm/slub.c | 89 ++++++++++++++++++++++++++-------------- 3 files changed, 61 insertions(+), 43 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7f8ee09c711f4..68ffa064b7a81 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -124,10 +124,8 @@ struct page { struct page *next; #ifdef CONFIG_64BIT int pages; /* Nr of pages left */ - int pobjects; /* Approximate count */ #else short int pages; - short int pobjects; #endif }; }; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 85499f0586b06..0fa751b946fa0 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -99,6 +99,8 @@ struct kmem_cache { #ifdef CONFIG_SLUB_CPU_PARTIAL /* Number of per cpu partial objects to keep around */ unsigned int cpu_partial; + /* Number of per cpu partial pages to keep around */ + unsigned int cpu_partial_pages; #endif struct kmem_cache_order_objects oo; @@ -141,17 +143,6 @@ struct kmem_cache { struct kmem_cache_node *node[MAX_NUMNODES]; }; -#ifdef CONFIG_SLUB_CPU_PARTIAL -#define slub_cpu_partial(s) ((s)->cpu_partial) -#define slub_set_cpu_partial(s, n) \ -({ \ - slub_cpu_partial(s) = (n); \ -}) -#else -#define slub_cpu_partial(s) (0) -#define slub_set_cpu_partial(s, n) -#endif /* CONFIG_SLUB_CPU_PARTIAL */ - #ifdef CONFIG_SYSFS #define SLAB_SUPPORTS_SYSFS void sysfs_slab_unlink(struct kmem_cache *); diff --git a/mm/slub.c b/mm/slub.c index b6a1790812f77..0df81ea24b915 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -414,6 +414,29 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x) return x.x & OO_MASK; } +#ifdef CONFIG_SLUB_CPU_PARTIAL +static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) +{ + unsigned int nr_pages; + + s->cpu_partial = nr_objects; + + /* + * We take the number of objects but actually limit the number of + * pages on the per cpu partial list, in order to limit excessive + * growth of the list. For simplicity we assume that the pages will + * be half-full. + */ + nr_pages = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo)); + s->cpu_partial_pages = nr_pages; +} +#else +static inline void +slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) +{ +} +#endif /* CONFIG_SLUB_CPU_PARTIAL */ + /* * Per slab locking using the pagelock */ @@ -2052,7 +2075,7 @@ static inline void remove_partial(struct kmem_cache_node *n, */ static inline void *acquire_slab(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page, - int mode, int *objects) + int mode) { void *freelist; unsigned long counters; @@ -2068,7 +2091,6 @@ static inline void *acquire_slab(struct kmem_cache *s, freelist = page->freelist; counters = page->counters; new.counters = counters; - *objects = new.objects - new.inuse; if (mode) { new.inuse = page->objects; new.freelist = NULL; @@ -2106,9 +2128,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, { struct page *page, *page2; void *object = NULL; - unsigned int available = 0; unsigned long flags; - int objects; + unsigned int partial_pages = 0; /* * Racy check. If we mistakenly see no partial slabs then we @@ -2126,11 +2147,10 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, if (!pfmemalloc_match(page, gfpflags)) continue; - t = acquire_slab(s, n, page, object == NULL, &objects); + t = acquire_slab(s, n, page, object == NULL); if (!t) break; - available += objects; if (!object) { *ret_page = page; stat(s, ALLOC_FROM_PARTIAL); @@ -2138,10 +2158,15 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, } else { put_cpu_partial(s, page, 0); stat(s, CPU_PARTIAL_NODE); + partial_pages++; } +#ifdef CONFIG_SLUB_CPU_PARTIAL if (!kmem_cache_has_cpu_partial(s) - || available > slub_cpu_partial(s) / 2) + || partial_pages > s->cpu_partial_pages / 2) break; +#else + break; +#endif } spin_unlock_irqrestore(&n->list_lock, flags); @@ -2546,14 +2571,13 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) struct page *page_to_unfreeze = NULL; unsigned long flags; int pages = 0; - int pobjects = 0; local_lock_irqsave(&s->cpu_slab->lock, flags); oldpage = this_cpu_read(s->cpu_slab->partial); if (oldpage) { - if (drain && oldpage->pobjects > slub_cpu_partial(s)) { + if (drain && oldpage->pages >= s->cpu_partial_pages) { /* * Partial array is full. Move the existing set to the * per node partial list. Postpone the actual unfreezing @@ -2562,16 +2586,13 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) page_to_unfreeze = oldpage; oldpage = NULL; } else { - pobjects = oldpage->pobjects; pages = oldpage->pages; } } pages++; - pobjects += page->objects - page->inuse; page->pages = pages; - page->pobjects = pobjects; page->next = oldpage; this_cpu_write(s->cpu_slab->partial, page); @@ -3991,6 +4012,8 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min) static void set_cpu_partial(struct kmem_cache *s) { #ifdef CONFIG_SLUB_CPU_PARTIAL + unsigned int nr_objects; + /* * cpu_partial determined the maximum number of objects kept in the * per cpu partial lists of a processor. @@ -4000,24 +4023,22 @@ static void set_cpu_partial(struct kmem_cache *s) * filled up again with minimal effort. The slab will never hit the * per node partial lists and therefore no locking will be required. * - * This setting also determines - * - * A) The number of objects from per cpu partial slabs dumped to the - * per node list when we reach the limit. - * B) The number of objects in cpu partial slabs to extract from the - * per node list when we run out of per cpu objects. We only fetch - * 50% to keep some capacity around for frees. + * For backwards compatibility reasons, this is determined as number + * of objects, even though we now limit maximum number of pages, see + * slub_set_cpu_partial() */ if (!kmem_cache_has_cpu_partial(s)) - slub_set_cpu_partial(s, 0); + nr_objects = 0; else if (s->size >= PAGE_SIZE) - slub_set_cpu_partial(s, 2); + nr_objects = 2; else if (s->size >= 1024) - slub_set_cpu_partial(s, 6); + nr_objects = 6; else if (s->size >= 256) - slub_set_cpu_partial(s, 13); + nr_objects = 13; else - slub_set_cpu_partial(s, 30); + nr_objects = 30; + + slub_set_cpu_partial(s, nr_objects); #endif } @@ -5392,7 +5413,12 @@ SLAB_ATTR(min_partial); static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf) { - return sysfs_emit(buf, "%u\n", slub_cpu_partial(s)); + unsigned int nr_partial = 0; +#ifdef CONFIG_SLUB_CPU_PARTIAL + nr_partial = s->cpu_partial; +#endif + + return sysfs_emit(buf, "%u\n", nr_partial); } static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, @@ -5463,12 +5489,12 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); - if (page) { + if (page) pages += page->pages; - objects += page->pobjects; - } } + /* Approximate half-full pages , see slub_set_cpu_partial() */ + objects = (pages * oo_objects(s->oo)) / 2; len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages); #ifdef CONFIG_SMP @@ -5476,9 +5502,12 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) struct page *page; page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); - if (page) + if (page) { + pages = READ_ONCE(page->pages); + objects = (pages * oo_objects(s->oo)) / 2; len += sysfs_emit_at(buf, len, " C%d=%d(%d)", - cpu, page->pobjects, page->pages); + cpu, objects, pages); + } } #endif len += sysfs_emit_at(buf, len, "\n"); From ff82d2329f243c0600ea4299f23d94f6d49acafd Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 21 Oct 2021 15:06:42 +1100 Subject: [PATCH 0470/1202] mm/slub: increase default cpu partial list sizes The defaults are determined based on object size and can go up to 30 for objects smaller than 256 bytes. Before the previous patch changed the accounting, this could have made cpu partial list contain up to 30 pages. After that patch, only up to 2 pages with default allocation order. Very short lists limit the usefulness of the whole concept of cpu partial lists, so this patch aims at a more reasonable default under the new accounting. The defaults are quadrupled, except for object size >= PAGE_SIZE where it's doubled. This makes the lists grow up to 10 pages in practice. A quick test of booting a kernel under virtme with 4GB RAM and 8 vcpus shows the following slab memory usage after boot: Before previous patch (using page->pobjects): Slab: 36732 kB SReclaimable: 14836 kB SUnreclaim: 21896 kB After previous patch (using page->pages): Slab: 34720 kB SReclaimable: 13716 kB SUnreclaim: 21004 kB After this patch (using page->pages, higher defaults): Slab: 35252 kB SReclaimable: 13944 kB SUnreclaim: 21308 kB In the same setup, I also ran 5 times: hackbench -l 16000 -g 16 Differences in time were in the noise, we can compare slub stats as given by slabinfo -r skbuff_head_cache (the other cache heavily used by hackbench, kmalloc-cg-512 looks similar). Negligible stats left out for brevity. Before previous patch (using page->pobjects): Objects: 1408, Memory Total: 401408 Used : 304128 Slab Perf Counter Alloc Free %Al %Fr -------------------------------------------------- Fastpath 469952498 5946606 91 1 Slowpath 42053573 506059465 8 98 Page Alloc 41093 41044 0 0 Add partial 18 21229327 0 4 Remove partial 20039522 36051 3 0 Cpu partial list 4686640 24767229 0 4 RemoteObj/SlabFrozen 16 124027841 0 24 Total 512006071 512006071 Flushes 18 Slab Deactivation Occurrences % ------------------------------------------------- Slab empty 4993 0% Deactivation bypass 24767229 99% Refilled from foreign frees 21972674 88% After previous patch (using page->pages): Objects: 480, Memory Total: 131072 Used : 103680 Slab Perf Counter Alloc Free %Al %Fr -------------------------------------------------- Fastpath 473016294 5405653 92 1 Slowpath 38989777 506600418 7 98 Page Alloc 32717 32701 0 0 Add partial 3 22749164 0 4 Remove partial 11371127 32474 2 0 Cpu partial list 11686226 23090059 2 4 RemoteObj/SlabFrozen 2 67541803 0 13 Total 512006071 512006071 Flushes 3 Slab Deactivation Occurrences % ------------------------------------------------- Slab empty 227 0% Deactivation bypass 23090059 99% Refilled from foreign frees 27585695 119% After this patch (using page->pages, higher defaults): Objects: 896, Memory Total: 229376 Used : 193536 Slab Perf Counter Alloc Free %Al %Fr -------------------------------------------------- Fastpath 473799295 4980278 92 0 Slowpath 38206776 507025793 7 99 Page Alloc 32295 32267 0 0 Add partial 11 23291143 0 4 Remove partial 5815764 31278 1 0 Cpu partial list 18119280 23967320 3 4 RemoteObj/SlabFrozen 10 76974794 0 15 Total 512006071 512006071 Flushes 11 Slab Deactivation Occurrences % ------------------------------------------------- Slab empty 989 0% Deactivation bypass 23967320 99% Refilled from foreign frees 32358473 135% As expected, memory usage dropped significantly with change of accounting, increasing the defaults increased it, but not as much. The number of page allocation/frees dropped significantly with the new accounting, but didn't increase with the higher defaults. Interestingly, the number of fasthpath allocations increased, as well as allocations from the cpu partial list, even though it's shorter. Link: https://lkml.kernel.org/r/20211012134651.11258-2-vbabka@suse.cz Signed-off-by: Vlastimil Babka Cc: Christoph Lameter Cc: David Rientjes Cc: Jann Horn Cc: Joonsoo Kim Cc: Pekka Enberg Cc: Roman Gushchin Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/slub.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 0df81ea24b915..f282329489bf9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4030,13 +4030,13 @@ static void set_cpu_partial(struct kmem_cache *s) if (!kmem_cache_has_cpu_partial(s)) nr_objects = 0; else if (s->size >= PAGE_SIZE) - nr_objects = 2; - else if (s->size >= 1024) nr_objects = 6; + else if (s->size >= 1024) + nr_objects = 24; else if (s->size >= 256) - nr_objects = 13; + nr_objects = 52; else - nr_objects = 30; + nr_objects = 120; slub_set_cpu_partial(s, nr_objects); #endif From dd4b499af21bacd2ad4124b23a59b70b39924bca Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Thu, 21 Oct 2021 15:06:42 +1100 Subject: [PATCH 0471/1202] mm, slub: use prefetchw instead of prefetch commit 0ad9500e16fe ("slub: prefetch next freelist pointer in slab_alloc()") introduced prefetch_freepointer() because when other cpu(s) freed objects into a page that current cpu owns, the freelist link is hot on cpu(s) which freed objects and possibly very cold on current cpu. But if freelist link chain is hot on cpu(s) which freed objects, it's better to invalidate that chain because they're not going to access again within a short time. So use prefetchw instead of prefetch. On supported architectures like x86 and arm, it invalidates other copied instances of a cache line when prefetching it. Before: Time: 91.677 Performance counter stats for 'hackbench -g 100 -l 10000': 1462938.07 msec cpu-clock # 15.908 CPUs utilized 18072550 context-switches # 12.354 K/sec 1018814 cpu-migrations # 696.416 /sec 104558 page-faults # 71.471 /sec 1580035699271 cycles # 1.080 GHz (54.51%) 2003670016013 instructions # 1.27 insn per cycle (54.31%) 5702204863 branch-misses (54.28%) 643368500985 cache-references # 439.778 M/sec (54.26%) 18475582235 cache-misses # 2.872 % of all cache refs (54.28%) 642206796636 L1-dcache-loads # 438.984 M/sec (46.87%) 18215813147 L1-dcache-load-misses # 2.84% of all L1-dcache accesses (46.83%) 653842996501 dTLB-loads # 446.938 M/sec (46.63%) 3227179675 dTLB-load-misses # 0.49% of all dTLB cache accesses (46.85%) 537531951350 iTLB-loads # 367.433 M/sec (54.33%) 114750630 iTLB-load-misses # 0.02% of all iTLB cache accesses (54.37%) 630135543177 L1-icache-loads # 430.733 M/sec (46.80%) 22923237620 L1-icache-load-misses # 3.64% of all L1-icache accesses (46.76%) 91.964452802 seconds time elapsed 43.416742000 seconds user 1422.441123000 seconds sys After: Time: 90.220 Performance counter stats for 'hackbench -g 100 -l 10000': 1437418.48 msec cpu-clock # 15.880 CPUs utilized 17694068 context-switches # 12.310 K/sec 958257 cpu-migrations # 666.651 /sec 100604 page-faults # 69.989 /sec 1583259429428 cycles # 1.101 GHz (54.57%) 2004002484935 instructions # 1.27 insn per cycle (54.37%) 5594202389 branch-misses (54.36%) 643113574524 cache-references # 447.409 M/sec (54.39%) 18233791870 cache-misses # 2.835 % of all cache refs (54.37%) 640205852062 L1-dcache-loads # 445.386 M/sec (46.75%) 17968160377 L1-dcache-load-misses # 2.81% of all L1-dcache accesses (46.79%) 651747432274 dTLB-loads # 453.415 M/sec (46.59%) 3127124271 dTLB-load-misses # 0.48% of all dTLB cache accesses (46.75%) 535395273064 iTLB-loads # 372.470 M/sec (54.38%) 113500056 iTLB-load-misses # 0.02% of all iTLB cache accesses (54.35%) 628871845924 L1-icache-loads # 437.501 M/sec (46.80%) 22585641203 L1-icache-load-misses # 3.59% of all L1-icache accesses (46.79%) 90.514819303 seconds time elapsed 43.877656000 seconds user 1397.176001000 seconds sys Link: https://lkml.org/lkml/2021/10/8/598=20 Link: https://lkml.kernel.org/r/20211011144331.70084-1-42.hyeyoo@gmail.com Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index f282329489bf9..e9a51dcf8bf9a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -354,7 +354,7 @@ static inline void *get_freepointer(struct kmem_cache *s, void *object) static void prefetch_freepointer(const struct kmem_cache *s, void *object) { - prefetch(object + s->offset); + prefetchw(object + s->offset); } static inline void *get_freepointer_safe(struct kmem_cache *s, void *object) From 4dd71465b7d503ec374f4188e626eda37c0bf6d1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Oct 2021 15:06:43 +1100 Subject: [PATCH 0472/1202] mm: don't include in Not required at all, and having this causes a huge kernel rebuild as soon as something in dax.h changes. Link: https://lkml.kernel.org/r/20210921082253.1859794-1-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Naoya Horiguchi Reviewed-by: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mempolicy.h | 1 - mm/memory-failure.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 4091692bed8c0..097bbbb374936 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -8,7 +8,6 @@ #include #include -#include #include #include #include diff --git a/mm/memory-failure.c b/mm/memory-failure.c index bdbbb32211a58..8376cfe0efce7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include From a150ad3e6fc63558623f1d306b9d28a01b8d81bd Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:06:44 +1100 Subject: [PATCH 0473/1202] lib/stackdepot: include gfp.h Patch series "stackdepot, kasan, workqueue: Avoid expanding stackdepot slabs when holding raw_spin_lock", v2. Shuah Khan reported [1]: | When CONFIG_PROVE_RAW_LOCK_NESTING=y and CONFIG_KASAN are enabled, | kasan_record_aux_stack() runs into "BUG: Invalid wait context" when | it tries to allocate memory attempting to acquire spinlock in page | allocation code while holding workqueue pool raw_spinlock. | | There are several instances of this problem when block layer tries | to __queue_work(). Call trace from one of these instances is below: | | kblockd_mod_delayed_work_on() | mod_delayed_work_on() | __queue_delayed_work() | __queue_work() (rcu_read_lock, raw_spin_lock pool->lock held) | insert_work() | kasan_record_aux_stack() | kasan_save_stack() | stack_depot_save() | alloc_pages() | __alloc_pages() | get_page_from_freelist() | rm_queue() | rm_queue_pcplist() | local_lock_irqsave(&pagesets.lock, flags); | [ BUG: Invalid wait context triggered ] PROVE_RAW_LOCK_NESTING is pointing out that (on RT kernels) the locking rules are being violated. More generally, memory is being allocated from a non-preemptive context (raw_spin_lock'd c-s) where it is not allowed. To properly fix this, we must prevent stackdepot from replenishing its "stack slab" pool if memory allocations cannot be done in the current context: it's a bug to use either GFP_ATOMIC nor GFP_NOWAIT in certain non-preemptive contexts, including raw_spin_locks (see gfp.h and ab00db216c9c7). The only downside is that saving a stack trace may fail if: stackdepot runs out of space AND the same stack trace has not been recorded before. I expect this to be unlikely, and a simple experiment (boot the kernel) didn't result in any failure to record stack trace from insert_work(). The series includes a few minor fixes to stackdepot that I noticed in preparing the series. It then introduces __stack_depot_save(), which exposes the option to force stackdepot to not allocate any memory. Finally, KASAN is changed to use the new stackdepot interface and provide kasan_record_aux_stack_noalloc(), which is then used by workqueue code. [1] https://lkml.kernel.org/r/20210902200134.25603-1-skhan@linuxfoundation.org This patch (of 6): refers to gfp_t, but doesn't include gfp.h. Fix it by including . Link: https://lkml.kernel.org/r/20210913112609.2651084-1-elver@google.com Link: https://lkml.kernel.org/r/20210913112609.2651084-2-elver@google.com Signed-off-by: Marco Elver Tested-by: Shuah Khan Acked-by: Sebastian Andrzej Siewior Reviewed-by: Andrey Konovalov Cc: Tejun Heo Cc: Lai Jiangshan Cc: Walter Wu Cc: Thomas Gleixner Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Vijayanand Jitta Cc: Vinayak Menon Cc: "Gustavo A. R. Silva" Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/stackdepot.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 6bb4bc1a5f545..97b36dc53301d 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -11,6 +11,8 @@ #ifndef _LINUX_STACKDEPOT_H #define _LINUX_STACKDEPOT_H +#include + typedef u32 depot_stack_handle_t; depot_stack_handle_t stack_depot_save(unsigned long *entries, From 29cfb2f197b7e64c356fbff671647db515a45433 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:06:44 +1100 Subject: [PATCH 0474/1202] lib/stackdepot: remove unused function argument alloc_flags in depot_alloc_stack() is no longer used; remove it. Link: https://lkml.kernel.org/r/20210913112609.2651084-3-elver@google.com Signed-off-by: Marco Elver Tested-by: Shuah Khan Acked-by: Sebastian Andrzej Siewior Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: "Gustavo A. R. Silva" Cc: Lai Jiangshan Cc: Taras Madan Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vijayanand Jitta Cc: Vinayak Menon Cc: Walter Wu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/stackdepot.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 0a2e417f83cba..c80a9f7342537 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -102,8 +102,8 @@ static bool init_stack_slab(void **prealloc) } /* Allocation of a new stack in raw storage */ -static struct stack_record *depot_alloc_stack(unsigned long *entries, int size, - u32 hash, void **prealloc, gfp_t alloc_flags) +static struct stack_record * +depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { struct stack_record *stack; size_t required_size = struct_size(stack, entries, size); @@ -309,9 +309,8 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, found = find_stack(*bucket, entries, nr_entries, hash); if (!found) { - struct stack_record *new = - depot_alloc_stack(entries, nr_entries, - hash, &prealloc, alloc_flags); + struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); + if (new) { new->next = *bucket; /* From a312176d57b730a8832cd7f3bfbd9c9ede2b8925 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:06:45 +1100 Subject: [PATCH 0475/1202] lib/stackdepot: introduce __stack_depot_save() Add __stack_depot_save(), which provides more fine-grained control over stackdepot's memory allocation behaviour, in case stackdepot runs out of "stack slabs". Normally stackdepot uses alloc_pages() in case it runs out of space; passing can_alloc==false to __stack_depot_save() prohibits this, at the cost of more likely failure to record a stack trace. Link: https://lkml.kernel.org/r/20210913112609.2651084-4-elver@google.com Signed-off-by: Marco Elver Tested-by: Shuah Khan Acked-by: Sebastian Andrzej Siewior Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: "Gustavo A. R. Silva" Cc: Lai Jiangshan Cc: Taras Madan Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vijayanand Jitta Cc: Vinayak Menon Cc: Walter Wu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/stackdepot.h | 4 ++++ lib/stackdepot.c | 43 ++++++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 97b36dc53301d..b2f7e7c6ba54d 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -15,6 +15,10 @@ typedef u32 depot_stack_handle_t; +depot_stack_handle_t __stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + gfp_t gfp_flags, bool can_alloc); + depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index c80a9f7342537..bda58597e3756 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -248,17 +248,28 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, EXPORT_SYMBOL_GPL(stack_depot_fetch); /** - * stack_depot_save - Save a stack trace from an array + * __stack_depot_save - Save a stack trace from an array * * @entries: Pointer to storage array * @nr_entries: Size of the storage array * @alloc_flags: Allocation gfp flags + * @can_alloc: Allocate stack slabs (increased chance of failure if false) + * + * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is + * %true, is allowed to replenish the stack slab pool in case no space is left + * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids + * any allocations and will fail if no space is left to store the stack trace. + * + * Context: Any context, but setting @can_alloc to %false is required if + * alloc_pages() cannot be used from the current context. Currently + * this is the case from contexts where neither %GFP_ATOMIC nor + * %GFP_NOWAIT can be used (NMI, raw_spin_lock). * - * Return: The handle of the stack struct stored in depot + * Return: The handle of the stack struct stored in depot, 0 on failure. */ -depot_stack_handle_t stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - gfp_t alloc_flags) +depot_stack_handle_t __stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + gfp_t alloc_flags, bool can_alloc) { struct stack_record *found = NULL, **bucket; depot_stack_handle_t retval = 0; @@ -291,7 +302,7 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, * The smp_load_acquire() here pairs with smp_store_release() to * |next_slab_inited| in depot_alloc_stack() and init_stack_slab(). */ - if (unlikely(!smp_load_acquire(&next_slab_inited))) { + if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -339,6 +350,26 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, fast_exit: return retval; } +EXPORT_SYMBOL_GPL(__stack_depot_save); + +/** + * stack_depot_save - Save a stack trace from an array + * + * @entries: Pointer to storage array + * @nr_entries: Size of the storage array + * @alloc_flags: Allocation gfp flags + * + * Context: Contexts where allocations via alloc_pages() are allowed. + * See __stack_depot_save() for more details. + * + * Return: The handle of the stack struct stored in depot, 0 on failure. + */ +depot_stack_handle_t stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + gfp_t alloc_flags) +{ + return __stack_depot_save(entries, nr_entries, alloc_flags, true); +} EXPORT_SYMBOL_GPL(stack_depot_save); static inline int in_irqentry_text(unsigned long ptr) From 390c165c97de88c722efb65dc7f662f3cdb22ef3 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:06:46 +1100 Subject: [PATCH 0476/1202] kasan: common: provide can_alloc in kasan_save_stack() Add another argument, can_alloc, to kasan_save_stack() which is passed as-is to __stack_depot_save(). No functional change intended. Link: https://lkml.kernel.org/r/20210913112609.2651084-5-elver@google.com Signed-off-by: Marco Elver Tested-by: Shuah Khan Acked-by: Sebastian Andrzej Siewior Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: "Gustavo A. R. Silva" Cc: Lai Jiangshan Cc: Taras Madan Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vijayanand Jitta Cc: Vinayak Menon Cc: Walter Wu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kasan/common.c | 6 +++--- mm/kasan/generic.c | 2 +- mm/kasan/kasan.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 2baf121fb8c50..3e0999892c368 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -30,20 +30,20 @@ #include "kasan.h" #include "../slab.h" -depot_stack_handle_t kasan_save_stack(gfp_t flags) +depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc) { unsigned long entries[KASAN_STACK_DEPTH]; unsigned int nr_entries; nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); nr_entries = filter_irq_stacks(entries, nr_entries); - return stack_depot_save(entries, nr_entries, flags); + return __stack_depot_save(entries, nr_entries, flags, can_alloc); } void kasan_set_track(struct kasan_track *track, gfp_t flags) { track->pid = current->pid; - track->stack = kasan_save_stack(flags); + track->stack = kasan_save_stack(flags, true); } #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index c3f5ba7a294a8..2a8e59e6326d2 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -345,7 +345,7 @@ void kasan_record_aux_stack(void *addr) return; alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; - alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT); + alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT, true); } void kasan_set_free_info(struct kmem_cache *cache, diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 8bf568a80eb85..fa6b48d085134 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -251,7 +251,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip); struct page *kasan_addr_to_page(const void *addr); -depot_stack_handle_t kasan_save_stack(gfp_t flags); +depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc); void kasan_set_track(struct kasan_track *track, gfp_t flags); void kasan_set_free_info(struct kmem_cache *cache, void *object, u8 tag); struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, From 6607d81dbf6188c468f0323097907b37a2236b08 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:06:46 +1100 Subject: [PATCH 0477/1202] kasan: generic: introduce kasan_record_aux_stack_noalloc() Introduce a variant of kasan_record_aux_stack() that does not do any memory allocation through stackdepot. This will permit using it in contexts that cannot allocate any memory. Link: https://lkml.kernel.org/r/20210913112609.2651084-6-elver@google.com Signed-off-by: Marco Elver Tested-by: Shuah Khan Acked-by: Sebastian Andrzej Siewior Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: "Gustavo A. R. Silva" Cc: Lai Jiangshan Cc: Taras Madan Cc: Tejun Heo Cc: Thomas Gleixner Cc: Vijayanand Jitta Cc: Vinayak Menon Cc: Walter Wu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/kasan.h | 2 ++ mm/kasan/generic.c | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index dd874a1ee862a..736d7b458996d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -370,12 +370,14 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {} void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); +void kasan_record_aux_stack_noalloc(void *ptr); #else /* CONFIG_KASAN_GENERIC */ static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_record_aux_stack(void *ptr) {} +static inline void kasan_record_aux_stack_noalloc(void *ptr) {} #endif /* CONFIG_KASAN_GENERIC */ diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 2a8e59e6326d2..84a038b07c6fe 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -328,7 +328,7 @@ DEFINE_ASAN_SET_SHADOW(f3); DEFINE_ASAN_SET_SHADOW(f5); DEFINE_ASAN_SET_SHADOW(f8); -void kasan_record_aux_stack(void *addr) +static void __kasan_record_aux_stack(void *addr, bool can_alloc) { struct page *page = kasan_addr_to_page(addr); struct kmem_cache *cache; @@ -345,7 +345,17 @@ void kasan_record_aux_stack(void *addr) return; alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; - alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT, true); + alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT, can_alloc); +} + +void kasan_record_aux_stack(void *addr) +{ + return __kasan_record_aux_stack(addr, true); +} + +void kasan_record_aux_stack_noalloc(void *addr) +{ + return __kasan_record_aux_stack(addr, false); } void kasan_set_free_info(struct kmem_cache *cache, From 970d902e27265031decc89e0368346229d2ab44b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:06:47 +1100 Subject: [PATCH 0478/1202] workqueue, kasan: avoid alloc_pages() when recording stack Shuah Khan reported: | When CONFIG_PROVE_RAW_LOCK_NESTING=y and CONFIG_KASAN are enabled, | kasan_record_aux_stack() runs into "BUG: Invalid wait context" when | it tries to allocate memory attempting to acquire spinlock in page | allocation code while holding workqueue pool raw_spinlock. | | There are several instances of this problem when block layer tries | to __queue_work(). Call trace from one of these instances is below: | | kblockd_mod_delayed_work_on() | mod_delayed_work_on() | __queue_delayed_work() | __queue_work() (rcu_read_lock, raw_spin_lock pool->lock held) | insert_work() | kasan_record_aux_stack() | kasan_save_stack() | stack_depot_save() | alloc_pages() | __alloc_pages() | get_page_from_freelist() | rm_queue() | rm_queue_pcplist() | local_lock_irqsave(&pagesets.lock, flags); | [ BUG: Invalid wait context triggered ] The default kasan_record_aux_stack() calls stack_depot_save() with GFP_NOWAIT, which in turn can then call alloc_pages(GFP_NOWAIT, ...). In general, however, it is not even possible to use either GFP_ATOMIC nor GFP_NOWAIT in certain non-preemptive contexts, including raw_spin_locks (see gfp.h and ab00db216c9c7). Fix it by instructing stackdepot to not expand stack storage via alloc_pages() in case it runs out by using kasan_record_aux_stack_noalloc(). While there is an increased risk of failing to insert the stack trace, this is typically unlikely, especially if the same insertion had already succeeded previously (stack depot hit). For frequent calls from the same location, it therefore becomes extremely unlikely that kasan_record_aux_stack_noalloc() fails. Link: https://lkml.kernel.org/r/20210902200134.25603-1-skhan@linuxfoundation.org Link: https://lkml.kernel.org/r/20210913112609.2651084-7-elver@google.com Signed-off-by: Marco Elver Reported-by: Shuah Khan Tested-by: Shuah Khan Acked-by: Sebastian Andrzej Siewior Acked-by: Tejun Heo Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: "Gustavo A. R. Silva" Cc: Lai Jiangshan Cc: Taras Madan Cc: Thomas Gleixner Cc: Vijayanand Jitta Cc: Vinayak Menon Cc: Walter Wu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1b3eb1e9531f4..12e0d9cb4ac7c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1350,7 +1350,7 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, struct worker_pool *pool = pwq->pool; /* record the work call stack in order to print it in KASAN reports */ - kasan_record_aux_stack(work); + kasan_record_aux_stack_noalloc(work); /* we own @work, set data and link */ set_work_pwq(work, pwq, extra_flags); From 2aef77f0cc7bc8d02bdaab0c2135486741d60052 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Oct 2021 15:06:48 +1100 Subject: [PATCH 0479/1202] kasan: fix tag for large allocations when using CONFIG_SLAB If an object is allocated on a tail page of a multi-page slab, kasan will get the wrong tag because page->s_mem is NULL for tail pages. I'm not quite sure what the user-visible effect of this might be. Link: https://lkml.kernel.org/r/20211001024105.3217339-1-willy@infradead.org Fixes: 7f94ffbc4c6a ("kasan: add hooks implementation for tag-based mode") Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kasan/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 3e0999892c368..8428da2aaf173 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -298,7 +298,7 @@ static inline u8 assign_tag(struct kmem_cache *cache, /* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */ #ifdef CONFIG_SLAB /* For SLAB assign tags based on the object index in the freelist. */ - return (u8)obj_to_index(cache, virt_to_page(object), (void *)object); + return (u8)obj_to_index(cache, virt_to_head_page(object), (void *)object); #else /* * For SLUB assign a random tag during slab creation, otherwise reuse From 2abddd5c68c249b7946e7811e44aa08b638deb89 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 21 Oct 2021 15:06:49 +1100 Subject: [PATCH 0480/1202] kasan: test: add memcpy test that avoids out-of-bounds write With HW tag-based KASAN, error checks are performed implicitly by the load and store instructions in the memcpy implementation. A failed check results in tag checks being disabled and execution will keep going. As a result, under HW tag-based KASAN, prior to commit 1b0668be62cf ("kasan: test: disable kmalloc_memmove_invalid_size for HW_TAGS"), this memcpy would end up corrupting memory until it hits an inaccessible page and causes a kernel panic. This is a pre-existing issue that was revealed by commit 285133040e6c ("arm64: Import latest memcpy()/memmove() implementation") which changed the memcpy implementation from using signed comparisons (incorrectly, resulting in the memcpy being terminated early for negative sizes) to using unsigned comparisons. It is unclear how this could be handled by memcpy itself in a reasonable way. One possibility would be to add an exception handler that would force memcpy to return if a tag check fault is detected -- this would make the behavior roughly similar to generic and SW tag-based KASAN. However, this wouldn't solve the problem for asynchronous mode and also makes memcpy behavior inconsistent with manually copying data. This test was added as a part of a series that taught KASAN to detect negative sizes in memory operations, see commit 8cceeff48f23 ("kasan: detect negative size in memory operation function"). Therefore we should keep testing for negative sizes with generic and SW tag-based KASAN. But there is some value in testing small memcpy overflows, so let's add another test with memcpy that does not destabilize the kernel by performing out-of-bounds writes, and run it in all modes. Link: https://linux-review.googlesource.com/id/I048d1e6a9aff766c4a53f989fb0c83de68923882 Link: https://lkml.kernel.org/r/20210910211356.3603758-1-pcc@google.com Signed-off-by: Peter Collingbourne Reviewed-by: Andrey Konovalov Acked-by: Marco Elver Cc: Robin Murphy Cc: Will Deacon Cc: Catalin Marinas Cc: Mark Rutland Cc: Evgenii Stepanov Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/test_kasan.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index decd765152d62..ec1e437b1fdab 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -493,7 +493,7 @@ static void kmalloc_oob_in_memset(struct kunit *test) kfree(ptr); } -static void kmalloc_memmove_invalid_size(struct kunit *test) +static void kmalloc_memmove_negative_size(struct kunit *test) { char *ptr; size_t size = 64; @@ -515,6 +515,21 @@ static void kmalloc_memmove_invalid_size(struct kunit *test) kfree(ptr); } +static void kmalloc_memmove_invalid_size(struct kunit *test) +{ + char *ptr; + size_t size = 64; + volatile size_t invalid_size = size; + + ptr = kmalloc(size, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + + memset((char *)ptr, 0, 64); + KUNIT_EXPECT_KASAN_FAIL(test, + memmove((char *)ptr, (char *)ptr + 4, invalid_size)); + kfree(ptr); +} + static void kmalloc_uaf(struct kunit *test) { char *ptr; @@ -1144,6 +1159,7 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kmalloc_oob_memset_4), KUNIT_CASE(kmalloc_oob_memset_8), KUNIT_CASE(kmalloc_oob_memset_16), + KUNIT_CASE(kmalloc_memmove_negative_size), KUNIT_CASE(kmalloc_memmove_invalid_size), KUNIT_CASE(kmalloc_uaf), KUNIT_CASE(kmalloc_uaf_memset), From 664230b497eeb1122c786c9677b6e69a07f36cf7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 21 Oct 2021 15:06:49 +1100 Subject: [PATCH 0481/1202] mm/smaps: fix shmem pte hole swap calculation Patch series "mm/smaps: Fixes and optimizations on shmem swap handling". This patch (of 3): The shmem swap calculation on the privately writable mappings are using wrong parameters as spotted by Vlastimil. Fix them. That's introduced in commit 48131e03ca4e, when rework shmem_swap_usage to shmem_partial_swap_usage. Test program: ================== void main(void) { char *buffer, *p; int i, fd; fd = memfd_create("test", 0); assert(fd > 0); /* isize==2M*3, fill in pages, swap them out */ ftruncate(fd, SIZE_2M * 3); buffer = mmap(NULL, SIZE_2M * 3, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); assert(buffer); for (i = 0, p = buffer; i < SIZE_2M * 3 / 4096; i++) { *p = 1; p += 4096; } madvise(buffer, SIZE_2M * 3, MADV_PAGEOUT); munmap(buffer, SIZE_2M * 3); /* * Remap with private+writtable mappings on partial of the inode (<= 2M*3), * while the size must also be >= 2M*2 to make sure there's a none pmd so * smaps_pte_hole will be triggered. */ buffer = mmap(NULL, SIZE_2M * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); printf("pid=%d, buffer=%p ", getpid(), buffer); /* Check /proc/$PID/smap_rollup, should see 4MB swap */ sleep(1000000); } ================== Before the patch, smaps_rollup shows <4MB swap and the number will be random depending on the alignment of the buffer of mmap() allocated. After this patch, it'll show 4MB. Link: https://lkml.kernel.org/r/20210917164756.8586-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210917164756.8586-2-peterx@redhat.com Fixes: 48131e03ca4e ("mm, proc: reduce cost of /proc/pid/smaps for unpopulated shmem mappings") Signed-off-by: Peter Xu Reported-by: Vlastimil Babka Reviewed-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/task_mmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index cf25be3e03212..2197f669e17bc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -478,9 +478,11 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end, __always_unused int depth, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; + struct vm_area_struct *vma = walk->vma; - mss->swap += shmem_partial_swap_usage( - walk->vma->vm_file->f_mapping, addr, end); + mss->swap += shmem_partial_swap_usage(walk->vma->vm_file->f_mapping, + linear_page_index(vma, addr), + linear_page_index(vma, end)); return 0; } From fcbc012e3f6452985591f1929cce4a7b947a269c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 21 Oct 2021 15:06:50 +1100 Subject: [PATCH 0482/1202] mm/smaps: use vma->vm_pgoff directly when counting partial swap As it's trying to cover the whole vma anyways, use direct vm_pgoff value and vma_pages() rather than linear_page_index. Link: https://lkml.kernel.org/r/20210917164756.8586-3-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Vlastimil Babka Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/shmem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index b5860f4a2738e..7cd976b588ff1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -856,9 +856,8 @@ unsigned long shmem_swap_usage(struct vm_area_struct *vma) return swapped << PAGE_SHIFT; /* Here comes the more involved part */ - return shmem_partial_swap_usage(mapping, - linear_page_index(vma, vma->vm_start), - linear_page_index(vma, vma->vm_end)); + return shmem_partial_swap_usage(mapping, vma->vm_pgoff, + vma->vm_pgoff + vma_pages(vma)); } /* From 260bb86c8ce32418ed7302fdf8666bc10668c031 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 21 Oct 2021 15:06:50 +1100 Subject: [PATCH 0483/1202] mm/smaps: simplify shmem handling of pte holes Firstly, check_shmem_swap variable is actually not necessary, because it's always set with pte_hole hook; checking each would work. Meanwhile, the check within smaps_pte_entry is not easy to follow. E.g., pte_none() check is not needed as "!pte_present && !is_swap_pte" is the same. Since at it, use the pte_hole() helper rather than dup the page cache lookup. Still keep the CONFIG_SHMEM part so the code can be optimized to nop for !SHMEM. There will be a very slight functional change in smaps_pte_entry(), that for !SHMEM we'll return early for pte_none (before checking page==NULL), but that's even nicer. Link: https://lkml.kernel.org/r/20210917164756.8586-4-peterx@redhat.com Signed-off-by: Peter Xu Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Vlastimil Babka Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/task_mmu.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2197f669e17bc..ad667dbc96f5c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -397,7 +397,6 @@ struct mem_size_stats { u64 pss_shmem; u64 pss_locked; u64 swap_pss; - bool check_shmem_swap; }; static void smaps_page_accumulate(struct mem_size_stats *mss, @@ -490,6 +489,16 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end, #define smaps_pte_hole NULL #endif /* CONFIG_SHMEM */ +static void smaps_pte_hole_lookup(unsigned long addr, struct mm_walk *walk) +{ +#ifdef CONFIG_SHMEM + if (walk->ops->pte_hole) { + /* depth is not used */ + smaps_pte_hole(addr, addr + PAGE_SIZE, 0, walk); + } +#endif +} + static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct mm_walk *walk) { @@ -518,12 +527,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } } else if (is_pfn_swap_entry(swpent)) page = pfn_swap_entry_to_page(swpent); - } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap - && pte_none(*pte))) { - page = xa_load(&vma->vm_file->f_mapping->i_pages, - linear_page_index(vma, addr)); - if (xa_is_value(page)) - mss->swap += PAGE_SIZE; + } else { + smaps_pte_hole_lookup(addr, walk); return; } @@ -737,8 +742,6 @@ static void smap_gather_stats(struct vm_area_struct *vma, return; #ifdef CONFIG_SHMEM - /* In case of smaps_rollup, reset the value from previous vma */ - mss->check_shmem_swap = false; if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { /* * For shared or readonly shmem mappings we know that all @@ -756,7 +759,6 @@ static void smap_gather_stats(struct vm_area_struct *vma, !(vma->vm_flags & VM_WRITE))) { mss->swap += shmem_swapped; } else { - mss->check_shmem_swap = true; ops = &smaps_shmem_walk_ops; } } From 8fafcfa7b430a1dd2a9b00ab3b34fbfdcb3eb853 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 21 Oct 2021 15:06:51 +1100 Subject: [PATCH 0484/1202] mm: debug_vm_pgtable: don't use __P000 directly The __Pxxx/__Sxxx macros are only for protection_map[] init. All usage of them in linux should come from protection_map array. Because a lot of architectures would re-initilize protection_map[] array, eg: x86-mem_encrypt, m68k-motorola, mips, arm, sparc. Using __P000 is not rigorous. Link: https://lkml.kernel.org/r/20210924060821.1138281-1-guoren@kernel.org Signed-off-by: Guo Ren Reviewed-by: Andrew Morton Reviewed-by: Anshuman Khandual Cc: Gavin Shan Cc: Christophe Leroy Cc: Gerald Schaefer Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/debug_vm_pgtable.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 1403639302e48..228e3954b90c1 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -1104,13 +1104,14 @@ static int __init init_args(struct pgtable_debug_args *args) /* * Initialize the debugging data. * - * __P000 (or even __S000) will help create page table entries with - * PROT_NONE permission as required for pxx_protnone_tests(). + * protection_map[0] (or even protection_map[8]) will help create + * page table entries with PROT_NONE permission as required for + * pxx_protnone_tests(). */ memset(args, 0, sizeof(*args)); args->vaddr = get_random_vaddr(); args->page_prot = vm_get_page_prot(VMFLAGS); - args->page_prot_none = __P000; + args->page_prot_none = protection_map[0]; args->is_contiguous_page = false; args->pud_pfn = ULONG_MAX; args->pmd_pfn = ULONG_MAX; From 7de92c66a32c8335f81aa08029cc41b0d8f78a56 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:06:52 +1100 Subject: [PATCH 0485/1202] kasan: test: bypass __alloc_size checks Intentional overflows, as performed by the KASAN tests, are detected at compile time[1] (instead of only at run-time) with the addition of __alloc_size. Fix this by forcing the compiler into not being able to trust the size used following the kmalloc()s. [1] https://lore.kernel.org/lkml/20211005184717.65c6d8eb39350395e387b71f@linux-foundation.org Link: https://lkml.kernel.org/r/20211006181544.1670992-1-keescook@chromium.org Signed-off-by: Kees Cook Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/test_kasan.c | 8 +++++++- lib/test_kasan_module.c | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index ec1e437b1fdab..8b93c2541d921 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -440,6 +440,7 @@ static void kmalloc_oob_memset_2(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 1, 0, 2)); kfree(ptr); } @@ -452,6 +453,7 @@ static void kmalloc_oob_memset_4(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 3, 0, 4)); kfree(ptr); } @@ -464,6 +466,7 @@ static void kmalloc_oob_memset_8(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 7, 0, 8)); kfree(ptr); } @@ -476,6 +479,7 @@ static void kmalloc_oob_memset_16(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 15, 0, 16)); kfree(ptr); } @@ -488,6 +492,7 @@ static void kmalloc_oob_in_memset(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size + KASAN_GRANULE_SIZE)); kfree(ptr); @@ -497,7 +502,7 @@ static void kmalloc_memmove_negative_size(struct kunit *test) { char *ptr; size_t size = 64; - volatile size_t invalid_size = -2; + size_t invalid_size = -2; /* * Hardware tag-based mode doesn't check memmove for negative size. @@ -510,6 +515,7 @@ static void kmalloc_memmove_negative_size(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset((char *)ptr, 0, 64); + OPTIMIZER_HIDE_VAR(invalid_size); KUNIT_EXPECT_KASAN_FAIL(test, memmove((char *)ptr, (char *)ptr + 4, invalid_size)); kfree(ptr); diff --git a/lib/test_kasan_module.c b/lib/test_kasan_module.c index 7ebf433edef3b..b112cbc835e90 100644 --- a/lib/test_kasan_module.c +++ b/lib/test_kasan_module.c @@ -35,6 +35,8 @@ static noinline void __init copy_user_test(void) return; } + OPTIMIZER_HIDE_VAR(size); + pr_info("out-of-bounds in copy_from_user()\n"); unused = copy_from_user(kmem, usermem, size + 1); From f49ad45840e511fe890c8ecf5ff48d8d21d12847 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:06:52 +1100 Subject: [PATCH 0486/1202] rapidio: avoid bogus __alloc_size warning Patch series "Add __alloc_size()", v3. GCC and Clang both use the "alloc_size" attribute to assist with bounds checking around the use of allocation functions. Add the attribute, adjust the Makefile to silence needless warnings, and add the hints to the allocators where possible. These changes have been in use for a while now in GrapheneOS. This patch (of 8): After adding __alloc_size attributes to the allocators, GCC 9.3 (but not later) may incorrectly evaluate the arguments to check_copy_size(), getting seemingly confused by the size being returned from array_size(). Instead, perform the calculation once, which both makes the code more readable and avoids the bug in GCC. In file included from arch/x86/include/asm/preempt.h:7, from include/linux/preempt.h:78, from include/linux/spinlock.h:55, from include/linux/mm_types.h:9, from include/linux/buildid.h:5, from include/linux/module.h:14, from drivers/rapidio/devices/rio_mport_cdev.c:13: In function 'check_copy_size', inlined from 'copy_from_user' at include/linux/uaccess.h:191:6, inlined from 'rio_mport_transfer_ioctl' at drivers/rapidio/devices/rio_mport_cdev.c:983:6: include/linux/thread_info.h:213:4: error: call to '__bad_copy_to' declared with attribute error: copy destination size is too small 213 | __bad_copy_to(); | ^~~~~~~~~~~~~~~ But the allocation size and the copy size are identical: transfer = vmalloc(array_size(sizeof(*transfer), transaction.count)); if (!transfer) return -ENOMEM; if (unlikely(copy_from_user(transfer, (void __user *)(uintptr_t)transaction.block, array_size(sizeof(*transfer), transaction.count)))) { Link: https://lkml.kernel.org/r/20210930222704.2631604-1-keescook@chromium.org Link: https://lkml.kernel.org/r/20210930222704.2631604-2-keescook@chromium.org Link: https://lore.kernel.org/linux-mm/202109091134.FHnRmRxu-lkp@intel.com/ Signed-off-by: Kees Cook Reviewed-by: John Hubbard Reported-by: kernel test robot Cc: Matt Porter Cc: Alexandre Bounine Cc: Jing Xiangfeng Cc: Ira Weiny Cc: Souptick Joarder Cc: Gustavo A. R. Silva Cc: Andy Whitcroft Cc: Christoph Lameter Cc: Daniel Micay Cc: David Rientjes Cc: Dennis Zhou Cc: Dwaipayan Ray Cc: Joe Perches Cc: Joonsoo Kim Cc: Lukas Bulwahn Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Pekka Enberg Cc: Randy Dunlap Cc: Tejun Heo Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/rapidio/devices/rio_mport_cdev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 94331d999d273..7df466e222820 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -965,6 +965,7 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg) struct rio_transfer_io *transfer; enum dma_data_direction dir; int i, ret = 0; + size_t size; if (unlikely(copy_from_user(&transaction, arg, sizeof(transaction)))) return -EFAULT; @@ -976,13 +977,14 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg) priv->md->properties.transfer_mode) == 0) return -ENODEV; - transfer = vmalloc(array_size(sizeof(*transfer), transaction.count)); + size = array_size(sizeof(*transfer), transaction.count); + transfer = vmalloc(size); if (!transfer) return -ENOMEM; if (unlikely(copy_from_user(transfer, (void __user *)(uintptr_t)transaction.block, - array_size(sizeof(*transfer), transaction.count)))) { + size))) { ret = -EFAULT; goto out_free; } @@ -994,8 +996,7 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg) transaction.sync, dir, &transfer[i]); if (unlikely(copy_to_user((void __user *)(uintptr_t)transaction.block, - transfer, - array_size(sizeof(*transfer), transaction.count)))) + transfer, size))) ret = -EFAULT; out_free: From 5a0480e9c465d50089e6f09f6d9ca2256f41faf6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:06:53 +1100 Subject: [PATCH 0487/1202] Compiler Attributes: add __alloc_size() for better bounds checking GCC and Clang can use the "alloc_size" attribute to better inform the results of __builtin_object_size() (for compile-time constant values). Clang can additionally use alloc_size to inform the results of __builtin_dynamic_object_size() (for run-time values). Because GCC sees the frequent use of struct_size() as an allocator size argument, and notices it can return SIZE_MAX (the overflow indication), it complains about these call sites overflowing (since SIZE_MAX is greater than the default -Walloc-size-larger-than=PTRDIFF_MAX). This isn't helpful since we already know a SIZE_MAX will be caught at run-time (this was an intentional design). To deal with this, we must disable this check as it is both a false positive and redundant. (Clang does not have this warning option.) Unfortunately, just checking the -Wno-alloc-size-larger-than is not sufficient to make the __alloc_size attribute behave correctly under older GCC versions. The attribute itself must be disabled in those situations too, as there appears to be no way to reliably silence the SIZE_MAX constant expression cases for GCC versions less than 9.1: In file included from ./include/linux/resource_ext.h:11, from ./include/linux/pci.h:40, from drivers/net/ethernet/intel/ixgbe/ixgbe.h:9, from drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c:4: In function 'kmalloc_node', inlined from 'ixgbe_alloc_q_vector' at ./include/linux/slab.h:743:9: ./include/linux/slab.h:618:9: error: argument 1 value '18446744073709551615' exceeds maximum object size 9223372036854775807 [-Werror=alloc-size-larger-than=] return __kmalloc_node(size, flags, node); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/slab.h: In function 'ixgbe_alloc_q_vector': ./include/linux/slab.h:455:7: note: in a call to allocation function '__kmalloc_node' declared here void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_slab_alignment __malloc; ^~~~~~~~~~~~~~ Specifically: -Wno-alloc-size-larger-than is not correctly handled by GCC < 9.1 https://godbolt.org/z/hqsfG7q84 (doesn't disable) https://godbolt.org/z/P9jdrPTYh (doesn't admit to not knowing about option) https://godbolt.org/z/465TPMWKb (only warns when other warnings appear) -Walloc-size-larger-than=18446744073709551615 is not handled by GCC < 8.2 https://godbolt.org/z/73hh1EPxz (ignores numeric value) Since anything marked with __alloc_size would also qualify for marking with __malloc, just include __malloc along with it to avoid redundant markings. (Suggested by Linus Torvalds.) Finally, make sure checkpatch.pl doesn't get confused about finding the __alloc_size attribute on functions. (Thanks to Joe Perches.) Link: https://lkml.kernel.org/r/20210930222704.2631604-3-keescook@chromium.org Signed-off-by: Kees Cook Tested-by: Randy Dunlap Cc: Andy Whitcroft Cc: Christoph Lameter Cc: Daniel Micay Cc: David Rientjes Cc: Dennis Zhou Cc: Dwaipayan Ray Cc: Joe Perches Cc: Joonsoo Kim Cc: Lukas Bulwahn Cc: Pekka Enberg Cc: Tejun Heo Cc: Vlastimil Babka Cc: Alexandre Bounine Cc: Gustavo A. R. Silva Cc: Ira Weiny Cc: Jing Xiangfeng Cc: John Hubbard Cc: kernel test robot Cc: Matt Porter Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Souptick Joarder Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Makefile | 15 +++++++++++++++ include/linux/compiler-gcc.h | 8 ++++++++ include/linux/compiler_attributes.h | 10 ++++++++++ include/linux/compiler_types.h | 12 ++++++++++++ scripts/checkpatch.pl | 3 ++- 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 91297670da8e0..78b6b0f615414 100644 --- a/Makefile +++ b/Makefile @@ -1008,6 +1008,21 @@ ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -Wno-maybe-uninitialized endif +ifdef CONFIG_CC_IS_GCC +# The allocators already balk at large sizes, so silence the compiler +# warnings for bounds checks involving those possible values. While +# -Wno-alloc-size-larger-than would normally be used here, earlier versions +# of gcc (<9.1) weirdly don't handle the option correctly when _other_ +# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX +# doesn't work (as it is documented to), silently resolving to "0" prior to +# version 9.1 (and producing an error more recently). Numeric values larger +# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently +# ignored, continuing to default to PTRDIFF_MAX. So, left with no other +# choice, we must perform a versioned check to disable this warning. +# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au +KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0901, -Wno-alloc-size-larger-than) +endif + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index bd2b881c6b63a..b9d5f9c373a09 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -144,3 +144,11 @@ #else #define __diag_GCC_8(s) #endif + +/* + * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" + * attribute) do not work, and must be disabled. + */ +#if GCC_VERSION < 90100 +#undef __alloc_size__ +#endif diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index e6ec634039658..3de06a8fae73b 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -33,6 +33,15 @@ #define __aligned(x) __attribute__((__aligned__(x))) #define __aligned_largest __attribute__((__aligned__)) +/* + * Note: do not use this directly. Instead, use __alloc_size() since it is conditionally + * available and includes other attributes. + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size + */ +#define __alloc_size__(x, ...) __attribute__((__alloc_size__(x, ## __VA_ARGS__))) + /* * Note: users of __always_inline currently do not write "inline" themselves, * which seems to be required by gcc to apply the attribute according @@ -153,6 +162,7 @@ /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#malloc */ #define __malloc __attribute__((__malloc__)) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index b6ff83a714ca9..4f2203c4a2574 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -250,6 +250,18 @@ struct ftrace_likely_data { # define __cficanonical #endif +/* + * Any place that could be marked with the "alloc_size" attribute is also + * a place to be marked with the "malloc" attribute. Do this as part of the + * __alloc_size macro to avoid redundant attributes and to avoid missing a + * __malloc marking. + */ +#ifdef __alloc_size__ +# define __alloc_size(x, ...) __alloc_size__(x, ## __VA_ARGS__) __malloc +#else +# define __alloc_size(x, ...) __malloc +#endif + #ifndef asm_volatile_goto #define asm_volatile_goto(x...) asm goto(x) #endif diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c27d2312cfc30..88cb294dc4472 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -489,7 +489,8 @@ sub hash_show_words { ____cacheline_aligned| ____cacheline_aligned_in_smp| ____cacheline_internodealigned_in_smp| - __weak + __weak| + __alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) }x; our $Modifier; our $Inline = qr{inline|__always_inline|noinline|__inline|__inline__}; From 7b71d3b837b4be13035c97208935713ba2fc7546 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:06:54 +1100 Subject: [PATCH 0488/1202] slab: clean up function prototypes Based on feedback from Joe Perches and Linus Torvalds, regularize the slab function prototypes before making attribute changes. Link: https://lkml.kernel.org/r/20210930222704.2631604-4-keescook@chromium.org Signed-off-by: Kees Cook Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: Alexandre Bounine Cc: Andy Whitcroft Cc: Daniel Micay Cc: Dennis Zhou Cc: Dwaipayan Ray Cc: Gustavo A. R. Silva Cc: Ira Weiny Cc: Jing Xiangfeng Cc: Joe Perches Cc: John Hubbard Cc: kernel test robot Cc: Lukas Bulwahn Cc: Matt Porter Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Randy Dunlap Cc: Souptick Joarder Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/slab.h | 68 ++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index c0d46b6fa12a2..d05de03bdcddb 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -152,8 +152,8 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name, slab_flags_t flags, unsigned int useroffset, unsigned int usersize, void (*ctor)(void *)); -void kmem_cache_destroy(struct kmem_cache *); -int kmem_cache_shrink(struct kmem_cache *); +void kmem_cache_destroy(struct kmem_cache *s); +int kmem_cache_shrink(struct kmem_cache *s); /* * Please use this macro to create slab caches. Simply specify the @@ -181,11 +181,11 @@ int kmem_cache_shrink(struct kmem_cache *); /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc(const void *, size_t, gfp_t); -void kfree(const void *); -void kfree_sensitive(const void *); -size_t __ksize(const void *); -size_t ksize(const void *); +void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags); +void kfree(const void *objp); +void kfree_sensitive(const void *objp); +size_t __ksize(const void *objp); +size_t ksize(const void *objp); #ifdef CONFIG_PRINTK bool kmem_valid_obj(void *object); void kmem_dump_obj(void *object); @@ -426,8 +426,8 @@ static __always_inline unsigned int __kmalloc_index(size_t size, #endif /* !CONFIG_SLOB */ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc; -void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc; -void kmem_cache_free(struct kmem_cache *, void *); +void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc; +void kmem_cache_free(struct kmem_cache *s, void *objp); /* * Bulk allocation and freeing operations. These are accelerated in an @@ -436,8 +436,8 @@ void kmem_cache_free(struct kmem_cache *, void *); * * Note that interrupts must be enabled when calling these functions. */ -void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); -int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); +int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p); /* * Caller must not use kfree_bulk() on memory not originally allocated @@ -450,7 +450,8 @@ static __always_inline void kfree_bulk(size_t size, void **p) #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc; -void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc; +void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment + __malloc; #else static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) { @@ -464,25 +465,24 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f #endif #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment __malloc; +extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) + __assume_slab_alignment __malloc; #ifdef CONFIG_NUMA -extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, - int node, size_t size) __assume_slab_alignment __malloc; +extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) __assume_slab_alignment __malloc; #else -static __always_inline void * -kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, - int node, size_t size) +static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, + gfp_t gfpflags, int node, + size_t size) { return kmem_cache_alloc_trace(s, gfpflags, size); } #endif /* CONFIG_NUMA */ #else /* CONFIG_TRACING */ -static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, - gfp_t flags, size_t size) +static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, + size_t size) { void *ret = kmem_cache_alloc(s, flags); @@ -490,10 +490,8 @@ static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, return ret; } -static __always_inline void * -kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, - int node, size_t size) +static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) { void *ret = kmem_cache_alloc_node(s, gfpflags, node); @@ -502,13 +500,14 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, } #endif /* CONFIG_TRACING */ -extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc; +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment + __malloc; #ifdef CONFIG_TRACING -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc; +extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) + __assume_page_alignment __malloc; #else -static __always_inline void * -kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) +static __always_inline void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) { return kmalloc_order(size, flags, order); } @@ -638,8 +637,8 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) */ -static __must_check inline void * -krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags) +static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t new_size, + gfp_t flags) { size_t bytes; @@ -668,7 +667,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) * allocator where we care about the real place the memory allocation * request comes from. */ -extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long); +extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller); #define kmalloc_track_caller(size, flags) \ __kmalloc_track_caller(size, flags, _RET_IP_) @@ -691,7 +690,8 @@ static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) #ifdef CONFIG_NUMA -extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long); +extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, + unsigned long caller); #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node_track_caller(size, flags, node, \ _RET_IP_) From c333eacfa102bf531ad4e202f93e04300e87339a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:06:55 +1100 Subject: [PATCH 0489/1202] slab: add __alloc_size attributes for better bounds checking As already done in GrapheneOS, add the __alloc_size attribute for regular kmalloc interfaces, to provide additional hinting for better bounds checking, assisting CONFIG_FORTIFY_SOURCE and other compiler optimizations. Link: https://lkml.kernel.org/r/20210930222704.2631604-5-keescook@chromium.org Signed-off-by: Kees Cook Co-developed-by: Daniel Micay Signed-off-by: Daniel Micay Reviewed-by: Nick Desaulniers Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: Andy Whitcroft Cc: Dennis Zhou Cc: Dwaipayan Ray Cc: Joe Perches Cc: Lukas Bulwahn Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Tejun Heo Cc: Alexandre Bounine Cc: Gustavo A. R. Silva Cc: Ira Weiny Cc: Jing Xiangfeng Cc: John Hubbard Cc: kernel test robot Cc: Matt Porter Cc: Randy Dunlap Cc: Souptick Joarder Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/slab.h | 61 ++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index d05de03bdcddb..b5bf0537975b5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -181,7 +181,7 @@ int kmem_cache_shrink(struct kmem_cache *s); /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags); +void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2); void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); @@ -425,7 +425,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size, #define kmalloc_index(s) __kmalloc_index(s, true) #endif /* !CONFIG_SLOB */ -void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc; +void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc; void kmem_cache_free(struct kmem_cache *s, void *objp); @@ -449,11 +449,12 @@ static __always_inline void kfree_bulk(size_t size, void **p) } #ifdef CONFIG_NUMA -void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc; +void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment + __alloc_size(1); void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; #else -static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline __alloc_size(1) void *__kmalloc_node(size_t size, gfp_t flags, int node) { return __kmalloc(size, flags); } @@ -466,23 +467,23 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f #ifdef CONFIG_TRACING extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) - __assume_slab_alignment __malloc; + __assume_slab_alignment __alloc_size(3); #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) __assume_slab_alignment __malloc; + int node, size_t size) __assume_slab_alignment + __alloc_size(4); #else -static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, - gfp_t gfpflags, int node, - size_t size) +static __always_inline __alloc_size(4) void *kmem_cache_alloc_node_trace(struct kmem_cache *s, + gfp_t gfpflags, int node, size_t size) { return kmem_cache_alloc_trace(s, gfpflags, size); } #endif /* CONFIG_NUMA */ #else /* CONFIG_TRACING */ -static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, - size_t size) +static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s, + gfp_t flags, size_t size) { void *ret = kmem_cache_alloc(s, flags); @@ -501,19 +502,20 @@ static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, g #endif /* CONFIG_TRACING */ extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment - __malloc; + __alloc_size(1); #ifdef CONFIG_TRACING extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) - __assume_page_alignment __malloc; + __assume_page_alignment __alloc_size(1); #else -static __always_inline void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) +static __always_inline __alloc_size(1) void *kmalloc_order_trace(size_t size, gfp_t flags, + unsigned int order) { return kmalloc_order(size, flags, order); } #endif -static __always_inline void *kmalloc_large(size_t size, gfp_t flags) +static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t flags) { unsigned int order = get_order(size); return kmalloc_order_trace(size, flags, order); @@ -573,7 +575,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * Try really hard to succeed the allocation but fail * eventually. */ -static __always_inline void *kmalloc(size_t size, gfp_t flags) +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { #ifndef CONFIG_SLOB @@ -595,7 +597,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) return __kmalloc(size, flags); } -static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) { #ifndef CONFIG_SLOB if (__builtin_constant_p(size) && @@ -619,7 +621,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) +static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_t flags) { size_t bytes; @@ -637,8 +639,10 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) */ -static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t new_size, - gfp_t flags) +static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p, + size_t new_n, + size_t new_size, + gfp_t flags) { size_t bytes; @@ -654,7 +658,7 @@ static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t n * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flags) { return kmalloc_array(n, size, flags | __GFP_ZERO); } @@ -667,12 +671,13 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) * allocator where we care about the real place the memory allocation * request comes from. */ -extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller); +extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller) + __alloc_size(1); #define kmalloc_track_caller(size, flags) \ __kmalloc_track_caller(size, flags, _RET_IP_) -static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, - int node) +static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, + int node) { size_t bytes; @@ -683,7 +688,7 @@ static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, return __kmalloc_node(bytes, flags, node); } -static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) +static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) { return kmalloc_array_node(n, size, flags | __GFP_ZERO, node); } @@ -691,7 +696,7 @@ static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) #ifdef CONFIG_NUMA extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, - unsigned long caller); + unsigned long caller) __alloc_size(1); #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node_track_caller(size, flags, node, \ _RET_IP_) @@ -716,7 +721,7 @@ static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). */ -static inline void *kzalloc(size_t size, gfp_t flags) +static inline __alloc_size(1) void *kzalloc(size_t size, gfp_t flags) { return kmalloc(size, flags | __GFP_ZERO); } @@ -727,7 +732,7 @@ static inline void *kzalloc(size_t size, gfp_t flags) * @flags: the type of memory to allocate (see kmalloc). * @node: memory node from which to allocate */ -static inline void *kzalloc_node(size_t size, gfp_t flags, int node) +static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int node) { return kmalloc_node(size, flags | __GFP_ZERO, node); } From 4f3afad1c74d8b37086fbcb08480a88a0c9fd9b3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:06:55 +1100 Subject: [PATCH 0490/1202] mm/kvmalloc: add __alloc_size attributes for better bounds checking As already done in GrapheneOS, add the __alloc_size attribute for regular kvmalloc interfaces, to provide additional hinting for better bounds checking, assisting CONFIG_FORTIFY_SOURCE and other compiler optimizations. Link: https://lkml.kernel.org/r/20210930222704.2631604-6-keescook@chromium.org Signed-off-by: Kees Cook Co-developed-by: Daniel Micay Signed-off-by: Daniel Micay Reviewed-by: Nick Desaulniers Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: Andy Whitcroft Cc: Dennis Zhou Cc: Dwaipayan Ray Cc: Joe Perches Cc: Lukas Bulwahn Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Tejun Heo Cc: Alexandre Bounine Cc: Gustavo A. R. Silva Cc: Ira Weiny Cc: Jing Xiangfeng Cc: John Hubbard Cc: kernel test robot Cc: Matt Porter Cc: Randy Dunlap Cc: Souptick Joarder Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/slab.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index b5bf0537975b5..837cb16232ef4 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -737,21 +737,21 @@ static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int n return kmalloc_node(size, flags | __GFP_ZERO, node); } -extern void *kvmalloc_node(size_t size, gfp_t flags, int node); -static inline void *kvmalloc(size_t size, gfp_t flags) +extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1); +static inline __alloc_size(1) void *kvmalloc(size_t size, gfp_t flags) { return kvmalloc_node(size, flags, NUMA_NO_NODE); } -static inline void *kvzalloc_node(size_t size, gfp_t flags, int node) +static inline __alloc_size(1) void *kvzalloc_node(size_t size, gfp_t flags, int node) { return kvmalloc_node(size, flags | __GFP_ZERO, node); } -static inline void *kvzalloc(size_t size, gfp_t flags) +static inline __alloc_size(1) void *kvzalloc(size_t size, gfp_t flags) { return kvmalloc(size, flags | __GFP_ZERO); } -static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) +static inline __alloc_size(1, 2) void *kvmalloc_array(size_t n, size_t size, gfp_t flags) { size_t bytes; @@ -761,13 +761,13 @@ static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) return kvmalloc(bytes, flags); } -static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) +static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t flags) { return kvmalloc_array(n, size, flags | __GFP_ZERO); } -extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, - gfp_t flags); +extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) + __alloc_size(3); extern void kvfree(const void *addr); extern void kvfree_sensitive(const void *addr, size_t len); From f430da55e2b726a9874ac07b92e5856ff05eacc0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:06:56 +1100 Subject: [PATCH 0491/1202] mm/vmalloc: add __alloc_size attributes for better bounds checking As already done in GrapheneOS, add the __alloc_size attribute for appropriate vmalloc allocator interfaces, to provide additional hinting for better bounds checking, assisting CONFIG_FORTIFY_SOURCE and other compiler optimizations. Link: https://lkml.kernel.org/r/20210930222704.2631604-7-keescook@chromium.org Signed-off-by: Kees Cook Co-developed-by: Daniel Micay Signed-off-by: Daniel Micay Cc: Andy Whitcroft Cc: Christoph Lameter Cc: David Rientjes Cc: Dennis Zhou Cc: Dwaipayan Ray Cc: Joe Perches Cc: Joonsoo Kim Cc: Lukas Bulwahn Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Pekka Enberg Cc: Tejun Heo Cc: Vlastimil Babka Cc: Alexandre Bounine Cc: Gustavo A. R. Silva Cc: Ira Weiny Cc: Jing Xiangfeng Cc: John Hubbard Cc: kernel test robot Cc: Matt Porter Cc: Randy Dunlap Cc: Souptick Joarder Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/vmalloc.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 671d402c3778f..0ed56fc10c11d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -136,21 +136,21 @@ static inline void vmalloc_init(void) static inline unsigned long vmalloc_nr_pages(void) { return 0; } #endif -extern void *vmalloc(unsigned long size); -extern void *vzalloc(unsigned long size); -extern void *vmalloc_user(unsigned long size); -extern void *vmalloc_node(unsigned long size, int node); -extern void *vzalloc_node(unsigned long size, int node); -extern void *vmalloc_32(unsigned long size); -extern void *vmalloc_32_user(unsigned long size); -extern void *__vmalloc(unsigned long size, gfp_t gfp_mask); +extern void *vmalloc(unsigned long size) __alloc_size(1); +extern void *vzalloc(unsigned long size) __alloc_size(1); +extern void *vmalloc_user(unsigned long size) __alloc_size(1); +extern void *vmalloc_node(unsigned long size, int node) __alloc_size(1); +extern void *vzalloc_node(unsigned long size, int node) __alloc_size(1); +extern void *vmalloc_32(unsigned long size) __alloc_size(1); +extern void *vmalloc_32_user(unsigned long size) __alloc_size(1); +extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, - const void *caller); + const void *caller) __alloc_size(1); void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, - int node, const void *caller); -void *vmalloc_no_huge(unsigned long size); + int node, const void *caller) __alloc_size(1); +void *vmalloc_no_huge(unsigned long size) __alloc_size(1); extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); From 1eb6b8ab93cbcaa7d55910db2b0e445679fa0933 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:06:57 +1100 Subject: [PATCH 0492/1202] mm/page_alloc: add __alloc_size attributes for better bounds checking As already done in GrapheneOS, add the __alloc_size attribute for appropriate page allocator interfaces, to provide additional hinting for better bounds checking, assisting CONFIG_FORTIFY_SOURCE and other compiler optimizations. Link: https://lkml.kernel.org/r/20210930222704.2631604-8-keescook@chromium.org Signed-off-by: Kees Cook Co-developed-by: Daniel Micay Signed-off-by: Daniel Micay Cc: Andy Whitcroft Cc: Christoph Lameter Cc: David Rientjes Cc: Dennis Zhou Cc: Dwaipayan Ray Cc: Joe Perches Cc: Joonsoo Kim Cc: Lukas Bulwahn Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Pekka Enberg Cc: Tejun Heo Cc: Vlastimil Babka Cc: Alexandre Bounine Cc: Gustavo A. R. Silva Cc: Ira Weiny Cc: Jing Xiangfeng Cc: John Hubbard Cc: kernel test robot Cc: Matt Porter Cc: Randy Dunlap Cc: Souptick Joarder Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/gfp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 55b2ec1f965a5..fbd4abc33f242 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -608,9 +608,9 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); -void *alloc_pages_exact(size_t size, gfp_t gfp_mask); +void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1); void free_pages_exact(void *virt, size_t size); -void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); +__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(1); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) From d2763ca543d4d65b6d92f8e2af41fe91d87492e2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:06:57 +1100 Subject: [PATCH 0493/1202] percpu: add __alloc_size attributes for better bounds checking As already done in GrapheneOS, add the __alloc_size attribute for appropriate percpu allocator interfaces, to provide additional hinting for better bounds checking, assisting CONFIG_FORTIFY_SOURCE and other compiler optimizations. Note that due to the implementation of the percpu API, this is unlikely to ever actually provide compile-time checking beyond very simple non-SMP builds. But, since they are technically allocators, mark them as such. Link: https://lkml.kernel.org/r/20210930222704.2631604-9-keescook@chromium.org Signed-off-by: Kees Cook Co-developed-by: Daniel Micay Signed-off-by: Daniel Micay Acked-by: Dennis Zhou Cc: Tejun Heo Cc: Christoph Lameter Cc: Andy Whitcroft Cc: David Rientjes Cc: Dwaipayan Ray Cc: Joe Perches Cc: Joonsoo Kim Cc: Lukas Bulwahn Cc: Miguel Ojeda Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Pekka Enberg Cc: Vlastimil Babka Cc: Alexandre Bounine Cc: Gustavo A. R. Silva Cc: Ira Weiny Cc: Jing Xiangfeng Cc: John Hubbard Cc: kernel test robot Cc: Matt Porter Cc: Randy Dunlap Cc: Souptick Joarder Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/percpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 5e76af742c807..98a9371133f8f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -123,7 +123,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn); #endif -extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1); extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); extern bool is_kernel_percpu_address(unsigned long addr); @@ -131,8 +131,8 @@ extern bool is_kernel_percpu_address(unsigned long addr); extern void __init setup_per_cpu_areas(void); #endif -extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); -extern void __percpu *__alloc_percpu(size_t size, size_t align); +extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); +extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); From cd38dec743ae53023e018684ac0dd62750fee980 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:06:58 +1100 Subject: [PATCH 0494/1202] kasan: test: consolidate workarounds for unwanted __alloc_size() protection This fixes "lib/test_kasan.c: use underlying string helpers" to avoid needing new helpers. As done in "kasan: test: bypass __alloc_size checks", just use OPTIMIZER_HIDE_VAR(). Additionally converts a use of "volatile", which was trying to work around similar detection. Link: https://lkml.kernel.org/r/20211020193807.40684-1-keescook@chromium.org Signed-off-by: Kees Cook Acked-by: Arnd Bergmann Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/test_kasan.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 8b93c2541d921..11443e9bfeafe 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -525,12 +525,13 @@ static void kmalloc_memmove_invalid_size(struct kunit *test) { char *ptr; size_t size = 64; - volatile size_t invalid_size = size; + size_t invalid_size = size; ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset((char *)ptr, 0, 64); + OPTIMIZER_HIDE_VAR(invalid_size); KUNIT_EXPECT_KASAN_FAIL(test, memmove((char *)ptr, (char *)ptr + 4, invalid_size)); kfree(ptr); @@ -852,21 +853,6 @@ static void kmem_cache_invalid_free(struct kunit *test) kmem_cache_destroy(cache); } -/* - * noinline wrappers to prevent the compiler from noticing the overflow - * at compile time rather than having kasan catch it. - */ -static noinline void *__kasan_memchr(const void *s, int c, size_t n) -{ - return memchr(s, c, n); -} - -static noinline int __kasan_memcmp(const void *s1, const void *s2, size_t n) -{ - return memcmp(s1, s2, n); -} - - static void kasan_memchr(struct kunit *test) { char *ptr; @@ -884,8 +870,9 @@ static void kasan_memchr(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, - kasan_ptr_result = __kasan_memchr(ptr, '1', size + 1)); + kasan_ptr_result = memchr(ptr, '1', size + 1)); kfree(ptr); } @@ -909,8 +896,9 @@ static void kasan_memcmp(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset(arr, 0, sizeof(arr)); + OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, - kasan_int_result = __kasan_memcmp(ptr, arr, size+1)); + kasan_int_result = memcmp(ptr, arr, size+1)); kfree(ptr); } From 018b3196d0fdfbd4392a843c2762adb2db7ed97e Mon Sep 17 00:00:00 2001 From: Yinan Zhang Date: Thu, 21 Oct 2021 15:06:58 +1100 Subject: [PATCH 0495/1202] mm/page_ext.c: fix a comment I have noticed that the previous macro is #ifndef CONFIG_SPARSEMEM. I think the comment of #else should be CONFIG_SPARSEMEM. Link: https://lkml.kernel.org/r/20211008140312.6492-1-zhangyinan2019@email.szu.edu.cn Signed-off-by: Yinan Zhang Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_ext.c b/mm/page_ext.c index 2a52fd9ed464a..6242afb24d847 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -201,7 +201,7 @@ void __init page_ext_init_flatmem(void) panic("Out of memory"); } -#else /* CONFIG_FLATMEM */ +#else /* CONFIG_SPARSEMEM */ struct page_ext *lookup_page_ext(const struct page *page) { From a16c5e29c0c9603706e3fa6d45098e01e270433a Mon Sep 17 00:00:00 2001 From: Yixuan Cao Date: Thu, 21 Oct 2021 15:06:59 +1100 Subject: [PATCH 0496/1202] mm/page_owner.c: modify the type of argument "order" in some functions The type of "order" in struct page_owner is unsigned short. However, it is unsigned int in the following 3 functions: __reset_page_owner __set_page_owner_handle __set_page_owner_handle The type of "order" in argument list is unsigned int, which is inconsistent. Link: https://lkml.kernel.org/r/20211020125945.47792-1-caoyixuan2019@email.szu.edu.cn Signed-off-by: Yixuan Cao Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_owner.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/page_owner.c b/mm/page_owner.c index 62402d22539b8..8eddd2225e330 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -125,7 +125,7 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags) return handle; } -void __reset_page_owner(struct page *page, unsigned int order) +void __reset_page_owner(struct page *page, unsigned short order) { int i; struct page_ext *page_ext; @@ -149,7 +149,7 @@ void __reset_page_owner(struct page *page, unsigned int order) static inline void __set_page_owner_handle(struct page_ext *page_ext, depot_stack_handle_t handle, - unsigned int order, gfp_t gfp_mask) + unsigned short order, gfp_t gfp_mask) { struct page_owner *page_owner; int i; @@ -169,7 +169,7 @@ static inline void __set_page_owner_handle(struct page_ext *page_ext, } } -noinline void __set_page_owner(struct page *page, unsigned int order, +noinline void __set_page_owner(struct page *page, unsigned short order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page); From fcc6277eb22555b2ada868a7fca85d82f200ac80 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 21 Oct 2021 15:07:00 +1100 Subject: [PATCH 0497/1202] mm-page_ownerc-modify-the-type-of-argument-order-in-some-functions-fix update include/linux/page_owner.h Cc: Yixuan Cao Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/page_owner.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 719bfe5108c56..14f981c228fce 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -8,9 +8,9 @@ extern struct static_key_false page_owner_inited; extern struct page_ext_operations page_owner_ops; -extern void __reset_page_owner(struct page *page, unsigned int order); +extern void __reset_page_owner(struct page *page, unsigned short order); extern void __set_page_owner(struct page *page, - unsigned int order, gfp_t gfp_mask); + unsigned short order, gfp_t gfp_mask); extern void __split_page_owner(struct page *page, unsigned int nr); extern void __copy_page_owner(struct page *oldpage, struct page *newpage); extern void __set_page_owner_migrate_reason(struct page *page, int reason); @@ -18,14 +18,14 @@ extern void __dump_page_owner(const struct page *page); extern void pagetypeinfo_showmixedcount_print(struct seq_file *m, pg_data_t *pgdat, struct zone *zone); -static inline void reset_page_owner(struct page *page, unsigned int order) +static inline void reset_page_owner(struct page *page, unsigned short order) { if (static_branch_unlikely(&page_owner_inited)) __reset_page_owner(page, order); } static inline void set_page_owner(struct page *page, - unsigned int order, gfp_t gfp_mask) + unsigned short order, gfp_t gfp_mask) { if (static_branch_unlikely(&page_owner_inited)) __set_page_owner(page, order, gfp_mask); @@ -52,7 +52,7 @@ static inline void dump_page_owner(const struct page *page) __dump_page_owner(page); } #else -static inline void reset_page_owner(struct page *page, unsigned int order) +static inline void reset_page_owner(struct page *page, unsigned short order) { } static inline void set_page_owner(struct page *page, @@ -60,7 +60,7 @@ static inline void set_page_owner(struct page *page, { } static inline void split_page_owner(struct page *page, - unsigned int order) + unsigned short order) { } static inline void copy_page_owner(struct page *oldpage, struct page *newpage) From bf7a1535a6f13da706db4f8e4a936bf37c341e70 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 21 Oct 2021 15:07:01 +1100 Subject: [PATCH 0498/1202] mm: stop filemap_read() from grabbing a superfluous page Under some circumstances, filemap_read() will allocate sufficient pages to read to the end of the file, call readahead/readpages on them and copy the data over - and then it will allocate another page at the EOF and call readpage on that and then ignore it. This is unnecessary and a waste of time and resources. filemap_read() *does* check for this, but only after it has already done the allocation and I/O. Fix this by checking before calling filemap_get_pages() also. Link: https://lkml.kernel.org/r/163472463105.3126792.7056099385135786492.stgit@warthog.procyon.org.uk Link: https://lore.kernel.org/r/160588481358.3465195.16552616179674485179.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/163456863216.2614702.6384850026368833133.stgit@warthog.procyon.org.uk/ Signed-off-by: David Howells Acked-by: Jeff Layton Reviewed-by: Matthew Wilcox (Oracle) Cc: Kent Overstreet Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/filemap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index dae481293b5d9..e50be519f6a48 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2625,6 +2625,9 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, if ((iocb->ki_flags & IOCB_WAITQ) && already_read) iocb->ki_flags |= IOCB_NOWAIT; + if (unlikely(iocb->ki_pos >= i_size_read(inode))) + break; + error = filemap_get_pages(iocb, iter, &pvec); if (error < 0) break; From ff8c7cfbeb106f7fab63bb02ef0e4cf1eb3ee48f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Oct 2021 15:07:01 +1100 Subject: [PATCH 0499/1202] mm/filemap.c: remove bogus VM_BUG_ON It is not safe to check page->index without holding the page lock. It can be changed if the page is moved between the swap cache and the page cache for a shmem file, for example. There is a VM_BUG_ON below which checks page->index is correct after taking the page lock. Link: https://lkml.kernel.org/r/20210818144932.940640-1-willy@infradead.org Fixes: 5c211ba29deb ("mm: add and use find_lock_entries") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/filemap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index e50be519f6a48..ee2f21e229c06 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2093,7 +2093,6 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start, if (!xa_is_value(page)) { if (page->index < start) goto put; - VM_BUG_ON_PAGE(page->index != xas.xa_index, page); if (page->index + thp_nr_pages(page) - 1 > end) goto put; if (!trylock_page(page)) From e7bacad0223ee47b04f6b60b004bded206af3220 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 21 Oct 2021 15:07:02 +1100 Subject: [PATCH 0500/1202] vfs: keep inodes with page cache off the inode shrinker LRU Historically (pre-2.5), the inode shrinker used to reclaim only empty inodes and skip over those that still contained page cache. This caused problems on highmem hosts: struct inode could put fill lowmem zones before the cache was getting reclaimed in the highmem zones. To address this, the inode shrinker started to strip page cache to facilitate reclaiming lowmem. However, this comes with its own set of problems: the shrinkers may drop actively used page cache just because the inodes are not currently open or dirty - think working with a large git tree. It further doesn't respect cgroup memory protection settings and can cause priority inversions between containers. Nowadays, the page cache also holds non-resident info for evicted cache pages in order to detect refaults. We've come to rely heavily on this data inside reclaim for protecting the cache workingset and driving swap behavior. We also use it to quantify and report workload health through psi. The latter in turn is used for fleet health monitoring, as well as driving automated memory sizing of workloads and containers, proactive reclaim and memory offloading schemes. The consequences of dropping page cache prematurely is that we're seeing subtle and not-so-subtle failures in all of the above-mentioned scenarios, with the workload generally entering unexpected thrashing states while losing the ability to reliably detect it. To fix this on non-highmem systems at least, going back to rotating inodes on the LRU isn't feasible. We've tried (commit a76cf1a474d7 ("mm: don't reclaim inodes with many attached pages")) and failed (commit 69056ee6a8a3 ("Revert "mm: don't reclaim inodes with many attached pages"")). The issue is mostly that shrinker pools attract pressure based on their size, and when objects get skipped the shrinkers remember this as deferred reclaim work. This accumulates excessive pressure on the remaining inodes, and we can quickly eat into heavily used ones, or dirty ones that require IO to reclaim, when there potentially is plenty of cold, clean cache around still. Instead, this patch keeps populated inodes off the inode LRU in the first place - just like an open file or dirty state would. An otherwise clean and unused inode then gets queued when the last cache entry disappears. This solves the problem without reintroducing the reclaim issues, and generally is a bit more scalable than having to wade through potentially hundreds of thousands of busy inodes. Locking is a bit tricky because the locks protecting the inode state (i_lock) and the inode LRU (lru_list.lock) don't nest inside the irq-safe page cache lock (i_pages.xa_lock). Page cache deletions are serialized through i_lock, taken before the i_pages lock, to make sure depopulated inodes are queued reliably. Additions may race with deletions, but we'll check again in the shrinker. If additions race with the shrinker itself, we're protected by the i_lock: if find_inode() or iput() win, the shrinker will bail on the elevated i_count or I_REFERENCED; if the shrinker wins and goes ahead with the inode, it will set I_FREEING and inhibit further igets(), which will cause the other side to create a new instance of the inode instead. Link: https://lkml.kernel.org/r/20210614211904.14420-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Cc: Roman Gushchin Cc: Tejun Heo Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/inode.c | 46 +++++++++++++++++++++---------------- fs/internal.h | 1 - include/linux/fs.h | 1 + include/linux/pagemap.h | 50 +++++++++++++++++++++++++++++++++++++++++ mm/filemap.c | 8 +++++++ mm/truncate.c | 19 ++++++++++++++-- mm/vmscan.c | 7 ++++++ mm/workingset.c | 10 +++++++++ 8 files changed, 120 insertions(+), 22 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index ed0cab8a32db1..a49695f57e1ea 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -428,11 +428,20 @@ void ihold(struct inode *inode) } EXPORT_SYMBOL(ihold); -static void inode_lru_list_add(struct inode *inode) +static void __inode_add_lru(struct inode *inode, bool rotate) { + if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE)) + return; + if (atomic_read(&inode->i_count)) + return; + if (!(inode->i_sb->s_flags & SB_ACTIVE)) + return; + if (!mapping_shrinkable(&inode->i_data)) + return; + if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_inc(nr_unused); - else + else if (rotate) inode->i_state |= I_REFERENCED; } @@ -443,16 +452,11 @@ static void inode_lru_list_add(struct inode *inode) */ void inode_add_lru(struct inode *inode) { - if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC | - I_FREEING | I_WILL_FREE)) && - !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE) - inode_lru_list_add(inode); + __inode_add_lru(inode, false); } - static void inode_lru_list_del(struct inode *inode) { - if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru)) this_cpu_dec(nr_unused); } @@ -728,10 +732,6 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) /* * Isolate the inode from the LRU in preparation for freeing it. * - * Any inodes which are pinned purely because of attached pagecache have their - * pagecache removed. If the inode has metadata buffers attached to - * mapping->private_list then try to remove them. - * * If the inode has the I_REFERENCED flag set, then it means that it has been * used recently - the flag is set in iput_final(). When we encounter such an * inode, clear the flag and move it to the back of the LRU so it gets another @@ -747,31 +747,39 @@ static enum lru_status inode_lru_isolate(struct list_head *item, struct inode *inode = container_of(item, struct inode, i_lru); /* - * we are inverting the lru lock/inode->i_lock here, so use a trylock. - * If we fail to get the lock, just skip it. + * We are inverting the lru lock/inode->i_lock here, so use a + * trylock. If we fail to get the lock, just skip it. */ if (!spin_trylock(&inode->i_lock)) return LRU_SKIP; /* - * Referenced or dirty inodes are still in use. Give them another pass - * through the LRU as we canot reclaim them now. + * Inodes can get referenced, redirtied, or repopulated while + * they're already on the LRU, and this can make them + * unreclaimable for a while. Remove them lazily here; iput, + * sync, or the last page cache deletion will requeue them. */ if (atomic_read(&inode->i_count) || - (inode->i_state & ~I_REFERENCED)) { + (inode->i_state & ~I_REFERENCED) || + !mapping_shrinkable(&inode->i_data)) { list_lru_isolate(lru, &inode->i_lru); spin_unlock(&inode->i_lock); this_cpu_dec(nr_unused); return LRU_REMOVED; } - /* recently referenced inodes get one more pass */ + /* Recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { inode->i_state &= ~I_REFERENCED; spin_unlock(&inode->i_lock); return LRU_ROTATE; } + /* + * On highmem systems, mapping_shrinkable() permits dropping + * page cache in order to free up struct inodes: lowmem might + * be under pressure before the cache inside the highmem zone. + */ if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { __iget(inode); spin_unlock(&inode->i_lock); @@ -1638,7 +1646,7 @@ static void iput_final(struct inode *inode) if (!drop && !(inode->i_state & I_DONTCACHE) && (sb->s_flags & SB_ACTIVE)) { - inode_add_lru(inode); + __inode_add_lru(inode, true); spin_unlock(&inode->i_lock); return; } diff --git a/fs/internal.h b/fs/internal.h index 3cd065c8a66b4..2854ff29f1167 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -149,7 +149,6 @@ extern int vfs_open(const struct path *, struct file *); * inode.c */ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); -extern void inode_add_lru(struct inode *inode); extern int dentry_needs_remove_privs(struct dentry *dentry); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index e7a633353fd20..be0afaea2eae1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3192,6 +3192,7 @@ static inline void remove_inode_hash(struct inode *inode) } extern void inode_sb_list_add(struct inode *inode); +extern void inode_add_lru(struct inode *inode); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 62db6b0176b95..5c74a45ff97a0 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -23,6 +23,56 @@ static inline bool mapping_empty(struct address_space *mapping) return xa_empty(&mapping->i_pages); } +/* + * mapping_shrinkable - test if page cache state allows inode reclaim + * @mapping: the page cache mapping + * + * This checks the mapping's cache state for the pupose of inode + * reclaim and LRU management. + * + * The caller is expected to hold the i_lock, but is not required to + * hold the i_pages lock, which usually protects cache state. That's + * because the i_lock and the list_lru lock that protect the inode and + * its LRU state don't nest inside the irq-safe i_pages lock. + * + * Cache deletions are performed under the i_lock, which ensures that + * when an inode goes empty, it will reliably get queued on the LRU. + * + * Cache additions do not acquire the i_lock and may race with this + * check, in which case we'll report the inode as shrinkable when it + * has cache pages. This is okay: the shrinker also checks the + * refcount and the referenced bit, which will be elevated or set in + * the process of adding new cache pages to an inode. + */ +static inline bool mapping_shrinkable(struct address_space *mapping) +{ + void *head; + + /* + * On highmem systems, there could be lowmem pressure from the + * inodes before there is highmem pressure from the page + * cache. Make inodes shrinkable regardless of cache state. + */ + if (IS_ENABLED(CONFIG_HIGHMEM)) + return true; + + /* Cache completely empty? Shrink away. */ + head = rcu_access_pointer(mapping->i_pages.xa_head); + if (!head) + return true; + + /* + * The xarray stores single offset-0 entries directly in the + * head pointer, which allows non-resident page cache entries + * to escape the shadow shrinker's list of xarray nodes. The + * inode shrinker needs to pick them up under memory pressure. + */ + if (!xa_is_node(head) && xa_is_value(head)) + return true; + + return false; +} + /* * Bits in mapping->flags. */ diff --git a/mm/filemap.c b/mm/filemap.c index ee2f21e229c06..0cbc24cc92921 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -262,9 +262,13 @@ void delete_from_page_cache(struct page *page) struct address_space *mapping = page_mapping(page); BUG_ON(!PageLocked(page)); + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); __delete_from_page_cache(page, NULL); xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); page_cache_free_page(mapping, page); } @@ -340,6 +344,7 @@ void delete_from_page_cache_batch(struct address_space *mapping, if (!pagevec_count(pvec)) return; + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); for (i = 0; i < pagevec_count(pvec); i++) { trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); @@ -348,6 +353,9 @@ void delete_from_page_cache_batch(struct address_space *mapping, } page_cache_delete_batch(mapping, pvec); xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); for (i = 0; i < pagevec_count(pvec); i++) page_cache_free_page(mapping, pvec->pages[i]); diff --git a/mm/truncate.c b/mm/truncate.c index 714eaf19821d7..cc83a3f7c1ad3 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -45,9 +45,13 @@ static inline void __clear_shadow_entry(struct address_space *mapping, static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, void *entry) { + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); __clear_shadow_entry(mapping, index, entry); xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); } /* @@ -73,8 +77,10 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping, return; dax = dax_mapping(mapping); - if (!dax) + if (!dax) { + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); + } for (i = j; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; @@ -93,8 +99,12 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping, __clear_shadow_entry(mapping, index, page); } - if (!dax) + if (!dax) { xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); + } pvec->nr = j; } @@ -567,6 +577,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return 0; + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); if (PageDirty(page)) goto failed; @@ -574,6 +585,9 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) BUG_ON(page_has_private(page)); __delete_from_page_cache(page, NULL); xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); if (mapping->a_ops->freepage) mapping->a_ops->freepage(page); @@ -582,6 +596,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) return 1; failed: xa_unlock_irq(&mapping->i_pages); + spin_unlock(&mapping->host->i_lock); return 0; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 74296c2d1fed2..d010b8f64568e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1105,6 +1105,8 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, BUG_ON(!PageLocked(page)); BUG_ON(mapping != page_mapping(page)); + if (!PageSwapCache(page)) + spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); /* * The non racy check for a busy page. @@ -1173,6 +1175,9 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, shadow = workingset_eviction(page, target_memcg); __delete_from_page_cache(page, shadow); xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); if (freepage != NULL) freepage(page); @@ -1182,6 +1187,8 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, cannot_free: xa_unlock_irq(&mapping->i_pages); + if (!PageSwapCache(page)) + spin_unlock(&mapping->host->i_lock); return 0; } diff --git a/mm/workingset.c b/mm/workingset.c index d5b81e4f4cbe8..23df60ce2e109 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -543,6 +543,13 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, goto out; } + if (!spin_trylock(&mapping->host->i_lock)) { + xa_unlock(&mapping->i_pages); + spin_unlock_irq(lru_lock); + ret = LRU_RETRY; + goto out; + } + list_lru_isolate(lru, item); __dec_lruvec_kmem_state(node, WORKINGSET_NODES); @@ -562,6 +569,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, out_invalid: xa_unlock_irq(&mapping->i_pages); + if (mapping_shrinkable(mapping)) + inode_add_lru(mapping->host); + spin_unlock(&mapping->host->i_lock); ret = LRU_REMOVED_RETRY; out: cond_resched(); From 75affdaa953be6809740230311314840bd1cdf0b Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Thu, 21 Oct 2021 15:07:03 +1100 Subject: [PATCH 0501/1202] mm/gup: further simplify __gup_device_huge() commit 6401c4eb57f9 ("mm: gup: fix potential pgmap refcnt leak in __gup_device_huge()") simplified the return paths, but didn't go quite far enough, as discussed in [1]. Remove the "ret" variable entirely, because there is enough information already available to provide the return value. [1] https://lore.kernel.org/r/CAHk-=wgQTRX=5SkCmS+zfmpqubGHGJvXX_HgnPG8JSpHKHBMeg@mail.gmail.com Link: https://lkml.kernel.org/r/20210904004224.86391-1-jhubbard@nvidia.com Signed-off-by: John Hubbard Suggested-by: Linus Torvalds Reviewed-by: Jan Kara Cc: Miaohe Lin Cc: Claudio Imbrenda Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/gup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 886d6148d3d03..48c7a5a1e85ba 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2228,7 +2228,6 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, { int nr_start = *nr; struct dev_pagemap *pgmap = NULL; - int ret = 1; do { struct page *page = pfn_to_page(pfn); @@ -2236,14 +2235,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, pgmap = get_dev_pagemap(pfn, pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, flags, pages); - ret = 0; break; } SetPageReferenced(page); pages[*nr] = page; if (unlikely(!try_grab_page(page, flags))) { undo_dev_pagemap(nr, nr_start, flags, pages); - ret = 0; break; } (*nr)++; @@ -2251,7 +2248,7 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, } while (addr += PAGE_SIZE, addr != end); put_dev_pagemap(pgmap); - return ret; + return addr == end; } static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, From cb9e3c0f7493eb5bdc7849314a4e437b200af5b2 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Thu, 21 Oct 2021 15:07:04 +1100 Subject: [PATCH 0502/1202] mm/swapfile: remove needless request_queue NULL pointer check The request_queue pointer returned from bdev_get_queue() shall never be NULL, so the null check is unnecessary, just remove it. Link: https://lkml.kernel.org/r/20210917082111.33923-1-vulab@iscas.ac.cn Signed-off-by: Xu Wang Acked-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/swapfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 22d10f7138487..42027d213fd2d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3118,7 +3118,7 @@ static bool swap_discardable(struct swap_info_struct *si) { struct request_queue *q = bdev_get_queue(si->bdev); - if (!q || !blk_queue_discard(q)) + if (!blk_queue_discard(q)) return false; return true; From 6657f449b5f94af8a2867ecfc79e1fa572cc56d2 Mon Sep 17 00:00:00 2001 From: Rafael Aquini Date: Thu, 21 Oct 2021 15:07:04 +1100 Subject: [PATCH 0503/1202] mm/swapfile: fix an integer overflow in swap_show() This one is just a minor nuisance for people going through /proc/swaps if any of their swapareas is bigger than, or equal to 1073741824 pages (4TB). seq_printf() format string casts as uint the conversion from pages to KB, and that will overflow in the aforementioned case. Albeit being almost unthinkable that someone would actually set up such big of a single swaparea, there is a ticket recently filed against RHEL: https://bugzilla.redhat.com/show_bug.cgi?id=2008812 Given that all other codesites that use format strings for the same swap pages-to-KB conversion do cast it as ulong, this patch just follows suit. Link: https://lkml.kernel.org/r/20211006184011.2579054-1-aquini@redhat.com Signed-off-by: Rafael Aquini Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/swapfile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 42027d213fd2d..353e5bd518e17 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2763,7 +2763,7 @@ static int swap_show(struct seq_file *swap, void *v) struct swap_info_struct *si = v; struct file *file; int len; - unsigned int bytes, inuse; + unsigned long bytes, inuse; if (si == SEQ_START_TOKEN) { seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n"); @@ -2775,7 +2775,7 @@ static int swap_show(struct seq_file *swap, void *v) file = si->swap_file; len = seq_file_path(swap, file, " \t\n\\"); - seq_printf(swap, "%*s%s\t%u\t%s%u\t%s%d\n", + seq_printf(swap, "%*s%s\t%lu\t%s%lu\t%s%d\n", len < 40 ? 40 - len : 1, " ", S_ISBLK(file_inode(file)->i_mode) ? "partition" : "file\t", From 7ec50d8d865dd6d03be85e6079d72a1db9801ac2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Oct 2021 15:07:05 +1100 Subject: [PATCH 0504/1202] mm: optimise put_pages_list() Instead of calling put_page() one page at a time, pop pages off the list if their refcount was too high and pass the remainder to put_unref_page_list(). This should be a speed improvement, but I have no measurements to support that. Current callers do not care about performance, but I hope to add some which do. Link: https://lkml.kernel.org/r/20211007192138.561673-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/swap.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/mm/swap.c b/mm/swap.c index af3cad4e53783..9f334d503fd2f 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -134,18 +134,27 @@ EXPORT_SYMBOL(__put_page); * put_pages_list() - release a list of pages * @pages: list of pages threaded on page->lru * - * Release a list of pages which are strung together on page.lru. Currently - * used by read_cache_pages() and related error recovery code. + * Release a list of pages which are strung together on page.lru. */ void put_pages_list(struct list_head *pages) { - while (!list_empty(pages)) { - struct page *victim; + struct page *page, *next; - victim = lru_to_page(pages); - list_del(&victim->lru); - put_page(victim); + list_for_each_entry_safe(page, next, pages, lru) { + if (!put_page_testzero(page)) { + list_del(&page->lru); + continue; + } + if (PageHead(page)) { + list_del(&page->lru); + __put_compound_page(page); + continue; + } + /* Cannot be PageLRU because it's passed to us using the lru */ + __ClearPageWaiters(page); } + + free_unref_page_list(pages); } EXPORT_SYMBOL(put_pages_list); From 9e0ebf3937a26205274d3afe2eb9468070c4f3f3 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 21 Oct 2021 15:07:06 +1100 Subject: [PATCH 0505/1202] mm/memcg: drop swp_entry_t* in mc_handle_file_pte() After the rework of f5df8635c5a3 ("mm: use find_get_incore_page in memcontrol", 2020-10-13) it's unused. Link: https://lkml.kernel.org/r/20210916193014.80129-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Muchun Song Reviewed-by: David Hildenbrand Cc: Johannes Weiner Cc: Michal Hocko Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6da5020a8656d..15e6fb5d56a7e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5545,7 +5545,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, #endif static struct page *mc_handle_file_pte(struct vm_area_struct *vma, - unsigned long addr, pte_t ptent, swp_entry_t *entry) + unsigned long addr, pte_t ptent) { if (!vma->vm_file) /* anonymous vma */ return NULL; @@ -5718,7 +5718,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, else if (is_swap_pte(ptent)) page = mc_handle_swap_pte(vma, ptent, &ent); else if (pte_none(ptent)) - page = mc_handle_file_pte(vma, addr, ptent, &ent); + page = mc_handle_file_pte(vma, addr, ptent); if (!page && !ent.val) return ret; From e0eeb5aff0ab3d167dc4edc2f9c71295a7a32ed6 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 21 Oct 2021 15:07:07 +1100 Subject: [PATCH 0506/1202] memcg: flush stats only if updated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment, the kernel flushes the memcg stats on every refault and also on every reclaim iteration. Although rstat maintains per-cpu update tree but on the flush the kernel still has to go through all the cpu rstat update tree to check if there is anything to flush. This patch adds the tracking on the stats update side to make flush side more clever by skipping the flush if there is no update. The stats update codepath is very sensitive performance wise for many workloads and benchmarks. So, we can not follow what the commit aa48e47e3906 ("memcg: infrastructure to flush memcg stats") did which was triggering async flush through queue_work() and caused a lot performance regression reports. That got reverted by the commit 1f828223b799 ("memcg: flush lruvec stats in the refault"). In this patch we kept the stats update codepath very minimal and let the stats reader side to flush the stats only when the updates are over a specific threshold. For now the threshold is (nr_cpus * CHARGE_BATCH). To evaluate the impact of this patch, an 8 GiB tmpfs file is created on a system with swap-on-zram and the file was pushed to swap through memory.force_empty interface. On reading the whole file, the memcg stat flush in the refault code path is triggered. With this patch, we observed 63% reduction in the read time of 8 GiB file. Link: https://lkml.kernel.org/r/20211001190040.48086-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Reviewed-by: "Michal Koutný" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 78 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 15e6fb5d56a7e..ae12fdd4bc0de 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -103,11 +103,6 @@ static bool do_memsw_account(void) return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap; } -/* memcg and lruvec stats flushing */ -static void flush_memcg_stats_dwork(struct work_struct *w); -static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork); -static DEFINE_SPINLOCK(stats_flush_lock); - #define THRESHOLDS_EVENTS_TARGET 128 #define SOFTLIMIT_EVENTS_TARGET 1024 @@ -635,6 +630,56 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) return mz; } +/* + * memcg and lruvec stats flushing + * + * Many codepaths leading to stats update or read are performance sensitive and + * adding stats flushing in such codepaths is not desirable. So, to optimize the + * flushing the kernel does: + * + * 1) Periodically and asynchronously flush the stats every 2 seconds to not let + * rstat update tree grow unbounded. + * + * 2) Flush the stats synchronously on reader side only when there are more than + * (MEMCG_CHARGE_BATCH * nr_cpus) update events. Though this optimization + * will let stats be out of sync by atmost (MEMCG_CHARGE_BATCH * nr_cpus) but + * only for 2 seconds due to (1). + */ +static void flush_memcg_stats_dwork(struct work_struct *w); +static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork); +static DEFINE_SPINLOCK(stats_flush_lock); +static DEFINE_PER_CPU(unsigned int, stats_updates); +static atomic_t stats_flush_threshold = ATOMIC_INIT(0); + +static inline void memcg_rstat_updated(struct mem_cgroup *memcg) +{ + cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id()); + if (!(__this_cpu_inc_return(stats_updates) % MEMCG_CHARGE_BATCH)) + atomic_inc(&stats_flush_threshold); +} + +static void __mem_cgroup_flush_stats(void) +{ + if (!spin_trylock(&stats_flush_lock)) + return; + + cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup); + atomic_set(&stats_flush_threshold, 0); + spin_unlock(&stats_flush_lock); +} + +void mem_cgroup_flush_stats(void) +{ + if (atomic_read(&stats_flush_threshold) > num_online_cpus()) + __mem_cgroup_flush_stats(); +} + +static void flush_memcg_stats_dwork(struct work_struct *w) +{ + mem_cgroup_flush_stats(); + queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ); +} + /** * __mod_memcg_state - update cgroup memory statistics * @memcg: the memory cgroup @@ -647,7 +692,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) return; __this_cpu_add(memcg->vmstats_percpu->state[idx], val); - cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id()); + memcg_rstat_updated(memcg); } /* idx can be of type enum memcg_stat_item or node_stat_item. */ @@ -675,10 +720,12 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, memcg = pn->memcg; /* Update memcg */ - __mod_memcg_state(memcg, idx, val); + __this_cpu_add(memcg->vmstats_percpu->state[idx], val); /* Update lruvec */ __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val); + + memcg_rstat_updated(memcg); } /** @@ -780,7 +827,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, return; __this_cpu_add(memcg->vmstats_percpu->events[idx], count); - cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id()); + memcg_rstat_updated(memcg); } static unsigned long memcg_events(struct mem_cgroup *memcg, int event) @@ -5341,21 +5388,6 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) memcg_wb_domain_size_changed(memcg); } -void mem_cgroup_flush_stats(void) -{ - if (!spin_trylock(&stats_flush_lock)) - return; - - cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup); - spin_unlock(&stats_flush_lock); -} - -static void flush_memcg_stats_dwork(struct work_struct *w) -{ - mem_cgroup_flush_stats(); - queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ); -} - static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); From 168a56603ddbe7dd3db892620c5012421575f50c Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 21 Oct 2021 15:07:07 +1100 Subject: [PATCH 0507/1202] memcg: unify memcg stat flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memcg stats can be flushed in multiple context and potentially in parallel too. For example multiple parallel user space readers for memcg stats will contend on the rstat locks with each other. There is no need for that. We just need one flusher and everyone else can benefit. In addition after aa48e47e3906 ("memcg: infrastructure to flush memcg stats") the kernel periodically flush the memcg stats from the root, so, the other flushers will potentially have much less work to do. Link: https://lkml.kernel.org/r/20211001190040.48086-2-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: "Michal Koutný" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ae12fdd4bc0de..fd18076171b5f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -660,12 +660,14 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg) static void __mem_cgroup_flush_stats(void) { - if (!spin_trylock(&stats_flush_lock)) + unsigned long flag; + + if (!spin_trylock_irqsave(&stats_flush_lock, flag)) return; cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup); atomic_set(&stats_flush_threshold, 0); - spin_unlock(&stats_flush_lock); + spin_unlock_irqrestore(&stats_flush_lock, flag); } void mem_cgroup_flush_stats(void) @@ -1461,7 +1463,7 @@ static char *memory_stat_format(struct mem_cgroup *memcg) * * Current memory state: */ - cgroup_rstat_flush(memcg->css.cgroup); + mem_cgroup_flush_stats(); for (i = 0; i < ARRAY_SIZE(memory_stats); i++) { u64 size; @@ -3565,8 +3567,7 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) unsigned long val; if (mem_cgroup_is_root(memcg)) { - /* mem_cgroup_threshold() calls here from irqsafe context */ - cgroup_rstat_flush_irqsafe(memcg->css.cgroup); + mem_cgroup_flush_stats(); val = memcg_page_state(memcg, NR_FILE_PAGES) + memcg_page_state(memcg, NR_ANON_MAPPED); if (swap) @@ -3947,7 +3948,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v) int nid; struct mem_cgroup *memcg = mem_cgroup_from_seq(m); - cgroup_rstat_flush(memcg->css.cgroup); + mem_cgroup_flush_stats(); for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) { seq_printf(m, "%s=%lu", stat->name, @@ -4019,7 +4020,7 @@ static int memcg_stat_show(struct seq_file *m, void *v) BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats)); - cgroup_rstat_flush(memcg->css.cgroup); + mem_cgroup_flush_stats(); for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { unsigned long nr; @@ -4522,7 +4523,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); struct mem_cgroup *parent; - cgroup_rstat_flush_irqsafe(memcg->css.cgroup); + mem_cgroup_flush_stats(); *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY); *pwriteback = memcg_page_state(memcg, NR_WRITEBACK); @@ -6405,7 +6406,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v) int i; struct mem_cgroup *memcg = mem_cgroup_from_seq(m); - cgroup_rstat_flush(memcg->css.cgroup); + mem_cgroup_flush_stats(); for (i = 0; i < ARRAY_SIZE(memory_stats); i++) { int nid; From 19eb7facb8775e1b87c76572486543be7953d99a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 21 Oct 2021 15:07:08 +1100 Subject: [PATCH 0508/1202] mm/memcg: remove obsolete memcg_free_kmem() Since commit d648bcc7fe65 ("mm: kmem: make memcg_kmem_enabled() irreversible"), the only thing memcg_free_kmem() does is to call memcg_offline_kmem() when the memcg is still online which can happen when online_css() fails due to -ENOMEM. However, the name memcg_free_kmem() is confusing and it is more clear and straight forward to call memcg_offline_kmem() directly from mem_cgroup_css_free(). Link: https://lkml.kernel.org/r/20211005202450.11775-1-longman@redhat.com Signed-off-by: Waiman Long Suggested-by: Roman Gushchin Reviewed-by: Aaron Tomlin Reviewed-by: Shakeel Butt Reviewed-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Vlastimil Babka Cc: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fd18076171b5f..e092cce3c96e7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3704,13 +3704,6 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) memcg_free_cache_id(kmemcg_id); } - -static void memcg_free_kmem(struct mem_cgroup *memcg) -{ - /* css_alloc() failed, offlining didn't happen */ - if (unlikely(memcg->kmem_state == KMEM_ONLINE)) - memcg_offline_kmem(memcg); -} #else static int memcg_online_kmem(struct mem_cgroup *memcg) { @@ -3719,9 +3712,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg) static void memcg_offline_kmem(struct mem_cgroup *memcg) { } -static void memcg_free_kmem(struct mem_cgroup *memcg) -{ -} #endif /* CONFIG_MEMCG_KMEM */ static int memcg_update_kmem_max(struct mem_cgroup *memcg, @@ -5356,7 +5346,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css) cancel_work_sync(&memcg->high_work); mem_cgroup_remove_from_trees(memcg); free_shrinker_info(memcg); - memcg_free_kmem(memcg); + + /* Need to offline kmem if online_css() fails */ + memcg_offline_kmem(memcg); mem_cgroup_free(memcg); } From 05e76c5c077c471cd56f4e5f6fd7e7547c42821f Mon Sep 17 00:00:00 2001 From: Len Baker Date: Thu, 21 Oct 2021 15:07:09 +1100 Subject: [PATCH 0509/1202] mm/list_lru.c: prefer struct_size over open coded arithmetic As noted in the "Deprecated Interfaces, Language Features, Attributes, and Conventions" documentation [1], size calculations (especially multiplication) should not be performed in memory allocator (or similar) function arguments due to the risk of them overflowing. This could lead to values wrapping around and a smaller allocation being made than the caller was expecting. Using those allocations could lead to linear overflows of heap memory and other misbehaviors. So, use the struct_size() helper to do the arithmetic instead of the argument "size + count * size" in the kvmalloc() functions. Also, take the opportunity to refactor the memcpy() call to use the flex_array_size() helper. This code was detected with the help of Coccinelle and audited and fixed manually. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#open-co= ded-arithmetic-in-allocator-arguments Link: https://lkml.kernel.org/r/20211017105929.9284-1-len.baker@gmx.com Signed-off-by: Len Baker Cc: Kees Cook Cc: "Gustavo A. R. Silva" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/list_lru.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/list_lru.c b/mm/list_lru.c index cd58790d0fb38..a6031f1c5bd78 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -354,8 +354,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru) struct list_lru_memcg *memcg_lrus; int size = memcg_nr_cache_ids; - memcg_lrus = kvmalloc(sizeof(*memcg_lrus) + - size * sizeof(void *), GFP_KERNEL); + memcg_lrus = kvmalloc(struct_size(memcg_lrus, lru, size), GFP_KERNEL); if (!memcg_lrus) return -ENOMEM; @@ -389,7 +388,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, old = rcu_dereference_protected(nlru->memcg_lrus, lockdep_is_held(&list_lrus_mutex)); - new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL); + new = kvmalloc(struct_size(new, lru, new_size), GFP_KERNEL); if (!new) return -ENOMEM; @@ -398,7 +397,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru, return -ENOMEM; } - memcpy(&new->lru, &old->lru, old_size * sizeof(void *)); + memcpy(&new->lru, &old->lru, flex_array_size(new, lru, old_size)); /* * The locking below allows readers that hold nlru->lock avoid taking From 5f3345c170795b0822a69ec3ccbac28f7f736090 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 21 Oct 2021 15:07:10 +1100 Subject: [PATCH 0510/1202] memcg, kmem: further deprecate kmem.limit_in_bytes The deprecation process of kmem.limit_in_bytes started with the commit 0158115f702 ("memcg, kmem: deprecate kmem.limit_in_bytes") which also explains in detail the motivation behind the deprecation. To summarize, it is the unexpected behavior on hitting the kmem limit. This patch moves the deprecation process to the next stage by disallowing to set the kmem limit. In future we might just remove the kmem.limit_in_bytes file completely. Link: https://lkml.kernel.org/r/20211019153408.2916808-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Roman Gushchin Acked-by: Michal Hocko Reviewed-by: Muchun Song Cc: Vasily Averin Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- .../admin-guide/cgroup-v1/memory.rst | 11 ++---- mm/memcontrol.c | 35 +++---------------- 2 files changed, 6 insertions(+), 40 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 41191b5fb69d9..faac50149a222 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -87,10 +87,8 @@ Brief summary of control files. memory.oom_control set/show oom controls. memory.numa_stat show the number of memory usage per numa node - memory.kmem.limit_in_bytes set/show hard limit for kernel memory - This knob is deprecated and shouldn't be - used. It is planned that this be removed in - the foreseeable future. + memory.kmem.limit_in_bytes This knob is deprecated and writing to + it will return -ENOTSUPP. memory.kmem.usage_in_bytes show current kernel memory allocation memory.kmem.failcnt show the number of kernel memory usage hits limits @@ -518,11 +516,6 @@ will be charged as a new owner of it. charged file caches. Some out-of-use page caches may keep charged until memory pressure happens. If you want to avoid that, force_empty will be useful. - Also, note that when memory.kmem.limit_in_bytes is set the charges due to - kernel pages will still be seen. This is not considered a failure and the - write will still return success. In this case, it is expected that - memory.kmem.usage_in_bytes == memory.usage_in_bytes. - 5.2 stat file ------------- diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e092cce3c96e7..26d7a9b9f9123 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3000,7 +3000,6 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp, unsigned int nr_pages) { - struct page_counter *counter; struct mem_cgroup *memcg; int ret; @@ -3010,21 +3009,8 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp, if (ret) goto out; - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && - !page_counter_try_charge(&memcg->kmem, nr_pages, &counter)) { - - /* - * Enforce __GFP_NOFAIL allocation because callers are not - * prepared to see failures and likely do not have any failure - * handling code. - */ - if (gfp & __GFP_NOFAIL) { - page_counter_charge(&memcg->kmem, nr_pages); - goto out; - } - cancel_charge(memcg, nr_pages); - ret = -ENOMEM; - } + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + page_counter_charge(&memcg->kmem, nr_pages); out: css_put(&memcg->css); @@ -3714,17 +3700,6 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) } #endif /* CONFIG_MEMCG_KMEM */ -static int memcg_update_kmem_max(struct mem_cgroup *memcg, - unsigned long max) -{ - int ret; - - mutex_lock(&memcg_max_mutex); - ret = page_counter_set_max(&memcg->kmem, max); - mutex_unlock(&memcg_max_mutex); - return ret; -} - static int memcg_update_tcp_max(struct mem_cgroup *memcg, unsigned long max) { int ret; @@ -3790,10 +3765,8 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of, ret = mem_cgroup_resize_max(memcg, nr_pages, true); break; case _KMEM: - pr_warn_once("kmem.limit_in_bytes is deprecated and will be removed. " - "Please report your usecase to linux-mm@kvack.org if you " - "depend on this functionality.\n"); - ret = memcg_update_kmem_max(memcg, nr_pages); + /* kmem.limit_in_bytes is deprecated. */ + ret = -ENOTSUPP; break; case _TCP: ret = memcg_update_tcp_max(memcg, nr_pages); From 0d26db643d8245379fc36d8593c22a57b787fb2e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 20 Oct 2021 18:56:04 +0300 Subject: [PATCH 0511/1202] phy: qcom-qmp: another fix for the sc8180x PCIe definition Commit f839f14e24f2 ("phy: qcom-qmp: Add sc8180x PCIe support") added SC8180X PCIe tables, but used sm8250_qmp_pcie_serdes_tbl as a serdes table because of the copy paste error. Commit bfccd9a71a08 ("phy: qcom-qmp: Fix sc8180x PCIe definition") corrected part of this mistake by pointing serdes_tbl to sc8180x_qmp_pcie_serdes_tbl, however the serdes_tbl_num field was not updated to use sc8180x table. So let's now fix the serdes_tbl_num field too. Fixes: bfccd9a71a08 ("phy: qcom-qmp: Fix sc8180x PCIe definition") Signed-off-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211020155604.1374530-1-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c index 32123a60be978..456a59d8c7d04 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp.c @@ -3745,7 +3745,7 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = { .nlanes = 1, .serdes_tbl = sc8180x_qmp_pcie_serdes_tbl, - .serdes_tbl_num = ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl), + .serdes_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl), .tx_tbl = sc8180x_qmp_pcie_tx_tbl, .tx_tbl_num = ARRAY_SIZE(sc8180x_qmp_pcie_tx_tbl), .rx_tbl = sc8180x_qmp_pcie_rx_tbl, From 6b6c496a88e7273a3cc945236b2d39a8f0568a8a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Oct 2021 16:45:23 +0200 Subject: [PATCH 0512/1202] spi: tegra20: fix build with CONFIG_PM_SLEEP=n There is another one of these warnings: drivers/spi/spi-tegra20-slink.c:1197:12: error: 'tegra_slink_runtime_resume' defined but not used [-Werror=unused-function] 1197 | static int tegra_slink_runtime_resume(struct device *dev) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ Give it the same treatment as the other functions in this file. Fixes: efafec27c565 ("spi: Fix tegra20 build with CONFIG_PM=n") Fixes: 2bab94090b01 ("spi: tegra20-slink: Declare runtime suspend and resume functions conditionally") Signed-off-by: Arnd Bergmann Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20211013144538.2346533-1-arnd@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-tegra20-slink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 713292b0c71ea..33302f6b42d7b 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1194,7 +1194,7 @@ static int __maybe_unused tegra_slink_runtime_suspend(struct device *dev) return 0; } -static int tegra_slink_runtime_resume(struct device *dev) +static __maybe_unused int tegra_slink_runtime_resume(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct tegra_slink_data *tspi = spi_master_get_devdata(master); From 0204bdeb3df79a5c78d9e76119a7f04e3dcb1258 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 18 Oct 2021 15:34:13 +0800 Subject: [PATCH 0513/1202] spi: bcm-qspi: Fix missing clk_disable_unprepare() on error in bcm_qspi_probe() Fix the missing clk_disable_unprepare() before return from bcm_qspi_probe() in the error handling case. Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20211018073413.2029081-1-yangyingliang@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm-qspi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 6cf7cff5edeed..f3de3305d0f59 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1602,7 +1602,7 @@ int bcm_qspi_probe(struct platform_device *pdev, &qspi->dev_ids[val]); if (ret < 0) { dev_err(&pdev->dev, "IRQ %s not found\n", name); - goto qspi_probe_err; + goto qspi_unprepare_err; } qspi->dev_ids[val].dev = qspi; @@ -1617,7 +1617,7 @@ int bcm_qspi_probe(struct platform_device *pdev, if (!num_ints) { dev_err(&pdev->dev, "no IRQs registered, cannot init driver\n"); ret = -EINVAL; - goto qspi_probe_err; + goto qspi_unprepare_err; } bcm_qspi_hw_init(qspi); @@ -1641,6 +1641,7 @@ int bcm_qspi_probe(struct platform_device *pdev, qspi_reg_err: bcm_qspi_hw_uninit(qspi); +qspi_unprepare_err: clk_disable_unprepare(qspi->clk); qspi_probe_err: kfree(qspi->dev_ids); From e57fb1713734fa1bcd435c4f86bc8fd135389bba Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 21 Oct 2021 07:44:47 +0800 Subject: [PATCH 0514/1202] btrfs: make btrfs_super_block size match BTRFS_SUPER_INFO_SIZE It's a common practice to avoid use sizeof(struct btrfs_super_block) (3531), but to use BTRFS_SUPER_INFO_SIZE (4096). The problem is that, sizeof(struct btrfs_super_block) doesn't match BTRFS_SUPER_INFO_SIZE from the very beginning. Furthermore, for all call sites except selftests, we always allocate BTRFS_SUPER_INFO_SIZE space for super block, there isn't any real reason to use the smaller value, and it doesn't really save any space. So let's get rid of such confusing behavior, and unify those two values. This modification also adds a new static_assert() to verify the size, and moves the BTRFS_SUPER_INFO_* macros to the definition of btrfs_super_block for the static_assert(). Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 7 +++++++ fs/btrfs/disk-io.h | 3 --- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1401268985774..dc6a6173a2c05 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -218,6 +218,9 @@ struct btrfs_root_backup { u8 unused_8[10]; } __attribute__ ((__packed__)); +#define BTRFS_SUPER_INFO_OFFSET SZ_64K +#define BTRFS_SUPER_INFO_SIZE 4096 + /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -270,7 +273,11 @@ struct btrfs_super_block { __le64 reserved[28]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; + + /* Padded to 4096 bytes */ + u8 padding[565]; } __attribute__ ((__packed__)); +static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE); /* * Compat flags that we support. If any incompat flags are set other than the diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 1d3b749c405a8..a2b5db4ba262c 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -6,9 +6,6 @@ #ifndef BTRFS_DISK_IO_H #define BTRFS_DISK_IO_H -#define BTRFS_SUPER_INFO_OFFSET SZ_64K -#define BTRFS_SUPER_INFO_SIZE 4096 - #define BTRFS_SUPER_MIRROR_MAX 3 #define BTRFS_SUPER_MIRROR_SHIFT 12 From 2aee149dd9ba374b426b9dd6192bf2a8a715e63a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Oct 2021 17:25:22 +0100 Subject: [PATCH 0515/1202] irqchip/mchp-eic: Drop build-breaking COMPILE_TEST This driver advertises COMPILE_TEST, and yet fails to link due to broken dependencies. Reported-by: Randy Dunlap Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/bfc6fc7b-2bc3-b2b6-d65b-04805fbeb59d@infradead.org --- drivers/irqchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 8df8ffb9379fc..6e5883827fbf8 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -604,7 +604,7 @@ config APPLE_AIC config MCHP_EIC bool "Microchip External Interrupt Controller" - depends on ARCH_AT91 || COMPILE_TEST + depends on ARCH_AT91 select IRQ_DOMAIN select IRQ_DOMAIN_HIERARCHY help From 2e0fd58181a2614e1f41e434476f3f21c39b4669 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Oct 2021 17:25:22 +0100 Subject: [PATCH 0516/1202] irqchip/meson-gpio: Drop build-breaking COMPILE_TEST This driver advertises COMPILE_TEST, and yet fails to link due to broken dependencies. Reported-by: Randy Dunlap Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/bfc6fc7b-2bc3-b2b6-d65b-04805fbeb59d@infradead.org --- drivers/irqchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index b2f483b7458dc..42d01bc259822 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -407,7 +407,7 @@ config IRQ_UNIPHIER_AIDET config MESON_IRQ_GPIO tristate "Meson GPIO Interrupt Multiplexer" - depends on ARCH_MESON || COMPILE_TEST + depends on ARCH_MESON default ARCH_MESON select IRQ_DOMAIN_HIERARCHY help From 31ad35e280b085cd319943fc372ea335a9cb52ea Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 19 Oct 2021 08:22:09 +0800 Subject: [PATCH 0517/1202] btrfs: sysfs: convert scnprintf and snprintf to sysfs_emit Commit 2efc459d06f1 ("sysfs: Add sysfs_emit and sysfs_emit_at to format sysfs out") merged in 5.10 introduced two new functions sysfs_emit() and sysfs_emit_at() which are aware of the PAGE_SIZE limit of the output buffer. Use the above two new functions instead of scnprintf() and snprintf() in various sysfs show(). Reviewed-by: Josef Bacik Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 93 +++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 49 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 25a6f587852be..f9eff3b0f77cd 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -177,7 +177,7 @@ static ssize_t btrfs_feature_attr_show(struct kobject *kobj, } else val = can_modify_feature(fa); - return scnprintf(buf, PAGE_SIZE, "%d\n", val); + return sysfs_emit(buf, "%d\n", val); } static ssize_t btrfs_feature_attr_store(struct kobject *kobj, @@ -330,7 +330,7 @@ static const struct attribute_group btrfs_feature_attr_group = { static ssize_t rmdir_subvol_show(struct kobject *kobj, struct kobj_attribute *ka, char *buf) { - return scnprintf(buf, PAGE_SIZE, "0\n"); + return sysfs_emit(buf, "0\n"); } BTRFS_ATTR(static_feature, rmdir_subvol, rmdir_subvol_show); @@ -345,12 +345,12 @@ static ssize_t supported_checksums_show(struct kobject *kobj, * This "trick" only works as long as 'enum btrfs_csum_type' has * no holes in it */ - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s", - (i == 0 ? "" : " "), btrfs_super_csum_name(i)); + ret += sysfs_emit_at(buf, ret, "%s%s", (i == 0 ? "" : " "), + btrfs_super_csum_name(i)); } - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += sysfs_emit_at(buf, ret, "\n"); return ret; } BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show); @@ -358,7 +358,7 @@ BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show); static ssize_t send_stream_version_show(struct kobject *kobj, struct kobj_attribute *ka, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", BTRFS_SEND_STREAM_VERSION); + return sysfs_emit(buf, "%d\n", BTRFS_SEND_STREAM_VERSION); } BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show); @@ -378,9 +378,8 @@ static ssize_t supported_rescue_options_show(struct kobject *kobj, int i; for (i = 0; i < ARRAY_SIZE(rescue_opts); i++) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s", - (i ? " " : ""), rescue_opts[i]); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += sysfs_emit_at(buf, ret, "%s%s", (i ? " " : ""), rescue_opts[i]); + ret += sysfs_emit_at(buf, ret, "\n"); return ret; } BTRFS_ATTR(static_feature, supported_rescue_options, @@ -394,10 +393,10 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj, /* 4K sector size is also supported with 64K page size */ if (PAGE_SIZE == SZ_64K) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%u ", SZ_4K); + ret += sysfs_emit_at(buf, ret, "%u ", SZ_4K); /* Only sectorsize == PAGE_SIZE is now supported */ - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%lu\n", PAGE_SIZE); + ret += sysfs_emit_at(buf, ret, "%lu\n", PAGE_SIZE); return ret; } @@ -437,7 +436,7 @@ static ssize_t btrfs_discardable_bytes_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%lld\n", + return sysfs_emit(buf, "%lld\n", atomic64_read(&fs_info->discard_ctl.discardable_bytes)); } BTRFS_ATTR(discard, discardable_bytes, btrfs_discardable_bytes_show); @@ -448,7 +447,7 @@ static ssize_t btrfs_discardable_extents_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%d\n", + return sysfs_emit(buf, "%d\n", atomic_read(&fs_info->discard_ctl.discardable_extents)); } BTRFS_ATTR(discard, discardable_extents, btrfs_discardable_extents_show); @@ -459,8 +458,8 @@ static ssize_t btrfs_discard_bitmap_bytes_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%llu\n", - fs_info->discard_ctl.discard_bitmap_bytes); + return sysfs_emit(buf, "%llu\n", + fs_info->discard_ctl.discard_bitmap_bytes); } BTRFS_ATTR(discard, discard_bitmap_bytes, btrfs_discard_bitmap_bytes_show); @@ -470,7 +469,7 @@ static ssize_t btrfs_discard_bytes_saved_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%lld\n", + return sysfs_emit(buf, "%lld\n", atomic64_read(&fs_info->discard_ctl.discard_bytes_saved)); } BTRFS_ATTR(discard, discard_bytes_saved, btrfs_discard_bytes_saved_show); @@ -481,8 +480,8 @@ static ssize_t btrfs_discard_extent_bytes_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%llu\n", - fs_info->discard_ctl.discard_extent_bytes); + return sysfs_emit(buf, "%llu\n", + fs_info->discard_ctl.discard_extent_bytes); } BTRFS_ATTR(discard, discard_extent_bytes, btrfs_discard_extent_bytes_show); @@ -492,8 +491,8 @@ static ssize_t btrfs_discard_iops_limit_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%u\n", - READ_ONCE(fs_info->discard_ctl.iops_limit)); + return sysfs_emit(buf, "%u\n", + READ_ONCE(fs_info->discard_ctl.iops_limit)); } static ssize_t btrfs_discard_iops_limit_store(struct kobject *kobj, @@ -523,8 +522,8 @@ static ssize_t btrfs_discard_kbps_limit_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%u\n", - READ_ONCE(fs_info->discard_ctl.kbps_limit)); + return sysfs_emit(buf, "%u\n", + READ_ONCE(fs_info->discard_ctl.kbps_limit)); } static ssize_t btrfs_discard_kbps_limit_store(struct kobject *kobj, @@ -553,8 +552,8 @@ static ssize_t btrfs_discard_max_discard_size_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%llu\n", - READ_ONCE(fs_info->discard_ctl.max_discard_size)); + return sysfs_emit(buf, "%llu\n", + READ_ONCE(fs_info->discard_ctl.max_discard_size)); } static ssize_t btrfs_discard_max_discard_size_store(struct kobject *kobj, @@ -627,7 +626,7 @@ static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf) val = *value_ptr; if (lock) spin_unlock(lock); - return scnprintf(buf, PAGE_SIZE, "%llu\n", val); + return sysfs_emit(buf, "%llu\n", val); } static ssize_t global_rsv_size_show(struct kobject *kobj, @@ -673,7 +672,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj, val += block_group->used; } up_read(&sinfo->groups_sem); - return scnprintf(buf, PAGE_SIZE, "%llu\n", val); + return sysfs_emit(buf, "%llu\n", val); } /* @@ -771,7 +770,7 @@ static ssize_t btrfs_label_show(struct kobject *kobj, ssize_t ret; spin_lock(&fs_info->super_lock); - ret = scnprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label); + ret = sysfs_emit(buf, label[0] ? "%s\n" : "%s", label); spin_unlock(&fs_info->super_lock); return ret; @@ -819,7 +818,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); + return sysfs_emit(buf, "%u\n", fs_info->super_copy->nodesize); } BTRFS_ATTR(, nodesize, btrfs_nodesize_show); @@ -829,8 +828,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%u\n", - fs_info->super_copy->sectorsize); + return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize); } BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show); @@ -840,7 +838,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); + return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize); } BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show); @@ -852,7 +850,7 @@ static ssize_t quota_override_show(struct kobject *kobj, int quota_override; quota_override = test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags); - return scnprintf(buf, PAGE_SIZE, "%d\n", quota_override); + return sysfs_emit(buf, "%d\n", quota_override); } static ssize_t quota_override_store(struct kobject *kobj, @@ -890,8 +888,7 @@ static ssize_t btrfs_metadata_uuid_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%pU\n", - fs_info->fs_devices->metadata_uuid); + return sysfs_emit(buf, "%pU\n", fs_info->fs_devices->metadata_uuid); } BTRFS_ATTR(, metadata_uuid, btrfs_metadata_uuid_show); @@ -902,9 +899,9 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj, struct btrfs_fs_info *fs_info = to_fs_info(kobj); u16 csum_type = btrfs_super_csum_type(fs_info->super_copy); - return scnprintf(buf, PAGE_SIZE, "%s (%s)\n", - btrfs_super_csum_name(csum_type), - crypto_shash_driver_name(fs_info->csum_shash)); + return sysfs_emit(buf, "%s (%s)\n", + btrfs_super_csum_name(csum_type), + crypto_shash_driver_name(fs_info->csum_shash)); } BTRFS_ATTR(, checksum, btrfs_checksum_show); @@ -941,7 +938,7 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj, str = "UNKNOWN\n"; break; } - return scnprintf(buf, PAGE_SIZE, "%s", str); + return sysfs_emit(buf, "%s", str); } BTRFS_ATTR(, exclusive_operation, btrfs_exclusive_operation_show); @@ -950,7 +947,7 @@ static ssize_t btrfs_generation_show(struct kobject *kobj, { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - return scnprintf(buf, PAGE_SIZE, "%llu\n", fs_info->generation); + return sysfs_emit(buf, "%llu\n", fs_info->generation); } BTRFS_ATTR(, generation, btrfs_generation_show); @@ -1028,8 +1025,7 @@ static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj, struct btrfs_fs_info *fs_info = to_fs_info(kobj); ssize_t ret; - ret = scnprintf(buf, PAGE_SIZE, "%d\n", - READ_ONCE(fs_info->bg_reclaim_threshold)); + ret = sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold)); return ret; } @@ -1471,7 +1467,7 @@ static ssize_t btrfs_devinfo_in_fs_metadata_show(struct kobject *kobj, val = !!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); - return scnprintf(buf, PAGE_SIZE, "%d\n", val); + return sysfs_emit(buf, "%d\n", val); } BTRFS_ATTR(devid, in_fs_metadata, btrfs_devinfo_in_fs_metadata_show); @@ -1484,7 +1480,7 @@ static ssize_t btrfs_devinfo_missing_show(struct kobject *kobj, val = !!test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state); - return scnprintf(buf, PAGE_SIZE, "%d\n", val); + return sysfs_emit(buf, "%d\n", val); } BTRFS_ATTR(devid, missing, btrfs_devinfo_missing_show); @@ -1498,7 +1494,7 @@ static ssize_t btrfs_devinfo_replace_target_show(struct kobject *kobj, val = !!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); - return scnprintf(buf, PAGE_SIZE, "%d\n", val); + return sysfs_emit(buf, "%d\n", val); } BTRFS_ATTR(devid, replace_target, btrfs_devinfo_replace_target_show); @@ -1509,8 +1505,7 @@ static ssize_t btrfs_devinfo_scrub_speed_max_show(struct kobject *kobj, struct btrfs_device *device = container_of(kobj, struct btrfs_device, devid_kobj); - return scnprintf(buf, PAGE_SIZE, "%llu\n", - READ_ONCE(device->scrub_speed_max)); + return sysfs_emit(buf, "%llu\n", READ_ONCE(device->scrub_speed_max)); } static ssize_t btrfs_devinfo_scrub_speed_max_store(struct kobject *kobj, @@ -1538,7 +1533,7 @@ static ssize_t btrfs_devinfo_writeable_show(struct kobject *kobj, val = !!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); - return scnprintf(buf, PAGE_SIZE, "%d\n", val); + return sysfs_emit(buf, "%d\n", val); } BTRFS_ATTR(devid, writeable, btrfs_devinfo_writeable_show); @@ -1549,14 +1544,14 @@ static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj, devid_kobj); if (!device->dev_stats_valid) - return scnprintf(buf, PAGE_SIZE, "invalid\n"); + return sysfs_emit(buf, "invalid\n"); /* * Print all at once so we get a snapshot of all values from the same * time. Keep them in sync and in order of definition of * btrfs_dev_stat_values. */ - return scnprintf(buf, PAGE_SIZE, + return sysfs_emit(buf, "write_errs %d\n" "read_errs %d\n" "flush_errs %d\n" From 6e7276a38cdede2129a885b00a1e0a1ebdd31593 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 19 Oct 2021 20:35:01 -0700 Subject: [PATCH 0518/1202] btrfs: fix deadlock when defragging transparent huge pages Attempting to defragment a Btrfs file containing a transparent huge page immediately deadlocks with the following stack trace: #0 context_switch (kernel/sched/core.c:4940:2) #1 __schedule (kernel/sched/core.c:6287:8) #2 schedule (kernel/sched/core.c:6366:3) #3 io_schedule (kernel/sched/core.c:8389:2) #4 wait_on_page_bit_common (mm/filemap.c:1356:4) #5 __lock_page (mm/filemap.c:1648:2) #6 lock_page (./include/linux/pagemap.h:625:3) #7 pagecache_get_page (mm/filemap.c:1910:4) #8 find_or_create_page (./include/linux/pagemap.h:420:9) #9 defrag_prepare_one_page (fs/btrfs/ioctl.c:1068:9) #10 defrag_one_range (fs/btrfs/ioctl.c:1326:14) #11 defrag_one_cluster (fs/btrfs/ioctl.c:1421:9) #12 btrfs_defrag_file (fs/btrfs/ioctl.c:1523:9) #13 btrfs_ioctl_defrag (fs/btrfs/ioctl.c:3117:9) #14 btrfs_ioctl (fs/btrfs/ioctl.c:4872:10) #15 vfs_ioctl (fs/ioctl.c:51:10) #16 __do_sys_ioctl (fs/ioctl.c:874:11) #17 __se_sys_ioctl (fs/ioctl.c:860:1) #18 __x64_sys_ioctl (fs/ioctl.c:860:1) #19 do_syscall_x64 (arch/x86/entry/common.c:50:14) #20 do_syscall_64 (arch/x86/entry/common.c:80:7) #21 entry_SYSCALL_64+0x7c/0x15b (arch/x86/entry/entry_64.S:113) A huge page is represented by a compound page, which consists of a struct page for each PAGE_SIZE page within the huge page. The first struct page is the "head page", and the remaining are "tail pages". Defragmentation attempts to lock each page in the range. However, lock_page() on a tail page actually locks the corresponding head page. So, if defragmentation tries to lock more than one struct page in a compound page, it tries to lock the same head page twice and deadlocks with itself. Ideally, we should be able to defragment transparent huge pages. However, THP for filesystems is currently read-only, so a lot of code is not ready to use huge pages for I/O. For now, let's just return ETXTBUSY. This can be reproduced with the following on a kernel with CONFIG_READ_ONLY_THP_FOR_FS=y: $ cat create_thp_file.c #include #include #include #include #include #include #include static const char zeroes[1024 * 1024]; static const size_t FILE_SIZE = 2 * 1024 * 1024; int main(int argc, char **argv) { if (argc != 2) { fprintf(stderr, "usage: %s PATH\n", argv[0]); return EXIT_FAILURE; } int fd = creat(argv[1], 0777); if (fd == -1) { perror("creat"); return EXIT_FAILURE; } size_t written = 0; while (written < FILE_SIZE) { ssize_t ret = write(fd, zeroes, sizeof(zeroes) < FILE_SIZE - written ? sizeof(zeroes) : FILE_SIZE - written); if (ret < 0) { perror("write"); return EXIT_FAILURE; } written += ret; } close(fd); fd = open(argv[1], O_RDONLY); if (fd == -1) { perror("open"); return EXIT_FAILURE; } /* * Reserve some address space so that we can align the file mapping to * the huge page size. */ void *placeholder_map = mmap(NULL, FILE_SIZE * 2, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (placeholder_map == MAP_FAILED) { perror("mmap (placeholder)"); return EXIT_FAILURE; } void *aligned_address = (void *)(((uintptr_t)placeholder_map + FILE_SIZE - 1) & ~(FILE_SIZE - 1)); void *map = mmap(aligned_address, FILE_SIZE, PROT_READ | PROT_EXEC, MAP_SHARED | MAP_FIXED, fd, 0); if (map == MAP_FAILED) { perror("mmap"); return EXIT_FAILURE; } if (madvise(map, FILE_SIZE, MADV_HUGEPAGE) < 0) { perror("madvise"); return EXIT_FAILURE; } char *line = NULL; size_t line_capacity = 0; FILE *smaps_file = fopen("/proc/self/smaps", "r"); if (!smaps_file) { perror("fopen"); return EXIT_FAILURE; } for (;;) { for (size_t off = 0; off < FILE_SIZE; off += 4096) ((volatile char *)map)[off]; ssize_t ret; bool this_mapping = false; while ((ret = getline(&line, &line_capacity, smaps_file)) > 0) { unsigned long start, end, huge; if (sscanf(line, "%lx-%lx", &start, &end) == 2) { this_mapping = (start <= (uintptr_t)map && (uintptr_t)map < end); } else if (this_mapping && sscanf(line, "FilePmdMapped: %ld", &huge) == 1 && huge > 0) { return EXIT_SUCCESS; } } sleep(6); rewind(smaps_file); fflush(smaps_file); } } $ ./create_thp_file huge $ btrfs fi defrag -czstd ./huge Reviewed-by: Josef Bacik Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c9d3f375df835..c474f7e241632 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1069,6 +1069,20 @@ static struct page *defrag_prepare_one_page(struct btrfs_inode *inode, if (!page) return ERR_PTR(-ENOMEM); + /* + * Since we can defragment files opened read-only, we can encounter + * transparent huge pages here (see CONFIG_READ_ONLY_THP_FOR_FS). We + * can't do I/O using huge pages yet, so return an error for now. + * Filesystem transparent huge pages are typically only used for + * executables that explicitly enable them, so this isn't very + * restrictive. + */ + if (PageCompound(page)) { + unlock_page(page); + put_page(page); + return ERR_PTR(-ETXTBSY); + } + ret = set_page_extent_mapped(page); if (ret < 0) { unlock_page(page); From 69efc1ef07f42d8d590f6e6bc62bc689b07b1154 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 14 Oct 2021 13:11:00 -0400 Subject: [PATCH 0519/1202] fs: export an inode_update_time helper If you already have an inode and need to update the time on the inode there is no way to do this properly. Export this helper to allow file systems to update time on the inode so the appropriate handler is called, either ->update_time or generic_update_time. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/inode.c | 7 ++++--- include/linux/fs.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index ed0cab8a32db1..9abc88d7959cb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1782,12 +1782,13 @@ EXPORT_SYMBOL(generic_update_time); * This does the actual work of updating an inodes time or version. Must have * had called mnt_want_write() before calling this. */ -static int update_time(struct inode *inode, struct timespec64 *time, int flags) +int inode_update_time(struct inode *inode, struct timespec64 *time, int flags) { if (inode->i_op->update_time) return inode->i_op->update_time(inode, time, flags); return generic_update_time(inode, time, flags); } +EXPORT_SYMBOL(inode_update_time); /** * atime_needs_update - update the access time @@ -1857,7 +1858,7 @@ void touch_atime(const struct path *path) * of the fs read only, e.g. subvolumes in Btrfs. */ now = current_time(inode); - update_time(inode, &now, S_ATIME); + inode_update_time(inode, &now, S_ATIME); __mnt_drop_write(mnt); skip_update: sb_end_write(inode->i_sb); @@ -2002,7 +2003,7 @@ int file_update_time(struct file *file) if (__mnt_want_write_file(file)) return 0; - ret = update_time(inode, &now, sync_it); + ret = inode_update_time(inode, &now, sync_it); __mnt_drop_write_file(file); return ret; diff --git a/include/linux/fs.h b/include/linux/fs.h index e7a633353fd20..56eba723477e3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2498,6 +2498,8 @@ enum file_time_flags { extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); +int inode_update_time(struct inode *inode, struct timespec64 *time, int flags); + static inline void file_accessed(struct file *file) { if (!(file->f_flags & O_NOATIME)) From 407b69507ab0364f363886c6f06c82c47b27fe6f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 14 Oct 2021 13:11:01 -0400 Subject: [PATCH 0520/1202] btrfs: update device path inode time instead of bd_inode Christoph pointed out that I'm updating bdev->bd_inode for the device time when we remove block devices from a btrfs file system, however this isn't actually exposed to anything. The inode we want to update is the one that's associated with the path to the device, usually on devtmpfs, so that blkid notices the difference. We still don't want to do the blkdev_open, so use kern_path() to get the path to the given device and do the update time on that inode. Fixes: 8f96a5bfa150 ("btrfs: update the bdev time directly when closing") Reported-by: Christoph Hellwig Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9eab8a7411665..a8ea3f88c4dbd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "misc.h" #include "ctree.h" #include "extent_map.h" @@ -1888,18 +1889,22 @@ static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, /* * Function to update ctime/mtime for a given device path. * Mainly used for ctime/mtime based probe like libblkid. + * + * We don't care about errors here, this is just to be kind to userspace. */ -static void update_dev_time(struct block_device *bdev) +static void update_dev_time(const char *device_path) { - struct inode *inode = bdev->bd_inode; + struct path path; struct timespec64 now; + int ret; - /* Shouldn't happen but just in case. */ - if (!inode) + ret = kern_path(device_path, LOOKUP_FOLLOW, &path); + if (ret) return; - now = current_time(inode); - generic_update_time(inode, &now, S_MTIME | S_CTIME); + now = current_time(d_inode(path.dentry)); + inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME); + path_put(&path); } static int btrfs_rm_dev_item(struct btrfs_device *device) @@ -2077,7 +2082,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, btrfs_kobject_uevent(bdev, KOBJ_CHANGE); /* Update ctime/mtime for device path for libblkid */ - update_dev_time(bdev); + update_dev_time(device_path); } int btrfs_rm_device(struct btrfs_fs_info *fs_info, @@ -2775,7 +2780,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_forget_devices(device_path); /* Update ctime/mtime for blkid or udev */ - update_dev_time(bdev); + update_dev_time(device_path); return ret; From 1f62a71da882381b6f516a7977f71909d4d4b7bd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 29 Sep 2021 11:17:12 -0400 Subject: [PATCH 0521/1202] btrfs: index free space entries on size Currently we index free space on offset only, because usually we have a hint from the allocator that we want to honor for locality reasons. However if we fail to use this hint we have to go back to a brute force search through the free space entries to find a large enough extent. With sufficiently fragmented free space this becomes quite expensive, as we have to linearly search all of the free space entries to find if we have a part that's long enough. To fix this add a cached rb tree to index based on free space entry bytes. This will allow us to quickly look up the largest chunk in the free space tree for this block group, and stop searching once we've found an entry that is too small to satisfy our allocation. We simply choose to use this tree if we're searching from the beginning of the block group, as we know we do not care about locality at that point. I wrote an allocator test that creates a 10TiB ram backed null block device and then fallocates random files until the file system is full. I think go through and delete all of the odd files. Then I spawn 8 threads that fallocate 64mib files (1/2 our extent size cap) until the file system is full again. I use bcc's funclatency to measure the latency of find_free_extent. The baseline results are nsecs : count distribution 0 -> 1 : 0 | | 2 -> 3 : 0 | | 4 -> 7 : 0 | | 8 -> 15 : 0 | | 16 -> 31 : 0 | | 32 -> 63 : 0 | | 64 -> 127 : 0 | | 128 -> 255 : 0 | | 256 -> 511 : 10356 |**** | 512 -> 1023 : 58242 |************************* | 1024 -> 2047 : 74418 |******************************** | 2048 -> 4095 : 90393 |****************************************| 4096 -> 8191 : 79119 |*********************************** | 8192 -> 16383 : 35614 |*************** | 16384 -> 32767 : 13418 |***** | 32768 -> 65535 : 12811 |***** | 65536 -> 131071 : 17090 |******* | 131072 -> 262143 : 26465 |*********** | 262144 -> 524287 : 40179 |***************** | 524288 -> 1048575 : 55469 |************************ | 1048576 -> 2097151 : 48807 |********************* | 2097152 -> 4194303 : 26744 |*********** | 4194304 -> 8388607 : 35351 |*************** | 8388608 -> 16777215 : 13918 |****** | 16777216 -> 33554431 : 21 | | avg = 908079 nsecs, total: 580889071441 nsecs, count: 639690 And the patch results are nsecs : count distribution 0 -> 1 : 0 | | 2 -> 3 : 0 | | 4 -> 7 : 0 | | 8 -> 15 : 0 | | 16 -> 31 : 0 | | 32 -> 63 : 0 | | 64 -> 127 : 0 | | 128 -> 255 : 0 | | 256 -> 511 : 6883 |** | 512 -> 1023 : 54346 |********************* | 1024 -> 2047 : 79170 |******************************** | 2048 -> 4095 : 98890 |****************************************| 4096 -> 8191 : 81911 |********************************* | 8192 -> 16383 : 27075 |********** | 16384 -> 32767 : 14668 |***** | 32768 -> 65535 : 13251 |***** | 65536 -> 131071 : 15340 |****** | 131072 -> 262143 : 26715 |********** | 262144 -> 524287 : 43274 |***************** | 524288 -> 1048575 : 53870 |********************* | 1048576 -> 2097151 : 55368 |********************** | 2097152 -> 4194303 : 41036 |**************** | 4194304 -> 8388607 : 24927 |********** | 8388608 -> 16777215 : 33 | | 16777216 -> 33554431 : 9 | | avg = 623599 nsecs, total: 397259314759 nsecs, count: 637042 There's a little variation in the amount of calls done because of timing of the threads with metadata requirements, but the avg, total, and count's are relatively consistent between runs (usually within 2-5% of each other). As you can see here we have around a 30% decrease in average latency with a 30% decrease in overall time spent in find_free_extent. Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 82 +++++++++++++++++++++++++++++++++---- fs/btrfs/free-space-cache.h | 2 + 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f3fee88c8ee09..8ea04582e34b5 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1580,6 +1580,44 @@ static int tree_insert_offset(struct rb_root *root, u64 offset, return 0; } +static u64 free_space_info_bytes(const struct btrfs_free_space *info) +{ + if (info->bitmap && info->max_extent_size) + return info->max_extent_size; + return info->bytes; +} + +/* + * This is indexed in reverse of what we generally do for rb-tree, the largest + * chunks are leftmost and the smallest are rightmost. This is so that we can + * take advantage of the cached property of the cached rb-tree and simply get + * the largest free space chunk right away. + */ +static void tree_insert_bytes(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + struct rb_root_cached *root = &ctl->free_space_bytes; + struct rb_node **p = &root->rb_root.rb_node; + struct rb_node *parent_node = NULL; + struct btrfs_free_space *tmp; + bool leftmost = true; + + while (*p) { + parent_node = *p; + tmp = rb_entry(parent_node, struct btrfs_free_space, + bytes_index); + if (free_space_info_bytes(info) < free_space_info_bytes(tmp)) { + p = &(*p)->rb_right; + leftmost = false; + } else { + p = &(*p)->rb_left; + } + } + + rb_link_node(&info->bytes_index, parent_node, p); + rb_insert_color_cached(&info->bytes_index, root, leftmost); +} + /* * searches the tree for the given offset. * @@ -1708,6 +1746,7 @@ __unlink_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info) { rb_erase(&info->offset_index, &ctl->free_space_offset); + rb_erase_cached(&info->bytes_index, &ctl->free_space_bytes); ctl->free_extents--; if (!info->bitmap && !btrfs_free_space_trimmed(info)) { @@ -1734,6 +1773,8 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl, if (ret) return ret; + tree_insert_bytes(ctl, info); + if (!info->bitmap && !btrfs_free_space_trimmed(info)) { ctl->discardable_extents[BTRFS_STAT_CURR]++; ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; @@ -1880,7 +1921,7 @@ static inline u64 get_max_extent_size(struct btrfs_free_space *entry) /* Cache the size of the max extent in bytes */ static struct btrfs_free_space * find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, - unsigned long align, u64 *max_extent_size) + unsigned long align, u64 *max_extent_size, bool use_bytes_index) { struct btrfs_free_space *entry; struct rb_node *node; @@ -1891,15 +1932,35 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, if (!ctl->free_space_offset.rb_node) goto out; - entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); - if (!entry) - goto out; + if (use_bytes_index) { + node = rb_first_cached(&ctl->free_space_bytes); + } else { + entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), + 0, 1); + if (!entry) + goto out; + node = &entry->offset_index; + } - for (node = &entry->offset_index; node; node = rb_next(node)) { - entry = rb_entry(node, struct btrfs_free_space, offset_index); + for (; node; node = rb_next(node)) { + if (use_bytes_index) + entry = rb_entry(node, struct btrfs_free_space, bytes_index); + else + entry = rb_entry(node, struct btrfs_free_space, offset_index); + + /* + * If we are using the bytes index then all subsequent entries + * in this tree are going to be < bytes, so simply set the max + * extent size and exit the loop. + * + * If we're using the offset index then we need to keep going + * through the rest of the tree. + */ if (entry->bytes < *bytes) { *max_extent_size = max(get_max_extent_size(entry), *max_extent_size); + if (use_bytes_index) + break; continue; } @@ -2486,6 +2547,7 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, info->bytes = bytes; info->trim_state = trim_state; RB_CLEAR_NODE(&info->offset_index); + RB_CLEAR_NODE(&info->bytes_index); spin_lock(&ctl->tree_lock); @@ -2799,6 +2861,7 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group, ctl->start = block_group->start; ctl->private = block_group; ctl->op = &free_space_op; + ctl->free_space_bytes = RB_ROOT_CACHED; INIT_LIST_HEAD(&ctl->trimming_ranges); mutex_init(&ctl->cache_writeout_mutex); @@ -2864,6 +2927,7 @@ static void __btrfs_return_cluster_to_free_space( } tree_insert_offset(&ctl->free_space_offset, entry->offset, &entry->offset_index, bitmap); + tree_insert_bytes(ctl, entry); } cluster->root = RB_ROOT; spin_unlock(&cluster->lock); @@ -2965,12 +3029,14 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, u64 align_gap = 0; u64 align_gap_len = 0; enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED; + bool use_bytes_index = (offset == block_group->start); ASSERT(!btrfs_is_zoned(block_group->fs_info)); spin_lock(&ctl->tree_lock); entry = find_free_space(ctl, &offset, &bytes_search, - block_group->full_stripe_len, max_extent_size); + block_group->full_stripe_len, max_extent_size, + use_bytes_index); if (!entry) goto out; @@ -3254,6 +3320,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group, cluster->window_start = start * ctl->unit + entry->offset; rb_erase(&entry->offset_index, &ctl->free_space_offset); + rb_erase_cached(&entry->bytes_index, &ctl->free_space_bytes); ret = tree_insert_offset(&cluster->root, entry->offset, &entry->offset_index, 1); ASSERT(!ret); /* -EEXIST; Logic error */ @@ -3344,6 +3411,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group *block_group, continue; rb_erase(&entry->offset_index, &ctl->free_space_offset); + rb_erase_cached(&entry->bytes_index, &ctl->free_space_bytes); ret = tree_insert_offset(&cluster->root, entry->offset, &entry->offset_index, 0); total_size += entry->bytes; diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 1f23088d43f95..dd982d204d2d7 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -22,6 +22,7 @@ enum btrfs_trim_state { struct btrfs_free_space { struct rb_node offset_index; + struct rb_node bytes_index; u64 offset; u64 bytes; u64 max_extent_size; @@ -45,6 +46,7 @@ static inline bool btrfs_free_space_trimming_bitmap( struct btrfs_free_space_ctl { spinlock_t tree_lock; struct rb_root free_space_offset; + struct rb_root_cached free_space_bytes; u64 free_space; int extents_thresh; int free_extents; From e10a898803c285addf633d03a5db6b9141c17c0e Mon Sep 17 00:00:00 2001 From: Li Zhang Date: Tue, 5 Oct 2021 01:15:33 +0800 Subject: [PATCH 0522/1202] btrfs: clear BTRFS_DEV_STATE_MISSING bit in btrfs_close_one_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bug: https://github.com/kdave/btrfs-progs/issues/389 The previous patch does not fix the bug right: https://lore.kernel.org/linux-btrfs/1632330390-29793-1-git-send-email-zhanglikernel@gmail.com So I write a new one It seems that the cause of the error is decrementing fs_devices->missing_devices but not clearing device->dev_state. Every time we umount filesystem, it would call close_ctree, And it would eventually involve btrfs_close_one_device to close the device, but it only decrements fs_devices->missing_devices but does not clear the device BTRFS_DEV_STATE_MISSING bit. Worse, this bug will cause Integer Overflow, because every time umount, fs_devices->missing_devices will decrease. If fs_devices->missing_devices value hit 0, it would overflow. I add the debug print in read_one_dev function(not in patch) to print fs_devices->missing_devices value. [root@zllke test]# truncate -s 10g test1 [root@zllke test]# truncate -s 10g test2 [root@zllke test]# losetup /dev/loop1 test1 [root@zllke test]# losetup /dev/loop2 test2 [root@zllke test]# mkfs.btrfs -draid1 -mraid1 /dev/loop1 /dev/loop2 -f [root@zllke test]# losetup -d /dev/loop2 [root@zllke test]# mount -o degraded /dev/loop1 /mnt/1 [root@zllke test]# umount /mnt/1 [root@zllke test]# mount -o degraded /dev/loop1 /mnt/1 [root@zllke test]# umount /mnt/1 [root@zllke test]# mount -o degraded /dev/loop1 /mnt/1 [root@zllke test]# umount /mnt/1 [root@zllke test]# dmesg [ 168.728888] loop1: detected capacity change from 0 to 20971520 [ 168.751227] BTRFS: device fsid 56ad51f1-5523-463b-8547-c19486c51ebb devid 1 transid 21 /dev/loop1 scanned by systemd-udevd (2311) [ 169.179102] loop2: detected capacity change from 0 to 20971520 [ 169.198307] BTRFS: device fsid 56ad51f1-5523-463b-8547-c19486c51ebb devid 2 transid 17 /dev/loop2 scanned by systemd-udevd (2313) [ 190.696579] BTRFS info (device loop1): flagging fs with big metadata feature [ 190.699445] BTRFS info (device loop1): allowing degraded mounts [ 190.701819] BTRFS info (device loop1): using free space tree [ 190.704126] BTRFS info (device loop1): has skinny extents [ 190.708890] BTRFS info (device loop1): before clear_missing.00000000f706684d /dev/loop1 0 [ 190.711958] BTRFS warning (device loop1): devid 2 uuid 6635ac31-56dd-4852-873b-c60f5e2d53d2 is missing [ 190.715370] BTRFS info (device loop1): before clear_missing.0000000000000000 /dev/loop2 1 [ 209.075744] BTRFS info (device loop1): flagging fs with big metadata feature [ 209.079106] BTRFS info (device loop1): allowing degraded mounts [ 209.082042] BTRFS info (device loop1): using free space tree [ 209.084791] BTRFS info (device loop1): has skinny extents [ 209.089172] BTRFS info (device loop1): before clear_missing.00000000f706684d /dev/loop1 0 [ 209.093074] BTRFS warning (device loop1): devid 2 uuid 6635ac31-56dd-4852-873b-c60f5e2d53d2 is missing [ 209.096848] BTRFS info (device loop1): before clear_missing.0000000000000000 /dev/loop2 0 [ 218.778031] BTRFS info (device loop1): flagging fs with big metadata feature [ 218.781504] BTRFS info (device loop1): allowing degraded mounts [ 218.784319] BTRFS info (device loop1): using free space tree [ 218.786902] BTRFS info (device loop1): has skinny extents [ 218.791190] BTRFS info (device loop1): before clear_missing.00000000f706684d /dev/loop1 18446744073709551615 [ 218.795532] BTRFS warning (device loop1): devid 2 uuid 6635ac31-56dd-4852-873b-c60f5e2d53d2 is missing [ 218.799320] BTRFS info (device loop1): before clear_missing.0000000000000000 /dev/loop2 18446744073709551615 If fs_devices->missing_devices is 0, next time it would be 18446744073709551615 After apply this patch, the fs_devices->missing_devices seems to be right [root@zllke test]# truncate -s 10g test1 [root@zllke test]# truncate -s 10g test2 [root@zllke test]# losetup /dev/loop1 test1 [root@zllke test]# losetup /dev/loop2 test2 [root@zllke test]# mkfs.btrfs -draid1 -mraid1 /dev/loop1 /dev/loop2 -f [root@zllke test]# losetup -d /dev/loop2 [root@zllke test]# mount -o degraded /dev/loop1 /mnt/1 [root@zllke test]# umount /mnt/1 [root@zllke test]# mount -o degraded /dev/loop1 /mnt/1 [root@zllke test]# umount /mnt/1 [root@zllke test]# mount -o degraded /dev/loop1 /mnt/1 [root@zllke test]# umount /mnt/1 [root@zllke test]# dmesg [ 80.647739] loop1: detected capacity change from 0 to 20971520 [ 81.268113] loop2: detected capacity change from 0 to 20971520 [ 90.694332] BTRFS: device fsid 15aa1203-98d3-4a66-bcae-ca82f629c2cd devid 1 transid 5 /dev/loop1 scanned by mkfs.btrfs (1863) [ 90.705180] BTRFS: device fsid 15aa1203-98d3-4a66-bcae-ca82f629c2cd devid 2 transid 5 /dev/loop2 scanned by mkfs.btrfs (1863) [ 104.935735] BTRFS info (device loop1): flagging fs with big metadata feature [ 104.939020] BTRFS info (device loop1): allowing degraded mounts [ 104.941637] BTRFS info (device loop1): disk space caching is enabled [ 104.944442] BTRFS info (device loop1): has skinny extents [ 104.948848] BTRFS info (device loop1): before clear_missing.00000000975bd577 /dev/loop1 0 [ 104.952365] BTRFS warning (device loop1): devid 2 uuid 8b333791-0b3f-4f57-b449-1c1ab6b51f38 is missing [ 104.956220] BTRFS info (device loop1): before clear_missing.0000000000000000 /dev/loop2 1 [ 104.960602] BTRFS info (device loop1): checking UUID tree [ 157.888711] BTRFS info (device loop1): flagging fs with big metadata feature [ 157.892915] BTRFS info (device loop1): allowing degraded mounts [ 157.896333] BTRFS info (device loop1): disk space caching is enabled [ 157.899244] BTRFS info (device loop1): has skinny extents [ 157.905068] BTRFS info (device loop1): before clear_missing.00000000975bd577 /dev/loop1 0 [ 157.908981] BTRFS warning (device loop1): devid 2 uuid 8b333791-0b3f-4f57-b449-1c1ab6b51f38 is missing [ 157.913540] BTRFS info (device loop1): before clear_missing.0000000000000000 /dev/loop2 1 [ 161.057615] BTRFS info (device loop1): flagging fs with big metadata feature [ 161.060874] BTRFS info (device loop1): allowing degraded mounts [ 161.063422] BTRFS info (device loop1): disk space caching is enabled [ 161.066179] BTRFS info (device loop1): has skinny extents [ 161.069997] BTRFS info (device loop1): before clear_missing.00000000975bd577 /dev/loop1 0 [ 161.073328] BTRFS warning (device loop1): devid 2 uuid 8b333791-0b3f-4f57-b449-1c1ab6b51f38 is missing [ 161.077084] BTRFS info (device loop1): before clear_missing.0000000000000000 /dev/loop2 1 Signed-off-by: Li Zhang Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a8ea3f88c4dbd..546bf1146b2dc 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1127,8 +1127,10 @@ static void btrfs_close_one_device(struct btrfs_device *device) if (device->devid == BTRFS_DEV_REPLACE_DEVID) clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); - if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { + clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state); fs_devices->missing_devices--; + } btrfs_close_bdev(device); if (device->bdev) { From dbc72ebcee7b30c89c359afc32df7f54896644f1 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 30 Sep 2021 19:26:03 +0200 Subject: [PATCH 0523/1202] parisc/unwind: use copy_from_kernel_nofault() I have no idea why get_user() is used there, but we're unwinding the kernel stack, so we should use copy_from_kernel_nofault(). Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/unwind.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 87ae476d1c4f5..889d5889203a5 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -302,7 +302,8 @@ static void unwind_frame_regs(struct unwind_frame_info *info) break; } - if (get_user(tmp, (unsigned long *)(info->prev_sp - RP_OFFSET))) + if (copy_from_kernel_nofault(&tmp, + (void *)info->prev_sp - RP_OFFSET, sizeof(tmp))) break; info->prev_ip = tmp; sp = info->prev_sp; From 8baaa5f25e33e24b50ff3f7049cb2795e312acc3 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 4 May 2021 14:43:04 +0200 Subject: [PATCH 0524/1202] parisc: make parisc_acctyp() available outside of faults.c When adding kfence support, we need to tell kfence_handle_page_fault() if the interrupted assembler statement is a read or write operation. Signed-off-by: Helge Deller --- arch/parisc/include/asm/traps.h | 1 + arch/parisc/mm/fault.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h index 8ecc1f0c0483d..34619f010c631 100644 --- a/arch/parisc/include/asm/traps.h +++ b/arch/parisc/include/asm/traps.h @@ -14,6 +14,7 @@ void parisc_terminate(char *msg, struct pt_regs *regs, void die_if_kernel(char *str, struct pt_regs *regs, long err); /* mm/fault.c */ +unsigned long parisc_acctyp(unsigned long code, unsigned int inst); const char *trap_name(unsigned long code); void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 716960f5d92ea..4a6221b869fd2 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -48,7 +48,7 @@ int show_unhandled_signals = 1; * VM_WRITE if write operation * VM_EXEC if execute operation */ -static unsigned long +unsigned long parisc_acctyp(unsigned long code, unsigned int inst) { if (code == 6 || code == 16) From 93ad32a3468b11d58c7d7b517259fad2f58f161e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 4 May 2021 14:49:14 +0200 Subject: [PATCH 0525/1202] parisc: Switch to ARCH_STACKWALK implementation It's shorter and kfence currently depends on this stack unwinding implementation. Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 2 ++ arch/parisc/kernel/stacktrace.c | 30 +++++++++++++----------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 27a8b49af11fc..aee25beb2d002 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -13,6 +13,8 @@ config PARISC select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE + select ARCH_STACKWALK + select HAVE_RELIABLE_STACKTRACE select DMA_OPS select RTC_CLASS select RTC_DRV_GENERIC diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c index 34bf6d6bf6e8f..6b4ca91932cf2 100644 --- a/arch/parisc/kernel/stacktrace.c +++ b/arch/parisc/kernel/stacktrace.c @@ -2,45 +2,41 @@ /* * Stack trace management functions * - * Copyright (C) 2009 Helge Deller + * Copyright (C) 2009-2021 Helge Deller * based on arch/x86/kernel/stacktrace.c by Ingo Molnar * and parisc unwind functions by Randolph Chung * * TODO: Userspace stacktrace (CONFIG_USER_STACKTRACE_SUPPORT) */ -#include #include #include -static void dump_trace(struct task_struct *task, struct stack_trace *trace) +static void notrace walk_stackframe(struct task_struct *task, + struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *cookie) { struct unwind_frame_info info; unwind_frame_init_task(&info, task, NULL); - - /* unwind stack and save entries in stack_trace struct */ - trace->nr_entries = 0; - while (trace->nr_entries < trace->max_entries) { + while (1) { if (unwind_once(&info) < 0 || info.ip == 0) break; if (__kernel_text_address(info.ip)) - trace->entries[trace->nr_entries++] = info.ip; + if (!fn(cookie, info.ip)) + break; } } -/* - * Save stack-backtrace addresses into a stack_trace buffer. - */ -void save_stack_trace(struct stack_trace *trace) +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { - dump_trace(current, trace); + walk_stackframe(task, regs, consume_entry, cookie); } -EXPORT_SYMBOL_GPL(save_stack_trace); -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task) { - dump_trace(tsk, trace); + walk_stackframe(task, NULL, consume_entry, cookie); + return 1; } -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); From d412236f09ef576386d9cbebfcd08ac5e4bd5f84 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 9 Oct 2021 22:21:49 +0200 Subject: [PATCH 0526/1202] parisc: Add KFENCE support Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 5 ++-- arch/parisc/include/asm/kfence.h | 44 ++++++++++++++++++++++++++++++++ arch/parisc/kernel/traps.c | 5 ++++ 3 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 arch/parisc/include/asm/kfence.h diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index aee25beb2d002..906187a412ec2 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -48,6 +48,7 @@ config PARISC select HAVE_ARCH_HASH select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE + select HAVE_ARCH_KFENCE select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_REGS_AND_STACK_ACCESS_API @@ -254,11 +255,11 @@ config PARISC_PAGE_SIZE_4KB config PARISC_PAGE_SIZE_16KB bool "16KB" - depends on PA8X00 && BROKEN + depends on PA8X00 && BROKEN && !KFENCE config PARISC_PAGE_SIZE_64KB bool "64KB" - depends on PA8X00 && BROKEN + depends on PA8X00 && BROKEN && !KFENCE endchoice diff --git a/arch/parisc/include/asm/kfence.h b/arch/parisc/include/asm/kfence.h new file mode 100644 index 0000000000000..6259e5ac1feae --- /dev/null +++ b/arch/parisc/include/asm/kfence.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PA-RISC KFENCE support. + * + * Copyright (C) 2021, Helge Deller + */ + +#ifndef _ASM_PARISC_KFENCE_H +#define _ASM_PARISC_KFENCE_H + +#include + +#include +#include + +static inline bool arch_kfence_init_pool(void) +{ + return true; +} + +/* Protect the given page and flush TLB. */ +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + pte_t *pte = virt_to_kpte(addr); + + if (WARN_ON(!pte)) + return false; + + /* + * We need to avoid IPIs, as we may get KFENCE allocations or faults + * with interrupts disabled. + */ + + if (protect) + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + else + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + + return true; +} + +#endif /* _ASM_PARISC_KFENCE_H */ diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 747c328fb8862..524781eae4dde 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -787,6 +788,10 @@ void notrace handle_interruption(int code, struct pt_regs *regs) /* Clean up and return if in exception table. */ if (fixup_exception(regs)) return; + /* Clean up and return if handled by kfence. */ + if (kfence_handle_page_fault(fault_address, + parisc_acctyp(code, regs->iir) == VM_WRITE, regs)) + return; pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); parisc_terminate("Kernel Fault", regs, code, fault_address); } From d172588868b9cdea041696f37e968ee9ea3dd7d6 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 9 Oct 2021 20:24:35 +0200 Subject: [PATCH 0527/1202] parisc: disable preemption during local tlb flush flush_cache_mm() and flush_cache_range() fetch %sr3 via mfsp(). If it matches mm->context, they flush caches and the TLB. However, the TLB is cpu-local, so if the code gets preempted shortly after the mfsp(), and later resumed on another CPU, the wrong TLB is flushed. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 39e02227e2310..a1a7e2b0812f7 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -558,6 +558,7 @@ void flush_cache_mm(struct mm_struct *mm) return; } + preempt_disable(); if (mm->context == mfsp(3)) { for (vma = mm->mmap; vma; vma = vma->vm_next) { flush_user_dcache_range_asm(vma->vm_start, vma->vm_end); @@ -565,6 +566,7 @@ void flush_cache_mm(struct mm_struct *mm) flush_user_icache_range_asm(vma->vm_start, vma->vm_end); flush_tlb_range(vma, vma->vm_start, vma->vm_end); } + preempt_enable(); return; } @@ -589,6 +591,7 @@ void flush_cache_mm(struct mm_struct *mm) } } } + preempt_enable(); } void flush_cache_range(struct vm_area_struct *vma, @@ -605,11 +608,13 @@ void flush_cache_range(struct vm_area_struct *vma, return; } + preempt_disable(); if (vma->vm_mm->context == mfsp(3)) { flush_user_dcache_range_asm(start, end); if (vma->vm_flags & VM_EXEC) flush_user_icache_range_asm(start, end); flush_tlb_range(vma, start, end); + preempt_enable(); return; } @@ -629,6 +634,7 @@ void flush_cache_range(struct vm_area_struct *vma, } } } + preempt_enable(); } void From 9702318eefa43bae984915878ce8b05493ca807f Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 9 Oct 2021 20:24:36 +0200 Subject: [PATCH 0528/1202] parisc: deduplicate code in flush_cache_mm() and flush_cache_range() Parts of both functions are the same, so deduplicate them. No functional change. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 81 ++++++++++++++------------------------ 1 file changed, 30 insertions(+), 51 deletions(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index a1a7e2b0812f7..c61827e4928ae 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -543,10 +543,33 @@ static inline pte_t *get_ptep(pgd_t *pgd, unsigned long addr) return ptep; } +static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + unsigned long addr, pfn; + pte_t *ptep; + + for (addr = start; addr < end; addr += PAGE_SIZE) { + ptep = get_ptep(mm->pgd, addr); + if (ptep) { + pfn = pte_pfn(*ptep); + flush_cache_page(vma, addr, pfn); + } + } +} + +static void flush_user_cache_tlb(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + flush_user_dcache_range_asm(start, end); + if (vma->vm_flags & VM_EXEC) + flush_user_icache_range_asm(start, end); + flush_tlb_range(vma, start, end); +} + void flush_cache_mm(struct mm_struct *mm) { struct vm_area_struct *vma; - pgd_t *pgd; /* Flushing the whole cache on each cpu takes forever on rp3440, etc. So, avoid it if the mm isn't too big. */ @@ -560,46 +583,20 @@ void flush_cache_mm(struct mm_struct *mm) preempt_disable(); if (mm->context == mfsp(3)) { - for (vma = mm->mmap; vma; vma = vma->vm_next) { - flush_user_dcache_range_asm(vma->vm_start, vma->vm_end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(vma->vm_start, vma->vm_end); - flush_tlb_range(vma, vma->vm_start, vma->vm_end); - } + for (vma = mm->mmap; vma; vma = vma->vm_next) + flush_user_cache_tlb(vma, vma->vm_start, vma->vm_end); preempt_enable(); return; } - pgd = mm->pgd; - for (vma = mm->mmap; vma; vma = vma->vm_next) { - unsigned long addr; - - for (addr = vma->vm_start; addr < vma->vm_end; - addr += PAGE_SIZE) { - unsigned long pfn; - pte_t *ptep = get_ptep(pgd, addr); - if (!ptep) - continue; - pfn = pte_pfn(*ptep); - if (!pfn_valid(pfn)) - continue; - if (unlikely(mm->context)) { - flush_tlb_page(vma, addr); - __flush_cache_page(vma, addr, PFN_PHYS(pfn)); - } else { - __purge_cache_page(vma, addr, PFN_PHYS(pfn)); - } - } - } + for (vma = mm->mmap; vma; vma = vma->vm_next) + flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end); preempt_enable(); } void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - pgd_t *pgd; - unsigned long addr; - if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && end - start >= parisc_cache_flush_threshold) { if (vma->vm_mm->context) @@ -610,30 +607,12 @@ void flush_cache_range(struct vm_area_struct *vma, preempt_disable(); if (vma->vm_mm->context == mfsp(3)) { - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); - flush_tlb_range(vma, start, end); + flush_user_cache_tlb(vma, start, end); preempt_enable(); return; } - pgd = vma->vm_mm->pgd; - for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { - unsigned long pfn; - pte_t *ptep = get_ptep(pgd, addr); - if (!ptep) - continue; - pfn = pte_pfn(*ptep); - if (pfn_valid(pfn)) { - if (unlikely(vma->vm_mm->context)) { - flush_tlb_page(vma, addr); - __flush_cache_page(vma, addr, PFN_PHYS(pfn)); - } else { - __purge_cache_page(vma, addr, PFN_PHYS(pfn)); - } - } - } + flush_cache_pages(vma, vma->vm_mm, vma->vm_start, vma->vm_end); preempt_enable(); } From 61c359de9c08696fac3c93bcf53e174a8127f177 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 9 Oct 2021 20:24:37 +0200 Subject: [PATCH 0529/1202] parisc: fix preempt_count() check in entry.S preempt_count in struct thread_info is unsigned int, but the entry.S code used LDREG, which generates a 64 bit load when compiled for 64 bit. Fix this to use an ldw and also change the condition in the compare one line below to only compares 32 bits, although ldw zero extends, and that should work with a 64 bit compare. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 9f939afe6b88c..e9e598c18cb06 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -974,8 +974,8 @@ intr_do_preempt: /* current_thread_info()->preempt_count */ mfctl %cr30, %r1 - LDREG TI_PRE_COUNT(%r1), %r19 - cmpib,COND(<>) 0, %r19, intr_restore /* if preempt_count > 0 */ + ldw TI_PRE_COUNT(%r1), %r19 + cmpib,<> 0, %r19, intr_restore /* if preempt_count > 0 */ nop /* prev insn branched backwards */ /* check if we interrupted a critical path */ From f3f312c4f7ad3810ba940572ab40ea566dd0d3a2 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 9 Oct 2021 20:24:38 +0200 Subject: [PATCH 0530/1202] parisc: disable preemption in send_IPI_allbutself() Otherwise we might not stop all other CPUs. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/smp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 1405b603b91b6..3413e6949c872 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -219,11 +219,13 @@ static inline void send_IPI_allbutself(enum ipi_message_type op) { int i; - + + preempt_disable(); for_each_online_cpu(i) { if (i != smp_processor_id()) send_IPI_single(i, op); } + preempt_enable(); } From f8011cb0790315798d3cbefcc51e986b87980077 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 9 Oct 2021 20:24:39 +0200 Subject: [PATCH 0531/1202] parisc: fix warning in flush_tlb_all I've got the following splat after enabling preemption: [ 3.724721] BUG: using __this_cpu_add() in preemptible [00000000] code: swapper/0/1 [ 3.734630] caller is __this_cpu_preempt_check+0x38/0x50 [ 3.740635] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.15.0-rc4-64bit+ #324 [ 3.744605] Hardware name: 9000/785/C8000 [ 3.744605] Backtrace: [ 3.744605] [<00000000401d9d58>] show_stack+0x74/0xb0 [ 3.744605] [<0000000040c27bd4>] dump_stack_lvl+0x10c/0x188 [ 3.744605] [<0000000040c27c84>] dump_stack+0x34/0x48 [ 3.744605] [<0000000040c33438>] check_preemption_disabled+0x178/0x1b0 [ 3.744605] [<0000000040c334f8>] __this_cpu_preempt_check+0x38/0x50 [ 3.744605] [<00000000401d632c>] flush_tlb_all+0x58/0x2e0 [ 3.744605] [<00000000401075c0>] 0x401075c0 [ 3.744605] [<000000004010b8fc>] 0x4010b8fc [ 3.744605] [<00000000401080fc>] 0x401080fc [ 3.744605] [<00000000401d5224>] do_one_initcall+0x128/0x378 [ 3.744605] [<0000000040102de8>] 0x40102de8 [ 3.744605] [<0000000040c33864>] kernel_init+0x60/0x3a8 [ 3.744605] [<00000000401d1020>] ret_from_kernel_thread+0x20/0x28 [ 3.744605] Fix this by moving the __inc_irq_stat() into the locked section. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 3f7d6d5b56ac8..65f50f072a87b 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -842,9 +842,9 @@ void flush_tlb_all(void) { int do_recycle; - __inc_irq_stat(irq_tlb_count); do_recycle = 0; spin_lock(&sid_lock); + __inc_irq_stat(irq_tlb_count); if (dirty_space_ids > RECYCLE_THRESHOLD) { BUG_ON(recycle_inuse); /* FIXME: Use a semaphore/wait queue here */ get_dirty_sids(&recycle_ndirty,recycle_dirty_array); @@ -863,8 +863,8 @@ void flush_tlb_all(void) #else void flush_tlb_all(void) { - __inc_irq_stat(irq_tlb_count); spin_lock(&sid_lock); + __inc_irq_stat(irq_tlb_count); flush_tlb_all_local(NULL); recycle_sids(); spin_unlock(&sid_lock); From e6fa5791478efb07855a81f723661df9d3e63a72 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 4 Oct 2021 23:36:50 +0200 Subject: [PATCH 0532/1202] parisc: Define FRAME_ALIGN and PRIV_USER/PRIV_KERNEL in assembly.h Signed-off-by: Helge Deller --- arch/parisc/include/asm/assembly.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index a39250cb7dfcf..365c2853eb7a0 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -42,6 +42,9 @@ #define ASM_ULONG_INSN .word #endif +/* Frame alignment for 32- and 64-bit */ +#define FRAME_ALIGN 64 + #define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE) #ifdef CONFIG_PA20 @@ -58,6 +61,10 @@ #define PA_ASM_LEVEL 1.1 #endif +/* Privilege level field in the rightmost two bits of the IA queues */ +#define PRIV_USER 3 +#define PRIV_KERNEL 0 + #ifdef __ASSEMBLY__ #ifdef CONFIG_64BIT From fe4d878f71d219984f82694982f0326d854215c6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 4 Oct 2021 23:40:48 +0200 Subject: [PATCH 0533/1202] parisc: Allocate task struct with stack frame alignment We will put the stack directly behind the task struct, so make sure that we allocate it with an alignment of 64 bytes. Signed-off-by: Helge Deller --- arch/parisc/include/asm/processor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index eeb7da0642891..290471ef5bac6 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLY__ #include +#include #include #include #include @@ -101,7 +102,7 @@ DECLARE_PER_CPU(struct cpuinfo_parisc, cpu_data); #define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF) -#define ARCH_MIN_TASKALIGN 8 +#define ARCH_MIN_TASKALIGN FRAME_ALIGN struct thread_struct { struct pt_regs regs; From 55d3a95ff38cab3413fb08fe3357a9a93d02527b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 4 Oct 2021 23:46:23 +0200 Subject: [PATCH 0534/1202] parisc: Use FRAME_SIZE and FRAME_ALIGN from assembly.h Signed-off-by: Helge Deller --- arch/parisc/kernel/asm-offsets.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 22924a3f17283..276c67ea7fd23 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -22,18 +22,12 @@ #include #include +#include #include #include #include #include -#ifdef CONFIG_64BIT -#define FRAME_SIZE 128 -#else -#define FRAME_SIZE 64 -#endif -#define FRAME_ALIGN 64 - /* Add FRAME_SIZE to the size x and align it to y. All definitions * that use align_frame will include space for a frame. */ From df199e805a9713e8511e5a8a47e21261c722cd83 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 4 Oct 2021 23:57:04 +0200 Subject: [PATCH 0535/1202] parisc: Use PRIV_USER instead of 3 in entry.S Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index e9e598c18cb06..4fc81eddad162 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -826,10 +826,10 @@ ENTRY_CFI(syscall_exit_rfi) * context via sigcontext. Also Filter the PSW for the same reason. */ LDREG PT_IAOQ0(%r16),%r19 - depi 3,31,2,%r19 + depi PRIV_USER,31,2,%r19 STREG %r19,PT_IAOQ0(%r16) LDREG PT_IAOQ1(%r16),%r19 - depi 3,31,2,%r19 + depi PRIV_USER,31,2,%r19 STREG %r19,PT_IAOQ1(%r16) LDREG PT_PSW(%r16),%r19 load32 USER_PSW_MASK,%r1 @@ -1873,7 +1873,7 @@ syscall_restore: mtsp %r1,%sr5 /* Restore sr5 */ mtsp %r1,%sr6 /* Restore sr6 */ - depi 3,31,2,%r31 /* ensure return to user mode. */ + depi PRIV_USER,31,2,%r31 /* ensure return to user mode. */ #ifdef CONFIG_64BIT /* decide whether to reset the wide mode bit @@ -1949,7 +1949,7 @@ syscall_restore_rfi: STREG %r0,TASK_PT_SR2(%r1) LDREG TASK_PT_GR31(%r1),%r2 - depi 3,31,2,%r2 /* ensure return to user mode. */ + depi PRIV_USER,31,2,%r2 /* ensure return to user mode. */ STREG %r2,TASK_PT_IAOQ0(%r1) ldo 4(%r2),%r2 STREG %r2,TASK_PT_IAOQ1(%r1) @@ -1958,10 +1958,10 @@ syscall_restore_rfi: pt_regs_ok: LDREG TASK_PT_IAOQ0(%r1),%r2 - depi 3,31,2,%r2 /* ensure return to user mode. */ + depi PRIV_USER,31,2,%r2 /* ensure return to user mode. */ STREG %r2,TASK_PT_IAOQ0(%r1) LDREG TASK_PT_IAOQ1(%r1),%r2 - depi 3,31,2,%r2 + depi PRIV_USER,31,2,%r2 STREG %r2,TASK_PT_IAOQ1(%r1) b intr_restore copy %r25,%r16 From 69dc881ed71502b310c697857a2e0ec47123ed9a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 5 Oct 2021 00:05:43 +0200 Subject: [PATCH 0536/1202] task_stack: Fix end_of_stack() for architectures with upwards-growing stack The function end_of_stack() returns a pointer to the last entry of a stack. For architectures like parisc where the stack grows upwards return the pointer to the highest address in the stack. Without this change I faced a crash on parisc, because the stackleak functionality wrote STACKLEAK_POISON to the lowest address and thus overwrote the first 4 bytes of the task_struct which included the TIF_FLAGS. Signed-off-by: Helge Deller --- include/linux/sched/task_stack.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 2413427e439c7..d10150587d819 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -25,7 +25,11 @@ static inline void *task_stack_page(const struct task_struct *task) static inline unsigned long *end_of_stack(const struct task_struct *task) { +#ifdef CONFIG_STACK_GROWSUP + return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; +#else return task->stack; +#endif } #elif !defined(__HAVE_THREAD_FUNCTIONS) From e0d8fb74c1a52702cbb3635c61dfeec0a8b091f8 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 5 Oct 2021 00:27:49 +0200 Subject: [PATCH 0537/1202] parisc: Fix ptrace check on syscall return The TIF_XXX flags are stored in the flags field in the thread_info struct (TI_FLAGS), not in the flags field of the task_struct structure (TASK_FLAGS). It seems this bug has been in the kernel for 12 years (since v2.6.32). Signed-off-by: Helge Deller Fixes: ecd3d4bc06e48 ("parisc: stop using task->ptrace for {single,block}step flags") Cc: Kyle McMartin Cc: stable@vger.kernel.org --- arch/parisc/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 4fc81eddad162..852bf7334fb5b 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -1834,7 +1834,7 @@ syscall_restore: LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* Are we being ptraced? */ - ldw TASK_FLAGS(%r1),%r19 + LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19 ldi _TIF_SYSCALL_TRACE_MASK,%r2 and,COND(=) %r19,%r2,%r0 b,n syscall_restore_rfi From b6be4a8fc6571e9b3922e32a2f794adc327cdb4d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 9 Oct 2021 23:15:17 +0200 Subject: [PATCH 0538/1202] parisc/unwind: fix unwinder when CONFIG_64BIT is enabled With 64 bit kernels unwind_special() is not working because it compares the pc to the address of the function descriptor. Add a helper function that compares pc with the dereferenced address. This fixes all of the backtraces on my c8000. Without this changes, a lot of backtraces are missing in kdb or the show-all-tasks command from /proc/sysrq-trigger. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/unwind.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 889d5889203a5..34676658c0401 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -21,6 +21,8 @@ #include #include +#include +#include /* #define DEBUG 1 */ #ifdef DEBUG @@ -203,6 +205,11 @@ int __init unwind_init(void) return 0; } +static bool pc_is_kernel_fn(unsigned long pc, void *fn) +{ + return (unsigned long)dereference_kernel_function_descriptor(fn) == pc; +} + static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int frame_size) { /* @@ -221,7 +228,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int extern void * const _call_on_stack; #endif /* CONFIG_IRQSTACKS */ - if (pc == (unsigned long) &handle_interruption) { + if (pc_is_kernel_fn(pc, handle_interruption)) { struct pt_regs *regs = (struct pt_regs *)(info->sp - frame_size - PT_SZ_ALGN); dbg("Unwinding through handle_interruption()\n"); info->prev_sp = regs->gr[30]; @@ -229,13 +236,13 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc == (unsigned long) &ret_from_kernel_thread || - pc == (unsigned long) &syscall_exit) { + if (pc_is_kernel_fn(pc, ret_from_kernel_thread) || + pc_is_kernel_fn(pc, syscall_exit)) { info->prev_sp = info->prev_ip = 0; return 1; } - if (pc == (unsigned long) &intr_return) { + if (pc_is_kernel_fn(pc, intr_return)) { struct pt_regs *regs; dbg("Found intr_return()\n"); @@ -246,20 +253,20 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc == (unsigned long) &_switch_to_ret) { + if (pc_is_kernel_fn(pc, _switch_to) || + pc_is_kernel_fn(pc, _switch_to_ret)) { info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); return 1; } #ifdef CONFIG_IRQSTACKS - if (pc == (unsigned long) &_call_on_stack) { + if (pc_is_kernel_fn(pc, _call_on_stack)) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; } #endif - return 0; } From b823642ca2cc8d00aed11a45859470e8c287547c Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 14 Oct 2021 21:49:13 +0200 Subject: [PATCH 0539/1202] parisc: move virt_map macro to assembly.h This macro will also be used by the TOC code, so move it into asm/assembly.h to avoid duplication. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/include/asm/assembly.h | 25 +++++++++++++++++++++++++ arch/parisc/kernel/entry.S | 24 ------------------------ 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 365c2853eb7a0..7085df0797029 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -78,6 +78,7 @@ #include #include +#include sp = 30 gp = 27 @@ -504,6 +505,30 @@ nop /* 7 */ .endm + /* Switch to virtual mapping, trashing only %r1 */ + .macro virt_map + /* pcxt_ssm_bug */ + rsm PSW_SM_I, %r0 /* barrier for "Relied upon Translation */ + mtsp %r0, %sr4 + mtsp %r0, %sr5 + mtsp %r0, %sr6 + tovirt_r1 %r29 + load32 KERNEL_PSW, %r1 + + rsm PSW_SM_QUIET,%r0 /* second "heavy weight" ctl op */ + mtctl %r0, %cr17 /* Clear IIASQ tail */ + mtctl %r0, %cr17 /* Clear IIASQ head */ + mtctl %r1, %ipsw + load32 4f, %r1 + mtctl %r1, %cr18 /* Set IIAOQ tail */ + ldo 4(%r1), %r1 + mtctl %r1, %cr18 /* Set IIAOQ head */ + rfir + nop +4: + .endm + + /* * ASM_EXCEPTIONTABLE_ENTRY * diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 852bf7334fb5b..b7108d5da6441 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -51,30 +51,6 @@ extrd,u \spc,(64 - (SPACEID_SHIFT)),32,\prot .endm #endif - - /* Switch to virtual mapping, trashing only %r1 */ - .macro virt_map - /* pcxt_ssm_bug */ - rsm PSW_SM_I, %r0 /* barrier for "Relied upon Translation */ - mtsp %r0, %sr4 - mtsp %r0, %sr5 - mtsp %r0, %sr6 - tovirt_r1 %r29 - load32 KERNEL_PSW, %r1 - - rsm PSW_SM_QUIET,%r0 /* second "heavy weight" ctl op */ - mtctl %r0, %cr17 /* Clear IIASQ tail */ - mtctl %r0, %cr17 /* Clear IIASQ head */ - mtctl %r1, %ipsw - load32 4f, %r1 - mtctl %r1, %cr18 /* Set IIAOQ tail */ - ldo 4(%r1), %r1 - mtctl %r1, %cr18 /* Set IIAOQ head */ - rfir - nop -4: - .endm - /* * The "get_stack" macros are responsible for determining the * kernel stack value. From a3a48ef6e24503af173052087765c7cae8bf02d6 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 14 Oct 2021 21:49:14 +0200 Subject: [PATCH 0540/1202] parisc: add PIM TOC data structures These data structures describe the TOC data we get from firmware when issuing a PDC_PIM_TOC request. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/include/uapi/asm/pdc.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/parisc/include/uapi/asm/pdc.h b/arch/parisc/include/uapi/asm/pdc.h index 15211723ebf54..ad51df8ba952b 100644 --- a/arch/parisc/include/uapi/asm/pdc.h +++ b/arch/parisc/include/uapi/asm/pdc.h @@ -689,6 +689,28 @@ struct pdc_hpmc_pim_20 { /* PDC_PIM */ unsigned long long fr[32]; }; +struct pdc_toc_pim_11 { + unsigned int gr[32]; + unsigned int cr[32]; + unsigned int sr[8]; + unsigned int iasq_back; + unsigned int iaoq_back; + unsigned int check_type; + unsigned int hversion; + unsigned int cpu_state; +}; + +struct pdc_toc_pim_20 { + unsigned long long gr[32]; + unsigned long long cr[32]; + unsigned long long sr[8]; + unsigned long long iasq_back; + unsigned long long iaoq_back; + unsigned int check_type; + unsigned int hversion; + unsigned int cpu_state; +}; + #endif /* !defined(__ASSEMBLY__) */ #endif /* _UAPI_PARISC_PDC_H */ From 4715fba7128643820608c41d4557b4936f8ba7c0 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 14 Oct 2021 21:49:15 +0200 Subject: [PATCH 0541/1202] parisc/firmware: add functions to retrieve TOC data Add functions to retrieve TOC data from firmware both for 1.1 and 2.0 PDC. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/include/asm/pdc.h | 2 ++ arch/parisc/kernel/firmware.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index b388d81765883..18b957a8630da 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -51,6 +51,8 @@ int pdc_spaceid_bits(unsigned long *space_bits); int pdc_btlb_info(struct pdc_btlb_info *btlb); int pdc_mem_map_hpa(struct pdc_memory_map *r_addr, struct pdc_module_path *mod_path); #endif /* !CONFIG_PA20 */ +int pdc_pim_toc11(struct pdc_toc_pim_11 *ret); +int pdc_pim_toc20(struct pdc_toc_pim_20 *ret); int pdc_lan_station_id(char *lan_addr, unsigned long net_hpa); int pdc_stable_read(unsigned long staddr, void *memaddr, unsigned long count); diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 7034227dbdf32..3370e347dde32 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -1061,6 +1061,38 @@ int pdc_mem_pdt_read_entries(struct pdc_mem_read_pdt *pret, return retval; } +/** + * pdc_pim_toc11 - Fetch TOC PIM 1.1 data from firmware. + * @ret: pointer to return buffer + */ +int pdc_pim_toc11(struct pdc_toc_pim_11 *ret) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_PIM, PDC_PIM_TOC, __pa(pdc_result), + __pa(ret), sizeof(*ret)); + spin_unlock_irqrestore(&pdc_lock, flags); + return retval; +} + +/** + * pdc_pim_toc20 - Fetch TOC PIM 2.0 data from firmware. + * @ret: pointer to return buffer + */ +int pdc_pim_toc20(struct pdc_toc_pim_20 *ret) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_PIM, PDC_PIM_TOC, __pa(pdc_result), + __pa(ret), sizeof(*ret)); + spin_unlock_irqrestore(&pdc_lock, flags); + return retval; +} + /** * pdc_tod_set - Set the Time-Of-Day clock. * @sec: The number of seconds since epoch. From 665f05ca9ffb335e8da18436cc93d1258f603d7d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 14 Oct 2021 21:49:16 +0200 Subject: [PATCH 0542/1202] parisc: add support for TOC (transfer of control) Almost all PA-RISC machines have either a button that is labeled with 'TOC' or a BMC function to trigger a TOC. TOC is a non-maskable interrupt that is sent to the processor. This can be used for diagnostic purposes like obtaining a stack trace/register dump or to enter KDB/KGDB. As an example, on my c8000, TOC can be used with: CONFIG_KGDB=y CONFIG_KGDB_KDB=y and the 'kgdboc=ttyS0,115200' appended to the command line. Press ^[( on serial console, which will enter the BMC command line, and enter 'TOC s': root@(none):/# ( cli>TOC s Sending TOC/INIT. 2800035d03e00000 0000000040c21ac8 CC_ERR_CHECK_TOC 2800035d00e00000 0000000040c21ad0 CC_ERR_CHECK_TOC 2800035d02e00000 0000000040c21ac8 CC_ERR_CHECK_TOC 2800035d01e00000 0000000040c21ad0 CC_ERR_CHECK_TOC 37000f7303e00000 2000000000000000 CC_ERR_CPU_CHECK_SUMMARY 37000f7300e00000 2000000000000000 CC_ERR_CPU_CHECK_SUMMARY 37000f7302e00000 2000000000000000 CC_ERR_CPU_CHECK_SUMMARY 37000f7301e00000 2000000000000000 CC_ERR_CPU_CHECK_SUMMARY 4300100803e00000 c0000000001d26cc CC_MC_BR_TO_OS_TOC 4300100800e00000 c0000000001d26cc CC_MC_BR_TO_OS_TOC 4300100802e00000 c0000000001d26cc CC_MC_BR_TO_OS_TOC 4300100801e00000 c0000000001d26cc CC_MC_BR_TO_OS_TOC Entering kdb (current=0x00000000411cef80, pid 0) on processor 0 due to NonMaskable Interrupt @ 0x40c21ad0 [0]kdb> Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 14 ++++ arch/parisc/include/asm/processor.h | 4 + arch/parisc/include/uapi/asm/pdc.h | 6 +- arch/parisc/kernel/Makefile | 1 + arch/parisc/kernel/toc.c | 111 ++++++++++++++++++++++++++++ arch/parisc/kernel/toc_asm.S | 88 ++++++++++++++++++++++ 6 files changed, 222 insertions(+), 2 deletions(-) create mode 100644 arch/parisc/kernel/toc.c create mode 100644 arch/parisc/kernel/toc_asm.S diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 906187a412ec2..3296fcb90019d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -291,6 +291,20 @@ config SMP If you don't know what to do here, say N. +config TOC + bool "Support TOC switch" + default y if 64BIT || !SMP + help + Most PA-RISC machines have either a switch at the back of the machine + or a command in BMC to trigger a TOC interrupt. If you say Y here a + handler will be installed which will either show a backtrace on all + CPUs, or enter a possible configured debugger like kgdb/kdb. + + Note that with this option enabled, the kernel will use an additional 16KB + per possible CPU as a special stack for the TOC handler. + + If you don't want to debug the Kernel, say N. + config PARISC_CPU_TOPOLOGY bool "Support cpu topology definition" depends on SMP diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 290471ef5bac6..f57944d3284bf 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -295,6 +295,10 @@ extern int _parisc_requires_coherency; extern int running_on_qemu; +extern void toc_handler(void); +extern unsigned int toc_handler_size; +extern unsigned int toc_handler_csum; + #endif /* __ASSEMBLY__ */ #endif /* __ASM_PARISC_PROCESSOR_H */ diff --git a/arch/parisc/include/uapi/asm/pdc.h b/arch/parisc/include/uapi/asm/pdc.h index ad51df8ba952b..acc633c157221 100644 --- a/arch/parisc/include/uapi/asm/pdc.h +++ b/arch/parisc/include/uapi/asm/pdc.h @@ -398,8 +398,10 @@ struct zeropage { /* int (*vec_rendz)(void); */ unsigned int vec_rendz; int vec_pow_fail_flen; - int vec_pad[10]; - + int vec_pad0[3]; + unsigned int vec_toc_hi; + int vec_pad1[6]; + /* [0x040] reserved processor dependent */ int pad0[112]; diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 068d90950d937..ed0b87908d71f 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -39,3 +39,4 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KEXEC_CORE) += kexec.o relocate_kernel.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o +obj-$(CONFIG_TOC) += toc.o toc_asm.o diff --git a/arch/parisc/kernel/toc.c b/arch/parisc/kernel/toc.c new file mode 100644 index 0000000000000..18327611cf8f2 --- /dev/null +++ b/arch/parisc/kernel/toc.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +#include +#include + +unsigned int __aligned(16) toc_lock = 1; + +static void toc20_to_pt_regs(struct pt_regs *regs, struct pdc_toc_pim_20 *toc) +{ + int i; + + regs->gr[0] = (unsigned long)toc->cr[22]; + + for (i = 1; i < 32; i++) + regs->gr[i] = (unsigned long)toc->gr[i]; + + for (i = 0; i < 8; i++) + regs->sr[i] = (unsigned long)toc->sr[i]; + + regs->iasq[0] = (unsigned long)toc->cr[17]; + regs->iasq[1] = (unsigned long)toc->iasq_back; + regs->iaoq[0] = (unsigned long)toc->cr[18]; + regs->iaoq[1] = (unsigned long)toc->iaoq_back; + + regs->sar = (unsigned long)toc->cr[11]; + regs->iir = (unsigned long)toc->cr[19]; + regs->isr = (unsigned long)toc->cr[20]; + regs->ior = (unsigned long)toc->cr[21]; +} + +static void toc11_to_pt_regs(struct pt_regs *regs, struct pdc_toc_pim_11 *toc) +{ + int i; + + regs->gr[0] = toc->cr[22]; + + for (i = 1; i < 32; i++) + regs->gr[i] = toc->gr[i]; + + for (i = 0; i < 8; i++) + regs->sr[i] = toc->sr[i]; + + regs->iasq[0] = toc->cr[17]; + regs->iasq[1] = toc->iasq_back; + regs->iaoq[0] = toc->cr[18]; + regs->iaoq[1] = toc->iaoq_back; + + regs->sar = toc->cr[11]; + regs->iir = toc->cr[19]; + regs->isr = toc->cr[20]; + regs->ior = toc->cr[21]; +} + +void notrace __noreturn __cold toc_intr(struct pt_regs *regs) +{ + struct pdc_toc_pim_20 pim_data20; + struct pdc_toc_pim_11 pim_data11; + + nmi_enter(); + + if (boot_cpu_data.cpu_type >= pcxu) { + if (pdc_pim_toc20(&pim_data20)) + panic("Failed to get PIM data"); + toc20_to_pt_regs(regs, &pim_data20); + } else { + if (pdc_pim_toc11(&pim_data11)) + panic("Failed to get PIM data"); + toc11_to_pt_regs(regs, &pim_data11); + } + +#ifdef CONFIG_KGDB + if (atomic_read(&kgdb_active) != -1) + kgdb_nmicallback(raw_smp_processor_id(), regs); + kgdb_handle_exception(9, SIGTRAP, 0, regs); +#endif + show_regs(regs); + + /* give other CPUs time to show their backtrace */ + mdelay(2000); + machine_restart("TOC"); + + /* should never reach this */ + panic("TOC"); +} + +static __init int setup_toc(void) +{ + unsigned int csum = 0; + unsigned long toc_code = (unsigned long)dereference_function_descriptor(toc_handler); + int i; + + PAGE0->vec_toc = __pa(toc_code) & 0xffffffff; +#ifdef CONFIG_64BIT + PAGE0->vec_toc_hi = __pa(toc_code) >> 32; +#endif + PAGE0->vec_toclen = toc_handler_size; + + for (i = 0; i < toc_handler_size/4; i++) + csum += ((u32 *)toc_code)[i]; + toc_handler_csum = -csum; + pr_info("TOC handler registered\n"); + return 0; +} +early_initcall(setup_toc); diff --git a/arch/parisc/kernel/toc_asm.S b/arch/parisc/kernel/toc_asm.S new file mode 100644 index 0000000000000..e94ba80441906 --- /dev/null +++ b/arch/parisc/kernel/toc_asm.S @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* TOC (Transfer of Control) handler. */ + + .level 1.1 + +#include +#include +#include +#include + + .text + .import toc_intr,code + .import toc_lock,data + .align 16 +ENTRY_CFI(toc_handler) + /* + * synchronize CPUs and obtain offset + * for stack setup. + */ + load32 PA(toc_lock),%r1 +0: ldcw,co 0(%r1),%r2 + cmpib,= 0,%r2,0b + nop + addi 1,%r2,%r4 + stw %r4,0(%r1) + addi -1,%r2,%r4 + + load32 PA(toc_stack),%sp + /* + * deposit CPU number into stack address, + * so every CPU will have its own stack. + */ + SHLREG %r4,14,%r4 + add %r4,%sp,%sp + + /* + * setup pt_regs on stack and save the + * floating point registers. PIM_TOC doesn't + * save fp registers, so we're doing it here. + */ + copy %sp,%arg0 + ldo PT_SZ_ALGN(%sp), %sp + + /* clear pt_regs */ + copy %arg0,%r1 +0: cmpb,<<,n %r1,%sp,0b + stw,ma %r0,4(%r1) + + ldo PT_FR0(%arg0),%r25 + save_fp %r25 + + /* go virtual */ + load32 PA(swapper_pg_dir),%r4 + mtctl %r4,%cr24 + mtctl %r4,%cr25 + + /* Clear sr4-sr7 */ + mtsp %r0, %sr4 + mtsp %r0, %sr5 + mtsp %r0, %sr6 + mtsp %r0, %sr7 + + tovirt_r1 %sp + tovirt_r1 %arg0 + virt_map + + loadgp + +#ifdef CONFIG_64BIT + ldo -16(%sp),%r29 +#endif + load32 toc_intr,%r1 + be 0(%sr7,%r1) + nop +ENDPROC_CFI(toc_handler) + + /* + * keep this checksum here, as it is part of the toc_handler + * spanned by toc_handler_size (all words in toc_handler are + * added in PDC and the sum must equal to zero. + */ +SYM_DATA(toc_handler_csum, .long 0) +SYM_DATA(toc_handler_size, .long . - toc_handler) + + __PAGE_ALIGNED_BSS + .align 64 +SYM_DATA(toc_stack, .block 16384*NR_CPUS) From 88595c9ae4e488470219a082a79e0bb3c277d666 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 15 Oct 2021 10:41:03 +0200 Subject: [PATCH 0543/1202] parisc: Move thread_info into task struct This implements the CONFIG_THREAD_INFO_IN_TASK option. With this change: - before thread_info was part of the stack and located at the beginning of the stack - now the thread_info struct is moved and located inside the task_struct structure - the stack is allocated and handled like the major other platforms - drop the cpu field of thread_info and use instead the one in task_struct Signed-off-by: Helge Deller Signed-off-by: Sven Schnelle --- .../core/thread-info-in-task/arch-support.txt | 2 +- arch/parisc/Kconfig | 1 + arch/parisc/include/asm/current.h | 19 +++++++ arch/parisc/include/asm/processor.h | 2 - arch/parisc/include/asm/smp.h | 19 ++++++- arch/parisc/include/asm/thread_info.h | 7 --- arch/parisc/kernel/asm-offsets.c | 21 +++----- arch/parisc/kernel/entry.S | 50 ++++++++----------- arch/parisc/kernel/head.S | 40 +++++++-------- arch/parisc/kernel/irq.c | 6 +-- arch/parisc/kernel/process.c | 4 +- arch/parisc/kernel/smp.c | 2 +- arch/parisc/kernel/syscall.S | 22 +++----- arch/parisc/kernel/traps.c | 2 +- arch/parisc/kernel/unwind.c | 10 ++-- 15 files changed, 106 insertions(+), 101 deletions(-) create mode 100644 arch/parisc/include/asm/current.h diff --git a/Documentation/features/core/thread-info-in-task/arch-support.txt b/Documentation/features/core/thread-info-in-task/arch-support.txt index 9f0259bbd7dfa..3361e86b0958e 100644 --- a/Documentation/features/core/thread-info-in-task/arch-support.txt +++ b/Documentation/features/core/thread-info-in-task/arch-support.txt @@ -20,7 +20,7 @@ | nds32: | ok | | nios2: | TODO | | openrisc: | TODO | - | parisc: | TODO | + | parisc: | ok | | powerpc: | ok | | riscv: | ok | | s390: | ok | diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 3296fcb90019d..b2188da09c732 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -56,6 +56,7 @@ config PARISC select HAVE_UNSTABLE_SCHED_CLOCK if SMP select LEGACY_TIMER_TICK select CPU_NO_EFFICIENT_FFS + select THREAD_INFO_IN_TASK select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH select HAVE_ARCH_KGDB diff --git a/arch/parisc/include/asm/current.h b/arch/parisc/include/asm/current.h new file mode 100644 index 0000000000000..568b739e42afa --- /dev/null +++ b/arch/parisc/include/asm/current.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_PARISC_CURRENT_H +#define _ASM_PARISC_CURRENT_H + +#include + +#ifndef __ASSEMBLY__ +struct task_struct; + +static __always_inline struct task_struct *get_current(void) +{ + return (struct task_struct *) mfctl(30); +} + +#define current get_current() + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_PARISC_CURRENT_H */ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index f57944d3284bf..0d567774f506e 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -102,8 +102,6 @@ DECLARE_PER_CPU(struct cpuinfo_parisc, cpu_data); #define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF) -#define ARCH_MIN_TASKALIGN FRAME_ALIGN - struct thread_struct { struct pt_regs regs; unsigned long task_size; diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h index b9a18db4b05af..16d41127500ed 100644 --- a/arch/parisc/include/asm/smp.h +++ b/arch/parisc/include/asm/smp.h @@ -34,8 +34,23 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); #endif /* !ASSEMBLY */ -#define raw_smp_processor_id() (current_thread_info()->cpu) - +/* + * This is particularly ugly: it appears we can't actually get the definition + * of task_struct here, but we need access to the CPU this task is running on. + * Instead of using task_struct we're using TASK_CPU which is extracted from + * asm-offsets.h by kbuild to get the current processor ID. + * + * This also needs to be safeguarded when building asm-offsets.s because at + * that time TASK_CPU is not defined yet. It could have been guarded by + * TASK_CPU itself, but we want the build to fail if TASK_CPU is missing + * when building something else than asm-offsets.s + */ +#ifdef GENERATING_ASM_OFFSETS +#define raw_smp_processor_id() (0) +#else +#include +#define raw_smp_processor_id() (*(unsigned int *)((void *)current + TASK_CPU)) +#endif #else /* CONFIG_SMP */ static inline void smp_send_all_nop(void) { return; } diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 00ad50fef7691..4617303e0620d 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -9,23 +9,16 @@ #include struct thread_info { - struct task_struct *task; /* main task structure */ unsigned long flags; /* thread_info flags (see TIF_*) */ - __u32 cpu; /* current CPU */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ }; #define INIT_THREAD_INFO(tsk) \ { \ - .task = &tsk, \ .flags = 0, \ - .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ } -/* how to get the thread information struct from C */ -#define current_thread_info() ((struct thread_info *)mfctl(30)) - #endif /* !__ASSEMBLY */ /* thread information allocation */ diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 276c67ea7fd23..5d84b0f78c2ae 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -14,6 +14,8 @@ * Copyright (C) 2003 James Bottomley */ +#define GENERATING_ASM_OFFSETS /* asm/smp.h */ + #include #include #include @@ -35,13 +37,16 @@ int main(void) { - DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack)); - DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags)); + DEFINE(TASK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TASK_SIGPENDING, offsetof(struct task_struct, pending)); DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace)); DEFINE(TASK_MM, offsetof(struct task_struct, mm)); DEFINE(TASK_PERSONALITY, offsetof(struct task_struct, personality)); DEFINE(TASK_PID, offsetof(struct task_struct, pid)); + DEFINE(TASK_STACK, offsetof(struct task_struct, stack)); +#ifdef CONFIG_SMP + DEFINE(TASK_CPU, offsetof(struct task_struct, cpu)); +#endif BLANK(); DEFINE(TASK_REGS, offsetof(struct task_struct, thread.regs)); DEFINE(TASK_PT_PSW, offsetof(struct task_struct, thread.regs.gr[ 0])); @@ -129,10 +134,6 @@ int main(void) DEFINE(TASK_PT_ISR, offsetof(struct task_struct, thread.regs.isr)); DEFINE(TASK_PT_IOR, offsetof(struct task_struct, thread.regs.ior)); BLANK(); - DEFINE(TASK_SZ, sizeof(struct task_struct)); - /* TASK_SZ_ALGN includes space for a stack frame. */ - DEFINE(TASK_SZ_ALGN, align_frame(sizeof(struct task_struct), FRAME_ALIGN)); - BLANK(); DEFINE(PT_PSW, offsetof(struct pt_regs, gr[ 0])); DEFINE(PT_GR1, offsetof(struct pt_regs, gr[ 1])); DEFINE(PT_GR2, offsetof(struct pt_regs, gr[ 2])); @@ -217,17 +218,11 @@ int main(void) DEFINE(PT_IIR, offsetof(struct pt_regs, iir)); DEFINE(PT_ISR, offsetof(struct pt_regs, isr)); DEFINE(PT_IOR, offsetof(struct pt_regs, ior)); - DEFINE(PT_SIZE, sizeof(struct pt_regs)); /* PT_SZ_ALGN includes space for a stack frame. */ DEFINE(PT_SZ_ALGN, align_frame(sizeof(struct pt_regs), FRAME_ALIGN)); BLANK(); - DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); - DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); - DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); - DEFINE(THREAD_SZ, sizeof(struct thread_info)); - /* THREAD_SZ_ALGN includes space for a stack frame. */ - DEFINE(THREAD_SZ_ALGN, align_frame(sizeof(struct thread_info), FRAME_ALIGN)); + DEFINE(TI_PRE_COUNT, offsetof(struct task_struct, thread_info.preempt_count)); BLANK(); DEFINE(ICACHE_BASE, offsetof(struct pdc_cache_info, ic_base)); DEFINE(ICACHE_STRIDE, offsetof(struct pdc_cache_info, ic_stride)); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index b7108d5da6441..57944d6f9ebb3 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -63,8 +63,8 @@ * Need to set up a kernel stack, so call the * get_stack_use_cr30 macro to set up a pointer * to the pt_regs structure contained within the - * task pointer pointed to by cr30. Set the stack - * pointer to point to the end of the task structure. + * task pointer pointed to by cr30. Load the stack + * pointer from the task structure. * * Note that we use shadowed registers for temps until * we can save %r26 and %r29. %r26 is used to preserve @@ -76,8 +76,6 @@ * or handle_interruption. %r29 is used to hold a pointer * the register save area, and once again, it needs to * be a non-shadowed register so that it survives the rfir. - * - * N.B. TASK_SZ_ALGN and PT_SZ_ALGN include space for a stack frame. */ .macro get_stack_use_cr30 @@ -86,12 +84,11 @@ copy %r30, %r17 mfctl %cr30, %r1 - ldo THREAD_SZ_ALGN(%r1), %r30 - mtsp %r0,%sr7 + tophys %r1,%r9 /* task_struct */ + LDREG TASK_STACK(%r9),%r30 + ldo PT_SZ_ALGN(%r30),%r30 + mtsp %r0,%sr7 /* clear sr7 after kernel stack was set! */ mtsp %r16,%sr3 - tophys %r1,%r9 - LDREG TI_TASK(%r9), %r1 /* thread_info -> task_struct */ - tophys %r1,%r9 ldo TASK_REGS(%r9),%r9 STREG %r17,PT_GR30(%r9) STREG %r29,PT_GR29(%r9) @@ -733,7 +730,7 @@ ENTRY(ret_from_kernel_thread) BL schedule_tail, %r2 nop - LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1 + mfctl %cr30,%r1 /* task_struct */ LDREG TASK_PT_GR25(%r1), %r26 #ifdef CONFIG_64BIT LDREG TASK_PT_GR27(%r1), %r27 @@ -764,7 +761,6 @@ ENTRY_CFI(_switch_to) STREG %r30, TASK_PT_KSP(%r26) LDREG TASK_PT_KSP(%r25), %r30 - LDREG TASK_THREAD_INFO(%r25), %r25 bv %r0(%r2) mtctl %r25,%cr30 @@ -795,8 +791,7 @@ ENDPROC_CFI(_switch_to) .align PAGE_SIZE ENTRY_CFI(syscall_exit_rfi) - mfctl %cr30,%r16 - LDREG TI_TASK(%r16), %r16 /* thread_info -> task_struct */ + mfctl %cr30,%r16 /* task_struct */ ldo TASK_REGS(%r16),%r16 /* Force iaoq to userspace, as the user has had access to our current * context via sigcontext. Also Filter the PSW for the same reason. @@ -841,14 +836,14 @@ ENTRY_CFI(syscall_exit_rfi) ENTRY(intr_return) /* check for reschedule */ mfctl %cr30,%r1 - LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ + LDREG TASK_TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ bb,<,n %r19,31-TIF_NEED_RESCHED,intr_do_resched /* forward */ .import do_notify_resume,code intr_check_sig: /* As above */ mfctl %cr30,%r1 - LDREG TI_FLAGS(%r1),%r19 + LDREG TASK_TI_FLAGS(%r1),%r19 ldi (_TIF_USER_WORK_MASK & ~_TIF_NEED_RESCHED), %r20 and,COND(<>) %r19, %r20, %r0 b,n intr_restore /* skip past if we've nothing to do */ @@ -1692,7 +1687,7 @@ dtlb_fault: .macro fork_like name ENTRY_CFI(sys_\name\()_wrapper) - LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1 + mfctl %cr30,%r1 ldo TASK_REGS(%r1),%r1 reg_save %r1 mfctl %cr27, %r28 @@ -1712,7 +1707,7 @@ ENTRY(child_return) BL schedule_tail, %r2 nop finish_child_return: - LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1 + mfctl %cr30,%r1 ldo TASK_REGS(%r1),%r1 /* get pt regs */ LDREG PT_CR27(%r1), %r3 @@ -1723,7 +1718,7 @@ finish_child_return: END(child_return) ENTRY_CFI(sys_rt_sigreturn_wrapper) - LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r26 + mfctl %cr30,%r26 ldo TASK_REGS(%r26),%r26 /* get pt regs */ /* Don't save regs, we are going to restore them from sigcontext. */ STREG %r2, -RP_OFFSET(%r30) @@ -1740,7 +1735,7 @@ ENTRY_CFI(sys_rt_sigreturn_wrapper) LDREG -RP_OFFSET(%r30), %r2 /* FIXME: I think we need to restore a few more things here. */ - LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 + mfctl %cr30,%r1 ldo TASK_REGS(%r1),%r1 /* get pt regs */ reg_restore %r1 @@ -1759,9 +1754,7 @@ ENTRY(syscall_exit) */ /* save return value now */ - mfctl %cr30, %r1 - LDREG TI_TASK(%r1),%r1 STREG %r28,TASK_PT_GR28(%r1) /* Seems to me that dp could be wrong here, if the syscall involved @@ -1772,13 +1765,14 @@ ENTRY(syscall_exit) syscall_check_resched: /* check for reschedule */ - - LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19 /* long */ + mfctl %cr30,%r19 + LDREG TASK_TI_FLAGS(%r19),%r19 /* long */ bb,<,n %r19, 31-TIF_NEED_RESCHED, syscall_do_resched /* forward */ .import do_signal,code syscall_check_sig: - LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19 + mfctl %cr30,%r19 + LDREG TASK_TI_FLAGS(%r19),%r19 ldi (_TIF_USER_WORK_MASK & ~_TIF_NEED_RESCHED), %r26 and,COND(<>) %r19, %r26, %r0 b,n syscall_restore /* skip past if we've nothing to do */ @@ -1789,7 +1783,7 @@ syscall_do_signal: * consistent with all the relevant state of the process * before the syscall. We need to verify this. */ - LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 + mfctl %cr30,%r1 ldo TASK_REGS(%r1), %r26 /* struct pt_regs *regs */ reg_save %r26 @@ -1800,17 +1794,17 @@ syscall_do_signal: BL do_notify_resume,%r2 ldi 1, %r25 /* long in_syscall = 1 */ - LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 + mfctl %cr30,%r1 ldo TASK_REGS(%r1), %r20 /* reload pt_regs */ reg_restore %r20 b,n syscall_check_sig syscall_restore: - LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 + mfctl %cr30,%r1 /* Are we being ptraced? */ - LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19 + LDREG TASK_TI_FLAGS(%r1),%r19 ldi _TIF_SYSCALL_TRACE_MASK,%r2 and,COND(=) %r19,%r2,%r0 b,n syscall_restore_rfi diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index aa93d775c34db..b24f77748c22b 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -35,7 +35,8 @@ END(boot_args) __HEAD .align 4 - .import init_thread_union,data + .import init_task,data + .import init_stack,data .import fault_vector_20,code /* IVA parisc 2.0 32 bit */ #ifndef CONFIG_64BIT .import fault_vector_11,code /* IVA parisc 1.1 32 bit */ @@ -123,12 +124,12 @@ $pgt_fill_loop: load32 start_parisc,%r11 /* And the initial task pointer */ - load32 init_thread_union,%r6 + load32 init_task,%r6 mtctl %r6,%cr30 /* And the stack pointer too */ - ldo THREAD_SZ_ALGN(%r6),%sp - + load32 init_stack,%sp + tophys_r1 %sp #if defined(CONFIG_64BIT) && defined(CONFIG_FUNCTION_TRACER) .import _mcount,data /* initialize mcount FPTR */ @@ -186,12 +187,11 @@ common_stext: #endif /*CONFIG_SMP*/ #ifdef CONFIG_64BIT - tophys_r1 %sp + mfctl %cr30,%r6 /* PCX-W2 firmware bug */ + tophys_r1 %r6 /* Save the rfi target address */ - ldd TI_TASK-THREAD_SZ_ALGN(%sp), %r10 - tophys_r1 %r10 - std %r11, TASK_PT_GR11(%r10) + STREG %r11, TASK_PT_GR11(%r6) /* Switch to wide mode Superdome doesn't support narrow PDC ** calls. */ @@ -206,7 +206,6 @@ common_stext: ** Someday, palo might not do this for the Monarch either. */ 2: - mfctl %cr30,%r6 /* PCX-W2 firmware bug */ ldo PDC_PSW(%r0),%arg0 /* 21 */ ldo PDC_PSW_SET_DEFAULTS(%r0),%arg1 /* 2 */ @@ -216,13 +215,9 @@ common_stext: copy %r0,%arg3 stext_pdc_ret: + LDREG TASK_PT_GR11(%r6), %r11 + tovirt_r1 %r6 mtctl %r6,%cr30 /* restore task thread info */ - - /* restore rfi target address*/ - ldd TI_TASK-THREAD_SZ_ALGN(%sp), %r10 - tophys_r1 %r10 - ldd TASK_PT_GR11(%r10), %r11 - tovirt_r1 %sp #endif /* PARANOID: clear user scratch/user space SR's */ @@ -287,7 +282,9 @@ aligned_rfi: load32 KERNEL_PSW,%r10 mtctl %r10,%ipsw - + + tovirt_r1 %sp + /* Jump through hyperspace to Virt Mode */ rfi nop @@ -343,12 +340,13 @@ smp_slave_stext: #endif /* Initialize the SP - monarch sets up smp_init_current_idle_task */ - load32 PA(smp_init_current_idle_task),%sp - LDREG 0(%sp),%sp /* load task address */ + load32 PA(smp_init_current_idle_task),%r6 + LDREG 0(%r6),%r6 + mtctl %r6,%cr30 + tophys_r1 %r6 + LDREG TASK_STACK(%r6),%sp tophys_r1 %sp - LDREG TASK_THREAD_INFO(%sp),%sp - mtctl %sp,%cr30 /* store in cr30 */ - ldo THREAD_SZ_ALGN(%sp),%sp + ldo FRAME_SIZE(%sp),%sp /* point CPU to kernel page tables */ load32 PA(swapper_pg_dir),%r4 diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 0d46b19dc4d3d..eb18e16362f6c 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -399,8 +400,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) #ifdef CONFIG_DEBUG_STACKOVERFLOW #define STACK_MARGIN (256*6) - /* Our stack starts directly behind the thread_info struct. */ - unsigned long stack_start = (unsigned long) current_thread_info(); + unsigned long stack_start = (unsigned long) task_stack_page(current); unsigned long sp = regs->gr[30]; unsigned long stack_usage; unsigned int *last_usage; @@ -476,7 +476,7 @@ static void execute_on_irq_stack(void *func, unsigned long param1) union_ptr = &per_cpu(irq_stack_union, smp_processor_id()); irq_stack = (unsigned long) &union_ptr->stack; irq_stack = ALIGN(irq_stack + sizeof(irq_stack_union.slock), - 64); /* align for stack frame usage */ + FRAME_ALIGN); /* align for stack frame usage */ /* We may be called recursive. If we are already using the irq stack, * just continue to use it. Use spinlocks to serialize diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 38ec4ae812396..cd749bf3d70d9 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -205,7 +205,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, /* Must exit via ret_from_kernel_thread in order * to call schedule_tail() */ - cregs->ksp = (unsigned long)stack + THREAD_SZ_ALGN + FRAME_SIZE; + cregs->ksp = (unsigned long) stack + FRAME_SIZE + PT_SZ_ALGN; cregs->kpc = (unsigned long) &ret_from_kernel_thread; /* * Copy function and argument to be called from @@ -228,7 +228,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, if (likely(usp)) cregs->gr[30] = usp; } - cregs->ksp = (unsigned long)stack + THREAD_SZ_ALGN + FRAME_SIZE; + cregs->ksp = (unsigned long) stack + FRAME_SIZE; cregs->kpc = (unsigned long) &child_return; /* Setup thread TLS area */ diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 3413e6949c872..b282c1ce00c87 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -324,7 +324,7 @@ int smp_boot_one_cpu(int cpuid, struct task_struct *idle) const struct cpuinfo_parisc *p = &per_cpu(cpu_data, cpuid); long timeout; - task_thread_info(idle)->cpu = cpuid; + idle->cpu = cpuid; /* Let _start know what logical CPU we're booting ** (offset into init_tasks[],cpu_data[]) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 3f24a0af1e047..c396853184d87 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -139,9 +139,9 @@ linux_gateway_entry: xor %r1,%r30,%r30 /* ye olde xor trick */ xor %r1,%r30,%r1 xor %r1,%r30,%r30 - - ldo THREAD_SZ_ALGN+FRAME_SIZE(%r30),%r30 /* set up kernel stack */ + LDREG TASK_STACK(%r30),%r30 /* set up kernel stack */ + ldo FRAME_SIZE(%r30),%r30 /* N.B.: It is critical that we don't set sr7 to 0 until r30 * contains a valid kernel stack pointer. It is also * critical that we don't start using the kernel stack @@ -152,7 +152,6 @@ linux_gateway_entry: ssm PSW_SM_I, %r0 /* enable interrupts */ STREGM %r1,FRAME_SIZE(%r30) /* save r1 (usp) here for now */ mfctl %cr30,%r1 /* get task ptr in %r1 */ - LDREG TI_TASK(%r1),%r1 /* Save some registers for sigcontext and potential task switch (see entry.S for the details of which ones are @@ -207,7 +206,7 @@ linux_gateway_entry: /* Are we being ptraced? */ mfctl %cr30, %r1 - LDREG TI_FLAGS(%r1),%r1 + LDREG TASK_TI_FLAGS(%r1),%r1 ldi _TIF_SYSCALL_TRACE_MASK, %r19 and,COND(=) %r1, %r19, %r0 b,n .Ltracesys @@ -272,8 +271,7 @@ tracesys: * C bit set, a non-straced syscall entry results in C and D clear * in the saved PSW. */ - ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */ - LDREG TI_TASK(%r1), %r1 + mfctl %cr30,%r1 /* get task ptr */ ssm 0,%r2 STREG %r2,TASK_PT_PSW(%r1) /* Lower 8 bits only!! */ mfsp %sr0,%r2 @@ -327,8 +325,7 @@ tracesys_next: */ copy %ret0,%r20 - ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */ - LDREG TI_TASK(%r1), %r1 + mfctl %cr30,%r1 /* get task ptr */ LDREG TASK_PT_GR28(%r1), %r28 /* Restore return value */ LDREG TASK_PT_GR26(%r1), %r26 /* Restore the users args */ LDREG TASK_PT_GR25(%r1), %r25 @@ -385,16 +382,14 @@ tracesys_next: makes a direct call to syscall_trace. */ tracesys_exit: - ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */ - LDREG TI_TASK(%r1), %r1 + mfctl %cr30,%r1 /* get task ptr */ #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #endif ldo TASK_REGS(%r1),%r26 BL do_syscall_trace_exit,%r2 STREG %r28,TASK_PT_GR28(%r1) /* save return value now */ - ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */ - LDREG TI_TASK(%r1), %r1 + mfctl %cr30,%r1 /* get task ptr */ LDREG TASK_PT_GR28(%r1), %r28 /* Restore return val. */ ldil L%syscall_exit,%r1 @@ -407,8 +402,7 @@ tracesys_exit: ldo R%tracesys_sigexit(%r2),%r2 tracesys_sigexit: - ldo -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* get task ptr */ - LDREG TI_TASK(%r1), %r1 + mfctl %cr30,%r1 /* get task ptr */ #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #endif diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 524781eae4dde..690e6abcaf221 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -144,7 +144,7 @@ void show_regs(struct pt_regs *regs) printk("%s IIR: %08lx ISR: " RFMT " IOR: " RFMT "\n", level, regs->iir, regs->isr, regs->ior); printk("%s CPU: %8d CR30: " RFMT " CR31: " RFMT "\n", - level, current_thread_info()->cpu, cr30, cr31); + level, task_cpu(current), cr30, cr31); printk("%s ORIG_R28: " RFMT "\n", level, regs->orig_r28); if (user) { diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 34676658c0401..42acc3b520174 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -299,12 +300,9 @@ static void unwind_frame_regs(struct unwind_frame_info *info) info->prev_sp = sp - 64; info->prev_ip = 0; - /* The stack is at the end inside the thread_union - * struct. If we reach data, we have reached the - * beginning of the stack and should stop unwinding. */ - if (info->prev_sp >= (unsigned long) task_thread_info(info->t) && - info->prev_sp < ((unsigned long) task_thread_info(info->t) - + THREAD_SZ_ALGN)) { + /* Check if stack is inside kernel stack area */ + if ((info->prev_sp - (unsigned long) task_stack_page(info->t)) + >= THREAD_SIZE) { info->prev_sp = 0; break; } From 72b7a31c8ec262cbd78839be8154542a96ba9bee Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 15 Oct 2021 21:49:23 +0200 Subject: [PATCH 0544/1202] parisc/kgdb: add kgdb_roundup() to make kgdb work with idle polling With idle polling, IPIs are not sent when a CPU idle, but queued and run later from do_idle(). The default kgdb_call_nmi_hook() implementation gets the pointer to struct pt_regs from get_irq_reqs(), which doesn't work in that case because it was not called from the IPI interrupt handler. Fix it by defining our own kgdb_roundup() function which sents an IPI_ENTER_KGDB. When that IPI is received on the target CPU kgdb_nmicallback() is called. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/smp.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index b282c1ce00c87..171925285f3eb 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -69,7 +70,10 @@ enum ipi_message_type { IPI_CALL_FUNC, IPI_CPU_START, IPI_CPU_STOP, - IPI_CPU_TEST + IPI_CPU_TEST, +#ifdef CONFIG_KGDB + IPI_ENTER_KGDB, +#endif }; @@ -167,7 +171,12 @@ ipi_interrupt(int irq, void *dev_id) case IPI_CPU_TEST: smp_debug(100, KERN_DEBUG "CPU%d is alive!\n", this_cpu); break; - +#ifdef CONFIG_KGDB + case IPI_ENTER_KGDB: + smp_debug(100, KERN_DEBUG "CPU%d ENTER_KGDB\n", this_cpu); + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); + break; +#endif default: printk(KERN_CRIT "Unknown IPI num on CPU%d: %lu\n", this_cpu, which); @@ -228,6 +237,12 @@ send_IPI_allbutself(enum ipi_message_type op) preempt_enable(); } +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(void) +{ + send_IPI_allbutself(IPI_ENTER_KGDB); +} +#endif inline void smp_send_stop(void) { send_IPI_allbutself(IPI_CPU_STOP); } From 7f3baa7f79ba56f704fe0fa23aa28d8e42f34a67 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 17 Oct 2021 15:23:53 +0200 Subject: [PATCH 0545/1202] parisc: Use PRIV_USER in syscall.S Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index c396853184d87..ec9675f584353 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -78,7 +78,7 @@ ENTRY(linux_gateway_page) lws_entry: gate lws_start, %r0 /* increase privilege */ - depi 3, 31, 2, %r31 /* Ensure we return into user mode. */ + depi PRIV_USER, 31, 2, %r31 /* Ensure we return into user mode. */ /* Fill from 0xb8 to 0xe0 */ .rept 10 @@ -89,7 +89,7 @@ lws_entry: mechanism to work. DO NOT MOVE THIS CODE EVER! */ set_thread_pointer: gate .+8, %r0 /* increase privilege */ - depi 3, 31, 2, %r31 /* Ensure we return into user mode. */ + depi PRIV_USER, 31, 2, %r31 /* Ensure we return into user mode. */ be 0(%sr7,%r31) /* return to user space */ mtctl %r26, %cr27 /* move arg0 to the control register */ From 20b15037c0f9e0a56f58ac31ce94936ae72ecba0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 17 Oct 2021 15:24:27 +0200 Subject: [PATCH 0546/1202] parisc: Use PRIV_USER and PRIV_KERNEL in ptrace.h Signed-off-by: Helge Deller --- arch/parisc/include/asm/ptrace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h index 143fb2a89dd85..eea3f3df08232 100644 --- a/arch/parisc/include/asm/ptrace.h +++ b/arch/parisc/include/asm/ptrace.h @@ -5,17 +5,17 @@ #ifndef _PARISC_PTRACE_H #define _PARISC_PTRACE_H +#include #include - #define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS)) #define arch_has_single_step() 1 #define arch_has_block_step() 1 /* XXX should we use iaoq[1] or iaoq[0] ? */ -#define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0) -#define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0) +#define user_mode(regs) (((regs)->iaoq[0] & 3) != PRIV_KERNEL) +#define user_space(regs) ((regs)->iasq[1] != PRIV_KERNEL) #define instruction_pointer(regs) ((regs)->iaoq[0] & ~3) #define user_stack_pointer(regs) ((regs)->gr[30]) unsigned long profile_pc(struct pt_regs *); From ac0e561e5069d542d4d863f3156366c4dcc1eb4e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 17 Oct 2021 18:56:00 +0200 Subject: [PATCH 0547/1202] parisc: Drop ifdef __KERNEL__ from non-uapi kernel headers Signed-off-by: Helge Deller --- arch/parisc/include/asm/bitops.h | 10 ---------- arch/parisc/include/asm/futex.h | 3 --- arch/parisc/include/asm/ide.h | 4 ---- arch/parisc/include/asm/mckinley.h | 2 -- arch/parisc/include/asm/processor.h | 4 ---- arch/parisc/include/asm/runway.h | 2 -- arch/parisc/include/asm/thread_info.h | 4 ---- arch/parisc/include/asm/unaligned.h | 2 -- 8 files changed, 31 deletions(-) diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h index aa4e883431c1a..daa2afd974fbf 100644 --- a/arch/parisc/include/asm/bitops.h +++ b/arch/parisc/include/asm/bitops.h @@ -104,8 +104,6 @@ static __inline__ int test_and_change_bit(int nr, volatile unsigned long * addr) #include -#ifdef __KERNEL__ - /** * __ffs - find first bit in word. returns 0 to "BITS_PER_LONG-1". * @word: The word to search @@ -205,16 +203,8 @@ static __inline__ int fls(unsigned int x) #include #include #include - -#endif /* __KERNEL__ */ - #include - -#ifdef __KERNEL__ - #include #include -#endif /* __KERNEL__ */ - #endif /* _PARISC_BITOPS_H */ diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h index fceb9cf02fb3a..e38a118cf65d5 100644 --- a/arch/parisc/include/asm/futex.h +++ b/arch/parisc/include/asm/futex.h @@ -2,8 +2,6 @@ #ifndef _ASM_PARISC_FUTEX_H #define _ASM_PARISC_FUTEX_H -#ifdef __KERNEL__ - #include #include #include @@ -119,5 +117,4 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return 0; } -#endif /*__KERNEL__*/ #endif /*_ASM_PARISC_FUTEX_H*/ diff --git a/arch/parisc/include/asm/ide.h b/arch/parisc/include/asm/ide.h index 34cdac01ed354..7aa75b93a1b6c 100644 --- a/arch/parisc/include/asm/ide.h +++ b/arch/parisc/include/asm/ide.h @@ -12,8 +12,6 @@ #ifndef __ASM_PARISC_IDE_H #define __ASM_PARISC_IDE_H -#ifdef __KERNEL__ - /* Generic I/O and MEMIO string operations. */ #define __ide_insw insw @@ -53,6 +51,4 @@ static __inline__ void __ide_mm_outsl(void __iomem *port, void *addr, u32 count) } } -#endif /* __KERNEL__ */ - #endif /* __ASM_PARISC_IDE_H */ diff --git a/arch/parisc/include/asm/mckinley.h b/arch/parisc/include/asm/mckinley.h index eb84dbeb7fd9c..1314390b9034b 100644 --- a/arch/parisc/include/asm/mckinley.h +++ b/arch/parisc/include/asm/mckinley.h @@ -1,10 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ASM_PARISC_MCKINLEY_H #define ASM_PARISC_MCKINLEY_H -#ifdef __KERNEL__ /* declared in arch/parisc/kernel/setup.c */ extern struct proc_dir_entry * proc_mckinley_root; -#endif /*__KERNEL__*/ #endif /*ASM_PARISC_MCKINLEY_H*/ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 0d567774f506e..95764c3633e48 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -38,16 +38,12 @@ #define DEFAULT_MAP_BASE DEFAULT_MAP_BASE32 #endif -#ifdef __KERNEL__ - /* XXX: STACK_TOP actually should be STACK_BOTTOM for parisc. * prumpf */ #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX DEFAULT_TASK_SIZE -#endif - #ifndef __ASSEMBLY__ unsigned long calc_max_stack_size(unsigned long stack_max); diff --git a/arch/parisc/include/asm/runway.h b/arch/parisc/include/asm/runway.h index f3cfe69439f65..5cf061376ddb1 100644 --- a/arch/parisc/include/asm/runway.h +++ b/arch/parisc/include/asm/runway.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ASM_PARISC_RUNWAY_H #define ASM_PARISC_RUNWAY_H -#ifdef __KERNEL__ /* declared in arch/parisc/kernel/setup.c */ extern struct proc_dir_entry * proc_runway_root; @@ -9,5 +8,4 @@ extern struct proc_dir_entry * proc_runway_root; #define RUNWAY_STATUS 0x10 #define RUNWAY_DEBUG 0x40 -#endif /* __KERNEL__ */ #endif /* ASM_PARISC_RUNWAY_H */ diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 4617303e0620d..444588443c04c 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -2,8 +2,6 @@ #ifndef _ASM_PARISC_THREAD_INFO_H #define _ASM_PARISC_THREAD_INFO_H -#ifdef __KERNEL__ - #ifndef __ASSEMBLY__ #include #include @@ -80,6 +78,4 @@ struct thread_info { # define is_32bit_task() (1) #endif -#endif /* __KERNEL__ */ - #endif /* _ASM_PARISC_THREAD_INFO_H */ diff --git a/arch/parisc/include/asm/unaligned.h b/arch/parisc/include/asm/unaligned.h index 3bda16773ba63..c0621295100d5 100644 --- a/arch/parisc/include/asm/unaligned.h +++ b/arch/parisc/include/asm/unaligned.h @@ -4,10 +4,8 @@ #include -#ifdef __KERNEL__ struct pt_regs; void handle_unaligned(struct pt_regs *regs); int check_unaligned(struct pt_regs *regs); -#endif #endif /* _ASM_PARISC_UNALIGNED_H */ From 015fe822d29f6655e1e00a5727cbace1fdea9725 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Oct 2021 22:55:50 +0200 Subject: [PATCH 0548/1202] soc: document merges Signed-off-by: Arnd Bergmann --- arch/arm/arm-soc-for-next-contents.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/arm-soc-for-next-contents.txt b/arch/arm/arm-soc-for-next-contents.txt index 761cedbc1b0af..709426e5af34f 100644 --- a/arch/arm/arm-soc-for-next-contents.txt +++ b/arch/arm/arm-soc-for-next-contents.txt @@ -90,7 +90,7 @@ arm/dt git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip tags/v5.16-rockchip-dts64-2 rockchip/dt-2 git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip tags/v5.16-rockchip-dts32-2 - (bc22b6208f416ba702c17a93c9af6474f19e8212) + broadcom/dt-fixes https://github.com/Broadcom/stblinux tags/arm-soc/for-5.15/devicetree mstar/dt https://github.com/linux-chenxing/linux mstar-dt-next @@ -133,6 +133,10 @@ arm/drivers git://git.linaro.org/people/jens.wiklander/linux-tee tags/optee-ffa-for-v5.16 drivers/reset git://git.pengutronix.de/pza/linux tags/reset-for-v5.16 + drivers/memory-2 + git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl tags/memory-controller-drv-5.16-2 + patch + optee: smc_abi.c: add missing #include arm/defconfigs defconfig/multiv7 @@ -149,6 +153,8 @@ arm/defconfigs git://git.kernel.org/pub/scm/linux/kernel/git/iwamatsu/linux-visconti tags/visconti-arm-defconfig-for-v5.16 imx/defconfig git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux tags/imx-defconfig-5.16 + aspeed/defconfigs + git://git.kernel.org/pub/scm/linux/kernel/git/joel/bmc tags/aspeed-5.16-defconfig arm/newsoc From dec9d62cc41171096aa43c092d1c1465aa2dbb56 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Wed, 6 Oct 2021 10:49:10 -0700 Subject: [PATCH 0549/1202] f2fs: include non-compressed blocks in compr_written_block Need to include non-compressed blocks in compr_written_block to estimate average compression ratio more accurately. Fixes: 5ac443e26a09 ("f2fs: add sysfs nodes to get runtime compression stat") Cc: stable@vger.kernel.org Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c1bf9ad4c2207..9b663eaf48057 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1530,6 +1530,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, if (cluster_may_compress(cc)) { err = f2fs_compress_pages(cc); if (err == -EAGAIN) { + add_compr_block_stat(cc->inode, cc->cluster_size); goto write; } else if (err) { f2fs_put_rpages_wbc(cc, wbc, true, 1); From b84a90d511cfb188683e57114d3d31f1511451f9 Mon Sep 17 00:00:00 2001 From: Qing Wang Date: Tue, 12 Oct 2021 20:29:04 -0700 Subject: [PATCH 0550/1202] f2fs: replace snprintf in show functions with sysfs_emit coccicheck complains about the use of snprintf() in sysfs show functions. Fix the following coccicheck warning: fs/f2fs/sysfs.c:198:12-20: WARNING: use scnprintf or sprintf. fs/f2fs/sysfs.c:247:8-16: WARNING: use scnprintf or sprintf. Use sysfs_emit instead of scnprintf or sprintf makes more sense. Signed-off-by: Qing Wang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a32fe31c33b8e..0fb891efdeada 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -196,7 +196,7 @@ static ssize_t encoding_show(struct f2fs_attr *a, struct super_block *sb = sbi->sb; if (f2fs_sb_has_casefold(sbi)) - return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", + return sysfs_emit(buf, "%s (%d.%d.%d)\n", sb->s_encoding->charset, (sb->s_encoding->version >> 16) & 0xff, (sb->s_encoding->version >> 8) & 0xff, @@ -245,7 +245,7 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a, static ssize_t main_blkaddr_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)MAIN_BLKADDR(sbi)); } From 25fbbe50d32fb8c3cac427bbf1abd7e33ae142d6 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Wed, 29 Sep 2021 11:12:03 -0700 Subject: [PATCH 0551/1202] f2fs: introduce fragment allocation mode mount option Added two options into "mode=" mount option to make it possible for developers to simulate filesystem fragmentation/after-GC situation itself. The developers use these modes to understand filesystem fragmentation/after-GC condition well, and eventually get some insights to handle them better. "fragment:segment": f2fs allocates a new segment in ramdom position. With this, we can simulate the after-GC condition. "fragment:block" : We can scatter block allocation with "max_fragment_chunk" and "max_fragment_hole" sysfs nodes. f2fs will allocate 1.. blocks in a chunk and make a hole in the length of 1.. by turns in a newly allocated free segment. Plus, this mode implicitly enables "fragment:segment" option for more randomness. Reviewed-by: Chao Yu Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 16 ++++++++++++++++ Documentation/filesystems/f2fs.rst | 18 ++++++++++++++++++ fs/f2fs/f2fs.h | 19 +++++++++++++++++-- fs/f2fs/gc.c | 5 ++++- fs/f2fs/segment.c | 20 ++++++++++++++++++-- fs/f2fs/segment.h | 1 + fs/f2fs/super.c | 10 ++++++++++ fs/f2fs/sysfs.c | 20 ++++++++++++++++++++ 8 files changed, 104 insertions(+), 5 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index f627e705e663b..b268e3e18b4a3 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -512,3 +512,19 @@ Date: July 2021 Contact: "Daeho Jeong" Description: You can control the multiplier value of bdi device readahead window size between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option. + +What: /sys/fs/f2fs//max_fragment_chunk +Date: August 2021 +Contact: "Daeho Jeong" +Description: With "mode=fragment:block" mount options, we can scatter block allocation. + f2fs will allocate 1.. blocks in a chunk and make a hole + in the length of 1.. by turns. This value can be set + between 1..512 and the default value is 4. + +What: /sys/fs/f2fs//max_fragment_hole +Date: August 2021 +Contact: "Daeho Jeong" +Description: With "mode=fragment:block" mount options, we can scatter block allocation. + f2fs will allocate 1.. blocks in a chunk and make a hole + in the length of 1.. by turns. This value can be set + between 1..512 and the default value is 4. diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 09de6ebbbdfa2..4294db649fa82 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -201,6 +201,24 @@ fault_type=%d Support configuring fault injection type, should be mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. + "fragment:segment" and "fragment:block" are newly added here. + These are developer options for experiments to simulate filesystem + fragmentation/after-GC situation itself. The developers use these + modes to understand filesystem fragmentation/after-GC condition well, + and eventually get some insights to handle them better. + In "fragment:segment", f2fs allocates a new segment in ramdom + position. With this, we can simulate the after-GC condition. + In "fragment:block", we can scatter block allocation with + "max_fragment_chunk" and "max_fragment_hole" sysfs nodes. + We added some randomness to both chunk and hole size to make + it close to realistic IO pattern. So, in this mode, f2fs will allocate + 1.. blocks in a chunk and make a hole in the + length of 1.. by turns. With this, the newly + allocated blocks will be scattered throughout the whole partition. + Note that "fragment:block" implicitly enables "fragment:segment" + option for more randomness. + Please, use these options for your experiments and we strongly + recommend to re-format the filesystem after using these options. io_bits=%u Set the bit size of write IO requests. It should be set with "mode=lfs". usrquota Enable plain user disk quota accounting. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 039a229e11c96..e2a35aa3ad9a0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1287,8 +1287,10 @@ enum { }; enum { - FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ - FS_MODE_LFS, /* use lfs allocation only */ + FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ + FS_MODE_LFS, /* use lfs allocation only */ + FS_MODE_FRAGMENT_SEG, /* segment fragmentation mode */ + FS_MODE_FRAGMENT_BLK, /* block fragmentation mode */ }; enum { @@ -1759,6 +1761,9 @@ struct f2fs_sb_info { unsigned long seq_file_ra_mul; /* multiplier for ra_pages of seq. files in fadvise */ + int max_fragment_chunk; /* max chunk size for block fragmentation mode */ + int max_fragment_hole; /* max hole size for block fragmentation mode */ + #ifdef CONFIG_F2FS_FS_COMPRESSION struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ @@ -3519,6 +3524,16 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno); +#define DEF_FRAGMENT_SIZE 4 +#define MIN_FRAGMENT_SIZE 1 +#define MAX_FRAGMENT_SIZE 512 + +static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi) +{ + return F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG || + F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK; +} + /* * checkpoint.c */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 77391e3b7d68f..a946ce0ead341 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -257,7 +258,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->max_search = sbi->max_victim_search; /* let's select beginning hot/small space first in no_heap mode*/ - if (test_opt(sbi, NOHEAP) && + if (f2fs_need_rand_seg(sbi)) + p->offset = prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec); + else if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) p->offset = 0; else diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3189537a19dc5..bdaf5fbdfb183 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -2649,6 +2650,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) unsigned short seg_type = curseg->seg_type; sanity_check_seg_type(sbi, seg_type); + if (f2fs_need_rand_seg(sbi)) + return prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec); /* if segs_per_sec is large than 1, we need to keep original policy. */ if (__is_large_section(sbi)) @@ -2700,6 +2703,9 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) curseg->next_segno = segno; reset_curseg(sbi, type, 1); curseg->alloc_type = LFS; + if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) + curseg->fragment_remained_chunk = + prandom_u32() % sbi->max_fragment_chunk + 1; } static int __next_free_blkoff(struct f2fs_sb_info *sbi, @@ -2726,12 +2732,22 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi, static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, struct curseg_info *seg) { - if (seg->alloc_type == SSR) + if (seg->alloc_type == SSR) { seg->next_blkoff = __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1); - else + } else { seg->next_blkoff++; + if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) { + /* To allocate block chunks in different sizes, use random number */ + if (--seg->fragment_remained_chunk <= 0) { + seg->fragment_remained_chunk = + prandom_u32() % sbi->max_fragment_chunk + 1; + seg->next_blkoff += + prandom_u32() % sbi->max_fragment_hole + 1; + } + } + } } bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 89fff258727d1..46fde9f3f28ea 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -314,6 +314,7 @@ struct curseg_info { unsigned short next_blkoff; /* next block offset to write */ unsigned int zone; /* current zone number */ unsigned int next_segno; /* preallocated segment */ + int fragment_remained_chunk; /* remained block size in a chunk for block fragmentation mode */ bool inited; /* indicate inmem log is inited */ }; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a133932333c5b..4dda9032534a5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -817,6 +817,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; } else if (!strcmp(name, "lfs")) { F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; + } else if (!strcmp(name, "fragment:segment")) { + F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG; + } else if (!strcmp(name, "fragment:block")) { + F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK; } else { kfree(name); return -EINVAL; @@ -1898,6 +1902,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, "adaptive"); else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS) seq_puts(seq, "lfs"); + else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG) + seq_puts(seq, "fragment:segment"); + else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) + seq_puts(seq, "fragment:block"); seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); if (test_opt(sbi, RESERVE_ROOT)) seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", @@ -3525,6 +3533,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; sbi->migration_granularity = sbi->segs_per_sec; sbi->seq_file_ra_mul = MIN_RA_MUL; + sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; + sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 0fb891efdeada..7d289249cd7eb 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -551,6 +551,22 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "max_fragment_chunk")) { + if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE) + sbi->max_fragment_chunk = t; + else + return -EINVAL; + return count; + } + + if (!strcmp(a->attr.name, "max_fragment_hole")) { + if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE) + sbi->max_fragment_hole = t; + else + return -EINVAL; + return count; + } + *ui = (unsigned int)t; return count; @@ -781,6 +797,8 @@ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -859,6 +877,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(seq_file_ra_mul), ATTR_LIST(gc_segment_mode), ATTR_LIST(gc_reclaimed_segments), + ATTR_LIST(max_fragment_chunk), + ATTR_LIST(max_fragment_hole), NULL, }; ATTRIBUTE_GROUPS(f2fs); From 6d135345f6adff3a8a564e352e9a8c25ae8f1621 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 1 Sep 2021 14:39:20 +0800 Subject: [PATCH 0552/1202] f2fs: multidevice: support direct IO Commit 3c62be17d4f5 ("f2fs: support multiple devices") missed to support direct IO for multiple device feature, this patch adds to support the missing part of multidevice feature. In addition, for multiple device image, we should be aware of any issued direct write IO rather than just buffered write IO, so that fsync and syncfs can issue a preflush command to the device where direct write IO goes, to persist user data for posix compliant. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 57 +++++++++++++++++++++++++++++++++++-- fs/f2fs/f2fs.h | 25 ++++++++++++++-- fs/f2fs/segment.c | 35 ++++++++++++++--------- fs/f2fs/super.c | 7 +++++ include/trace/events/f2fs.h | 21 ++++++++++---- 5 files changed, 120 insertions(+), 25 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f4fd6c246c9a9..84d5d6c387e00 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1465,10 +1465,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, struct extent_info ei = {0, }; block_t blkaddr; unsigned int start_pgofs; + int bidx = 0; if (!maxblocks) return 0; + map->m_bdev = inode->i_sb->s_bdev; + map->m_multidev_dio = + f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag); + map->m_len = 0; map->m_flags = 0; @@ -1491,6 +1496,21 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, if (flag == F2FS_GET_BLOCK_DIO) f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); + + if (map->m_multidev_dio) { + block_t blk_addr = map->m_pblk; + + bidx = f2fs_target_device_index(sbi, map->m_pblk); + + map->m_bdev = FDEV(bidx).bdev; + map->m_pblk -= FDEV(bidx).start_blk; + map->m_len = min(map->m_len, + FDEV(bidx).end_blk + 1 - map->m_pblk); + + if (map->m_may_create) + f2fs_update_device_state(sbi, inode->i_ino, + blk_addr, map->m_len); + } goto out; } @@ -1609,6 +1629,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, if (flag == F2FS_GET_BLOCK_PRE_AIO) goto skip; + if (map->m_multidev_dio) + bidx = f2fs_target_device_index(sbi, blkaddr); + if (map->m_len == 0) { /* preallocated unwritten block should be mapped for fiemap. */ if (blkaddr == NEW_ADDR) @@ -1617,10 +1640,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, map->m_pblk = blkaddr; map->m_len = 1; + + if (map->m_multidev_dio) + map->m_bdev = FDEV(bidx).bdev; } else if ((map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) || (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || flag == F2FS_GET_BLOCK_PRE_DIO) { + if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) + goto sync_out; ofs++; map->m_len++; } else { @@ -1673,11 +1701,31 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, sync_out: - /* for hardware encryption, but to avoid potential issue in future */ - if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) + if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) { + /* + * for hardware encryption, but to avoid potential issue + * in future + */ f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); + if (map->m_multidev_dio) { + block_t blk_addr = map->m_pblk; + + bidx = f2fs_target_device_index(sbi, map->m_pblk); + + map->m_bdev = FDEV(bidx).bdev; + map->m_pblk -= FDEV(bidx).start_blk; + + if (map->m_may_create) + f2fs_update_device_state(sbi, inode->i_ino, + blk_addr, map->m_len); + + f2fs_bug_on(sbi, blk_addr + map->m_len > + FDEV(bidx).end_blk + 1); + } + } + if (flag == F2FS_GET_BLOCK_PRECACHE) { if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; @@ -1696,7 +1744,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, f2fs_balance_fs(sbi, dn.node_changed); } out: - trace_f2fs_map_blocks(inode, map, err); + trace_f2fs_map_blocks(inode, map, create, flag, err); return err; } @@ -1755,6 +1803,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock, map_bh(bh, inode->i_sb, map.m_pblk); bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; bh->b_size = blks_to_bytes(inode, map.m_len); + + if (map.m_multidev_dio) + bh->b_bdev = map.m_bdev; } return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e2a35aa3ad9a0..b8e8f8c716b04 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -620,6 +620,7 @@ struct extent_tree { F2FS_MAP_UNWRITTEN) struct f2fs_map_blocks { + struct block_device *m_bdev; /* for multi-device dio */ block_t m_pblk; block_t m_lblk; unsigned int m_len; @@ -628,6 +629,7 @@ struct f2fs_map_blocks { pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; bool m_may_create; /* indicate it is from write path */ + bool m_multidev_dio; /* indicate it allows multi-device dio */ }; /* for flag in get_data_block */ @@ -1733,12 +1735,15 @@ struct f2fs_sb_info { /* For shrinker support */ struct list_head s_list; + struct mutex umount_mutex; + unsigned int shrinker_run_no; + + /* For multi devices */ int s_ndevs; /* number of devices */ struct f2fs_dev_info *devs; /* for device list */ unsigned int dirty_device; /* for checkpoint data flush */ spinlock_t dev_lock; /* protect dirty_device */ - struct mutex umount_mutex; - unsigned int shrinker_run_no; + bool aligned_blksize; /* all devices has the same logical blksize */ /* For write statistics */ u64 sectors_written_start; @@ -3500,6 +3505,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio); +void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, + block_t blkaddr, unsigned int blkcnt); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); @@ -4320,6 +4327,16 @@ static inline int block_unaligned_IO(struct inode *inode, return align & blocksize_mask; } +static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi, + int flag) +{ + if (!f2fs_is_multi_device(sbi)) + return false; + if (flag != F2FS_GET_BLOCK_DIO) + return false; + return sbi->aligned_blksize; +} + static inline bool f2fs_force_buffered_io(struct inode *inode, struct kiocb *iocb, struct iov_iter *iter) { @@ -4328,7 +4345,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, if (f2fs_post_read_required(inode)) return true; - if (f2fs_is_multi_device(sbi)) + + /* disallow direct IO if any of devices has unaligned blksize */ + if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) return true; /* * for blkzoned device, fallback direct IO to buffered IO, so diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bdaf5fbdfb183..526423fe84ce8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3520,24 +3520,30 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, up_read(&SM_I(sbi)->curseg_lock); } -static void update_device_state(struct f2fs_io_info *fio) +void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, + block_t blkaddr, unsigned int blkcnt) { - struct f2fs_sb_info *sbi = fio->sbi; - unsigned int devidx; - if (!f2fs_is_multi_device(sbi)) return; - devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); + while (1) { + unsigned int devidx = f2fs_target_device_index(sbi, blkaddr); + unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1; - /* update device state for fsync */ - f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); + /* update device state for fsync */ + f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO); - /* update device state for checkpoint */ - if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { - spin_lock(&sbi->dev_lock); - f2fs_set_bit(devidx, (char *)&sbi->dirty_device); - spin_unlock(&sbi->dev_lock); + /* update device state for checkpoint */ + if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { + spin_lock(&sbi->dev_lock); + f2fs_set_bit(devidx, (char *)&sbi->dirty_device); + spin_unlock(&sbi->dev_lock); + } + + if (blkcnt <= blks) + break; + blkcnt -= blks; + blkaddr += blks; } } @@ -3564,7 +3570,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) goto reallocate; } - update_device_state(fio); + f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); if (keep_order) up_read(&fio->sbi->io_order_lock); @@ -3653,7 +3659,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) else err = f2fs_submit_page_bio(fio); if (!err) { - update_device_state(fio); + f2fs_update_device_state(fio->sbi, fio->ino, + fio->new_blkaddr, 1); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4dda9032534a5..94da8212e5354 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3759,6 +3759,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); unsigned int max_devices = MAX_DEVICES; + unsigned int logical_blksize; int i; /* Initialize single device information */ @@ -3779,6 +3780,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) if (!sbi->devs) return -ENOMEM; + logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev); + sbi->aligned_blksize = true; + for (i = 0; i < max_devices; i++) { if (i > 0 && !RDEV(i).path[0]) @@ -3815,6 +3819,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) /* to release errored devices */ sbi->s_ndevs = i + 1; + if (logical_blksize != bdev_logical_block_size(FDEV(i).bdev)) + sbi->aligned_blksize = false; + #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && !f2fs_sb_has_blkzoned(sbi)) { diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 4cb055af1ec0b..f8cb916f35958 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -570,9 +570,10 @@ TRACE_EVENT(f2fs_file_write_iter, ); TRACE_EVENT(f2fs_map_blocks, - TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret), + TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, + int create, int flag, int ret), - TP_ARGS(inode, map, ret), + TP_ARGS(inode, map, create, flag, ret), TP_STRUCT__entry( __field(dev_t, dev) @@ -583,11 +584,14 @@ TRACE_EVENT(f2fs_map_blocks, __field(unsigned int, m_flags) __field(int, m_seg_type) __field(bool, m_may_create) + __field(bool, m_multidev_dio) + __field(int, create) + __field(int, flag) __field(int, ret) ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev = map->m_bdev->bd_dev; __entry->ino = inode->i_ino; __entry->m_lblk = map->m_lblk; __entry->m_pblk = map->m_pblk; @@ -595,12 +599,16 @@ TRACE_EVENT(f2fs_map_blocks, __entry->m_flags = map->m_flags; __entry->m_seg_type = map->m_seg_type; __entry->m_may_create = map->m_may_create; + __entry->m_multidev_dio = map->m_multidev_dio; + __entry->create = create; + __entry->flag = flag; __entry->ret = ret; ), TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " - "start blkaddr = 0x%llx, len = 0x%llx, flags = %u," - "seg_type = %d, may_create = %d, err = %d", + "start blkaddr = 0x%llx, len = 0x%llx, flags = %u, " + "seg_type = %d, may_create = %d, multidevice = %d, " + "create = %d, flag = %d, err = %d", show_dev_ino(__entry), (unsigned long long)__entry->m_lblk, (unsigned long long)__entry->m_pblk, @@ -608,6 +616,9 @@ TRACE_EVENT(f2fs_map_blocks, __entry->m_flags, __entry->m_seg_type, __entry->m_may_create, + __entry->m_multidev_dio, + __entry->create, + __entry->flag, __entry->ret) ); From 7e5e744183bbb0ad02412b21b1e61380c998dd18 Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Wed, 18 Dec 2019 16:43:16 +0200 Subject: [PATCH 0553/1202] soc: fsl: dpio: fix qbman alignment error in the virtualization context When running as a guest, under KVM, the CENA region is mapped as device memory, so uncacheable. When the memory is mapped as device memory, the unaligned accesses are not allowed. Memcpy is optimized to transfer 8 bytes at a time regardless of the start address and might cause alignment issues. Signed-off-by: Diana Craciun Signed-off-by: Ioana Ciornei Signed-off-by: Li Yang --- drivers/soc/fsl/dpio/qbman-portal.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c index 3ec8ab08b9889..3016d3e7d1d4f 100644 --- a/drivers/soc/fsl/dpio/qbman-portal.c +++ b/drivers/soc/fsl/dpio/qbman-portal.c @@ -674,9 +674,9 @@ int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s, for (i = 0; i < num_enqueued; i++) { p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); /* Skip copying the verb */ - memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); - memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], - &fd[i], sizeof(*fd)); + memcpy_toio((__iomem void *)&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); + memcpy_toio((__iomem void *)&p[EQ_DESC_SIZE_FD_START / sizeof(uint32_t)], + &fd[i], sizeof(*fd)); eqcr_pi++; } @@ -756,9 +756,9 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, for (i = 0; i < num_enqueued; i++) { p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); /* Skip copying the verb */ - memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); - memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], - &fd[i], sizeof(*fd)); + memcpy_toio((__iomem void *)&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); + memcpy_toio((__iomem void *)&p[EQ_DESC_SIZE_FD_START / sizeof(uint32_t)], + &fd[i], sizeof(*fd)); eqcr_pi++; } @@ -829,9 +829,9 @@ int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s, p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); cl = (uint32_t *)(&d[i]); /* Skip copying the verb */ - memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); - memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], - &fd[i], sizeof(*fd)); + memcpy_toio((__iomem void *)&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); + memcpy_toio((__iomem void *)&p[EQ_DESC_SIZE_FD_START / sizeof(uint32_t)], + &fd[i], sizeof(*fd)); eqcr_pi++; } @@ -899,9 +899,9 @@ int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); cl = (uint32_t *)(&d[i]); /* Skip copying the verb */ - memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); - memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], - &fd[i], sizeof(*fd)); + memcpy_toio((__iomem void *)&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); + memcpy_toio((__iomem void *)&p[EQ_DESC_SIZE_FD_START / sizeof(uint32_t)], + &fd[i], sizeof(*fd)); eqcr_pi++; } From 037495eb8133281667b6dbc98912086825015286 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 22 Oct 2021 09:40:27 +0200 Subject: [PATCH 0554/1202] stacktrace: Provide stack_trace_save_tsk() stub in the !CONFIG_STACKTRACE case too The following commit: bc9bbb81730e ("x86: Fix get_wchan() to support the ORC unwinder") Added stack_trace_save_tsk() use to __get_wchan(), while this method is not unconditionally defined: it's not available in the !CONFIG_STACKTRACE case. Give a default implementation that does nothing. Reported-by: Stephen Rothwell Fixes: bc9bbb81730e ("x86: Fix get_wchan() to support the ORC unwinder") Cc: Qi Zheng Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/stacktrace.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 9edecb494e9e2..3ccaf599630f4 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -91,8 +91,19 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, extern int save_stack_trace_tsk_reliable(struct task_struct *tsk, struct stack_trace *trace); extern void save_stack_trace_user(struct stack_trace *trace); + #endif /* !CONFIG_ARCH_STACKWALK */ -#endif /* CONFIG_STACKTRACE */ + +#else /* !CONFIG_STACKTRACE: */ +static inline unsigned int +stack_trace_save_tsk(struct task_struct *task, + unsigned long *store, unsigned int size, + unsigned int skipnr) +{ + return -ENOSYS; +} + +#endif /* !CONFIG_STACKTRACE */ #if defined(CONFIG_STACKTRACE) && defined(CONFIG_HAVE_RELIABLE_STACKTRACE) int stack_trace_save_tsk_reliable(struct task_struct *tsk, unsigned long *store, From c79bbd9d50bab4d5f6503059bac8fba86c7631ab Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Oct 2021 09:05:35 +0200 Subject: [PATCH 0555/1202] memcg, kmem: mark cancel_charge() inline cancel_charge() is no longer called for CONFIG_MEMCG_KMEM on NOMMU targets, which causes a compiletime warning: mm/memcontrol.c:2774:13: error: unused function 'cancel_charge' [-Werror,-Wunused-function] Remove the now-incorrect #ifdef and just mark the function 'inline' like the other related helpers. This is simpler and means we no longer have to match the #ifdef with the caller. Fixes: 5f3345c17079 ("memcg, kmem: further deprecate kmem.limit_in_bytes") Reviewed-by: Muchun Song Signed-off-by: Arnd Bergmann Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 26d7a9b9f9123..0fd96cc4e7444 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2771,8 +2771,7 @@ static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, return try_charge_memcg(memcg, gfp_mask, nr_pages); } -#if defined(CONFIG_MEMCG_KMEM) || defined(CONFIG_MMU) -static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) +static inline void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) { if (mem_cgroup_is_root(memcg)) return; @@ -2781,7 +2780,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) if (do_memsw_account()) page_counter_uncharge(&memcg->memsw, nr_pages); } -#endif static void commit_charge(struct page *page, struct mem_cgroup *memcg) { From 24c291acad480f00077e002a77f0d575cd35d4de Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 21 Oct 2021 15:07:10 +1100 Subject: [PATCH 0556/1202] memcg-kmem-further-deprecate-kmemlimit_in_bytes-checkpatch-fixes WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Vasily Averin Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0fd96cc4e7444..e4fcb6e55f750 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3764,7 +3764,7 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of, break; case _KMEM: /* kmem.limit_in_bytes is deprecated. */ - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; break; case _TCP: ret = memcg_update_tcp_max(memcg, nr_pages); From 1fb73708a1331cb44cf5338185aff50b8c519bba Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 21 Oct 2021 15:07:11 +1100 Subject: [PATCH 0557/1202] memcg: prohibit unconditional exceeding the limit of dying tasks Memory cgroup charging allows killed or exiting tasks to exceed the hard limit. It is assumed that the amount of the memory charged by those tasks is bound and most of the memory will get released while the task is exiting. This is resembling a heuristic for the global OOM situation when tasks get access to memory reserves. There is no global memory shortage at the memcg level so the memcg heuristic is more relieved. The above assumption is overly optimistic though. E.g. vmalloc can scale to really large requests and the heuristic would allow that. We used to have an early break in the vmalloc allocator for killed tasks but this has been reverted by b8c8a338f75e (Revert "vmalloc: back off when the current task is killed"). There are likely other similar code paths which do not check for fatal signals in an allocation&charge loop. Also there are some kernel objects charged to a memcg which are not bound to a process life time. It has been observed that it is not really hard to trigger these bypasses and cause global OOM situation. One potential way to address these runaways would be to limit the amount of excess (similar to the global OOM with limited oom reserves). This is certainly possible but it is not really clear how much of an excess is desirable and still protects from global OOMs as that would have to consider the overall memcg configuration. This patch is addressing the problem by removing the heuristic altogether. Bypass is only allowed for requests which either cannot fail or where the failure is not desirable while excess should be still limited (e.g. atomic requests). Implementation wise a killed or dying task fails to charge if it has passed the OOM killer stage. That should give all forms of reclaim chance to restore the limit before the failure (ENOMEM) and tell the caller to back off. {mhocko@suse.com: new changelog] Link: https://lkml.kernel.org/r/817a6ce2-4da9-72ac-c5b9-edd398d28a15@virtuozzo.com Signed-off-by: Vasily Averin Suggested-by: Michal Hocko Cc: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e4fcb6e55f750..5b00db3a8d530 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -234,7 +234,7 @@ enum res_type { iter != NULL; \ iter = mem_cgroup_iter(NULL, iter, NULL)) -static inline bool should_force_charge(void) +static inline bool task_is_dying(void) { return tsk_is_oom_victim(current) || fatal_signal_pending(current) || (current->flags & PF_EXITING); @@ -1624,7 +1624,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, * A few threads which were not waiting at mutex_lock_killable() can * fail to bail out. Therefore, check again after holding oom_lock. */ - ret = should_force_charge() || out_of_memory(&oc); + ret = task_is_dying() || out_of_memory(&oc); unlock: mutex_unlock(&oom_lock); @@ -2579,6 +2579,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, struct page_counter *counter; enum oom_status oom_status; unsigned long nr_reclaimed; + bool passed_oom = false; bool may_swap = true; bool drained = false; unsigned long pflags; @@ -2613,15 +2614,6 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, if (gfp_mask & __GFP_ATOMIC) goto force; - /* - * Unlike in global OOM situations, memcg is not in a physical - * memory shortage. Allow dying and OOM-killed tasks to - * bypass the last charges so that they can exit quickly and - * free their memory. - */ - if (unlikely(should_force_charge())) - goto force; - /* * Prevent unbounded recursion when reclaim operations need to * allocate memory. This might exceed the limits temporarily, @@ -2679,8 +2671,9 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, if (gfp_mask & __GFP_RETRY_MAYFAIL) goto nomem; - if (fatal_signal_pending(current)) - goto force; + /* Avoid endless loop for tasks bypassed by the oom killer */ + if (passed_oom && task_is_dying()) + goto nomem; /* * keep retrying as long as the memcg oom killer is able to make @@ -2689,14 +2682,10 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, */ oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages * PAGE_SIZE)); - switch (oom_status) { - case OOM_SUCCESS: + if (oom_status == OOM_SUCCESS) { + passed_oom = true; nr_retries = MAX_RECLAIM_RETRIES; goto retry; - case OOM_FAILED: - goto force; - default: - goto nomem; } nomem: if (!(gfp_mask & __GFP_NOFAIL)) From 88d70628b9072d3e5996958d8f8adb5b394a4d4f Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Thu, 21 Oct 2021 15:07:12 +1100 Subject: [PATCH 0558/1202] mm/mmap.c: fix a data race of mm->total_vm Variable mm->total_vm could be accessed concurrently during mmaping and system accounting as noticed by KCSAN, BUG: KCSAN: data-race in __acct_update_integrals / mmap_region read-write to 0xffffa40267bd14c8 of 8 bytes by task 15609 on cpu 3: mmap_region+0x6dc/0x1400 do_mmap+0x794/0xca0 vm_mmap_pgoff+0xdf/0x150 ksys_mmap_pgoff+0xe1/0x380 do_syscall_64+0x37/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xa9 read to 0xffffa40267bd14c8 of 8 bytes by interrupt on cpu 2: __acct_update_integrals+0x187/0x1d0 acct_account_cputime+0x3c/0x40 update_process_times+0x5c/0x150 tick_sched_timer+0x184/0x210 __run_hrtimer+0x119/0x3b0 hrtimer_interrupt+0x350/0xaa0 __sysvec_apic_timer_interrupt+0x7b/0x220 asm_call_irq_on_stack+0x12/0x20 sysvec_apic_timer_interrupt+0x4d/0x80 asm_sysvec_apic_timer_interrupt+0x12/0x20 smp_call_function_single+0x192/0x2b0 perf_install_in_context+0x29b/0x4a0 __se_sys_perf_event_open+0x1a98/0x2550 __x64_sys_perf_event_open+0x63/0x70 do_syscall_64+0x37/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported by Kernel Concurrency Sanitizer on: CPU: 2 PID: 15610 Comm: syz-executor.3 Not tainted 5.10.0+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 In vm_stat_account which called by mmap_region, increase total_vm, and __acct_update_integrals may read total_vm at the same time. This will cause a data race which lead to undefined behaviour. To avoid potential bad read/write, volatile property and barrier are both used to avoid undefined behaviour. Link: https://lkml.kernel.org/r/20210913105550.1569419-1-liupeng256@huawei.com Signed-off-by: Peng Liu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/tsacct.c | 2 +- mm/mmap.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 257ffb993ea23..f00de83d02462 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -137,7 +137,7 @@ static void __acct_update_integrals(struct task_struct *tsk, * the rest of the math is done in xacct_add_tsk. */ tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10; - tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10; + tsk->acct_vm_mem1 += delta * READ_ONCE(tsk->mm->total_vm) >> 10; } /** diff --git a/mm/mmap.c b/mm/mmap.c index 88dcc5c252255..b22a07f5e7614 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3332,7 +3332,7 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages) { - mm->total_vm += npages; + WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages); if (is_exec_mapping(flags)) mm->exec_vm += npages; From 9bee7cd2e60a455c00603b3d3bfda9ed6d5040c0 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Thu, 21 Oct 2021 15:07:13 +1100 Subject: [PATCH 0559/1202] mm: use __pfn_to_section() instead of open coding it It is defined in the same file just a few lines above. Link: https://lkml.kernel.org/r/4598487.Rc0NezkW7i@mobilepool36.emlix.com Signed-off-by: Rolf Eike Beer Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mmzone.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6a1d79d84675a..832aa49d0d8eb 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1481,7 +1481,7 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - ms = __nr_to_section(pfn_to_section_nr(pfn)); + ms = __pfn_to_section(pfn); if (!valid_section(ms)) return 0; /* @@ -1496,7 +1496,7 @@ static inline int pfn_in_present_section(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - return present_section(__nr_to_section(pfn_to_section_nr(pfn))); + return present_section(__pfn_to_section(pfn)); } static inline unsigned long next_present_section_nr(unsigned long section_nr) From af481ee4c0884994fc3e5557574d77dc1f89de93 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 21 Oct 2021 15:07:14 +1100 Subject: [PATCH 0560/1202] mm/memory.c: avoid unnecessary kernel/user pointer conversion Annotating a pointer from __user to kernel and then back again might confuse sparse. In copy_huge_page_from_user() it can be avoided by removing the intermediate variable since it is never used. Link: https://lkml.kernel.org/r/20210914150820.19326-1-amit.kachhap@arm.com Signed-off-by: Amit Daniel Kachhap Acked-by: Kirill A. Shutemov Cc: Vincenzo Frascino Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index c52be6d6b6055..1b7032a30dd52 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5421,7 +5421,6 @@ long copy_huge_page_from_user(struct page *dst_page, unsigned int pages_per_huge_page, bool allow_pagefault) { - void *src = (void *)usr_src; void *page_kaddr; unsigned long i, rc = 0; unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; @@ -5434,8 +5433,7 @@ long copy_huge_page_from_user(struct page *dst_page, else page_kaddr = kmap_atomic(subpage); rc = copy_from_user(page_kaddr, - (const void __user *)(src + i * PAGE_SIZE), - PAGE_SIZE); + usr_src + i * PAGE_SIZE, PAGE_SIZE); if (allow_pagefault) kunmap(subpage); else From 53c8f985db5693684920e4f0732f77d2a77fe943 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 21 Oct 2021 15:07:15 +1100 Subject: [PATCH 0561/1202] mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte Patch series "mm: A few cleanup patches around zap, shmem and uffd", v4. IMHO all of them are very nice cleanups to existing code already, they're all small and self-contained. They'll be needed by uffd-wp coming series. This patch (of 4): It was conditionally done previously, as there's one shmem special case that we use SetPageDirty() instead. However that's not necessary and it should be easier and cleaner to do it unconditionally in mfill_atomic_install_pte(). The most recent discussion about this is here, where Hugh explained the history of SetPageDirty() and why it's possible that it's not required at all: https://lore.kernel.org/lkml/alpine.LSU.2.11.2104121657050.1097@eggly.anvils/ Currently mfill_atomic_install_pte() has three callers: 1. shmem_mfill_atomic_pte 2. mcopy_atomic_pte 3. mcontinue_atomic_pte After the change: case (1) should have its SetPageDirty replaced by the dirty bit on pte (so we unify them together, finally), case (2) should have no functional change at all as it has page_in_cache==false, case (3) may add a dirty bit to the pte. However since case (3) is UFFDIO_CONTINUE for shmem, it's merely 100% sure the page is dirty after all because UFFDIO_CONTINUE normally requires another process to modify the page cache and kick the faulted thread, so should not make a real difference either. This should make it much easier to follow on which case will set dirty for uffd, as we'll simply set it all now for all uffd related ioctls. Meanwhile, no special handling of SetPageDirty() if there's no need. Link: https://lkml.kernel.org/r/20210915181456.10739-1-peterx@redhat.com Link: https://lkml.kernel.org/r/20210915181456.10739-2-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Axel Rasmussen Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Liam Howlett Cc: Mike Rapoport Cc: Yang Shi Cc: David Hildenbrand Cc: "Kirill A . Shutemov" Cc: Jerome Glisse Cc: Alistair Popple Cc: Miaohe Lin Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/shmem.c | 1 - mm/userfaultfd.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 7cd976b588ff1..df2f0288b9caa 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2423,7 +2423,6 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, shmem_recalc_inode(inode); spin_unlock_irq(&info->lock); - SetPageDirty(page); unlock_page(page); return 0; out_delete_from_cache: diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 7a90084155343..caf6dfff2a60f 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -69,10 +69,9 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, pgoff_t offset, max_off; _dst_pte = mk_pte(page, dst_vma->vm_page_prot); + _dst_pte = pte_mkdirty(_dst_pte); if (page_in_cache && !vm_shared) writable = false; - if (writable || !page_in_cache) - _dst_pte = pte_mkdirty(_dst_pte); if (writable) { if (wp_copy) _dst_pte = pte_mkuffd_wp(_dst_pte); From 5e7f762f12203d2b6bebd39b97cba852a21549f4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 21 Oct 2021 15:07:16 +1100 Subject: [PATCH 0562/1202] mm: clear vmf->pte after pte_unmap_same() returns pte_unmap_same() will always unmap the pte pointer. After the unmap, vmf->pte will not be valid any more, we should clear it. It was safe only because no one is accessing vmf->pte after pte_unmap_same() returns, since the only caller of pte_unmap_same() (so far) is do_swap_page(), where vmf->pte will in most cases be overwritten very soon. Directly pass in vmf into pte_unmap_same() and then we can also avoid the long parameter list too, which should be a nice cleanup. Link: https://lkml.kernel.org/r/20210915181533.11188-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Miaohe Lin Reviewed-by: David Hildenbrand Reviewed-by: Liam Howlett Acked-by: Hugh Dickins Cc: Alistair Popple Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Matthew Wilcox Cc: Mike Rapoport Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 1b7032a30dd52..cf7b059b57a7c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2724,19 +2724,19 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range); * proceeding (but do_wp_page is only called after already making such a check; * and do_anonymous_page can safely check later on). */ -static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, - pte_t *page_table, pte_t orig_pte) +static inline int pte_unmap_same(struct vm_fault *vmf) { int same = 1; #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION) if (sizeof(pte_t) > sizeof(unsigned long)) { - spinlock_t *ptl = pte_lockptr(mm, pmd); + spinlock_t *ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd); spin_lock(ptl); - same = pte_same(*page_table, orig_pte); + same = pte_same(*vmf->pte, vmf->orig_pte); spin_unlock(ptl); } #endif - pte_unmap(page_table); + pte_unmap(vmf->pte); + vmf->pte = NULL; return same; } @@ -3488,7 +3488,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) vm_fault_t ret = 0; void *shadow = NULL; - if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) + if (!pte_unmap_same(vmf)) goto out; entry = pte_to_swp_entry(vmf->orig_pte); From 1b40c5d984c655801d5d92323d2c34990bf8baae Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 21 Oct 2021 15:07:16 +1100 Subject: [PATCH 0563/1202] mm: drop first_index/last_index in zap_details The first_index/last_index parameters in zap_details are actually only used in unmap_mapping_range_tree(). At the meantime, this function is only called by unmap_mapping_pages() once. Instead of passing these two variables through the whole stack of page zapping code, remove them from zap_details and let them simply be parameters of unmap_mapping_range_tree(), which is inlined. Link: https://lkml.kernel.org/r/20210915181535.11238-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Alistair Popple Reviewed-by: David Hildenbrand Reviewed-by: Liam Howlett Acked-by: Hugh Dickins Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Rapoport Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mm.h | 2 -- mm/memory.c | 31 ++++++++++++++++++------------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 32b2ecc474087..c5b600d7f85bd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1688,8 +1688,6 @@ extern void user_shm_unlock(size_t, struct ucounts *); */ struct zap_details { struct address_space *check_mapping; /* Check page->mapping if set */ - pgoff_t first_index; /* Lowest page->index to unmap */ - pgoff_t last_index; /* Highest page->index to unmap */ struct page *single_page; /* Locked page to be unmapped */ }; diff --git a/mm/memory.c b/mm/memory.c index cf7b059b57a7c..d4ed701e3e5d2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3321,20 +3321,20 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma, } static inline void unmap_mapping_range_tree(struct rb_root_cached *root, + pgoff_t first_index, + pgoff_t last_index, struct zap_details *details) { struct vm_area_struct *vma; pgoff_t vba, vea, zba, zea; - vma_interval_tree_foreach(vma, root, - details->first_index, details->last_index) { - + vma_interval_tree_foreach(vma, root, first_index, last_index) { vba = vma->vm_pgoff; vea = vba + vma_pages(vma) - 1; - zba = details->first_index; + zba = first_index; if (zba < vba) zba = vba; - zea = details->last_index; + zea = last_index; if (zea > vea) zea = vea; @@ -3360,18 +3360,22 @@ void unmap_mapping_page(struct page *page) { struct address_space *mapping = page->mapping; struct zap_details details = { }; + pgoff_t first_index; + pgoff_t last_index; VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageTail(page)); + first_index = page->index; + last_index = page->index + thp_nr_pages(page) - 1; + details.check_mapping = mapping; - details.first_index = page->index; - details.last_index = page->index + thp_nr_pages(page) - 1; details.single_page = page; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) - unmap_mapping_range_tree(&mapping->i_mmap, &details); + unmap_mapping_range_tree(&mapping->i_mmap, first_index, + last_index, &details); i_mmap_unlock_write(mapping); } @@ -3391,16 +3395,17 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { struct zap_details details = { }; + pgoff_t first_index = start; + pgoff_t last_index = start + nr - 1; details.check_mapping = even_cows ? NULL : mapping; - details.first_index = start; - details.last_index = start + nr - 1; - if (details.last_index < details.first_index) - details.last_index = ULONG_MAX; + if (last_index < first_index) + last_index = ULONG_MAX; i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) - unmap_mapping_range_tree(&mapping->i_mmap, &details); + unmap_mapping_range_tree(&mapping->i_mmap, first_index, + last_index, &details); i_mmap_unlock_write(mapping); } EXPORT_SYMBOL_GPL(unmap_mapping_pages); From c27f24dc72de462d05f2e35dd125e996f334d214 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 21 Oct 2021 15:07:17 +1100 Subject: [PATCH 0564/1202] mm: add zap_skip_check_mapping() helper Use the helper for the checks. Rename "check_mapping" into "zap_mapping" because "check_mapping" looks like a bool but in fact it stores the mapping itself. When it's set, we check the mapping (it must be non-NULL). When it's cleared we skip the check, which works like the old way. Move the duplicated comments to the helper too. Link: https://lkml.kernel.org/r/20210915181538.11288-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Alistair Popple Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Liam Howlett Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Rapoport Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mm.h | 16 +++++++++++++++- mm/memory.c | 29 ++++++----------------------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index c5b600d7f85bd..6e29deb1e0d4b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1687,10 +1687,24 @@ extern void user_shm_unlock(size_t, struct ucounts *); * Parameter block passed down to zap_pte_range in exceptional cases. */ struct zap_details { - struct address_space *check_mapping; /* Check page->mapping if set */ + struct address_space *zap_mapping; /* Check page->mapping if set */ struct page *single_page; /* Locked page to be unmapped */ }; +/* + * We set details->zap_mappings when we want to unmap shared but keep private + * pages. Return true if skip zapping this page, false otherwise. + */ +static inline bool +zap_skip_check_mapping(struct zap_details *details, struct page *page) +{ + if (!details || !page) + return false; + + return details->zap_mapping && + (details->zap_mapping != page_rmapping(page)); +} + struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, diff --git a/mm/memory.c b/mm/memory.c index d4ed701e3e5d2..48b2e048d2677 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1333,16 +1333,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, struct page *page; page = vm_normal_page(vma, addr, ptent); - if (unlikely(details) && page) { - /* - * unmap_shared_mapping_pages() wants to - * invalidate cache without truncating: - * unmap shared but keep private pages. - */ - if (details->check_mapping && - details->check_mapping != page_rmapping(page)) - continue; - } + if (unlikely(zap_skip_check_mapping(details, page))) + continue; ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); tlb_remove_tlb_entry(tlb, pte, addr); @@ -1375,17 +1367,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, is_device_exclusive_entry(entry)) { struct page *page = pfn_swap_entry_to_page(entry); - if (unlikely(details && details->check_mapping)) { - /* - * unmap_shared_mapping_pages() wants to - * invalidate cache without truncating: - * unmap shared but keep private pages. - */ - if (details->check_mapping != - page_rmapping(page)) - continue; - } - + if (unlikely(zap_skip_check_mapping(details, page))) + continue; pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); rss[mm_counter(page)]--; @@ -3369,7 +3352,7 @@ void unmap_mapping_page(struct page *page) first_index = page->index; last_index = page->index + thp_nr_pages(page) - 1; - details.check_mapping = mapping; + details.zap_mapping = mapping; details.single_page = page; i_mmap_lock_write(mapping); @@ -3398,7 +3381,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t first_index = start; pgoff_t last_index = start + nr - 1; - details.check_mapping = even_cows ? NULL : mapping; + details.zap_mapping = even_cows ? NULL : mapping; if (last_index < first_index) last_index = ULONG_MAX; From 4c08ce01e7b0b1f64aa7ab708498c773c605bb02 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 21 Oct 2021 15:07:18 +1100 Subject: [PATCH 0565/1202] mm: introduce pmd_install() helper Patch series "Do some code cleanups related to mm", v3. This patch (of 2): Currently we have three times the same few lines repeated in the code. Deduplicate them by newly introduced pmd_install() helper. Link: https://lkml.kernel.org/r/20210901102722.47686-1-zhengqi.arch@bytedance.com Link: https://lkml.kernel.org/r/20210901102722.47686-2-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: David Hildenbrand Reviewed-by: Muchun Song Acked-by: Kirill A. Shutemov Cc: Thomas Gleixner Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Mika Penttila Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/filemap.c | 11 ++--------- mm/internal.h | 1 + mm/memory.c | 34 ++++++++++++++++------------------ 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 0cbc24cc92921..c762a36242c6a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3213,15 +3213,8 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page) } } - if (pmd_none(*vmf->pmd)) { - vmf->ptl = pmd_lock(mm, vmf->pmd); - if (likely(pmd_none(*vmf->pmd))) { - mm_inc_nr_ptes(mm); - pmd_populate(mm, vmf->pmd, vmf->prealloc_pte); - vmf->prealloc_pte = NULL; - } - spin_unlock(vmf->ptl); - } + if (pmd_none(*vmf->pmd)) + pmd_install(mm, vmf->pmd, &vmf->prealloc_pte); /* See comment in handle_pte_fault() */ if (pmd_devmap_trans_unstable(vmf->pmd)) { diff --git a/mm/internal.h b/mm/internal.h index cf3cb933eba3f..6c3e1a9f8c5a6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -38,6 +38,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); +void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); static inline bool can_madv_lru_vma(struct vm_area_struct *vma) { diff --git a/mm/memory.c b/mm/memory.c index 48b2e048d2677..38d63f6d998b1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -433,9 +433,20 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, } } +void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte) +{ + spinlock_t *ptl = pmd_lock(mm, pmd); + + if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ + mm_inc_nr_ptes(mm); + pmd_populate(mm, pmd, *pte); + *pte = NULL; + } + spin_unlock(ptl); +} + int __pte_alloc(struct mm_struct *mm, pmd_t *pmd) { - spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm); if (!new) return -ENOMEM; @@ -455,13 +466,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd) */ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ - ptl = pmd_lock(mm, pmd); - if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ - mm_inc_nr_ptes(mm); - pmd_populate(mm, pmd, new); - new = NULL; - } - spin_unlock(ptl); + pmd_install(mm, pmd, &new); if (new) pte_free(mm, new); return 0; @@ -4024,17 +4029,10 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return ret; } - if (vmf->prealloc_pte) { - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (likely(pmd_none(*vmf->pmd))) { - mm_inc_nr_ptes(vma->vm_mm); - pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); - vmf->prealloc_pte = NULL; - } - spin_unlock(vmf->ptl); - } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { + if (vmf->prealloc_pte) + pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte); + else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) return VM_FAULT_OOM; - } } /* See comment in handle_pte_fault() */ From 77fe6b7011ce4f20cd9b09b84d90cee7d621d6d8 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 21 Oct 2021 15:07:18 +1100 Subject: [PATCH 0566/1202] mm: remove redundant smp_wmb() The smp_wmb() which is in the __pte_alloc() is used to ensure all ptes setup is visible before the pte is made visible to other CPUs by being put into page tables. We only need this when the pte is actually populated, so move it to pmd_install(). __pte_alloc_kernel(), __p4d_alloc(), __pud_alloc() and __pmd_alloc() are similar to this case. We can also defer smp_wmb() to the place where the pmd entry is really populated by preallocated pte. There are two kinds of user of preallocated pte, one is filemap & finish_fault(), another is THP. The former does not need another smp_wmb() because the smp_wmb() has been done by pmd_install(). Fortunately, the latter also does not need another smp_wmb() because there is already a smp_wmb() before populating the new pte when the THP uses a preallocated pte to split a huge pmd. Link: https://lkml.kernel.org/r/20210901102722.47686-3-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Reviewed-by: Muchun Song Acked-by: David Hildenbrand Acked-by: Kirill A. Shutemov Cc: Johannes Weiner Cc: Michal Hocko Cc: Mika Penttila Cc: Thomas Gleixner Cc: Vladimir Davydov Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory.c | 52 ++++++++++++++++++++------------------------- mm/sparse-vmemmap.c | 2 +- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 38d63f6d998b1..5db36280950a5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -439,6 +439,20 @@ void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte) if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ mm_inc_nr_ptes(mm); + /* + * Ensure all pte setup (eg. pte page lock and page clearing) are + * visible before the pte is made visible to other CPUs by being + * put into page tables. + * + * The other side of the story is the pointer chasing in the page + * table walking code (when walking the page table without locking; + * ie. most of the time). Fortunately, these data accesses consist + * of a chain of data-dependent loads, meaning most CPUs (alpha + * being the notable exception) will already guarantee loads are + * seen in-order. See the alpha page table accessors for the + * smp_rmb() barriers in page table walking code. + */ + smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ pmd_populate(mm, pmd, *pte); *pte = NULL; } @@ -451,21 +465,6 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd) if (!new) return -ENOMEM; - /* - * Ensure all pte setup (eg. pte page lock and page clearing) are - * visible before the pte is made visible to other CPUs by being - * put into page tables. - * - * The other side of the story is the pointer chasing in the page - * table walking code (when walking the page table without locking; - * ie. most of the time). Fortunately, these data accesses consist - * of a chain of data-dependent loads, meaning most CPUs (alpha - * being the notable exception) will already guarantee loads are - * seen in-order. See the alpha page table accessors for the - * smp_rmb() barriers in page table walking code. - */ - smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ - pmd_install(mm, pmd, &new); if (new) pte_free(mm, new); @@ -478,10 +477,9 @@ int __pte_alloc_kernel(pmd_t *pmd) if (!new) return -ENOMEM; - smp_wmb(); /* See comment in __pte_alloc */ - spin_lock(&init_mm.page_table_lock); if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ + smp_wmb(); /* See comment in pmd_install() */ pmd_populate_kernel(&init_mm, pmd, new); new = NULL; } @@ -3845,7 +3843,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; - smp_wmb(); /* See comment in __pte_alloc() */ } ret = vma->vm_ops->fault(vmf); @@ -3916,7 +3913,6 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; - smp_wmb(); /* See comment in __pte_alloc() */ } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); @@ -4141,7 +4137,6 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; - smp_wmb(); /* See comment in __pte_alloc() */ } return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); @@ -4815,13 +4810,13 @@ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) if (!new) return -ENOMEM; - smp_wmb(); /* See comment in __pte_alloc */ - spin_lock(&mm->page_table_lock); - if (pgd_present(*pgd)) /* Another has populated it */ + if (pgd_present(*pgd)) { /* Another has populated it */ p4d_free(mm, new); - else + } else { + smp_wmb(); /* See comment in pmd_install() */ pgd_populate(mm, pgd, new); + } spin_unlock(&mm->page_table_lock); return 0; } @@ -4838,11 +4833,10 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) if (!new) return -ENOMEM; - smp_wmb(); /* See comment in __pte_alloc */ - spin_lock(&mm->page_table_lock); if (!p4d_present(*p4d)) { mm_inc_nr_puds(mm); + smp_wmb(); /* See comment in pmd_install() */ p4d_populate(mm, p4d, new); } else /* Another has populated it */ pud_free(mm, new); @@ -4863,14 +4857,14 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) if (!new) return -ENOMEM; - smp_wmb(); /* See comment in __pte_alloc */ - ptl = pud_lock(mm, pud); if (!pud_present(*pud)) { mm_inc_nr_pmds(mm); + smp_wmb(); /* See comment in pmd_install() */ pud_populate(mm, pud, new); - } else /* Another has populated it */ + } else { /* Another has populated it */ pmd_free(mm, new); + } spin_unlock(ptl); return 0; } diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index bdce883f92863..db6df27c852a7 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -76,7 +76,7 @@ static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start, set_pte_at(&init_mm, addr, pte, entry); } - /* Make pte visible before pmd. See comment in __pte_alloc(). */ + /* Make pte visible before pmd. See comment in pmd_install(). */ smp_wmb(); pmd_populate_kernel(&init_mm, pmd, pgtable); From 3bee30e4577029566b310ea6515351d83064be79 Mon Sep 17 00:00:00 2001 From: Tiberiu A Georgescu Date: Thu, 21 Oct 2021 15:07:19 +1100 Subject: [PATCH 0567/1202] Documentation: update pagemap with shmem exceptions This patch follows the discussions on previous documentation patch threads [1][2]. It presents the exception case of shared memory management from the pagemap's point of view. It briefly describes what is missing, why it is missing and alternatives to the pagemap for page info retrieval in user space. In short, the kernel does not keep track of PTEs for swapped out shared pages within the processes that references them. Thus, the proc/pid/pagemap tool cannot print the swap destination of the shared memory pages, instead setting the pagemap entry to zero for both non-allocated and swapped out pages. This can create confusion for users who need information on swapped out pages. The reasons why maintaining the PTEs of all swapped out shared pages among all processes while maintaining similar performance is not a trivial task, or a desirable change, have been discussed extensively [1][3][4][5]. There are also arguments for why this arguably missing information should eventually be exposed to the user in either a future pagemap patch, or by an alternative tool. [1]: https://marc.info/?m=162878395426774 [2]: https://lore.kernel.org/lkml/20210920164931.175411-1-tiberiu.georgescu@nutanix.com/ [3]: https://lore.kernel.org/lkml/20210730160826.63785-1-tiberiu.georgescu@nutanix.com/ [4]: https://lore.kernel.org/lkml/20210807032521.7591-1-peterx@redhat.com/ [5]: https://lore.kernel.org/lkml/20210715201651.212134-1-peterx@redhat.com/ Mention the current missing information in the pagemap and alternatives on how to retrieve it, in case someone stumbles upon unexpected behaviour. Link: https://lkml.kernel.org/r/20210923064618.157046-1-tiberiu.georgescu@nutanix.com Link: https://lkml.kernel.org/r/20210923064618.157046-2-tiberiu.georgescu@nutanix.com Signed-off-by: Tiberiu A Georgescu Reviewed-by: Ivan Teterevkov Reviewed-by: Florian Schmidt Reviewed-by: Carl Waldspurger Reviewed-by: Jonathan Davies Reviewed-by: Peter Xu Reviewed-by: David Hildenbrand Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/mm/pagemap.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst index fb578fbbb76ca..4581527c07ae1 100644 --- a/Documentation/admin-guide/mm/pagemap.rst +++ b/Documentation/admin-guide/mm/pagemap.rst @@ -196,6 +196,28 @@ you can go through every map in the process, find the PFNs, look those up in kpagecount, and tally up the number of pages that are only referenced once. +Exceptions for Shared Memory +============================ + +Page table entries for shared pages are cleared when the pages are zapped or +swapped out. This makes swapped out pages indistinguishable from never-allocated +ones. + +In kernel space, the swap location can still be retrieved from the page cache. +However, values stored only on the normal PTE get lost irretrievably when the +page is swapped out (i.e. SOFT_DIRTY). + +In user space, whether the page is present, swapped or none can be deduced with +the help of lseek and/or mincore system calls. + +lseek() can differentiate between accessed pages (present or swapped out) and +holes (none/non-allocated) by specifying the SEEK_DATA flag on the file where +the pages are backed. For anonymous shared pages, the file can be found in +``/proc/pid/map_files/``. + +mincore() can differentiate between pages in memory (present, including swap +cache) and out of memory (swapped out or none/non-allocated). + Other notes =========== From 2720e6746237f1a5d63e707d10dfee156e07f1d3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 21 Oct 2021 15:07:20 +1100 Subject: [PATCH 0568/1202] lazy tlb: introduce lazy mm refcount helper functions Patch series "shoot lazy tlbs", v4. On a 16-socket 192-core POWER8 system, a context switching benchmark with as many software threads as CPUs (so each switch will go in and out of idle), upstream can achieve a rate of about 1 million context switches per second. After this series it goes up to 118 million. This patch (of 4): Add explicit _lazy_tlb annotated functions for lazy mm refcounting. This makes lazy mm references more obvious, and allows explicit refcounting to be removed if it is not used. If a kernel thread's current lazy tlb mm happens to be the one it wants to use, then kthread_use_mm() cleverly transfers the mm refcount from the lazy tlb mm reference to the returned reference. If the lazy tlb mm reference is no longer identical to a normal reference, this trick does not work, so that is changed to be explicit about the two references. [npiggin@gmail.com: fix a refcounting bug in kthread_use_mm] Link: https://lkml.kernel.org/r/1623125298.bx63h3mopj.astroid@bobo.none Link: https://lkml.kernel.org/r/20210605014216.446867-1-npiggin@gmail.com Link: https://lkml.kernel.org/r/20210605014216.446867-2-npiggin@gmail.com Signed-off-by: Nicholas Piggin Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Andy Lutomirski Cc: Anton Blanchard Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/arm/mach-rpc/ecard.c | 2 +- arch/powerpc/kernel/smp.c | 2 +- arch/powerpc/mm/book3s64/radix_tlb.c | 4 ++-- fs/exec.c | 4 ++-- include/linux/sched/mm.h | 11 +++++++++++ kernel/cpu.c | 2 +- kernel/exit.c | 2 +- kernel/kthread.c | 21 +++++++++++++-------- kernel/sched/core.c | 15 ++++++++------- 9 files changed, 40 insertions(+), 23 deletions(-) diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index 53813f9464a24..c30df1097c524 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -253,7 +253,7 @@ static int ecard_init_mm(void) current->mm = mm; current->active_mm = mm; activate_mm(active_mm, mm); - mmdrop(active_mm); + mmdrop_lazy_tlb(active_mm); ecard_init_pgtables(mm); return 0; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9cc7d3dbf4392..3db7d1f0681db 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1582,7 +1582,7 @@ void start_secondary(void *unused) if (IS_ENABLED(CONFIG_PPC32)) setup_kup(); - mmgrab(&init_mm); + mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; smp_store_cpu_info(cpu); diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 7724af19ed7e6..59156c2d2ebef 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -786,10 +786,10 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) if (current->active_mm == mm) { WARN_ON_ONCE(current->mm != NULL); /* Is a kernel thread and is using mm as the lazy tlb */ - mmgrab(&init_mm); + mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; switch_mm_irqs_off(mm, &init_mm, current); - mmdrop(mm); + mmdrop_lazy_tlb(mm); } /* diff --git a/fs/exec.c b/fs/exec.c index a098c133d8d74..5a7a07dfdc812 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1028,9 +1028,9 @@ static int exec_mmap(struct mm_struct *mm) setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm); mm_update_next_owner(old_mm); mmput(old_mm); - return 0; + } else { + mmdrop_lazy_tlb(active_mm); } - mmdrop(active_mm); return 0; } diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 5561486fddef7..f7a0b347fecbd 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -49,6 +49,17 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } +/* Helpers for lazy TLB mm refcounting */ +static inline void mmgrab_lazy_tlb(struct mm_struct *mm) +{ + mmgrab(mm); +} + +static inline void mmdrop_lazy_tlb(struct mm_struct *mm) +{ + mmdrop(mm); +} + /** * mmget() - Pin the address space associated with a &struct mm_struct. * @mm: The address space to pin. diff --git a/kernel/cpu.c b/kernel/cpu.c index 192e43a874076..fffe8b7382012 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -613,7 +613,7 @@ static int finish_cpu(unsigned int cpu) */ if (mm != &init_mm) idle->active_mm = &init_mm; - mmdrop(mm); + mmdrop_lazy_tlb(mm); return 0; } diff --git a/kernel/exit.c b/kernel/exit.c index 91a43e57a32eb..8e7b41702ad66 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -475,7 +475,7 @@ static void exit_mm(void) __set_current_state(TASK_RUNNING); mmap_read_lock(mm); } - mmgrab(mm); + mmgrab_lazy_tlb(mm); BUG_ON(mm != current->active_mm); /* more a memory barrier than a real lock */ task_lock(current); diff --git a/kernel/kthread.c b/kernel/kthread.c index 5b37a8567168b..83ed75d531b4b 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1350,14 +1350,19 @@ void kthread_use_mm(struct mm_struct *mm) WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD)); WARN_ON_ONCE(tsk->mm); + /* + * It's possible that tsk->active_mm == mm here, but we must + * still mmgrab(mm) and mmdrop_lazy_tlb(active_mm), because lazy + * mm may not have its own refcount (see mmgrab/drop_lazy_tlb()). + */ + mmgrab(mm); + task_lock(tsk); /* Hold off tlb flush IPIs while switching mm's */ local_irq_disable(); active_mm = tsk->active_mm; - if (active_mm != mm) { - mmgrab(mm); + if (active_mm != mm) tsk->active_mm = mm; - } tsk->mm = mm; membarrier_update_current_mm(mm); switch_mm_irqs_off(active_mm, mm, tsk); @@ -1374,12 +1379,9 @@ void kthread_use_mm(struct mm_struct *mm) * memory barrier after storing to tsk->mm, before accessing * user-space memory. A full memory barrier for membarrier * {PRIVATE,GLOBAL}_EXPEDITED is implicitly provided by - * mmdrop(), or explicitly with smp_mb(). + * mmdrop_lazy_tlb(). */ - if (active_mm != mm) - mmdrop(active_mm); - else - smp_mb(); + mmdrop_lazy_tlb(active_mm); to_kthread(tsk)->oldfs = force_uaccess_begin(); } @@ -1411,10 +1413,13 @@ void kthread_unuse_mm(struct mm_struct *mm) local_irq_disable(); tsk->mm = NULL; membarrier_update_current_mm(NULL); + mmgrab_lazy_tlb(mm); /* active_mm is still 'mm' */ enter_lazy_tlb(mm, tsk); local_irq_enable(); task_unlock(tsk); + + mmdrop(mm); } EXPORT_SYMBOL_GPL(kthread_unuse_mm); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1bba4128a3e68..480205b6a1883 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4831,13 +4831,14 @@ static struct rq *finish_task_switch(struct task_struct *prev) * rq->curr, before returning to userspace, so provide them here: * * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly - * provided by mmdrop(), + * provided by mmdrop_lazy_tlb(), * - a sync_core for SYNC_CORE. */ if (mm) { membarrier_mm_sync_core_before_usermode(mm); - mmdrop(mm); + mmdrop_lazy_tlb(mm); } + if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); @@ -4900,9 +4901,9 @@ context_switch(struct rq *rq, struct task_struct *prev, /* * kernel -> kernel lazy + transfer active - * user -> kernel lazy + mmgrab() active + * user -> kernel lazy + mmgrab_lazy_tlb() active * - * kernel -> user switch + mmdrop() active + * kernel -> user switch + mmdrop_lazy_tlb() active * user -> user switch */ if (!next->mm) { // to kernel @@ -4910,7 +4911,7 @@ context_switch(struct rq *rq, struct task_struct *prev, next->active_mm = prev->active_mm; if (prev->mm) // from user - mmgrab(prev->active_mm); + mmgrab_lazy_tlb(prev->active_mm); else prev->active_mm = NULL; } else { // to user @@ -4926,7 +4927,7 @@ context_switch(struct rq *rq, struct task_struct *prev, switch_mm_irqs_off(prev->active_mm, next->mm, next); if (!prev->mm) { // from kernel - /* will mmdrop() in finish_task_switch(). */ + /* will mmdrop_lazy_tlb() in finish_task_switch(). */ rq->prev_mm = prev->active_mm; prev->active_mm = NULL; } @@ -9441,7 +9442,7 @@ void __init sched_init(void) /* * The boot idle thread does lazy MMU switching as well: */ - mmgrab(&init_mm); + mmgrab_lazy_tlb(&init_mm); enter_lazy_tlb(&init_mm, current); /* From abff72ba637bb52d0bc2581b31a139c1f435161e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 21 Oct 2021 15:07:20 +1100 Subject: [PATCH 0569/1202] lazy tlb: allow lazy tlb mm refcounting to be configurable Add CONFIG_MMU_TLB_REFCOUNT which enables refcounting of the lazy tlb mm when it is context switched. This can be disabled by architectures that don't require this refcounting if they clean up lazy tlb mms when the last refcount is dropped. Currently this is always enabled, which is what existing code does, so the patch is effectively a no-op. Rename rq->prev_mm to rq->prev_lazy_mm, because that's what it is. [akpm@linux-foundation.org: fix comment] [npiggin@gmail.com: update comments] Link: https://lkml.kernel.org/r/1623121605.j47gdpccep.astroid@bobo.none Link: https://lkml.kernel.org/r/20210605014216.446867-3-npiggin@gmail.com Signed-off-by: Nicholas Piggin Cc: Andy Lutomirski Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/Kconfig | 14 ++++++++++++++ include/linux/sched/mm.h | 14 ++++++++++++-- kernel/sched/core.c | 22 ++++++++++++++++++---- kernel/sched/sched.h | 4 +++- 4 files changed, 47 insertions(+), 7 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 8df1c71026435..c9c0ebd4fc1c3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -428,6 +428,20 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM irqs disabled over activate_mm. Architectures that do IPI based TLB shootdowns should enable this. +# Use normal mm refcounting for MMU_LAZY_TLB kernel thread references. +# MMU_LAZY_TLB_REFCOUNT=n can improve the scalability of context switching +# to/from kernel threads when the same mm is running on a lot of CPUs (a large +# multi-threaded application), by reducing contention on the mm refcount. +# +# This can be disabled if the architecture ensures no CPUs are using an mm as a +# "lazy tlb" beyond its final refcount (i.e., by the time __mmdrop frees the mm +# or its kernel page tables). This could be arranged by arch_exit_mmap(), or +# final exit(2) TLB flush, for example. arch code must also ensure the +# _lazy_tlb variants of mmgrab/mmdrop are used when dropping the lazy reference +# to a kthread ->active_mm (non-arch code has been converted already). +config MMU_LAZY_TLB_REFCOUNT + def_bool y + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index f7a0b347fecbd..fd6e4d14f477d 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -52,12 +52,22 @@ static inline void mmdrop(struct mm_struct *mm) /* Helpers for lazy TLB mm refcounting */ static inline void mmgrab_lazy_tlb(struct mm_struct *mm) { - mmgrab(mm); + if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) + mmgrab(mm); } static inline void mmdrop_lazy_tlb(struct mm_struct *mm) { - mmdrop(mm); + if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) { + mmdrop(mm); + } else { + /* + * mmdrop_lazy_tlb must provide a full memory barrier, see the + * membarrier comment in finish_task_switch which relies on + * this. + */ + smp_mb(); + } } /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 480205b6a1883..8fdc004b4f1f9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4772,7 +4772,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) __releases(rq->lock) { struct rq *rq = this_rq(); - struct mm_struct *mm = rq->prev_mm; + struct mm_struct *mm = NULL; long prev_state; /* @@ -4791,7 +4791,10 @@ static struct rq *finish_task_switch(struct task_struct *prev) current->comm, current->pid, preempt_count())) preempt_count_set(FORK_PREEMPT_COUNT); - rq->prev_mm = NULL; +#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT + mm = rq->prev_lazy_mm; + rq->prev_lazy_mm = NULL; +#endif /* * A task struct has one reference for the use as "current". @@ -4927,9 +4930,20 @@ context_switch(struct rq *rq, struct task_struct *prev, switch_mm_irqs_off(prev->active_mm, next->mm, next); if (!prev->mm) { // from kernel - /* will mmdrop_lazy_tlb() in finish_task_switch(). */ - rq->prev_mm = prev->active_mm; +#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT + /* Will mmdrop_lazy_tlb() in finish_task_switch(). */ + rq->prev_lazy_mm = prev->active_mm; prev->active_mm = NULL; +#else + /* + * Without MMU_LAZY_TLB_REFCOUNT there is no lazy + * tracking (because no rq->prev_lazy_mm) in + * finish_task_switch, so no mmdrop_lazy_tlb(), so no + * memory barrier for membarrier (see the membarrier + * comment in finish_task_switch()). Do it here. + */ + smp_mb(); +#endif } } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3d3e5793e1172..43e7fbe065578 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -977,7 +977,9 @@ struct rq { struct task_struct *idle; struct task_struct *stop; unsigned long next_balance; - struct mm_struct *prev_mm; +#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT + struct mm_struct *prev_lazy_mm; +#endif unsigned int clock_update_flags; u64 clock; From 4a5d8dac3df85b619e9ec5417ff63a101d0c229a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 21 Oct 2021 15:07:21 +1100 Subject: [PATCH 0570/1202] lazy tlb: shoot lazies, a non-refcounting lazy tlb option On big systems, the mm refcount can become highly contented when doing a lot of context switching with threaded applications (particularly switching between the idle thread and an application thread). Abandoning lazy tlb slows switching down quite a bit in the important user->idle->user cases, so instead implement a non-refcounted scheme that causes __mmdrop() to IPI all CPUs in the mm_cpumask and shoot down any remaining lazy ones. Shootdown IPIs are some concern, but they have not been observed to be a big problem with this scheme (the powerpc implementation generated 314 additional interrupts on a 144 CPU system during a kernel compile). There are a number of strategies that could be employed to reduce IPIs if they turn out to be a problem for some workload. [npiggin@gmail.com: update comments] Link: https://lkml.kernel.org/r/1623121901.mszkmmum0n.astroid@bobo.none Link: https://lkml.kernel.org/r/20210605014216.446867-4-npiggin@gmail.com Signed-off-by: Nicholas Piggin Cc: Anton Blanchard Cc: Andy Lutomirski Cc: Randy Dunlap Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/Kconfig | 14 ++++++++++++++ kernel/fork.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index c9c0ebd4fc1c3..cca27f1b5d0ed 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -441,6 +441,20 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM # to a kthread ->active_mm (non-arch code has been converted already). config MMU_LAZY_TLB_REFCOUNT def_bool y + depends on !MMU_LAZY_TLB_SHOOTDOWN + +# This option allows MMU_LAZY_TLB_REFCOUNT=n. It ensures no CPUs are using an +# mm as a lazy tlb beyond its last reference count, by shooting down these +# users before the mm is deallocated. __mmdrop() first IPIs all CPUs that may +# be using the mm as a lazy tlb, so that they may switch themselves to using +# init_mm for their active mm. mm_cpumask(mm) is used to determine which CPUs +# may be using mm as a lazy tlb mm. +# +# To implement this, an arch must ensure mm_cpumask(mm) contains at least all +# possible CPUs in which the mm is lazy, and it must meet the requirements for +# MMU_LAZY_TLB_REFCOUNT=n (see above). +config MMU_LAZY_TLB_SHOOTDOWN + bool config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/kernel/fork.c b/kernel/fork.c index 38681ad44c76b..a7da9b0bc4029 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -686,6 +686,53 @@ static void check_mm(struct mm_struct *mm) #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) +static void do_shoot_lazy_tlb(void *arg) +{ + struct mm_struct *mm = arg; + + if (current->active_mm == mm) { + WARN_ON_ONCE(current->mm); + current->active_mm = &init_mm; + switch_mm(mm, &init_mm, current); + } +} + +static void do_check_lazy_tlb(void *arg) +{ + struct mm_struct *mm = arg; + + WARN_ON_ONCE(current->active_mm == mm); +} + +static void shoot_lazy_tlbs(struct mm_struct *mm) +{ + if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) { + /* + * IPI overheads have not found to be expensive, but they could + * be reduced in a number of possible ways, for example (in + * roughly increasing order of complexity): + * - A batch of mms requiring IPIs could be gathered and freed + * at once. + * - CPUs could store their active mm somewhere that can be + * remotely checked without a lock, to filter out + * false-positives in the cpumask. + * - After mm_users or mm_count reaches zero, switching away + * from the mm could clear mm_cpumask to reduce some IPIs + * (some batching or delaying would help). + * - A delayed freeing and RCU-like quiescing sequence based on + * mm switching to avoid IPIs completely. + */ + on_each_cpu_mask(mm_cpumask(mm), do_shoot_lazy_tlb, (void *)mm, 1); + if (IS_ENABLED(CONFIG_DEBUG_VM)) + on_each_cpu(do_check_lazy_tlb, (void *)mm, 1); + } else { + /* + * In this case, lazy tlb mms are refounted and would not reach + * __mmdrop until all CPUs have switched away and mmdrop()ed. + */ + } +} + /* * Called when the last reference to the mm * is dropped: either by a lazy thread or by @@ -695,6 +742,10 @@ void __mmdrop(struct mm_struct *mm) { BUG_ON(mm == &init_mm); WARN_ON_ONCE(mm == current->mm); + + /* Ensure no CPUs are using this as their lazy tlb mm */ + shoot_lazy_tlbs(mm); + WARN_ON_ONCE(mm == current->active_mm); mm_free_pgd(mm); destroy_context(mm); From cba712aa6c5b7e4551de4a8e6303a6cba8a3d008 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 21 Oct 2021 15:07:22 +1100 Subject: [PATCH 0571/1202] powerpc/64s: enable MMU_LAZY_TLB_SHOOTDOWN On a 16-socket 192-core POWER8 system, a context switching benchmark with as many software threads as CPUs (so each switch will go in and out of idle), upstream can achieve a rate of about 1 million context switches per second. After this patch it goes up to 118 million. No real datya for real world workloads unfortunately. I think it's always been a "known" cacheline, it just showed up badly on will-it-scale tests recently when Anton was doing a sweep of low hanging scalability issues on big systems. We have some very big systems running certain in-memory databases that get into very high contention conditions on mutexes that push context switch rates right up and with idle times pretty high, which would get a lot of parallel context switching between user and idle thread, we might be getting a bit of this contention there. It's not something at the top of profiles though. And on multi-threaded workloads like this, the normal refcounting of the user mm still has fundmaental contention. It's tricky to get the change tested on these workloads (machine time is very limited and I can't drive the software). I suspect it could also show in things that do high net or disk IO rates (enough to need a lot of cores), and do some user processing steps along the way. You'd potentially get a lot of idle switching. This infrastructure could be beneficial to other architectures. The cacheline is going to bounce in the same situations on other archs, so I would say yes. Rik at one stage had some patches to try avoid it for x86 some years ago, I don't know what happened to those. The way powerpc has to maintain mm_cpumask for its TLB flushing makes it relatively easy to do this shootdown, and we decided the additional IPIs were less of a concern than the bouncing. Others have different concerns, but I tried to make it generic and add comments explaining what other archs can do, or possibly different ways it might be achieved. Link: https://lkml.kernel.org/r/20210605014216.446867-5-npiggin@gmail.com Signed-off-by: Nicholas Piggin Cc: Andy Lutomirski Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ba5b661893588..8a584414ef67a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -249,6 +249,7 @@ config PPC select IRQ_FORCED_THREADING select MMU_GATHER_PAGE_SIZE select MMU_GATHER_RCU_TABLE_FREE + select MMU_LAZY_TLB_SHOOTDOWN if PPC_BOOK3S_64 select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_SG_DMA_LENGTH From 6d4d8977d14f6f14461cc39b65cf060065e1d086 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 21 Oct 2021 15:07:22 +1100 Subject: [PATCH 0572/1202] memory: remove unused CONFIG_MEM_BLOCK_SIZE Commit 3947be1969a9 ("[PATCH] memory hotplug: sysfs and add/remove functions") defines CONFIG_MEM_BLOCK_SIZE, but this has never been utilized anywhere. It is a good practice to keep the CONFIG_* defines exclusively for the Kbuild system. So, drop this unused definition. This issue was noticed due to running ./scripts/checkkconfigsymbols.py. Link: https://lkml.kernel.org/r/20211006120354.7468-1-lukas.bulwahn@gmail.com Signed-off-by: Lukas Bulwahn Reviewed-by: David Hildenbrand Cc: Michal Hocko Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/memory.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/memory.h b/include/linux/memory.h index 182c606adb060..053a530c7bdd3 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -140,7 +140,6 @@ typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, walk_memory_blocks_func_t func); extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func); -#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION< Date: Thu, 21 Oct 2021 15:07:23 +1100 Subject: [PATCH 0573/1202] mm/mprotect.c: avoid repeated assignment in do_mprotect_pkey() After adjustment, the repeated assignment of "prev" is avoided, and the readability of the code is improved. Link: https://lkml.kernel.org/r/20211012152444.4127-1-fishland@aliyun.com Reviewed-by: Andrew Morton Signed-off-by: Liu Song Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/mprotect.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index 883e2cc85cad8..e552f5e0ccbde 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -563,7 +563,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, error = -ENOMEM; if (!vma) goto out; - prev = vma->vm_prev; + if (unlikely(grows & PROT_GROWSDOWN)) { if (vma->vm_start >= end) goto out; @@ -581,8 +581,11 @@ static int do_mprotect_pkey(unsigned long start, size_t len, goto out; } } + if (start > vma->vm_start) prev = vma; + else + prev = vma->vm_prev; for (nstart = start ; ; ) { unsigned long mask_off_old_flags; From 1e744a4c18da7e2a464b290d872ee8ff354b63e4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Oct 2021 15:07:23 +1100 Subject: [PATCH 0574/1202] mm/mremap: don't account pages in vma_to_resize() All this vm_unacct_memory(charged) dance seems to complicate the life without a good reason. Furthermore, it seems not always done right on error-pathes in mremap_to(). And worse than that: this `charged' difference is sometimes double-accounted for growing MREMAP_DONTUNMAP mremap()s in move_vma(): if (security_vm_enough_memory_mm(mm, new_len >> PAGE_SHIFT)) Let's not do this. Account memory in mremap() fast-path for growing VMAs or in move_vma() for actually moving things. The same simpler way as it's done by vm_stat_account(), but with a difference to call security_vm_enough_memory_mm() before copying/adjusting VMA. Originally noticed by Chen Wandun: https://lkml.kernel.org/r/20210717101942.120607-1-chenwandun@huawei.com Link: https://lkml.kernel.org/r/20210721131320.522061-1-dima@arista.com Fixes: e346b3813067 ("mm/mremap: add MREMAP_DONTUNMAP to mremap()") Signed-off-by: Dmitry Safonov Acked-by: Brian Geffon Cc: Alexander Viro Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Chen Wandun Cc: Dan Carpenter Cc: Dan Williams Cc: Dave Jiang Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: John Hubbard Cc: Kefeng Wang Cc: "Kirill A. Shutemov" Cc: Mike Kravetz Cc: Minchan Kim Cc: Ralph Campbell Cc: Russell King Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Vishal Verma Cc: Vlastimil Babka Cc: Wei Yongjun Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/mremap.c | 50 ++++++++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index badfe17ade1f0..c0b6c41b7b78f 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -565,6 +565,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, bool *locked, unsigned long flags, struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap) { + long to_account = new_len - old_len; struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma; unsigned long vm_flags = vma->vm_flags; @@ -583,6 +584,9 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (mm->map_count >= sysctl_max_map_count - 3) return -ENOMEM; + if (unlikely(flags & MREMAP_DONTUNMAP)) + to_account = new_len; + if (vma->vm_ops && vma->vm_ops->may_split) { if (vma->vm_start != old_addr) err = vma->vm_ops->may_split(vma, old_addr); @@ -604,8 +608,8 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (err) return err; - if (unlikely(flags & MREMAP_DONTUNMAP && vm_flags & VM_ACCOUNT)) { - if (security_vm_enough_memory_mm(mm, new_len >> PAGE_SHIFT)) + if (vm_flags & VM_ACCOUNT) { + if (security_vm_enough_memory_mm(mm, to_account >> PAGE_SHIFT)) return -ENOMEM; } @@ -613,8 +617,8 @@ static unsigned long move_vma(struct vm_area_struct *vma, new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff, &need_rmap_locks); if (!new_vma) { - if (unlikely(flags & MREMAP_DONTUNMAP && vm_flags & VM_ACCOUNT)) - vm_unacct_memory(new_len >> PAGE_SHIFT); + if (vm_flags & VM_ACCOUNT) + vm_unacct_memory(to_account >> PAGE_SHIFT); return -ENOMEM; } @@ -708,8 +712,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, } static struct vm_area_struct *vma_to_resize(unsigned long addr, - unsigned long old_len, unsigned long new_len, unsigned long flags, - unsigned long *p) + unsigned long old_len, unsigned long new_len, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -768,13 +771,6 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, (new_len - old_len) >> PAGE_SHIFT)) return ERR_PTR(-ENOMEM); - if (vma->vm_flags & VM_ACCOUNT) { - unsigned long charged = (new_len - old_len) >> PAGE_SHIFT; - if (security_vm_enough_memory_mm(mm, charged)) - return ERR_PTR(-ENOMEM); - *p = charged; - } - return vma; } @@ -787,7 +783,6 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long ret = -EINVAL; - unsigned long charged = 0; unsigned long map_flags = 0; if (offset_in_page(new_addr)) @@ -830,7 +825,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, old_len = new_len; } - vma = vma_to_resize(addr, old_len, new_len, flags, &charged); + vma = vma_to_resize(addr, old_len, new_len, flags); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; @@ -853,7 +848,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, ((addr - vma->vm_start) >> PAGE_SHIFT), map_flags); if (IS_ERR_VALUE(ret)) - goto out1; + goto out; /* We got a new mapping */ if (!(flags & MREMAP_FIXED)) @@ -862,12 +857,6 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf, uf_unmap); - if (!(offset_in_page(ret))) - goto out; - -out1: - vm_unacct_memory(charged); - out: return ret; } @@ -899,7 +888,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long ret = -EINVAL; - unsigned long charged = 0; bool locked = false; bool downgraded = false; struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; @@ -981,7 +969,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, /* * Ok, we need to grow.. */ - vma = vma_to_resize(addr, old_len, new_len, flags, &charged); + vma = vma_to_resize(addr, old_len, new_len, flags); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; @@ -992,10 +980,18 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (old_len == vma->vm_end - addr) { /* can we just expand the current mapping? */ if (vma_expandable(vma, new_len - old_len)) { - int pages = (new_len - old_len) >> PAGE_SHIFT; + long pages = (new_len - old_len) >> PAGE_SHIFT; + + if (vma->vm_flags & VM_ACCOUNT) { + if (security_vm_enough_memory_mm(mm, pages)) { + ret = -ENOMEM; + goto out; + } + } if (vma_adjust(vma, vma->vm_start, addr + new_len, vma->vm_pgoff, NULL)) { + vm_unacct_memory(pages); ret = -ENOMEM; goto out; } @@ -1034,10 +1030,8 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, &locked, flags, &uf, &uf_unmap); } out: - if (offset_in_page(ret)) { - vm_unacct_memory(charged); + if (offset_in_page(ret)) locked = false; - } if (downgraded) mmap_read_unlock(current->mm); else From 43a552d71281fb04fb5b01f995cb9e3b6ac7823d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 21 Oct 2021 15:07:24 +1100 Subject: [PATCH 0575/1202] include/linux/io-mapping.h: remove fallback for writecombine The fallback was introduced in commit 80c33624e472 ("io-mapping: Fixup for different names of writecombine") to fix the build on microblaze. 5 years later, it seems all archs now provide a pgprot_writecombine(), so just remove the other possible fallbacks. For microblaze, pgprot_writecombine() is available since commit 97ccedd793ac ("microblaze: Provide pgprot_device/writecombine macros for nommu"). This is build-tested on microblaze with a hack to always build mm/io-mapping.o and without DIYing on an x86-only macro (_PAGE_CACHE_MASK) Link: https://lkml.kernel.org/r/20211020204838.1142908-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Cc: Chris Wilson Cc: Daniel Vetter Cc: Joonas Lahtinen Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/io-mapping.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index e9743cfd85852..66a774d2710e6 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -132,13 +132,7 @@ io_mapping_init_wc(struct io_mapping *iomap, iomap->base = base; iomap->size = size; -#if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */ - iomap->prot = pgprot_noncached_wc(PAGE_KERNEL); -#elif defined(pgprot_writecombine) iomap->prot = pgprot_writecombine(PAGE_KERNEL); -#else - iomap->prot = pgprot_noncached(PAGE_KERNEL); -#endif return iomap; } From 28aaf4e10e2d8d838ac170aa81ca4a06e3055aee Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 21 Oct 2021 15:07:25 +1100 Subject: [PATCH 0576/1202] mm: mmap_lock: remove redundant newline in TP_printk Ftrace core will add newline automatically on printing, so using it in TP_printkcreates a blank line. Link: https://lkml.kernel.org/r/20211009071105.69544-1-ligang.bdlg@bytedance.com Signed-off-by: Gang Li Acked-by: Vlastimil Babka Reviewed-by: Steven Rostedt (VMware) Cc: Ingo Molnar Cc: Axel Rasmussen Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/trace/events/mmap_lock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h index 0abff67b96f09..5f980c92e3e93 100644 --- a/include/trace/events/mmap_lock.h +++ b/include/trace/events/mmap_lock.h @@ -32,7 +32,7 @@ TRACE_EVENT_FN(mmap_lock_start_locking, ), TP_printk( - "mm=%p memcg_path=%s write=%s\n", + "mm=%p memcg_path=%s write=%s", __entry->mm, __get_str(memcg_path), __entry->write ? "true" : "false" @@ -63,7 +63,7 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned, ), TP_printk( - "mm=%p memcg_path=%s write=%s success=%s\n", + "mm=%p memcg_path=%s write=%s success=%s", __entry->mm, __get_str(memcg_path), __entry->write ? "true" : "false", @@ -92,7 +92,7 @@ TRACE_EVENT_FN(mmap_lock_released, ), TP_printk( - "mm=%p memcg_path=%s write=%s\n", + "mm=%p memcg_path=%s write=%s", __entry->mm, __get_str(memcg_path), __entry->write ? "true" : "false" From 2039d5c6b34fa0989cf51e631b07046a999087f1 Mon Sep 17 00:00:00 2001 From: Gang Li Date: Thu, 21 Oct 2021 15:07:26 +1100 Subject: [PATCH 0577/1202] mm: mmap_lock: use DECLARE_EVENT_CLASS and DEFINE_EVENT_FN By using DECLARE_EVENT_CLASS and TRACE_EVENT_FN, we can save a lot of space from duplicate code. Link: https://lkml.kernel.org/r/20211009071243.70286-1-ligang.bdlg@bytedance.com Signed-off-by: Gang Li Acked-by: Vlastimil Babka Reviewed-by: Steven Rostedt (VMware) Cc: Axel Rasmussen Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/trace/events/mmap_lock.h | 44 +++++++++----------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/include/trace/events/mmap_lock.h b/include/trace/events/mmap_lock.h index 5f980c92e3e93..14db8044c1ff2 100644 --- a/include/trace/events/mmap_lock.h +++ b/include/trace/events/mmap_lock.h @@ -13,7 +13,7 @@ struct mm_struct; extern int trace_mmap_lock_reg(void); extern void trace_mmap_lock_unreg(void); -TRACE_EVENT_FN(mmap_lock_start_locking, +DECLARE_EVENT_CLASS(mmap_lock, TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write), @@ -36,11 +36,19 @@ TRACE_EVENT_FN(mmap_lock_start_locking, __entry->mm, __get_str(memcg_path), __entry->write ? "true" : "false" - ), - - trace_mmap_lock_reg, trace_mmap_lock_unreg + ) ); +#define DEFINE_MMAP_LOCK_EVENT(name) \ + DEFINE_EVENT_FN(mmap_lock, name, \ + TP_PROTO(struct mm_struct *mm, const char *memcg_path, \ + bool write), \ + TP_ARGS(mm, memcg_path, write), \ + trace_mmap_lock_reg, trace_mmap_lock_unreg) + +DEFINE_MMAP_LOCK_EVENT(mmap_lock_start_locking); +DEFINE_MMAP_LOCK_EVENT(mmap_lock_released); + TRACE_EVENT_FN(mmap_lock_acquire_returned, TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write, @@ -73,34 +81,6 @@ TRACE_EVENT_FN(mmap_lock_acquire_returned, trace_mmap_lock_reg, trace_mmap_lock_unreg ); -TRACE_EVENT_FN(mmap_lock_released, - - TP_PROTO(struct mm_struct *mm, const char *memcg_path, bool write), - - TP_ARGS(mm, memcg_path, write), - - TP_STRUCT__entry( - __field(struct mm_struct *, mm) - __string(memcg_path, memcg_path) - __field(bool, write) - ), - - TP_fast_assign( - __entry->mm = mm; - __assign_str(memcg_path, memcg_path); - __entry->write = write; - ), - - TP_printk( - "mm=%p memcg_path=%s write=%s", - __entry->mm, - __get_str(memcg_path), - __entry->write ? "true" : "false" - ), - - trace_mmap_lock_reg, trace_mmap_lock_unreg -); - #endif /* _TRACE_MMAP_LOCK_H */ /* This part must be outside protection */ From a824d2a01893a572e70d8b259abd4c7aa1e91e03 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 21 Oct 2021 15:07:26 +1100 Subject: [PATCH 0578/1202] mm/vmalloc: repair warn_alloc()s in __vmalloc_area_node() Commit f255935b9767 ("mm: cleanup the gfp_mask handling in __vmalloc_area_node") added __GFP_NOWARN to gfp_mask unconditionally however it disabled all output inside warn_alloc() call. This patch saves original gfp_mask and provides it to all warn_alloc() calls. Link: https://lkml.kernel.org/r/f4f3187b-9684-e426-565d-827c2a9bbb0e@virtuozzo.com Fixes: f255935b9767 ("mm: cleanup the gfp_mask handling in __vmalloc_area_node") Signed-off-by: Vasily Averin Reviewed-by: Christoph Hellwig Reviewed-by: Muchun Song Cc: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9e37c828a1f0e..34b03dc8cfb6e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2903,6 +2903,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, int node) { const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; + const gfp_t orig_gfp_mask = gfp_mask; unsigned long addr = (unsigned long)area->addr; unsigned long size = get_vm_area_size(area); unsigned long array_size; @@ -2923,7 +2924,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, } if (!area->pages) { - warn_alloc(gfp_mask, NULL, + warn_alloc(orig_gfp_mask, NULL, "vmalloc error: size %lu, failed to allocated page array size %lu", nr_small_pages * PAGE_SIZE, array_size); free_vm_area(area); @@ -2943,7 +2944,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, * allocation request, free them via __vfree() if any. */ if (area->nr_pages != nr_small_pages) { - warn_alloc(gfp_mask, NULL, + warn_alloc(orig_gfp_mask, NULL, "vmalloc error: size %lu, page order %u, failed to allocate pages", area->nr_pages * PAGE_SIZE, page_order); goto fail; @@ -2951,7 +2952,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, if (vmap_pages_range(addr, addr + size, prot, area->pages, page_shift) < 0) { - warn_alloc(gfp_mask, NULL, + warn_alloc(orig_gfp_mask, NULL, "vmalloc error: size %lu, failed to map pages", area->nr_pages * PAGE_SIZE); goto fail; From 5aaa853b58ae75ebe5cd4c3c65deaf25785f35e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 Oct 2021 15:07:27 +1100 Subject: [PATCH 0579/1202] mm/vmalloc: don't allow VM_NO_GUARD on vmap() The vmalloc guard pages are added on top of each allocation, thereby isolating any two allocations from one another. The top guard of the lower allocation is the bottom guard guard of the higher allocation etc. Therefore VM_NO_GUARD is dangerous; it breaks the basic premise of isolating separate allocations. There are only two in-tree users of this flag, neither of which use it through the exported interface. Ensure it stays this way. Link: https://lkml.kernel.org/r/YUMfdA36fuyZ+/xt@hirez.programming.kicks-ass.net Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Christoph Hellwig Reviewed-by: David Hildenbrand Acked-by: Will Deacon Acked-by: Kees Cook Cc: Andrey Konovalov Cc: Mel Gorman Cc: Uladzislau Rezki Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/vmalloc.h | 2 +- mm/vmalloc.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0ed56fc10c11d..6e022cc712e61 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -22,7 +22,7 @@ struct notifier_block; /* in notifier.h */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ -#define VM_NO_GUARD 0x00000040 /* don't add guard page */ +#define VM_NO_GUARD 0x00000040 /* ***DANGEROUS*** don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 34b03dc8cfb6e..48e717626e94f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2743,6 +2743,13 @@ void *vmap(struct page **pages, unsigned int count, might_sleep(); + /* + * Your top guard is someone else's bottom guard. Not having a top + * guard compromises someone else's mappings too. + */ + if (WARN_ON_ONCE(flags & VM_NO_GUARD)) + flags &= ~VM_NO_GUARD; + if (count > totalram_pages()) return NULL; From 3974aecebe85cf8f19a0971ea32e33960f432259 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Oct 2021 15:07:28 +1100 Subject: [PATCH 0580/1202] mm/vmalloc: make show_numa_info() aware of hugepage mappings show_numa_info() can be slightly faster, by skipping over hugepages directly. Link: https://lkml.kernel.org/r/20211001172725.105824-1-eric.dumazet@gmail.com Signed-off-by: Eric Dumazet Cc: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 48e717626e94f..21a37c104bb09 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3880,6 +3880,7 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) { if (IS_ENABLED(CONFIG_NUMA)) { unsigned int nr, *counters = m->private; + unsigned int step = 1U << vm_area_page_order(v); if (!counters) return; @@ -3891,9 +3892,8 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) memset(counters, 0, nr_node_ids * sizeof(unsigned int)); - for (nr = 0; nr < v->nr_pages; nr++) - counters[page_to_nid(v->pages[nr])]++; - + for (nr = 0; nr < v->nr_pages; nr += step) + counters[page_to_nid(v->pages[nr])] += step; for_each_node_state(nr, N_HIGH_MEMORY) if (counters[nr]) seq_printf(m, " N%u=%u", nr, counters[nr]); From 5945c5272dc3b7815ad79ffb07d1c0faab600c2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Oct 2021 15:07:29 +1100 Subject: [PATCH 0581/1202] mm/vmalloc: make sure to dump unpurged areas in /proc/vmallocinfo If last va found in vmap_area_list does not have a vm pointer, vmallocinfo.s_show() returns 0, and show_purge_info() is not called as it should. Link: https://lkml.kernel.org/r/20211001170815.73321-1-eric.dumazet@gmail.com Fixes: dd3b8353bae7 ("mm/vmalloc: do not keep unpurged areas in the busy tree") Signed-off-by: Eric Dumazet Cc: Uladzislau Rezki (Sony) Cc: Pengfei Li Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 21a37c104bb09..fa8388562401f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3929,7 +3929,7 @@ static int s_show(struct seq_file *m, void *p) (void *)va->va_start, (void *)va->va_end, va->va_end - va->va_start); - return 0; + goto final; } v = va->vm; @@ -3970,6 +3970,7 @@ static int s_show(struct seq_file *m, void *p) /* * As a final step, dump "unpurged" areas. */ +final: if (list_is_last(&va->list, &vmap_area_list)) show_purge_info(m); From f99df218ac97765d57edd36f7929c67552b1933c Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Thu, 21 Oct 2021 15:07:29 +1100 Subject: [PATCH 0582/1202] mm/vmalloc: do not adjust the search size for alignment overhead We used to include an alignment overhead into a search length, in that case we guarantee that a found area will definitely fit after applying a specific alignment that user specifies. From the other hand we do not guarantee that an area has the lowest address if an alignment is >= PAGE_SIZE. It means that, when a user specifies a special alignment together with a range that corresponds to an exact requested size then an allocation will fail. This is what happens to KASAN, it wants the free block that exactly matches a specified range during onlining memory banks: [root@vm-0 fedora]# echo online > /sys/devices/system/memory/memory82/state [root@vm-0 fedora]# echo online > /sys/devices/system/memory/memory83/state [root@vm-0 fedora]# echo online > /sys/devices/system/memory/memory85/state [root@vm-0 fedora]# echo online > /sys/devices/system/memory/memory84/state [ 223.858115] vmap allocation for size 16777216 failed: use vmalloc= to increase size [ 223.859415] bash: vmalloc: allocation failure: 16777216 bytes, mode:0x6000c0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0 [ 223.860992] CPU: 4 PID: 1644 Comm: bash Kdump: loaded Not tainted 4.18.0-339.el8.x86_64+debug #1 [ 223.862149] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 223.863580] Call Trace: [ 223.863946] dump_stack+0x8e/0xd0 [ 223.864420] warn_alloc.cold.90+0x8a/0x1b2 [ 223.864990] ? zone_watermark_ok_safe+0x300/0x300 [ 223.865626] ? slab_free_freelist_hook+0x85/0x1a0 [ 223.866264] ? __get_vm_area_node+0x240/0x2c0 [ 223.866858] ? kfree+0xdd/0x570 [ 223.867309] ? kmem_cache_alloc_node_trace+0x157/0x230 [ 223.868028] ? notifier_call_chain+0x90/0x160 [ 223.868625] __vmalloc_node_range+0x465/0x840 [ 223.869230] ? mark_held_locks+0xb7/0x120 Fix it by making sure that find_vmap_lowest_match() returns lowest start address with any given alignment value, i.e. for alignments bigger then PAGE_SIZE the algorithm rolls back toward parent nodes checking right sub-trees if the most left free block did not fit due to alignment overhead. Link: https://lkml.kernel.org/r/20211004142829.22222-1-urezki@gmail.com Fixes: 68ad4a330433 ("mm/vmalloc.c: keep track of free blocks for vmap allocation") Signed-off-by: Uladzislau Rezki (Sony) Reported-by: Ping Fang Tested-by: David Hildenbrand Reviewed-by: David Hildenbrand Cc: Mel Gorman Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Nicholas Piggin Cc: Hillf Danton Cc: Michal Hocko Cc: Oleksiy Avramchenko Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index fa8388562401f..9abc26874cbf1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1195,18 +1195,14 @@ find_vmap_lowest_match(unsigned long size, { struct vmap_area *va; struct rb_node *node; - unsigned long length; /* Start from the root. */ node = free_vmap_area_root.rb_node; - /* Adjust the search size for alignment overhead. */ - length = size + align - 1; - while (node) { va = rb_entry(node, struct vmap_area, rb_node); - if (get_subtree_max_size(node->rb_left) >= length && + if (get_subtree_max_size(node->rb_left) >= size && vstart < va->va_start) { node = node->rb_left; } else { @@ -1216,9 +1212,9 @@ find_vmap_lowest_match(unsigned long size, /* * Does not make sense to go deeper towards the right * sub-tree if it does not have a free block that is - * equal or bigger to the requested search length. + * equal or bigger to the requested search size. */ - if (get_subtree_max_size(node->rb_right) >= length) { + if (get_subtree_max_size(node->rb_right) >= size) { node = node->rb_right; continue; } @@ -1226,15 +1222,23 @@ find_vmap_lowest_match(unsigned long size, /* * OK. We roll back and find the first right sub-tree, * that will satisfy the search criteria. It can happen - * only once due to "vstart" restriction. + * due to "vstart" restriction or an alignment overhead + * that is bigger then PAGE_SIZE. */ while ((node = rb_parent(node))) { va = rb_entry(node, struct vmap_area, rb_node); if (is_within_this_va(va, size, align, vstart)) return va; - if (get_subtree_max_size(node->rb_right) >= length && + if (get_subtree_max_size(node->rb_right) >= size && vstart <= va->va_start) { + /* + * Shift the vstart forward. Please note, we update it with + * parent's start address adding "1" because we do not want + * to enter same sub-tree after it has already been checked + * and no suitable free block found there. + */ + vstart = va->va_start + 1; node = node->rb_right; break; } From 1db72c04dd2cb53de3e759bb325ec6be4e5622a8 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Thu, 21 Oct 2021 15:07:30 +1100 Subject: [PATCH 0583/1202] mm/vmalloc: check various alignments when debugging Before we did not guarantee a free block with lowest start address for allocations with alignment >= PAGE_SIZE. Because an alignment overhead was included into a search length like below: length = size + align - 1; doing so we make sure that a bigger block would fit after applying an alignment adjustment. Now there is no such limitation, i.e. any alignment that user wants to apply will result to a lowest address of returned free area. Link: https://lkml.kernel.org/r/20211004142829.22222-2-urezki@gmail.com Signed-off-by: Uladzislau Rezki (Sony) Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Hillf Danton Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Nicholas Piggin Cc: Oleksiy Avramchenko Cc: Ping Fang Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9abc26874cbf1..69006f0b0ea79 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1269,7 +1269,7 @@ find_vmap_lowest_linear_match(unsigned long size, } static void -find_vmap_lowest_match_check(unsigned long size) +find_vmap_lowest_match_check(unsigned long size, unsigned long align) { struct vmap_area *va_1, *va_2; unsigned long vstart; @@ -1278,8 +1278,8 @@ find_vmap_lowest_match_check(unsigned long size) get_random_bytes(&rnd, sizeof(rnd)); vstart = VMALLOC_START + rnd; - va_1 = find_vmap_lowest_match(size, 1, vstart); - va_2 = find_vmap_lowest_linear_match(size, 1, vstart); + va_1 = find_vmap_lowest_match(size, align, vstart); + va_2 = find_vmap_lowest_linear_match(size, align, vstart); if (va_1 != va_2) pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n", @@ -1458,7 +1458,7 @@ __alloc_vmap_area(unsigned long size, unsigned long align, return vend; #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK - find_vmap_lowest_match_check(size); + find_vmap_lowest_match_check(size, align); #endif return nva_start_addr; From 4b4acf42f2cea1d4008458b469d1cd04a073d1ed Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 21 Oct 2021 15:07:31 +1100 Subject: [PATCH 0584/1202] vmalloc: back off when the current task is OOM-killed Huge vmalloc allocation on heavy loaded node can lead to a global memory shortage. Task called vmalloc can have worst badness and be selected by OOM-killer, however taken fatal signal does not interrupt allocation cycle. Vmalloc repeat page allocaions again and again, exacerbating the crisis and consuming the memory freed up by another killed tasks. After a successful completion of the allocation procedure, a fatal signal will be processed and task will be destroyed finally. However it may not release the consumed memory, since the allocated object may have a lifetime unrelated to the completed task. In the worst case, this can lead to the host will panic due to "Out of memory and no killable processes..." This patch allows OOM-killer to break vmalloc cycle, makes OOM more effective and avoid host panic. It does not check oom condition directly, however, and breaks page allocation cycle when fatal signal was received. This may trigger some hidden problems, when caller does not handle vmalloc failures, or when rollaback after failed vmalloc calls own vmallocs inside. However all of these scenarios are incorrect: vmalloc does not guarantee successful allocation, it has never been called with __GFP_NOFAIL and threfore either should not be used for any rollbacks or should handle such errors correctly and not lead to critical failures. Link: https://lkml.kernel.org/r/83efc664-3a65-2adb-d7c4-2885784cf109@virtuozzo.com Signed-off-by: Vasily Averin Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Tetsuo Handa Cc: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 69006f0b0ea79..4dd44f510c272 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2887,6 +2887,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid, page = NULL; while (nr_allocated < nr_pages) { + if (fatal_signal_pending(current)) + break; + if (nid == NUMA_NO_NODE) page = alloc_pages(gfp, order); else From 5fc56aee24e7b486eff9fce3da9b3adf2a149022 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:07:31 +1100 Subject: [PATCH 0585/1202] vmalloc: choose a better start address in vm_area_register_early() Percpu embedded first chunk allocator is the firstly option, but it could fail on ARM64, eg, "percpu: max_distance=0x5fcfdc640000 too large for vmalloc space 0x781fefff0000" "percpu: max_distance=0x600000540000 too large for vmalloc space 0x7dffb7ff0000" "percpu: max_distance=0x5fff9adb0000 too large for vmalloc space 0x5dffb7ff0000" then we could meet "WARNING: CPU: 15 PID: 461 at vmalloc.c:3087 pcpu_get_vm_areas+0x488/0x838" and the system cannot boot successfully. Let's implement page mapping percpu first chunk allocator as a fallback to the embedding allocator to increase the robustness of the system. Also fix a crash when both NEED_PER_CPU_PAGE_FIRST_CHUNK and KASAN_VMALLOC enabled. Tested on ARM64 qemu with cmdline "percpu_alloc=page". This patch (of 3): There are some fixed locations in the vmalloc area be reserved in ARM(see iotable_init()) and ARM64(see map_kernel()), but for pcpu_page_first_chunk(), it calls vm_area_register_early() and choose VMALLOC_START as the start address of vmap area which could be conflicted with above address, then could trigger a BUG_ON in vm_area_add_early(). Let's choose a suit start address by traversing the vmlist. Link: https://lkml.kernel.org/r/20210910053354.26721-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20210910053354.26721-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Catalin Marinas Cc: Will Deacon Cc: Andrey Ryabinin Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Marco Elver Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4dd44f510c272..0bffb93a41725 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2276,15 +2276,21 @@ void __init vm_area_add_early(struct vm_struct *vm) */ void __init vm_area_register_early(struct vm_struct *vm, size_t align) { - static size_t vm_init_off __initdata; - unsigned long addr; + unsigned long addr = ALIGN(VMALLOC_START, align); + struct vm_struct *cur, **p; - addr = ALIGN(VMALLOC_START + vm_init_off, align); - vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; + BUG_ON(vmap_initialized); - vm->addr = (void *)addr; + for (p = &vmlist; (cur = *p) != NULL; p = &cur->next) { + if ((unsigned long)cur->addr - addr >= vm->size) + break; + addr = ALIGN((unsigned long)cur->addr + cur->size, align); + } - vm_area_add_early(vm); + BUG_ON(addr > VMALLOC_END - vm->size); + vm->addr = (void *)addr; + vm->next = *p; + *p = vm; } static void vmap_init_free_space(void) From 23472977d78a0089927254179730bc605d9cfaa1 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:07:32 +1100 Subject: [PATCH 0586/1202] arm64: support page mapping percpu first chunk allocator Percpu embedded first chunk allocator is the firstly option, but it could fails on ARM64, eg, "percpu: max_distance=0x5fcfdc640000 too large for vmalloc space 0x781fefff0000" "percpu: max_distance=0x600000540000 too large for vmalloc space 0x7dffb7ff0000" "percpu: max_distance=0x5fff9adb0000 too large for vmalloc space 0x5dffb7ff0000" then we could meet "WARNING: CPU: 15 PID: 461 at vmalloc.c:3087 pcpu_get_vm_areas+0x488/0x838" and the system could not boot successfully. Let's implement page mapping percpu first chunk allocator as a fallback to the embedding allocator to increase the robustness of the system. Link: https://lkml.kernel.org/r/20210910053354.26721-3-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Catalin Marinas Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Marco Elver Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/arm64/Kconfig | 4 ++ drivers/base/arch_numa.c | 82 +++++++++++++++++++++++++++++++++++----- 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fee914c716aa2..67f24ee6cf572 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1042,6 +1042,10 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK def_bool y depends on NUMA +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y + depends on NUMA + source "kernel/Kconfig.hz" config ARCH_SPARSEMEM_ENABLE diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 00fb4120a5b3a..28222688ace79 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -14,6 +14,7 @@ #include #include +#include struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -168,22 +169,83 @@ static void __init pcpu_fc_free(void *ptr, size_t size) memblock_free_early(__pa(ptr), size); } +#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK +static void __init pcpu_populate_pte(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + pud_t *new; + + new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!new) + goto err_alloc; + p4d_populate(&init_mm, p4d, new); + } + + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new; + + new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!new) + goto err_alloc; + pud_populate(&init_mm, pud, new); + } + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + pte_t *new; + + new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!new) + goto err_alloc; + pmd_populate_kernel(&init_mm, pmd, new); + } + + return; + +err_alloc: + panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); +} +#endif + void __init setup_per_cpu_areas(void) { unsigned long delta; unsigned int cpu; - int rc; + int rc = -EINVAL; + + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); +#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK + if (rc < 0) + pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); +#endif + } - /* - * Always reserve area for module percpu variables. That's - * what the legacy allocator did. - */ - rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, - PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, - pcpu_cpu_distance, - pcpu_fc_alloc, pcpu_fc_free); +#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK + if (rc < 0) + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, + pcpu_fc_alloc, + pcpu_fc_free, + pcpu_populate_pte); +#endif if (rc < 0) - panic("Failed to initialize percpu areas."); + panic("Failed to initialize percpu areas (err=%d).", rc); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) From 4b453aab41a2f04eae6c64fc40a13c57a14eb6a9 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:07:33 +1100 Subject: [PATCH 0587/1202] kasan: arm64: fix pcpu_page_first_chunk crash with KASAN_VMALLOC With KASAN_VMALLOC and NEED_PER_CPU_PAGE_FIRST_CHUNK, it crashes, Unable to handle kernel paging request at virtual address ffff7000028f2000 ... swapper pgtable: 64k pages, 48-bit VAs, pgdp=0000000042440000 [ffff7000028f2000] pgd=000000063e7c0003, p4d=000000063e7c0003, pud=000000063e7c0003, pmd=000000063e7b0003, pte=0000000000000000 Internal error: Oops: 96000007 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.13.0-rc4-00003-gc6e6e28f3f30-dirty #62 Hardware name: linux,dummy-virt (DT) pstate: 200000c5 (nzCv daIF -PAN -UAO -TCO BTYPE=--) pc : kasan_check_range+0x90/0x1a0 lr : memcpy+0x88/0xf4 sp : ffff80001378fe20 ... Call trace: kasan_check_range+0x90/0x1a0 pcpu_page_first_chunk+0x3f0/0x568 setup_per_cpu_areas+0xb8/0x184 start_kernel+0x8c/0x328 The vm area used in vm_area_register_early() has no kasan shadow memory, Let's add a new kasan_populate_early_vm_area_shadow() function to populate the vm area shadow memory to fix the issue. Link: https://lkml.kernel.org/r/20210910053354.26721-4-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Marco Elver [KASAN] Acked-by: Andrey Konovalov [KASAN] Acked-by: Catalin Marinas Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Greg Kroah-Hartman Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/arm64/mm/kasan_init.c | 16 ++++++++++++++++ include/linux/kasan.h | 6 ++++++ mm/kasan/init.c | 5 +++++ mm/vmalloc.c | 1 + 4 files changed, 28 insertions(+) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 61b52a92b8b68..5b996ca4d9960 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -287,6 +287,22 @@ static void __init kasan_init_depth(void) init_task.kasan_depth = 0; } +#ifdef CONFIG_KASAN_VMALLOC +void __init kasan_populate_early_vm_area_shadow(void *start, unsigned long size) +{ + unsigned long shadow_start, shadow_end; + + if (!is_vmalloc_or_module_addr(start)) + return; + + shadow_start = (unsigned long)kasan_mem_to_shadow(start); + shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); + shadow_end = (unsigned long)kasan_mem_to_shadow(start + size); + shadow_end = ALIGN(shadow_end, PAGE_SIZE); + kasan_map_populate(shadow_start, shadow_end, NUMA_NO_NODE); +} +#endif + void __init kasan_init(void) { kasan_init_shadow(); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 736d7b458996d..65487a3b2a711 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -436,6 +436,8 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); +void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); + #else /* CONFIG_KASAN_VMALLOC */ static inline int kasan_populate_vmalloc(unsigned long start, @@ -453,6 +455,10 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long free_region_start, unsigned long free_region_end) {} +static inline void kasan_populate_early_vm_area_shadow(void *start, + unsigned long size) +{ } + #endif /* CONFIG_KASAN_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ diff --git a/mm/kasan/init.c b/mm/kasan/init.c index cc64ed6858c66..d39577d088a1c 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -279,6 +279,11 @@ int __ref kasan_populate_early_shadow(const void *shadow_start, return 0; } +void __init __weak kasan_populate_early_vm_area_shadow(void *start, + unsigned long size) +{ +} + static void kasan_free_pte(pte_t *pte_start, pmd_t *pmd) { pte_t *pte; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0bffb93a41725..b7ac4a8fe2b3a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2291,6 +2291,7 @@ void __init vm_area_register_early(struct vm_struct *vm, size_t align) vm->addr = (void *)addr; vm->next = *p; *p = vm; + kasan_populate_early_vm_area_shadow(vm->addr, vm->size); } static void vmap_init_free_space(void) From 92526f49bbafcbb90b87dc90827b654821110931 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:07:33 +1100 Subject: [PATCH 0588/1202] mm: kasan: fix redefinition of 'kasan_populate_early_vm_area_shadow' Move kasan_populate_early_vm_area_shadow() from mm/kasan/init.c to mm/kasan/shadow.c, make it under CONFIG_KASAN_VMALLOC to fix the redefinition issue. Link: https://lkml.kernel.org/r/20211011123211.3936196-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reported-by: Linux Kernel Functional Testing Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Dmitry Vyukov Cc: resh Kamboju Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kasan/init.c | 5 ----- mm/kasan/shadow.c | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/kasan/init.c b/mm/kasan/init.c index d39577d088a1c..cc64ed6858c66 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -279,11 +279,6 @@ int __ref kasan_populate_early_shadow(const void *shadow_start, return 0; } -void __init __weak kasan_populate_early_vm_area_shadow(void *start, - unsigned long size) -{ -} - static void kasan_free_pte(pte_t *pte_start, pmd_t *pmd) { pte_t *pte; diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 8d95ee52d0194..4a4929b29a237 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -254,6 +254,11 @@ core_initcall(kasan_memhotplug_init); #ifdef CONFIG_KASAN_VMALLOC +void __init __weak kasan_populate_early_vm_area_shadow(void *start, + unsigned long size) +{ +} + static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, void *unused) { From fb5e732ffbbec5720c72f27152c60d649cfc0f59 Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Thu, 21 Oct 2021 15:07:34 +1100 Subject: [PATCH 0589/1202] mm/vmalloc: introduce alloc_pages_bulk_array_mempolicy to accelerate memory allocation It will cause significant performance regressions in some situations as Andrew mentioned in [1]. The main situation is vmalloc, vmalloc will allocate pages with NUMA_NO_NODE by default, that will result in alloc page one by one; In order to solve this, __alloc_pages_bulk and mempolicy should be considered at the same time. 1) If node is specified in memory allocation request, it will alloc all pages by __alloc_pages_bulk. 2) If interleaving allocate memory, it will cauculate how many pages should be allocated in each node, and use __alloc_pages_bulk to alloc pages in each node. Based on "[PATCH] mm/vmalloc: fix numa spreading for large hash tables". [1]: https://lore.kernel.org/lkml/CALvZod4G3SzP3kWxQYn0fj+VgG-G3yWXz=gz17+3N57ru1iajw@mail.gmail.com/t/#m750c8e3231206134293b089feaa090590afa0f60 Link: https://lkml.kernel.org/r/20211014092952.1500982-1-chenwandun@huawei.com Signed-off-by: Chen Wandun Reviewed-by: Uladzislau Rezki (Sony) Cc: Nicholas Piggin Cc: Eric Dumazet Cc: Kefeng Wang Cc: Hanjun Guo Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/gfp.h | 4 +++ mm/mempolicy.c | 76 +++++++++++++++++++++++++++++++++++++++++++++ mm/vmalloc.c | 19 +++--------- 3 files changed, 85 insertions(+), 14 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index fbd4abc33f242..c1b262725dca9 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -535,6 +535,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, struct list_head *page_list, struct page **page_array); +unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp, + unsigned long nr_pages, + struct page **page_array); + /* Bulk allocate order-0 pages */ static inline unsigned long alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d12e0608fced2..a57d97e4ee8b6 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2196,6 +2196,82 @@ struct page *alloc_pages(gfp_t gfp, unsigned order) } EXPORT_SYMBOL(alloc_pages); +unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, + struct mempolicy *pol, unsigned long nr_pages, + struct page **page_array) +{ + int nodes; + unsigned long nr_pages_per_node; + int delta; + int i; + unsigned long nr_allocated; + unsigned long total_allocated = 0; + + nodes = nodes_weight(pol->nodes); + nr_pages_per_node = nr_pages / nodes; + delta = nr_pages - nodes * nr_pages_per_node; + + for (i = 0; i < nodes; i++) { + if (delta) { + nr_allocated = __alloc_pages_bulk(gfp, + interleave_nodes(pol), NULL, + nr_pages_per_node + 1, NULL, + page_array); + delta--; + } else { + nr_allocated = __alloc_pages_bulk(gfp, + interleave_nodes(pol), NULL, + nr_pages_per_node, NULL, page_array); + } + + page_array += nr_allocated; + total_allocated += nr_allocated; + } + + return total_allocated; +} + +unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid, + struct mempolicy *pol, unsigned long nr_pages, + struct page **page_array) +{ + gfp_t preferred_gfp; + unsigned long nr_allocated = 0; + + preferred_gfp = gfp | __GFP_NOWARN; + preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); + + nr_allocated = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes, + nr_pages, NULL, page_array); + + if (nr_allocated < nr_pages) + nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL, + nr_pages - nr_allocated, NULL, + page_array + nr_allocated); + return nr_allocated; +} + +unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp, + unsigned long nr_pages, struct page **page_array) +{ + struct mempolicy *pol = &default_policy; + + if (!in_interrupt() && !(gfp & __GFP_THISNODE)) + pol = get_task_policy(current); + + if (pol->mode == MPOL_INTERLEAVE) + return alloc_pages_bulk_array_interleave(gfp, pol, + nr_pages, page_array); + + if (pol->mode == MPOL_PREFERRED_MANY) + return alloc_pages_bulk_array_preferred_many(gfp, + numa_node_id(), pol, nr_pages, page_array); + + return __alloc_pages_bulk(gfp, policy_node(gfp, pol, numa_node_id()), + policy_nodemask(gfp, pol), nr_pages, NULL, + page_array); +} + int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { struct mempolicy *pol = mpol_dup(vma_policy(src)); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b7ac4a8fe2b3a..49adba793f3c1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2856,23 +2856,14 @@ vm_area_alloc_pages(gfp_t gfp, int nid, */ nr_pages_request = min(100U, nr_pages - nr_allocated); - if (nid == NUMA_NO_NODE) { - for (i = 0; i < nr_pages_request; i++) { - page = alloc_page(gfp); - if (page) - pages[nr_allocated + i] = page; - else { - nr = i; - break; - } - } - if (i >= nr_pages_request) - nr = nr_pages_request; - } else { + if (nid == NUMA_NO_NODE) + nr = alloc_pages_bulk_array_mempolicy(gfp, + nr_pages_request, + pages + nr_allocated); + else nr = alloc_pages_bulk_array_node(gfp, nid, nr_pages_request, pages + nr_allocated); - } nr_allocated += nr; cond_resched(); From 0b9d802b14e6defb2224275879b45f86fbace1eb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 21 Oct 2021 15:07:35 +1100 Subject: [PATCH 0590/1202] mm-vmalloc-introduce-alloc_pages_bulk_array_mempolicy-to-accelerate-memory-allocation-fix make two functions static Cc: Chen Wandun Cc: Eric Dumazet Cc: Hanjun Guo Cc: Kefeng Wang Cc: Nicholas Piggin Cc: Shakeel Butt Cc: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/mempolicy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a57d97e4ee8b6..e18c91d301c53 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2196,7 +2196,7 @@ struct page *alloc_pages(gfp_t gfp, unsigned order) } EXPORT_SYMBOL(alloc_pages); -unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, +static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { @@ -2231,7 +2231,7 @@ unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp, return total_allocated; } -unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid, +static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid, struct mempolicy *pol, unsigned long nr_pages, struct page **page_array) { From f39ae67ac474cb9db7ad063c59ec24058c63b9de Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 21 Oct 2021 15:07:35 +1100 Subject: [PATCH 0591/1202] mm-vmalloc-introduce-alloc_pages_bulk_array_mempolicy-to-accelerate-memory-allocation-fix-2 fix CONFIG_NUMA=n build. alloc_pages_bulk_array_mempolicy() was undefined Cc: Chen Wandun Cc: Eric Dumazet Cc: Hanjun Guo Cc: Kefeng Wang Cc: Nicholas Piggin Cc: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 49adba793f3c1..b2df76893c128 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2856,7 +2856,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, */ nr_pages_request = min(100U, nr_pages - nr_allocated); - if (nid == NUMA_NO_NODE) + if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE) nr = alloc_pages_bulk_array_mempolicy(gfp, nr_pages_request, pages + nr_allocated); From 49b112b17196804813a45a34b2dccd8aac9980f9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 21 Oct 2021 15:07:36 +1100 Subject: [PATCH 0592/1202] mm/vmalloc: be more explicit about supported gfp flags The core of the vmalloc allocator __vmalloc_area_node doesn't say anything about gfp mask argument. Not all gfp flags are supported though. Be more explicit about constraints. Link: https://lkml.kernel.org/r/20211020082545.4830-1-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Dave Chinner Cc: Neil Brown Cc: Christoph Hellwig Cc: Uladzislau Rezki Cc: Ilya Dryomov Cc: Jeff Layton Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmalloc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b2df76893c128..de21457cd3f5f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2990,8 +2990,16 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, * @caller: caller's return address * * Allocate enough pages to cover @size from the page level - * allocator with @gfp_mask flags. Map them into contiguous - * kernel virtual space, using a pagetable protection of @prot. + * allocator with @gfp_mask flags. Please note that the full set of gfp + * flags are not supported. GFP_KERNEL would be a preferred allocation mode + * but GFP_NOFS and GFP_NOIO are supported as well. Zone modifiers are not + * supported. From the reclaim modifiers__GFP_DIRECT_RECLAIM is required (aka + * GFP_NOWAIT is not supported) and only __GFP_NOFAIL is supported (aka + * __GFP_NORETRY and __GFP_RETRY_MAYFAIL are not supported). + * __GFP_NOWARN can be used to suppress error messages about failures. + * + * Map them into contiguous kernel virtual space, using a pagetable + * protection of @prot. * * Return: the address of the area or %NULL on failure */ From 45589500a3d292ca80918d30d4131c13e4e2ec6c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Oct 2021 15:07:36 +1100 Subject: [PATCH 0593/1202] mm/large system hash: avoid possible NULL deref in alloc_large_system_hash If __vmalloc() returned NULL, is_vm_area_hugepages(NULL) will fault if CONFIG_HAVE_ARCH_HUGE_VMALLOC=y Link: https://lkml.kernel.org/r/20210915212530.2321545-1-eric.dumazet@gmail.com Fixes: 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings") Signed-off-by: Eric Dumazet Reviewed-by: Andrew Morton Cc: Nicholas Piggin Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 23d3339ac4e8e..133dad9f46699 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8762,7 +8762,8 @@ void *__init alloc_large_system_hash(const char *tablename, } else if (get_order(size) >= MAX_ORDER || hashdist) { table = __vmalloc(size, gfp_flags); virt = true; - huge = is_vm_area_hugepages(table); + if (table) + huge = is_vm_area_hugepages(table); } else { /* * If bucketsize is not a power-of-two, we may free From b5abe1fe0c97fccc36296dce3f9321cc744b1ae6 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 21 Oct 2021 15:07:37 +1100 Subject: [PATCH 0594/1202] mm/page_alloc.c: remove meaningless VM_BUG_ON() in pindex_to_order() Patch series "Cleanups and fixup for page_alloc", v2. This series contains cleanups to remove meaningless VM_BUG_ON(), use helpers to simplify the code and remove obsolete comment. Also we avoid allocating highmem pages via alloc_pages_exact[_nid]. More details can be found in the respective changelogs. This patch (of 5): It's meaningless to VM_BUG_ON() order != pageblock_order just after setting order to pageblock_order. Remove it. Link: https://lkml.kernel.org/r/20210902121242.41607-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20210902121242.41607-2-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Acked-by: Mel Gorman Reviewed-by: David Hildenbrand Cc: Vlastimil Babka Cc: Stephen Rothwell Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 133dad9f46699..afaa544f129f6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -677,10 +677,8 @@ static inline int pindex_to_order(unsigned int pindex) int order = pindex / MIGRATE_PCPTYPES; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (order > PAGE_ALLOC_COSTLY_ORDER) { + if (order > PAGE_ALLOC_COSTLY_ORDER) order = pageblock_order; - VM_BUG_ON(order != pageblock_order); - } #else VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); #endif From 41ab0e4c7b8dce3148cc450bdea7bf93ceb5dc0a Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 21 Oct 2021 15:07:38 +1100 Subject: [PATCH 0595/1202] mm/page_alloc.c: simplify the code by using macro K() Use helper macro K() to convert the pages to the corresponding size. Minor readability improvement. Link: https://lkml.kernel.org/r/20210902121242.41607-3-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Acked-by: Mel Gorman Reviewed-by: David Hildenbrand Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index afaa544f129f6..8224c6c302dd2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8130,8 +8130,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char } if (pages && s) - pr_info("Freeing %s memory: %ldK\n", - s, pages << (PAGE_SHIFT - 10)); + pr_info("Freeing %s memory: %ldK\n", s, K(pages)); return pages; } @@ -8176,14 +8175,13 @@ void __init mem_init_print_info(void) ", %luK highmem" #endif ")\n", - nr_free_pages() << (PAGE_SHIFT - 10), - physpages << (PAGE_SHIFT - 10), + K(nr_free_pages()), K(physpages), codesize >> 10, datasize >> 10, rosize >> 10, (init_data_size + init_code_size) >> 10, bss_size >> 10, - (physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10), - totalcma_pages << (PAGE_SHIFT - 10) + K(physpages - totalram_pages() - totalcma_pages), + K(totalcma_pages) #ifdef CONFIG_HIGHMEM - , totalhigh_pages() << (PAGE_SHIFT - 10) + , K(totalhigh_pages()) #endif ); } From fddba69e962f596cac85bae21b4763749fe13d69 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 21 Oct 2021 15:07:38 +1100 Subject: [PATCH 0596/1202] mm/page_alloc.c: fix obsolete comment in free_pcppages_bulk() The second two paragraphs about "all pages pinned" and pages_scanned is obsolete. And There are PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP orders in pcp. So the same order assumption is not held now. Link: https://lkml.kernel.org/r/20210902121242.41607-4-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Acked-by: Mel Gorman Cc: David Hildenbrand Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8224c6c302dd2..c494c9c66f4f0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1428,14 +1428,8 @@ static inline void prefetch_buddy(struct page *page) /* * Frees a number of pages from the PCP lists - * Assumes all pages on list are in same zone, and of same order. + * Assumes all pages on list are in same zone. * count is the number of pages to free. - * - * If the zone was previously in an "all pages pinned" state then look to - * see if this freeing clears that state. - * - * And clear the zone's pages_scanned counter, to hold off the "all pages are - * pinned" detection logic. */ static void free_pcppages_bulk(struct zone *zone, int count, struct per_cpu_pages *pcp) From cbf7a1fdc14de88149712145b2a4f09293c92237 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 21 Oct 2021 15:07:39 +1100 Subject: [PATCH 0597/1202] mm/page_alloc.c: use helper function zone_spans_pfn() Use helper function zone_spans_pfn() to check whether pfn is within a zone to simplify the code slightly. Link: https://lkml.kernel.org/r/20210902121242.41607-5-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Acked-by: Mel Gorman Reviewed-by: David Hildenbrand Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c494c9c66f4f0..26aebbbf1f666 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1583,7 +1583,7 @@ static void __meminit init_reserved_page(unsigned long pfn) for (zid = 0; zid < MAX_NR_ZONES; zid++) { struct zone *zone = &pgdat->node_zones[zid]; - if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone)) + if (zone_spans_pfn(zone, pfn)) break; } __init_single_page(pfn_to_page(pfn), pfn, zid, nid); From c55472659ca57f9cd100cb46522be5d19a845b79 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 21 Oct 2021 15:07:39 +1100 Subject: [PATCH 0598/1202] mm/page_alloc.c: avoid allocating highmem pages via alloc_pages_exact[_nid] Don't use with __GFP_HIGHMEM because page_address() cannot represent highmem pages without kmap(). Newly allocated pages would leak as page_address() will return NULL for highmem pages here. But It works now because the callers do not specify __GFP_HIGHMEM now. Link: https://lkml.kernel.org/r/20210902121242.41607-6-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reviewed-by: David Hildenbrand Cc: Mel Gorman Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 26aebbbf1f666..307ad23cb9e9b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5610,8 +5610,8 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) unsigned int order = get_order(size); unsigned long addr; - if (WARN_ON_ONCE(gfp_mask & __GFP_COMP)) - gfp_mask &= ~__GFP_COMP; + if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM))) + gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM); addr = __get_free_pages(gfp_mask, order); return make_alloc_exact(addr, order, size); @@ -5635,8 +5635,8 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) unsigned int order = get_order(size); struct page *p; - if (WARN_ON_ONCE(gfp_mask & __GFP_COMP)) - gfp_mask &= ~__GFP_COMP; + if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM))) + gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM); p = alloc_pages_node(nid, gfp_mask, order); if (!p) From 1fd0fd3e7065015fb8becaec3fc35f235d69a8a2 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Thu, 21 Oct 2021 15:07:40 +1100 Subject: [PATCH 0599/1202] mm/page_alloc: print node fallback order Patch series "Fix NUMA nodes fallback list ordering". For a NUMA system that has multiple nodes at same distance from other nodes, the fallback list generation prefers same node order for them instead of round-robin thereby penalizing one node over others. This series fixes it. More description of the problem and the fix is present in the patch description. This patch (of 2): Print information message about the allocation fallback order for each NUMA node during boot. No functional changes here. This makes it easier to illustrate the problem in the node fallback list generation, which the next patch fixes. Link: https://lkml.kernel.org/r/20210830121603.1081-1-bharata@amd.com Link: https://lkml.kernel.org/r/20210830121603.1081-2-bharata@amd.com Signed-off-by: Bharata B Rao Acked-by: Mel Gorman Reviewed-by: Anshuman Khandual Cc: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Cc: Krupa Ramakrishnan Cc: Sadagopan Srinivasan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 307ad23cb9e9b..163f60dcac47f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6262,6 +6262,10 @@ static void build_zonelists(pg_data_t *pgdat) build_zonelists_in_node_order(pgdat, node_order, nr_nodes); build_thisnode_zonelists(pgdat); + pr_info("Fallback order for Node %d: ", local_node); + for (node = 0; node < nr_nodes; node++) + pr_cont("%d ", node_order[node]); + pr_cont("\n"); } #ifdef CONFIG_HAVE_MEMORYLESS_NODES From fc1201f57c9257db5bcc803d081e33ff9faa6606 Mon Sep 17 00:00:00 2001 From: Krupa Ramakrishnan Date: Thu, 21 Oct 2021 15:07:41 +1100 Subject: [PATCH 0600/1202] mm/page_alloc: use accumulated load when building node fallback list In build_zonelists(), when the fallback list is built for the nodes, the node load gets reinitialized during each iteration. This results in nodes with same distances occupying the same slot in different node fallback lists rather than appearing in the intended round- robin manner. This results in one node getting picked for allocation more compared to other nodes with the same distance. As an example, consider a 4 node system with the following distance matrix. Node 0 1 2 3 ---------------- 0 10 12 32 32 1 12 10 32 32 2 32 32 10 12 3 32 32 12 10 For this case, the node fallback list gets built like this: Node Fallback list --------------------- 0 0 1 2 3 1 1 0 3 2 2 2 3 0 1 3 3 2 0 1 <-- Unexpected fallback order In the fallback list for nodes 2 and 3, the nodes 0 and 1 appear in the same order which results in more allocations getting satisfied from node 0 compared to node 1. The effect of this on remote memory bandwidth as seen by stream benchmark is shown below: Case 1: Bandwidth from cores on nodes 2 & 3 to memory on nodes 0 & 1 (numactl -m 0,1 ./stream_lowOverhead ... --cores ) Case 2: Bandwidth from cores on nodes 0 & 1 to memory on nodes 2 & 3 (numactl -m 2,3 ./stream_lowOverhead ... --cores ) ---------------------------------------- BANDWIDTH (MB/s) TEST Case 1 Case 2 ---------------------------------------- COPY 57479.6 110791.8 SCALE 55372.9 105685.9 ADD 50460.6 96734.2 TRIADD 50397.6 97119.1 ---------------------------------------- The bandwidth drop in Case 1 occurs because most of the allocations get satisfied by node 0 as it appears first in the fallback order for both nodes 2 and 3. This can be fixed by accumulating the node load in build_zonelists() rather than reinitializing it during each iteration. With this the nodes with the same distance rightly get assigned in the round robin manner. In fact this was how it was originally until the commit f0c0b2b808f2 ("change zonelist order: zonelist order selection logic") dropped the load accumulation and resorted to initializing the load during each iteration. While zonelist ordering was removed by commit c9bff3eebc09 ("mm, page_alloc: rip out ZONELIST_ORDER_ZONE"), the change to the node load accumulation in build_zonelists() remained. So essentially this patch reverts back to the accumulated node load logic. After this fix, the fallback order gets built like this: Node Fallback list ------------------ 0 0 1 2 3 1 1 0 3 2 2 2 3 0 1 3 3 2 1 0 <-- Note the change here The bandwidth in Case 1 improves and matches Case 2 as shown below. ---------------------------------------- BANDWIDTH (MB/s) TEST Case 1 Case 2 ---------------------------------------- COPY 110438.9 110107.2 SCALE 105930.5 105817.5 ADD 97005.1 96159.8 TRIADD 97441.5 96757.1 ---------------------------------------- The correctness of the fallback list generation has been verified for the above node configuration where the node 3 starts as memory-less node and comes up online only during memory hotplug. [bharata@amd.com: Added changelog, review, test validation] Link: https://lkml.kernel.org/r/20210830121603.1081-3-bharata@amd.com Fixes: f0c0b2b808f2 ("change zonelist order: zonelist order selection logic") Signed-off-by: Krupa Ramakrishnan Co-developed-by: Sadagopan Srinivasan Signed-off-by: Sadagopan Srinivasan Signed-off-by: Bharata B Rao Acked-by: Mel Gorman Reviewed-by: Anshuman Khandual Cc: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 163f60dcac47f..1eab16b4006d8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6253,7 +6253,7 @@ static void build_zonelists(pg_data_t *pgdat) */ if (node_distance(local_node, node) != node_distance(local_node, prev_node)) - node_load[node] = load; + node_load[node] += load; node_order[nr_nodes++] = node; prev_node = node; From feafef649a212860241b8ea45207e058f733e21d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 21 Oct 2021 15:07:41 +1100 Subject: [PATCH 0601/1202] mm: move node_reclaim_distance to fix NUMA without SMP Patch series "Fix NUMA without SMP". SuperH is the only architecture which still supports NUMA without SMP, for good reasons (various memories scattered around the address space, each with varying latencies). This series fixes two build errors due to variables and functions used by the NUMA code being provided by SMP-only source files or sections. This patch (of 2): If CONFIG_NUMA=y, but CONFIG_SMP=n (e.g. sh/migor_defconfig): sh4-linux-gnu-ld: mm/page_alloc.o: in function `get_page_from_freelist': page_alloc.c:(.text+0x2c24): undefined reference to `node_reclaim_distance' Fix this by moving the declaration of node_reclaim_distance from an SMP-only to a generic file. Link: https://lkml.kernel.org/r/cover.1631781495.git.geert+renesas@glider.be Link: https://lkml.kernel.org/r/6432666a648dde85635341e6c918cee97c97d264.1631781495.git.geert+renesas@glider.be Fixes: a55c7454a8c887b2 ("sched/topology: Improve load balancing on AMD EPYC systems") Signed-off-by: Geert Uytterhoeven Suggested-by: Matt Fleming Acked-by: Mel Gorman Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Juri Lelli Cc: Vincent Guittot Cc: Vlastimil Babka Cc: Yoshinori Sato Cc: Rich Felker Cc: Gon Solo Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/sched/topology.c | 1 - mm/page_alloc.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 4e8698e62f075..738ee7fa79724 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1481,7 +1481,6 @@ static int sched_domains_curr_level; int sched_max_numa_distance; static int *sched_domains_numa_distance; static struct cpumask ***sched_domains_numa_masks; -int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; static unsigned long __read_mostly *sched_numa_onlined_nodes; #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1eab16b4006d8..e4aef907abe9d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3960,6 +3960,8 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order, } #ifdef CONFIG_NUMA +int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; + static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <= From fe4cc694510964179c166792e059558d072d3aac Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 21 Oct 2021 15:07:42 +1100 Subject: [PATCH 0602/1202] mm: move fold_vm_numa_events() to fix NUMA without SMP If CONFIG_NUMA=y, but CONFIG_SMP=n (e.g. sh/migor_defconfig): sh4-linux-gnu-ld: mm/vmstat.o: in function `vmstat_start': vmstat.c:(.text+0x97c): undefined reference to `fold_vm_numa_events' sh4-linux-gnu-ld: drivers/base/node.o: in function `node_read_vmstat': node.c:(.text+0x140): undefined reference to `fold_vm_numa_events' sh4-linux-gnu-ld: drivers/base/node.o: in function `node_read_numastat': node.c:(.text+0x1d0): undefined reference to `fold_vm_numa_events' Fix this by moving fold_vm_numa_events() outside the SMP-only section. Link: https://lkml.kernel.org/r/9d16ccdd9ef32803d7100c84f737de6a749314fb.1631781495.git.geert+renesas@glider.be Fixes: f19298b9516c1a03 ("mm/vmstat: convert NUMA statistics to basic NUMA counters") Signed-off-by: Geert Uytterhoeven Acked-by: Mel Gorman Cc: Gon Solo Cc: Ingo Molnar Cc: Juri Lelli Cc: Matt Fleming Cc: Peter Zijlstra Cc: Rich Felker Cc: Vincent Guittot Cc: Vlastimil Babka Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmstat.c | 56 ++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 8ce2620344b2f..5db54581feab9 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -165,6 +165,34 @@ atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp; EXPORT_SYMBOL(vm_zone_stat); EXPORT_SYMBOL(vm_node_stat); +#ifdef CONFIG_NUMA +static void fold_vm_zone_numa_events(struct zone *zone) +{ + unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, }; + int cpu; + enum numa_stat_item item; + + for_each_online_cpu(cpu) { + struct per_cpu_zonestat *pzstats; + + pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); + for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) + zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0); + } + + for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) + zone_numa_event_add(zone_numa_events[item], zone, item); +} + +void fold_vm_numa_events(void) +{ + struct zone *zone; + + for_each_populated_zone(zone) + fold_vm_zone_numa_events(zone); +} +#endif + #ifdef CONFIG_SMP int calculate_pressure_threshold(struct zone *zone) @@ -771,34 +799,6 @@ static int fold_diff(int *zone_diff, int *node_diff) return changes; } -#ifdef CONFIG_NUMA -static void fold_vm_zone_numa_events(struct zone *zone) -{ - unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, }; - int cpu; - enum numa_stat_item item; - - for_each_online_cpu(cpu) { - struct per_cpu_zonestat *pzstats; - - pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu); - for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) - zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0); - } - - for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) - zone_numa_event_add(zone_numa_events[item], zone, item); -} - -void fold_vm_numa_events(void) -{ - struct zone *zone; - - for_each_populated_zone(zone) - fold_vm_zone_numa_events(zone); -} -#endif - /* * Update the zone counters for the current cpu. * From 4659cb5b96e6105f28443827790f8c349e719462 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Oct 2021 15:07:42 +1100 Subject: [PATCH 0603/1202] mm/page_alloc.c: do not acquire zone lock in is_free_buddy_page() Grabbing zone lock in is_free_buddy_page() gives a wrong sense of safety, and has potential performance implications when zone is experiencing lock contention. In any case, if a caller needs a stable result, it should grab zone lock before calling this function. Link: https://lkml.kernel.org/r/20210922152833.4023972-1-eric.dumazet@gmail.com Signed-off-by: Eric Dumazet Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e4aef907abe9d..e891560e0a80c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -9356,21 +9356,21 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) } #endif +/* + * This function returns a stable result only if called under zone lock. + */ bool is_free_buddy_page(struct page *page) { - struct zone *zone = page_zone(page); unsigned long pfn = page_to_pfn(page); - unsigned long flags; unsigned int order; - spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { struct page *page_head = page - (pfn & ((1 << order) - 1)); - if (PageBuddy(page_head) && buddy_order(page_head) >= order) + if (PageBuddy(page_head) && + buddy_order_unsafe(page_head) >= order) break; } - spin_unlock_irqrestore(&zone->lock, flags); return order < MAX_ORDER; } From a38ac52eb3c4ccb3ede0fce55acfe504feac8b71 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 21 Oct 2021 15:07:43 +1100 Subject: [PATCH 0604/1202] mm/page_alloc: detect allocation forbidden by cpuset and bail out early There was a report that starting an Ubuntu in docker while using cpuset to bind it to movable nodes (a node only has movable zone, like a node for hotplug or a Persistent Memory node in normal usage) will fail due to memory allocation failure, and then OOM is involved and many other innocent processes got killed. It can be reproduced with command: $docker run -it --rm --cpuset-mems 4 ubuntu:latest bash -c "grep Mems_allowed /proc/self/status" (node 4 is a movable node) runc:[2:INIT] invoked oom-killer: gfp_mask=0x500cc2(GFP_HIGHUSER|__GFP_ACCOUNT), order=0, oom_score_adj=0 CPU: 8 PID: 8291 Comm: runc:[2:INIT] Tainted: G W I E 5.8.2-0.g71b519a-default #1 openSUSE Tumbleweed (unreleased) Hardware name: Dell Inc. PowerEdge R640/0PHYDR, BIOS 2.6.4 04/09/2020 Call Trace: dump_stack+0x6b/0x88 dump_header+0x4a/0x1e2 oom_kill_process.cold+0xb/0x10 out_of_memory.part.0+0xaf/0x230 out_of_memory+0x3d/0x80 __alloc_pages_slowpath.constprop.0+0x954/0xa20 __alloc_pages_nodemask+0x2d3/0x300 pipe_write+0x322/0x590 new_sync_write+0x196/0x1b0 vfs_write+0x1c3/0x1f0 ksys_write+0xa7/0xe0 do_syscall_64+0x52/0xd0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Mem-Info: active_anon:392832 inactive_anon:182 isolated_anon:0 active_file:68130 inactive_file:151527 isolated_file:0 unevictable:2701 dirty:0 writeback:7 slab_reclaimable:51418 slab_unreclaimable:116300 mapped:45825 shmem:735 pagetables:2540 bounce:0 free:159849484 free_pcp:73 free_cma:0 Node 4 active_anon:1448kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:0kB dirty:0kB writeback:0kB shmem:0kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 0kB writeback_tmp:0kB all_unreclaimable? no Node 4 Movable free:130021408kB min:9140kB low:139160kB high:269180kB reserved_highatomic:0KB active_anon:1448kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:130023424kB managed:130023424kB mlocked:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:292kB local_pcp:84kB free_cma:0kB lowmem_reserve[]: 0 0 0 0 0 Node 4 Movable: 1*4kB (M) 0*8kB 0*16kB 1*32kB (M) 0*64kB 0*128kB 1*256kB (M) 1*512kB (M) 1*1024kB (M) 0*2048kB 31743*4096kB (M) = 130021156kB oom-kill:constraint=CONSTRAINT_CPUSET,nodemask=(null),cpuset=docker-9976a269caec812c134fa317f27487ee36e1129beba7278a463dd53e5fb9997b.scope,mems_allowed=4,global_oom,task_memcg=/system.slice/containerd.service,task=containerd,pid=4100,uid=0 Out of memory: Killed process 4100 (containerd) total-vm:4077036kB, anon-rss:51184kB, file-rss:26016kB, shmem-rss:0kB, UID:0 pgtables:676kB oom_score_adj:0 oom_reaper: reaped process 8248 (docker), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB oom_reaper: reaped process 2054 (node_exporter), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB oom_reaper: reaped process 1452 (systemd-journal), now anon-rss:0kB, file-rss:8564kB, shmem-rss:4kB oom_reaper: reaped process 2146 (munin-node), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB oom_reaper: reaped process 8291 (runc:[2:INIT]), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB The reason is, in the case, the target cpuset nodes only have movable zone, while the creation of an OS in docker sometimes needs to allocate memory in non-movable zones (dma/dma32/normal) like GFP_HIGHUSER, and the cpuset limit forbids the allocation, then out-of-memory killing is involved even when normal nodes and movable nodes both have many free memory. The OOM killer cannot help to resolve the situation as there is no usable memory for the request in the cpuset scope. The only reasonable measure to take is to fail the allocation right away and have the caller to deal with it. So add a check for cases like this in the slowpath of allocation, and bail out early returning NULL for the allocation. As page allocation is one of the hottest path in kernel, this check will hurt all users with sane cpuset configuration, add a static branch check and detect the abnormal config in cpuset memory binding setup so that the extra check cost in page allocation is not paid by everyone. [thanks to Micho Hocko and David Rientjes for suggesting not handling it inside OOM code, adding cpuset check, refining comments] Link: https://lkml.kernel.org/r/1632481657-68112-1-git-send-email-feng.tang@intel.com Signed-off-by: Feng Tang Suggested-by: Michal Hocko Acked-by: Michal Hocko Cc: David Rientjes Cc: Tejun Heo Cc: Zefan Li Cc: Johannes Weiner Cc: Mel Gorman Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/cpuset.h | 17 +++++++++++++++++ include/linux/mmzone.h | 22 ++++++++++++++++++++++ kernel/cgroup/cpuset.c | 23 +++++++++++++++++++++++ mm/page_alloc.c | 13 +++++++++++++ 4 files changed, 75 insertions(+) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index d2b9c41c8edf5..d58e0476ee8e3 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -34,6 +34,8 @@ */ extern struct static_key_false cpusets_pre_enable_key; extern struct static_key_false cpusets_enabled_key; +extern struct static_key_false cpusets_insane_config_key; + static inline bool cpusets_enabled(void) { return static_branch_unlikely(&cpusets_enabled_key); @@ -51,6 +53,19 @@ static inline void cpuset_dec(void) static_branch_dec_cpuslocked(&cpusets_pre_enable_key); } +/* + * This will get enabled whenever a cpuset configuration is considered + * unsupportable in general. E.g. movable only node which cannot satisfy + * any non movable allocations (see update_nodemask). Page allocator + * needs to make additional checks for those configurations and this + * check is meant to guard those checks without any overhead for sane + * configurations. + */ +static inline bool cpusets_insane_config(void) +{ + return static_branch_unlikely(&cpusets_insane_config_key); +} + extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_force_rebuild(void); @@ -167,6 +182,8 @@ static inline void set_mems_allowed(nodemask_t nodemask) static inline bool cpusets_enabled(void) { return false; } +static inline bool cpusets_insane_config(void) { return false; } + static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 832aa49d0d8eb..fb36a29e3aaeb 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1220,6 +1220,28 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, #define for_each_zone_zonelist(zone, z, zlist, highidx) \ for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL) +/* Whether the 'nodes' are all movable nodes */ +static inline bool movable_only_nodes(nodemask_t *nodes) +{ + struct zonelist *zonelist; + struct zoneref *z; + int nid; + + if (nodes_empty(*nodes)) + return false; + + /* + * We can chose arbitrary node from the nodemask to get a + * zonelist as they are interlinked. We just need to find + * at least one zone that can satisfy kernel allocations. + */ + nid = first_node(*nodes); + zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK]; + z = first_zones_zonelist(zonelist, ZONE_NORMAL, nodes); + return (!z->zone) ? true : false; +} + + #ifdef CONFIG_SPARSEMEM #include #endif diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 2a9695ccb65f5..d0e163a020997 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -69,6 +69,13 @@ DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); +/* + * There could be abnormal cpuset configurations for cpu or memory + * node binding, add this key to provide a quick low-cost judgement + * of the situation. + */ +DEFINE_STATIC_KEY_FALSE(cpusets_insane_config_key); + /* See "Frequency meter" comments, below. */ struct fmeter { @@ -372,6 +379,17 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); +static inline void check_insane_mems_config(nodemask_t *nodes) +{ + if (!cpusets_insane_config() && + movable_only_nodes(nodes)) { + static_branch_enable(&cpusets_insane_config_key); + pr_info("Unsupported (movable nodes only) cpuset configuration detected (nmask=%*pbl)!\n" + "Cpuset allocations might fail even with a lot of memory available.\n", + nodemask_pr_args(nodes)); + } +} + /* * Cgroup v2 behavior is used on the "cpus" and "mems" control files when * on default hierarchy or when the cpuset_v2_mode flag is set by mounting @@ -1870,6 +1888,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) goto done; + check_insane_mems_config(&trialcs->mems_allowed); + spin_lock_irq(&callback_lock); cs->mems_allowed = trialcs->mems_allowed; spin_unlock_irq(&callback_lock); @@ -3173,6 +3193,9 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); mems_updated = !nodes_equal(new_mems, cs->effective_mems); + if (mems_updated) + check_insane_mems_config(&new_mems); + if (is_in_v2_mode()) hotplug_update_tasks(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e891560e0a80c..e493d7da26140 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4910,6 +4910,19 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, if (!ac->preferred_zoneref->zone) goto nopage; + /* + * Check for insane configurations where the cpuset doesn't contain + * any suitable zone to satisfy the request - e.g. non-movable + * GFP_HIGHUSER allocations from MOVABLE nodes only. + */ + if (cpusets_insane_config() && (gfp_mask & __GFP_HARDWALL)) { + struct zoneref *z = first_zones_zonelist(ac->zonelist, + ac->highest_zoneidx, + &cpuset_current_mems_allowed); + if (!z->zone) + goto nopage; + } + if (alloc_flags & ALLOC_KSWAPD) wake_all_kswapds(order, gfp_mask, ac); From 9ecdaea977d6c2cabd754fe646a2d49bfe989d0c Mon Sep 17 00:00:00 2001 From: Liangcai Fan Date: Thu, 21 Oct 2021 15:07:44 +1100 Subject: [PATCH 0605/1202] mm/page_alloc.c: show watermark_boost of zone in zoneinfo min/low/high_wmark_pages(z) is defined as (z->_watermark[WMARK_MIN/LOW/HIGH] + z->watermark_boost). If kswapd is frequently waked up due to the increase of min/low/high_wmark_pages, printing watermark_boost can quickly locate whether watermark_boost or _watermark[WMARK_MIN/LOW/HIGH] caused min/low/high_wmark_pages to increase. Link: https://lkml.kernel.org/r/1632472566-12246-1-git-send-email-liangcaifan19@gmail.com Signed-off-by: Liangcai Fan Cc: Chunyan Zhang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 2 ++ mm/vmstat.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e493d7da26140..6714039cddf4a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5993,6 +5993,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) printk(KERN_CONT "%s" " free:%lukB" + " boost:%lukB" " min:%lukB" " low:%lukB" " high:%lukB" @@ -6013,6 +6014,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) "\n", zone->name, K(zone_page_state(zone, NR_FREE_PAGES)), + K(zone->watermark_boost), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), diff --git a/mm/vmstat.c b/mm/vmstat.c index 5db54581feab9..ae87c90e0b4e9 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1656,6 +1656,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, } seq_printf(m, "\n pages free %lu" + "\n boost %lu" "\n min %lu" "\n low %lu" "\n high %lu" @@ -1664,6 +1665,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n managed %lu" "\n cma %lu", zone_page_state(zone, NR_FREE_PAGES), + zone->watermark_boost, min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), From 1ac9b2f6850cf63972a77cc8074a830d81711b9b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Oct 2021 15:07:45 +1100 Subject: [PATCH 0606/1202] mm: create a new system state and fix core_kernel_text() core_kernel_text() considers that until system_state in at least SYSTEM_RUNNING, init memory is valid. But init memory is freed a few lines before setting SYSTEM_RUNNING, so we have a small period of time when core_kernel_text() is wrong. Create an intermediate system state called SYSTEM_FREEING_INIT that is set before starting freeing init memory, and use it in core_kernel_text() to report init memory invalid earlier. Link: https://lkml.kernel.org/r/9ecfdee7dd4d741d172cb93ff1d87f1c58127c9a.1633001016.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Cc: Gerald Schaefer Cc: Kefeng Wang Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/kernel.h | 1 + init/main.c | 2 ++ kernel/extable.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2776423a587e4..471bc05936796 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -248,6 +248,7 @@ extern bool early_boot_irqs_disabled; extern enum system_states { SYSTEM_BOOTING, SYSTEM_SCHEDULING, + SYSTEM_FREEING_INITMEM, SYSTEM_RUNNING, SYSTEM_HALT, SYSTEM_POWER_OFF, diff --git a/init/main.c b/init/main.c index 3c4054a955458..767ee26721760 100644 --- a/init/main.c +++ b/init/main.c @@ -1506,6 +1506,8 @@ static int __ref kernel_init(void *unused) kernel_init_freeable(); /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); + + system_state = SYSTEM_FREEING_INITMEM; kprobe_free_init_mem(); ftrace_free_init_mem(); kgdb_free_init_mem(); diff --git a/kernel/extable.c b/kernel/extable.c index b0ea5eb0c3b43..290661f68e6b3 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -76,7 +76,7 @@ int notrace core_kernel_text(unsigned long addr) addr < (unsigned long)_etext) return 1; - if (system_state < SYSTEM_RUNNING && + if (system_state < SYSTEM_FREEING_INITMEM && init_kernel_text(addr)) return 1; return 0; From 51978f92c9fa7abcfe2e9b42db36b77f28d9b2be Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Oct 2021 15:07:46 +1100 Subject: [PATCH 0607/1202] mm: make generic arch_is_kernel_initmem_freed() do what it says Commit 7a5da02de8d6 ("locking/lockdep: check for freed initmem in static_obj()") added arch_is_kernel_initmem_freed() which is supposed to report whether an object is part of already freed init memory. For the time being, the generic version of arch_is_kernel_initmem_freed() always reports 'false', allthough free_initmem() is generically called on all architectures. Therefore, change the generic version of arch_is_kernel_initmem_freed() to check whether free_initmem() has been called. If so, then check if a given address falls into init memory. To ease the use of system_state, move it out of line into its only caller which is lockdep.c Link: https://lkml.kernel.org/r/1d40783e676e07858be97d881f449ee7ea8adfb1.1633001016.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Cc: Gerald Schaefer Cc: Kefeng Wang Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/asm-generic/sections.h | 14 -------------- kernel/locking/lockdep.c | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index d16302d3eb597..596ab20922892 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -80,20 +80,6 @@ static inline int arch_is_kernel_data(unsigned long addr) } #endif -/* - * Check if an address is part of freed initmem. This is needed on architectures - * with virt == phys kernel mapping, for code that wants to check if an address - * is part of a static object within [_stext, _end]. After initmem is freed, - * memory can be allocated from it, and such allocations would then have - * addresses within the range [_stext, _end]. - */ -#ifndef arch_is_kernel_initmem_freed -static inline int arch_is_kernel_initmem_freed(unsigned long addr) -{ - return 0; -} -#endif - /** * memory_contains - checks if an object is contained within a memory region * @begin: virtual address of the beginning of the memory region diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index bf1c00c881e48..8e118caf835e0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -788,6 +788,21 @@ static int very_verbose(struct lock_class *class) * Is this the address of a static object: */ #ifdef __KERNEL__ +/* + * Check if an address is part of freed initmem. After initmem is freed, + * memory can be allocated from it, and such allocations would then have + * addresses within the range [_stext, _end]. + */ +#ifndef arch_is_kernel_initmem_freed +static int arch_is_kernel_initmem_freed(unsigned long addr) +{ + if (system_state < SYSTEM_FREEING_INITMEM) + return 0; + + return init_section_contains((void *)addr, 1); +} +#endif + static int static_obj(const void *obj) { unsigned long start = (unsigned long) &_stext, From 42c2b6da3b01b39de24eca63a8e1bf27b1c23caf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Oct 2021 15:07:46 +1100 Subject: [PATCH 0608/1202] powerpc: use generic version of arch_is_kernel_initmem_freed() Generic version of arch_is_kernel_initmem_freed() now does the same as powerpc version. Remove the powerpc version. Link: https://lkml.kernel.org/r/c53764eb45d41491e2b21da2e7812239897dbebb.1633001016.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Cc: Kefeng Wang Cc: Benjamin Herrenschmidt Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/powerpc/include/asm/sections.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 6e4af4492a144..79cb7a25a5fb6 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -6,21 +6,8 @@ #include #include -#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed - #include -extern bool init_mem_is_free; - -static inline int arch_is_kernel_initmem_freed(unsigned long addr) -{ - if (!init_mem_is_free) - return 0; - - return addr >= (unsigned long)__init_begin && - addr < (unsigned long)__init_end; -} - extern char __head_end[]; #ifdef __powerpc64__ From 999591db3b7a43fe7ba7a7a66bc940f9c0880c23 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 21 Oct 2021 15:07:47 +1100 Subject: [PATCH 0609/1202] s390: use generic version of arch_is_kernel_initmem_freed() Generic version of arch_is_kernel_initmem_freed() now does the same as s390 version. Remove the s390 version. Link: https://lkml.kernel.org/r/b6feb5dfe611a322de482762fc2df3a9eece70c7.1633001016.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Acked-by: Heiko Carstens Cc: Gerald Schaefer Cc: Kefeng Wang Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/s390/include/asm/sections.h | 12 ------------ arch/s390/mm/init.c | 3 --- 2 files changed, 15 deletions(-) diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 85881dd48022a..3fecaa4e8b74d 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -2,20 +2,8 @@ #ifndef _S390_SECTIONS_H #define _S390_SECTIONS_H -#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed - #include -extern bool initmem_freed; - -static inline int arch_is_kernel_initmem_freed(unsigned long addr) -{ - if (!initmem_freed) - return 0; - return addr >= (unsigned long)__init_begin && - addr < (unsigned long)__init_end; -} - /* * .boot.data section contains variables "shared" between the decompressor and * the decompressed kernel. The decompressor will store values in them, and diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index a04faf49001ac..8c6f258a61832 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -58,8 +58,6 @@ unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(zero_page_mask); -bool initmem_freed; - static void __init setup_zero_pages(void) { unsigned int order; @@ -214,7 +212,6 @@ void __init mem_init(void) void free_initmem(void) { - initmem_freed = true; __set_memory((unsigned long)_sinittext, (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT, SET_MEMORY_RW | SET_MEMORY_NX); From 5bba9b4ba8615a957d6b595e30867d4b318025cd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 21 Oct 2021 15:07:48 +1100 Subject: [PATCH 0610/1202] mm: page_alloc: use migrate_disable() in drain_local_pages_wq() drain_local_pages_wq() disables preemption to avoid CPU migration during CPU hotplug and can't use cpus_read_lock(). Using migrate_disable() works here, too. The scheduler won't take the CPU offline until the task left the migrate-disable section. The problem with disabled preemption here is that drain_local_pages() acquires locks which are turned into sleeping locks on PREEMPT_RT and can't be acquired with disabled preemption. Use migrate_disable() in drain_local_pages_wq(). Link: https://lkml.kernel.org/r/20211015210933.viw6rjvo64qtqxn4@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6714039cddf4a..2aafc337d0684 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3141,9 +3141,9 @@ static void drain_local_pages_wq(struct work_struct *work) * cpu which is alright but we also have to make sure to not move to * a different one. */ - preempt_disable(); + migrate_disable(); drain_local_pages(drain->zone); - preempt_enable(); + migrate_enable(); } /* From f90aeb89348df86c74680e8a0f7aa4d87a05f229 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:07:49 +1100 Subject: [PATCH 0611/1202] mm: fix data race in PagePoisoned() PagePoisoned() accesses page->flags which can be updated concurrently: | BUG: KCSAN: data-race in next_uptodate_page / unlock_page | | write (marked) to 0xffffea00050f37c0 of 8 bytes by task 1872 on cpu 1: | instrument_atomic_write include/linux/instrumented.h:87 [inline] | clear_bit_unlock_is_negative_byte include/asm-generic/bitops/instrumented-lock.h:74 [inline] | unlock_page+0x102/0x1b0 mm/filemap.c:1465 | filemap_map_pages+0x6c6/0x890 mm/filemap.c:3057 | ... | read to 0xffffea00050f37c0 of 8 bytes by task 1873 on cpu 0: | PagePoisoned include/linux/page-flags.h:204 [inline] | PageReadahead include/linux/page-flags.h:382 [inline] | next_uptodate_page+0x456/0x830 mm/filemap.c:2975 | ... | CPU: 0 PID: 1873 Comm: systemd-udevd Not tainted 5.11.0-rc4-00001-gf9ce0be71d1f #1 To avoid the compiler tearing or otherwise optimizing the access, use READ_ONCE() to access flags. Link: https://lore.kernel.org/all/20210826144157.GA26950@xsang-OptiPlex-9020/ Link: https://lkml.kernel.org/r/20210913113542.2658064-1-elver@google.com Reported-by: kernel test robot Signed-off-by: Marco Elver Acked-by: Kirill A. Shutemov Acked-by: Will Deacon Cc: Marco Elver Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index fbfd3fad48f21..dd80cf0bb579b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -215,7 +215,7 @@ static __always_inline int PageCompound(struct page *page) #define PAGE_POISON_PATTERN -1l static inline int PagePoisoned(const struct page *page) { - return page->flags == PAGE_POISON_PATTERN; + return READ_ONCE(page->flags) == PAGE_POISON_PATTERN; } #ifdef CONFIG_DEBUG_VM From c1aef86f7e002a29398e0a1ca9221549996994b0 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Thu, 21 Oct 2021 15:07:49 +1100 Subject: [PATCH 0612/1202] mm/memory_failure: constify static mm_walk_ops The only usage of hwp_walk_ops is to pass its address to walk_page_range() which takes a pointer to const mm_walk_ops as argument. Make it const to allow the compiler to put it in read-only memory. Link: https://lkml.kernel.org/r/20211014075042.17174-3-rikard.falkeborn@gmail.com Signed-off-by: Rikard Falkeborn Acked-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8376cfe0efce7..54a5d28ea6017 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -674,7 +674,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, #define hwpoison_hugetlb_range NULL #endif -static struct mm_walk_ops hwp_walk_ops = { +static const struct mm_walk_ops hwp_walk_ops = { .pmd_entry = hwpoison_pte_range, .hugetlb_entry = hwpoison_hugetlb_range, }; From 89e00b7191a644ffd89757f2280f798d19207065 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 21 Oct 2021 15:07:50 +1100 Subject: [PATCH 0613/1202] mm: filemap: coding style cleanup for filemap_map_pmd() Patch series "Solve silent data loss caused by poisoned page cache (shmem/tmpfs)", v5. When discussing the patch that splits page cache THP in order to offline the poisoned page, Noaya mentioned there is a bigger problem [1] that prevents this from working since the page cache page will be truncated if uncorrectable errors happen. By looking this deeper it turns out this approach (truncating poisoned page) may incur silent data loss for all non-readonly filesystems if the page is dirty. It may be worse for in-memory filesystem, e.g. shmem/tmpfs since the data blocks are actually gone. To solve this problem we could keep the poisoned dirty page in page cache then notify the users on any later access, e.g. page fault, read/write, etc. The clean page could be truncated as is since they can be reread from disk later on. The consequence is the filesystems may find poisoned page and manipulate it as healthy page since all the filesystems actually don't check if the page is poisoned or not in all the relevant paths except page fault. In general, we need make the filesystems be aware of poisoned page before we could keep the poisoned page in page cache in order to solve the data loss problem. To make filesystems be aware of poisoned page we should consider: - The page should be not written back: clearing dirty flag could prevent from writeback. - The page should not be dropped (it shows as a clean page) by drop caches or other callers: the refcount pin from hwpoison could prevent from invalidating (called by cache drop, inode cache shrinking, etc), but it doesn't avoid invalidation in DIO path. - The page should be able to get truncated/hole punched/unlinked: it works as it is. - Notify users when the page is accessed, e.g. read/write, page fault and other paths (compression, encryption, etc). The scope of the last one is huge since almost all filesystems need do it once a page is returned from page cache lookup. There are a couple of options to do it: 1. Check hwpoison flag for every path, the most straightforward way. 2. Return NULL for poisoned page from page cache lookup, the most callsites check if NULL is returned, this should have least work I think. But the error handling in filesystems just return -ENOMEM, the error code will incur confusion to the users obviously. 3. To improve #2, we could return error pointer, e.g. ERR_PTR(-EIO), but this will involve significant amount of code change as well since all the paths need check if the pointer is ERR or not just like option #1. I did prototype for both #1 and #3, but it seems #3 may require more changes than #1. For #3 ERR_PTR will be returned so all the callers need to check the return value otherwise invalid pointer may be dereferenced, but not all callers really care about the content of the page, for example, partial truncate which just sets the truncated range in one page to 0. So for such paths it needs additional modification if ERR_PTR is returned. And if the callers have their own way to handle the problematic pages we need to add a new FGP flag to tell FGP functions to return the pointer to the page. It may happen very rarely, but once it happens the consequence (data corruption) could be very bad and it is very hard to debug. It seems this problem had been slightly discussed before, but seems no action was taken at that time. [2] As the aforementioned investigation, it needs huge amount of work to solve the potential data loss for all filesystems. But it is much easier for in-memory filesystems and such filesystems actually suffer more than others since even the data blocks are gone due to truncating. So this patchset starts from shmem/tmpfs by taking option #1. TODO: * The unpoison has been broken since commit 0ed950d1f281 ("mm,hwpoison: make get_hwpoison_page() call get_any_page()"), and this patch series make refcount check for unpoisoning shmem page fail. * Expand to other filesystems. But I haven't heard feedback from filesystem developers yet. Patch breakdown: Patch #1: cleanup, depended by patch #2 Patch #2: fix THP with hwpoisoned subpage(s) PMD map bug Patch #3: coding style cleanup Patch #4: refactor and preparation. Patch #5: keep the poisoned page in page cache and handle such case for all the paths. Patch #6: the previous patches unblock page cache THP split, so this patch add page cache THP split support. This patch (of 4): A minor cleanup to the indent. Link: https://lkml.kernel.org/r/20211020210755.23964-1-shy828301@gmail.com Link: https://lkml.kernel.org/r/20211020210755.23964-4-shy828301@gmail.com Signed-off-by: Yang Shi Reviewed-by: Naoya Horiguchi Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Oscar Salvador Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/filemap.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index c762a36242c6a..b0254cf793732 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3205,12 +3205,12 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page) } if (pmd_none(*vmf->pmd) && PageTransHuge(page)) { - vm_fault_t ret = do_set_pmd(vmf, page); - if (!ret) { - /* The page is mapped successfully, reference consumed. */ - unlock_page(page); - return true; - } + vm_fault_t ret = do_set_pmd(vmf, page); + if (!ret) { + /* The page is mapped successfully, reference consumed. */ + unlock_page(page); + return true; + } } if (pmd_none(*vmf->pmd)) From 14f27ba3f059878910c2413c58d2fd2d7937a026 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 21 Oct 2021 15:07:50 +1100 Subject: [PATCH 0614/1202] mm: hwpoison: refactor refcount check handling Memory failure will report failure if the page still has extra pinned refcount other than from hwpoison after the handler is done. Actually the check is not necessary for all handlers, so move the check into specific handlers. This would make the following keeping shmem page in page cache patch easier. There may be expected extra pin for some cases, for example, when the page is dirty and in swapcache. Link: https://lkml.kernel.org/r/20211020210755.23964-5-shy828301@gmail.com Signed-off-by: Yang Shi Signed-off-by: Naoya Horiguchi Suggested-by: Naoya Horiguchi Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Oscar Salvador Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory-failure.c | 93 +++++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 54a5d28ea6017..bbd433e967908 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -807,12 +807,44 @@ static int truncate_error_page(struct page *p, unsigned long pfn, return ret; } +struct page_state { + unsigned long mask; + unsigned long res; + enum mf_action_page_type type; + + /* Callback ->action() has to unlock the relevant page inside it. */ + int (*action)(struct page_state *ps, struct page *p); +}; + +/* + * Return true if page is still referenced by others, otherwise return + * false. + * + * The extra_pins is true when one extra refcount is expected. + */ +static bool has_extra_refcount(struct page_state *ps, struct page *p, + bool extra_pins) +{ + int count = page_count(p) - 1; + + if (extra_pins) + count -= 1; + + if (count > 0) { + pr_err("Memory failure: %#lx: %s still referenced by %d users\n", + page_to_pfn(p), action_page_types[ps->type], count); + return true; + } + + return false; +} + /* * Error hit kernel page. * Do nothing, try to be lucky and not touch this instead. For a few cases we * could be more sophisticated. */ -static int me_kernel(struct page *p, unsigned long pfn) +static int me_kernel(struct page_state *ps, struct page *p) { unlock_page(p); return MF_IGNORED; @@ -821,9 +853,9 @@ static int me_kernel(struct page *p, unsigned long pfn) /* * Page in unknown state. Do nothing. */ -static int me_unknown(struct page *p, unsigned long pfn) +static int me_unknown(struct page_state *ps, struct page *p) { - pr_err("Memory failure: %#lx: Unknown page state\n", pfn); + pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p)); unlock_page(p); return MF_FAILED; } @@ -831,7 +863,7 @@ static int me_unknown(struct page *p, unsigned long pfn) /* * Clean (or cleaned) page cache page. */ -static int me_pagecache_clean(struct page *p, unsigned long pfn) +static int me_pagecache_clean(struct page_state *ps, struct page *p) { int ret; struct address_space *mapping; @@ -868,9 +900,13 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * * Open: to take i_rwsem or not for this? Right now we don't. */ - ret = truncate_error_page(p, pfn, mapping); + ret = truncate_error_page(p, page_to_pfn(p), mapping); out: unlock_page(p); + + if (has_extra_refcount(ps, p, false)) + ret = MF_FAILED; + return ret; } @@ -879,7 +915,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * Issues: when the error hit a hole page the error is not properly * propagated. */ -static int me_pagecache_dirty(struct page *p, unsigned long pfn) +static int me_pagecache_dirty(struct page_state *ps, struct page *p) { struct address_space *mapping = page_mapping(p); @@ -923,7 +959,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) mapping_set_error(mapping, -EIO); } - return me_pagecache_clean(p, pfn); + return me_pagecache_clean(ps, p); } /* @@ -945,9 +981,10 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) * Clean swap cache pages can be directly isolated. A later page fault will * bring in the known good data from disk. */ -static int me_swapcache_dirty(struct page *p, unsigned long pfn) +static int me_swapcache_dirty(struct page_state *ps, struct page *p) { int ret; + bool extra_pins = false; ClearPageDirty(p); /* Trigger EIO in shmem: */ @@ -955,10 +992,17 @@ static int me_swapcache_dirty(struct page *p, unsigned long pfn) ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; unlock_page(p); + + if (ret == MF_DELAYED) + extra_pins = true; + + if (has_extra_refcount(ps, p, extra_pins)) + ret = MF_FAILED; + return ret; } -static int me_swapcache_clean(struct page *p, unsigned long pfn) +static int me_swapcache_clean(struct page_state *ps, struct page *p) { int ret; @@ -966,6 +1010,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; unlock_page(p); + + if (has_extra_refcount(ps, p, false)) + ret = MF_FAILED; + return ret; } @@ -975,7 +1023,7 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) * - Error on hugepage is contained in hugepage unit (not in raw page unit.) * To narrow down kill region to one page, we need to break up pmd. */ -static int me_huge_page(struct page *p, unsigned long pfn) +static int me_huge_page(struct page_state *ps, struct page *p) { int res; struct page *hpage = compound_head(p); @@ -986,7 +1034,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) mapping = page_mapping(hpage); if (mapping) { - res = truncate_error_page(hpage, pfn, mapping); + res = truncate_error_page(hpage, page_to_pfn(p), mapping); unlock_page(hpage); } else { res = MF_FAILED; @@ -1004,6 +1052,9 @@ static int me_huge_page(struct page *p, unsigned long pfn) } } + if (has_extra_refcount(ps, p, false)) + res = MF_FAILED; + return res; } @@ -1029,14 +1080,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) #define slab (1UL << PG_slab) #define reserved (1UL << PG_reserved) -static struct page_state { - unsigned long mask; - unsigned long res; - enum mf_action_page_type type; - - /* Callback ->action() has to unlock the relevant page inside it. */ - int (*action)(struct page *p, unsigned long pfn); -} error_states[] = { +static struct page_state error_states[] = { { reserved, reserved, MF_MSG_KERNEL, me_kernel }, /* * free pages are specially detected outside this table: @@ -1096,19 +1140,10 @@ static int page_action(struct page_state *ps, struct page *p, unsigned long pfn) { int result; - int count; /* page p should be unlocked after returning from ps->action(). */ - result = ps->action(p, pfn); + result = ps->action(ps, p); - count = page_count(p) - 1; - if (ps->action == me_swapcache_dirty && result == MF_DELAYED) - count--; - if (count > 0) { - pr_err("Memory failure: %#lx: %s still referenced by %d users\n", - pfn, action_page_types[ps->type], count); - result = MF_FAILED; - } action_result(pfn, ps->type, result); /* Could do more checks here if page looks ok */ From 118a23ecf4e3892fba88da01f3ae68f5f4a5eaca Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 21 Oct 2021 15:07:51 +1100 Subject: [PATCH 0615/1202] mm: shmem: don't truncate page if memory failure happens The current behavior of memory failure is to truncate the page cache regardless of dirty or clean. If the page is dirty the later access will get the obsolete data from disk without any notification to the users. This may cause silent data loss. It is even worse for shmem since shmem is in-memory filesystem, truncating page cache means discarding data blocks. The later read would return all zero. The right approach is to keep the corrupted page in page cache, any later access would return error for syscalls or SIGBUS for page fault, until the file is truncated, hole punched or removed. The regular storage backed filesystems would be more complicated so this patch is focused on shmem. This also unblock the support for soft offlining shmem THP. Link: https://lkml.kernel.org/r/20211020210755.23964-6-shy828301@gmail.com Signed-off-by: Yang Shi Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory-failure.c | 10 +++++++++- mm/shmem.c | 38 +++++++++++++++++++++++++++++++++++--- mm/userfaultfd.c | 5 +++++ 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index bbd433e967908..28e7959d3c5c9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -58,6 +58,7 @@ #include #include #include +#include #include "internal.h" #include "ras/ras_event.h" @@ -867,6 +868,7 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p) { int ret; struct address_space *mapping; + bool extra_pins; delete_from_lru_cache(p); @@ -895,6 +897,12 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p) goto out; } + /* + * The shmem page is kept in page cache instead of truncating + * so is expected to have an extra refcount after error-handling. + */ + extra_pins = shmem_mapping(mapping); + /* * Truncation is a bit tricky. Enable it per file system for now. * @@ -904,7 +912,7 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p) out: unlock_page(p); - if (has_extra_refcount(ps, p, false)) + if (has_extra_refcount(ps, p, extra_pins)) ret = MF_FAILED; return ret; diff --git a/mm/shmem.c b/mm/shmem.c index df2f0288b9caa..6a7bb46019e2c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2454,6 +2454,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct shmem_inode_info *info = SHMEM_I(inode); pgoff_t index = pos >> PAGE_SHIFT; + int ret = 0; /* i_rwsem is held by caller */ if (unlikely(info->seals & (F_SEAL_GROW | @@ -2464,7 +2465,15 @@ shmem_write_begin(struct file *file, struct address_space *mapping, return -EPERM; } - return shmem_getpage(inode, index, pagep, SGP_WRITE); + ret = shmem_getpage(inode, index, pagep, SGP_WRITE); + + if (*pagep && PageHWPoison(*pagep)) { + unlock_page(*pagep); + put_page(*pagep); + ret = -EIO; + } + + return ret; } static int @@ -2551,6 +2560,12 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (sgp == SGP_CACHE) set_page_dirty(page); unlock_page(page); + + if (PageHWPoison(page)) { + put_page(page); + error = -EIO; + break; + } } /* @@ -3112,7 +3127,8 @@ static const char *shmem_get_link(struct dentry *dentry, page = find_get_page(inode->i_mapping, 0); if (!page) return ERR_PTR(-ECHILD); - if (!PageUptodate(page)) { + if (PageHWPoison(page) || + !PageUptodate(page)) { put_page(page); return ERR_PTR(-ECHILD); } @@ -3120,6 +3136,11 @@ static const char *shmem_get_link(struct dentry *dentry, error = shmem_getpage(inode, 0, &page, SGP_READ); if (error) return ERR_PTR(error); + if (page && PageHWPoison(page)) { + unlock_page(page); + put_page(page); + return ERR_PTR(-ECHILD); + } unlock_page(page); } set_delayed_call(done, shmem_put_link, page); @@ -3770,6 +3791,13 @@ static void shmem_destroy_inodecache(void) kmem_cache_destroy(shmem_inode_cachep); } +/* Keep the page in page cache instead of truncating it */ +static int shmem_error_remove_page(struct address_space *mapping, + struct page *page) +{ + return 0; +} + const struct address_space_operations shmem_aops = { .writepage = shmem_writepage, .set_page_dirty = __set_page_dirty_no_writeback, @@ -3780,7 +3808,7 @@ const struct address_space_operations shmem_aops = { #ifdef CONFIG_MIGRATION .migratepage = migrate_page, #endif - .error_remove_page = generic_error_remove_page, + .error_remove_page = shmem_error_remove_page, }; EXPORT_SYMBOL(shmem_aops); @@ -4191,6 +4219,10 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, page = ERR_PTR(error); else unlock_page(page); + + if (PageHWPoison(page)) + page = ERR_PTR(-EIO); + return page; #else /* diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index caf6dfff2a60f..77fce86371a95 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -232,6 +232,11 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, goto out; } + if (PageHWPoison(page)) { + ret = -EIO; + goto out_release; + } + ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, page, false, wp_copy); if (ret) From 496d11af1b15ba2edc2c990cf81bbe83b74320f2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 9 Oct 2021 10:52:50 -0400 Subject: [PATCH 0616/1202] virtio_net: clarify tailroom logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make tailroom math follow same logic as everything else, subtracing values in the order in which things are laid out in the buffer. Tested-by: Corentin Noël Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4ad25a8b0870c..6d8c8745bf242 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -406,12 +406,13 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, * add_recvbuf_mergeable() + get_mergeable_buf_len() */ truesize = headroom ? PAGE_SIZE : truesize; - tailroom = truesize - len - headroom - (hdr_padded_len - hdr_len); + tailroom = truesize - headroom; buf = p - headroom; len -= hdr_len; offset += hdr_padded_len; p += hdr_padded_len; + tailroom -= hdr_padded_len + len; shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); From b2c5221fd074fbb0e57d6707bed5b7386bf430ed Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 1 Sep 2021 16:14:34 +0300 Subject: [PATCH 0617/1202] virtio-blk: avoid preallocating big SGL for data No need to pre-allocate a big buffer for the IO SGL anymore. If a device has lots of deep queues, preallocation for the sg list can consume substantial amounts of memory. For HW virtio-blk device, nr_hw_queues can be 64 or 128 and each queue's depth might be 128. This means the resulting preallocation for the data SGLs is big. Switch to runtime allocation for SGL for lists longer than 2 entries. This is the approach used by NVMe drivers so it should be reasonable for virtio block as well. Runtime SGL allocation has always been the case for the legacy I/O path so this is nothing new. The preallocated small SGL depends on SG_CHAIN so if the ARCH doesn't support SG_CHAIN, use only runtime allocation for the SGL. Re-organize the setup of the IO request to fit the new sg chain mechanism. No performance degradation was seen (fio libaio engine with 16 jobs and 128 iodepth): IO size IOPs Rand Read (before/after) IOPs Rand Write (before/after) -------- --------------------------------- ---------------------------------- 512B 318K/316K 329K/325K 4KB 323K/321K 353K/349K 16KB 199K/208K 250K/275K 128KB 36K/36.1K 39.2K/41.7K Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Link: https://lore.kernel.org/r/20210901131434.31158-1-mgurtovoy@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Feng Li Reviewed-by: Stefan Hajnoczi Tested-by: Stefan Hajnoczi Reviewed-by: Christoph Hellwig --- drivers/block/virtio_blk.c | 156 ++++++++++++++++++++++++------------- 1 file changed, 100 insertions(+), 56 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 303caf2d17d0c..6767fb709d253 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -24,6 +24,12 @@ /* The maximum number of sg elements that fit into a virtqueue */ #define VIRTIO_BLK_MAX_SG_ELEMS 32768 +#ifdef CONFIG_ARCH_NO_SG_CHAIN +#define VIRTIO_BLK_INLINE_SG_CNT 0 +#else +#define VIRTIO_BLK_INLINE_SG_CNT 2 +#endif + static int major; static DEFINE_IDA(vd_index_ida); @@ -77,6 +83,7 @@ struct virtio_blk { struct virtblk_req { struct virtio_blk_outhdr out_hdr; u8 status; + struct sg_table sg_table; struct scatterlist sg[]; }; @@ -162,12 +169,92 @@ static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) return 0; } -static inline void virtblk_request_done(struct request *req) +static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr) { - struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + if (blk_rq_nr_phys_segments(req)) + sg_free_table_chained(&vbr->sg_table, + VIRTIO_BLK_INLINE_SG_CNT); +} + +static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req, + struct virtblk_req *vbr) +{ + int err; + + if (!blk_rq_nr_phys_segments(req)) + return 0; + + vbr->sg_table.sgl = vbr->sg; + err = sg_alloc_table_chained(&vbr->sg_table, + blk_rq_nr_phys_segments(req), + vbr->sg_table.sgl, + VIRTIO_BLK_INLINE_SG_CNT); + if (unlikely(err)) + return -ENOMEM; + return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl); +} + +static void virtblk_cleanup_cmd(struct request *req) +{ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) kfree(bvec_virt(&req->special_vec)); +} + +static int virtblk_setup_cmd(struct virtio_device *vdev, struct request *req, + struct virtblk_req *vbr) +{ + bool unmap = false; + u32 type; + + vbr->out_hdr.sector = 0; + + switch (req_op(req)) { + case REQ_OP_READ: + type = VIRTIO_BLK_T_IN; + vbr->out_hdr.sector = cpu_to_virtio64(vdev, + blk_rq_pos(req)); + break; + case REQ_OP_WRITE: + type = VIRTIO_BLK_T_OUT; + vbr->out_hdr.sector = cpu_to_virtio64(vdev, + blk_rq_pos(req)); + break; + case REQ_OP_FLUSH: + type = VIRTIO_BLK_T_FLUSH; + break; + case REQ_OP_DISCARD: + type = VIRTIO_BLK_T_DISCARD; + break; + case REQ_OP_WRITE_ZEROES: + type = VIRTIO_BLK_T_WRITE_ZEROES; + unmap = !(req->cmd_flags & REQ_NOUNMAP); + break; + case REQ_OP_DRV_IN: + type = VIRTIO_BLK_T_GET_ID; + break; + default: + WARN_ON_ONCE(1); + return BLK_STS_IOERR; + } + + vbr->out_hdr.type = cpu_to_virtio32(vdev, type); + vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); + + if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { + if (virtblk_setup_discard_write_zeroes(req, unmap)) + return BLK_STS_RESOURCE; + } + + return 0; +} + +static inline void virtblk_request_done(struct request *req) +{ + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + + virtblk_unmap_data(req, vbr); + virtblk_cleanup_cmd(req); blk_mq_end_request(req, virtblk_result(vbr)); } @@ -225,57 +312,23 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, int qid = hctx->queue_num; int err; bool notify = false; - bool unmap = false; - u32 type; BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); - switch (req_op(req)) { - case REQ_OP_READ: - case REQ_OP_WRITE: - type = 0; - break; - case REQ_OP_FLUSH: - type = VIRTIO_BLK_T_FLUSH; - break; - case REQ_OP_DISCARD: - type = VIRTIO_BLK_T_DISCARD; - break; - case REQ_OP_WRITE_ZEROES: - type = VIRTIO_BLK_T_WRITE_ZEROES; - unmap = !(req->cmd_flags & REQ_NOUNMAP); - break; - case REQ_OP_DRV_IN: - type = VIRTIO_BLK_T_GET_ID; - break; - default: - WARN_ON_ONCE(1); - return BLK_STS_IOERR; - } - - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); - vbr->out_hdr.sector = type ? - 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); + err = virtblk_setup_cmd(vblk->vdev, req, vbr); + if (unlikely(err)) + return err; blk_mq_start_request(req); - if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { - err = virtblk_setup_discard_write_zeroes(req, unmap); - if (err) - return BLK_STS_RESOURCE; - } - - num = blk_rq_map_sg(hctx->queue, req, vbr->sg); - if (num) { - if (rq_data_dir(req) == WRITE) - vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); - else - vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); + num = virtblk_map_data(hctx, req, vbr); + if (unlikely(num < 0)) { + virtblk_cleanup_cmd(req); + return BLK_STS_RESOURCE; } spin_lock_irqsave(&vblk->vqs[qid].lock, flags); - err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); + err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); /* Don't stop the queue if -ENOMEM: we may have failed to @@ -284,6 +337,8 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, if (err == -ENOSPC) blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); + virtblk_unmap_data(req, vbr); + virtblk_cleanup_cmd(req); switch (err) { case -ENOSPC: return BLK_STS_DEV_RESOURCE; @@ -660,16 +715,6 @@ static const struct attribute_group *virtblk_attr_groups[] = { NULL, }; -static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) -{ - struct virtio_blk *vblk = set->driver_data; - struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); - - sg_init_table(vbr->sg, vblk->sg_elems); - return 0; -} - static int virtblk_map_queues(struct blk_mq_tag_set *set) { struct virtio_blk *vblk = set->driver_data; @@ -682,7 +727,6 @@ static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .commit_rqs = virtio_commit_rqs, .complete = virtblk_request_done, - .init_request = virtblk_init_request, .map_queues = virtblk_map_queues, }; @@ -762,7 +806,7 @@ static int virtblk_probe(struct virtio_device *vdev) vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; vblk->tag_set.cmd_size = sizeof(struct virtblk_req) + - sizeof(struct scatterlist) * sg_elems; + sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; vblk->tag_set.driver_data = vblk; vblk->tag_set.nr_hw_queues = vblk->num_vqs; From 1a662cf6cb9a6c3f53bfd7ec5d07dcddf15c6df8 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 2 Sep 2021 23:46:22 +0300 Subject: [PATCH 0618/1202] virtio-blk: add num_request_queues module parameter Sometimes a user would like to control the amount of request queues to be created for a block device. For example, for limiting the memory footprint of virtio-blk devices. Reviewed-by: Christoph Hellwig Reviewed-by: Stefan Hajnoczi Signed-off-by: Max Gurtovoy Link: https://lore.kernel.org/r/20210902204622.54354-1-mgurtovoy@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6767fb709d253..a33fe07436722 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -30,6 +30,25 @@ #define VIRTIO_BLK_INLINE_SG_CNT 2 #endif +static int virtblk_queue_count_set(const char *val, + const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, 1, nr_cpu_ids); +} + +static const struct kernel_param_ops queue_count_ops = { + .set = virtblk_queue_count_set, + .get = param_get_uint, +}; + +static unsigned int num_request_queues; +module_param_cb(num_request_queues, &queue_count_ops, &num_request_queues, + 0644); +MODULE_PARM_DESC(num_request_queues, + "Limit the number of request queues to use for blk device. " + "0 for no limit. " + "Values > nr_cpu_ids truncated to nr_cpu_ids."); + static int major; static DEFINE_IDA(vd_index_ida); @@ -553,7 +572,9 @@ static int init_vq(struct virtio_blk *vblk) if (err) num_vqs = 1; - num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); + num_vqs = min_t(unsigned int, + min_not_zero(num_request_queues, nr_cpu_ids), + num_vqs); vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); if (!vblk->vqs) From c3ca8a3eeb54e2fcac3d8454cec9b33e0766b226 Mon Sep 17 00:00:00 2001 From: Wu Zongyong Date: Fri, 22 Oct 2021 10:44:16 +0800 Subject: [PATCH 0619/1202] virtio-pci: introduce legacy device module Split common codes from virtio-pci-legacy so vDPA driver can reuse it later. Signed-off-by: Wu Zongyong Link: https://lore.kernel.org/r/7bb4834a0638fabaf3ba1a585b607830392a088f.1634870456.git.wuzongyong@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/Kconfig | 10 ++ drivers/virtio/Makefile | 1 + drivers/virtio/virtio_pci_common.c | 10 +- drivers/virtio/virtio_pci_common.h | 9 +- drivers/virtio/virtio_pci_legacy.c | 101 +++--------- drivers/virtio/virtio_pci_legacy_dev.c | 220 +++++++++++++++++++++++++ include/linux/virtio_pci_legacy.h | 42 +++++ 7 files changed, 310 insertions(+), 83 deletions(-) create mode 100644 drivers/virtio/virtio_pci_legacy_dev.c create mode 100644 include/linux/virtio_pci_legacy.h diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index ce1b3f6ec325d..8fcf94cd2c965 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -20,6 +20,15 @@ config VIRTIO_PCI_LIB PCI device with possible vendor specific extensions. Any module that selects this module must depend on PCI. +config VIRTIO_PCI_LIB_LEGACY + tristate + help + Legacy PCI device (Virtio PCI Card 0.9.x Draft and older device) + implementation. + This module implements the basic probe and control for devices + which are based on legacy PCI device. Any module that selects this + module must depend on PCI. + menuconfig VIRTIO_MENU bool "Virtio drivers" default y @@ -43,6 +52,7 @@ config VIRTIO_PCI_LEGACY bool "Support for legacy virtio draft 0.9.X and older devices" default y depends on VIRTIO_PCI + select VIRTIO_PCI_LIB_LEGACY help Virtio PCI Card 0.9.X Draft (circa 2014) and older device support. diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 699bbea0465f9..0a82d08732484 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio_pci_modern_dev.o +obj-$(CONFIG_VIRTIO_PCI_LIB_LEGACY) += virtio_pci_legacy_dev.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index b35bb2d57f62c..d724f676608ba 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -549,6 +549,8 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, pci_set_master(pci_dev); + vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false; + rc = register_virtio_device(&vp_dev->vdev); reg_dev = vp_dev; if (rc) @@ -557,10 +559,10 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, return 0; err_register: - if (vp_dev->ioaddr) - virtio_pci_legacy_remove(vp_dev); + if (vp_dev->is_legacy) + virtio_pci_legacy_remove(vp_dev); else - virtio_pci_modern_remove(vp_dev); + virtio_pci_modern_remove(vp_dev); err_probe: pci_disable_device(pci_dev); err_enable_device: @@ -587,7 +589,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) unregister_virtio_device(&vp_dev->vdev); - if (vp_dev->ioaddr) + if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index beec047a8f8d9..eb17a29fc7ef1 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -44,16 +45,14 @@ struct virtio_pci_vq_info { struct virtio_pci_device { struct virtio_device vdev; struct pci_dev *pci_dev; + struct virtio_pci_legacy_device ldev; struct virtio_pci_modern_device mdev; - /* In legacy mode, these two point to within ->legacy. */ + bool is_legacy; + /* Where to read and clear interrupt */ u8 __iomem *isr; - /* Legacy only field */ - /* the IO mapping for the PCI config space */ - void __iomem *ioaddr; - /* a list of queues so we can dispatch IRQs */ spinlock_t lock; struct list_head virtqueues; diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index d62e9835aeeca..82eb437ad9205 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -14,6 +14,7 @@ * Michael S. Tsirkin */ +#include "linux/virtio_pci_legacy.h" #include "virtio_pci_common.h" /* virtio config->get_features() implementation */ @@ -23,7 +24,7 @@ static u64 vp_get_features(struct virtio_device *vdev) /* When someone needs more than 32 feature bits, we'll need to * steal a bit to indicate that the rest are somewhere else. */ - return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); + return vp_legacy_get_features(&vp_dev->ldev); } /* virtio config->finalize_features() implementation */ @@ -38,7 +39,7 @@ static int vp_finalize_features(struct virtio_device *vdev) BUG_ON((u32)vdev->features != vdev->features); /* We only support 32 feature bits. */ - iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); + vp_legacy_set_features(&vp_dev->ldev, vdev->features); return 0; } @@ -48,7 +49,7 @@ static void vp_get(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + + void __iomem *ioaddr = vp_dev->ldev.ioaddr + VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) + offset; u8 *ptr = buf; @@ -64,7 +65,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - void __iomem *ioaddr = vp_dev->ioaddr + + void __iomem *ioaddr = vp_dev->ldev.ioaddr + VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) + offset; const u8 *ptr = buf; @@ -78,7 +79,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset, static u8 vp_get_status(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); + return vp_legacy_get_status(&vp_dev->ldev); } static void vp_set_status(struct virtio_device *vdev, u8 status) @@ -86,28 +87,24 @@ static void vp_set_status(struct virtio_device *vdev, u8 status) struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* We should never be setting status to 0. */ BUG_ON(status == 0); - iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); + vp_legacy_set_status(&vp_dev->ldev, status); } static void vp_reset(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* 0 status means a reset. */ - iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); + vp_legacy_set_status(&vp_dev->ldev, 0); /* Flush out the status write, and flush in device writes, * including MSi-X interrupts, if any. */ - ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); + vp_legacy_get_status(&vp_dev->ldev); /* Flush pending VQ/configuration callbacks. */ vp_synchronize_vectors(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) { - /* Setup the vector used for configuration events */ - iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - /* Verify we had enough resources to assign the vector */ - /* Will also flush the write out to device */ - return ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + return vp_legacy_config_vector(&vp_dev->ldev, vector); } static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, @@ -123,12 +120,9 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, int err; u64 q_pfn; - /* Select the queue we're interested in */ - iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - /* Check if queue is either not available or already active. */ - num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); - if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) + num = vp_legacy_get_queue_size(&vp_dev->ldev, index); + if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index)) return ERR_PTR(-ENOENT); info->msix_vector = msix_vec; @@ -151,13 +145,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, } /* activate the queue */ - iowrite32(q_pfn, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + vp_legacy_set_queue_address(&vp_dev->ldev, index, q_pfn); - vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; + vq->priv = (void __force *)vp_dev->ldev.ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; if (msix_vec != VIRTIO_MSI_NO_VECTOR) { - iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); - msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + msix_vec = vp_legacy_queue_vector(&vp_dev->ldev, index, msix_vec); if (msix_vec == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; goto out_deactivate; @@ -167,7 +160,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, return vq; out_deactivate: - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + vp_legacy_set_queue_address(&vp_dev->ldev, index, 0); out_del_vq: vring_del_virtqueue(vq); return ERR_PTR(err); @@ -178,17 +171,15 @@ static void del_vq(struct virtio_pci_vq_info *info) struct virtqueue *vq = info->vq; struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); - iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); - if (vp_dev->msix_enabled) { - iowrite16(VIRTIO_MSI_NO_VECTOR, - vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + vp_legacy_queue_vector(&vp_dev->ldev, vq->index, + VIRTIO_MSI_NO_VECTOR); /* Flush the write out to device */ - ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); + ioread8(vp_dev->ldev.ioaddr + VIRTIO_PCI_ISR); } /* Select and deactivate the queue */ - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + vp_legacy_set_queue_address(&vp_dev->ldev, vq->index, 0); vring_del_virtqueue(vq); } @@ -211,51 +202,18 @@ static const struct virtio_config_ops virtio_pci_config_ops = { /* the PCI probing function */ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) { + struct virtio_pci_legacy_device *ldev = &vp_dev->ldev; struct pci_dev *pci_dev = vp_dev->pci_dev; int rc; - /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ - if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) - return -ENODEV; - - if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) { - printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n", - VIRTIO_PCI_ABI_VERSION, pci_dev->revision); - return -ENODEV; - } - - rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)); - if (rc) { - rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); - } else { - /* - * The virtio ring base address is expressed as a 32-bit PFN, - * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT. - */ - dma_set_coherent_mask(&pci_dev->dev, - DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT)); - } - - if (rc) - dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); + ldev->pci_dev = pci_dev; - rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); + rc = vp_legacy_probe(ldev); if (rc) return rc; - rc = -ENOMEM; - vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); - if (!vp_dev->ioaddr) - goto err_iomap; - - vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR; - - /* we use the subsystem vendor/device id as the virtio vendor/device - * id. this allows us to use the same PCI vendor/device id for all - * virtio devices and to identify the particular virtio driver by - * the subsystem ids */ - vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; - vp_dev->vdev.id.device = pci_dev->subsystem_device; + vp_dev->isr = ldev->isr; + vp_dev->vdev.id = ldev->id; vp_dev->vdev.config = &virtio_pci_config_ops; @@ -264,16 +222,11 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) vp_dev->del_vq = del_vq; return 0; - -err_iomap: - pci_release_region(pci_dev, 0); - return rc; } void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev) { - struct pci_dev *pci_dev = vp_dev->pci_dev; + struct virtio_pci_legacy_device *ldev = &vp_dev->ldev; - pci_iounmap(pci_dev, vp_dev->ioaddr); - pci_release_region(pci_dev, 0); + vp_legacy_remove(ldev); } diff --git a/drivers/virtio/virtio_pci_legacy_dev.c b/drivers/virtio/virtio_pci_legacy_dev.c new file mode 100644 index 0000000000000..9b97680dd02b5 --- /dev/null +++ b/drivers/virtio/virtio_pci_legacy_dev.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "linux/virtio_pci.h" +#include +#include +#include + + +/* + * vp_legacy_probe: probe the legacy virtio pci device, note that the + * caller is required to enable PCI device before calling this function. + * @ldev: the legacy virtio-pci device + * + * Return 0 on succeed otherwise fail + */ +int vp_legacy_probe(struct virtio_pci_legacy_device *ldev) +{ + struct pci_dev *pci_dev = ldev->pci_dev; + int rc; + + /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ + if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) + return -ENODEV; + + if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) + return -ENODEV; + + rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)); + if (rc) { + rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); + } else { + /* + * The virtio ring base address is expressed as a 32-bit PFN, + * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT. + */ + dma_set_coherent_mask(&pci_dev->dev, + DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT)); + } + + if (rc) + dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); + + rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); + if (rc) + return rc; + + ldev->ioaddr = pci_iomap(pci_dev, 0, 0); + if (!ldev->ioaddr) + goto err_iomap; + + ldev->isr = ldev->ioaddr + VIRTIO_PCI_ISR; + + ldev->id.vendor = pci_dev->subsystem_vendor; + ldev->id.device = pci_dev->subsystem_device; + + return 0; +err_iomap: + pci_release_region(pci_dev, 0); + return rc; +} +EXPORT_SYMBOL_GPL(vp_legacy_probe); + +/* + * vp_legacy_probe: remove and cleanup the legacy virtio pci device + * @ldev: the legacy virtio-pci device + */ +void vp_legacy_remove(struct virtio_pci_legacy_device *ldev) +{ + struct pci_dev *pci_dev = ldev->pci_dev; + + pci_iounmap(pci_dev, ldev->ioaddr); + pci_release_region(pci_dev, 0); +} +EXPORT_SYMBOL_GPL(vp_legacy_remove); + +/* + * vp_legacy_get_features - get features from device + * @ldev: the legacy virtio-pci device + * + * Returns the features read from the device + */ +u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev) +{ + + return ioread32(ldev->ioaddr + VIRTIO_PCI_HOST_FEATURES); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_features); + +/* + * vp_legacy_get_driver_features - get driver features from device + * @ldev: the legacy virtio-pci device + * + * Returns the driver features read from the device + */ +u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev) +{ + return ioread32(ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_driver_features); + +/* + * vp_legacy_set_features - set features to device + * @ldev: the legacy virtio-pci device + * @features: the features set to device + */ +void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev, + u32 features) +{ + iowrite32(features, ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); +} +EXPORT_SYMBOL_GPL(vp_legacy_set_features); + +/* + * vp_legacy_get_status - get the device status + * @ldev: the legacy virtio-pci device + * + * Returns the status read from device + */ +u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev) +{ + return ioread8(ldev->ioaddr + VIRTIO_PCI_STATUS); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_status); + +/* + * vp_legacy_set_status - set status to device + * @ldev: the legacy virtio-pci device + * @status: the status set to device + */ +void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev, + u8 status) +{ + iowrite8(status, ldev->ioaddr + VIRTIO_PCI_STATUS); +} +EXPORT_SYMBOL_GPL(vp_legacy_set_status); + +/* + * vp_legacy_queue_vector - set the MSIX vector for a specific virtqueue + * @ldev: the legacy virtio-pci device + * @index: queue index + * @vector: the config vector + * + * Returns the config vector read from the device + */ +u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, + u16 index, u16 vector) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); + /* Flush the write out to device */ + return ioread16(ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); +} +EXPORT_SYMBOL_GPL(vp_legacy_queue_vector); + +/* + * vp_legacy_config_vector - set the vector for config interrupt + * @ldev: the legacy virtio-pci device + * @vector: the config vector + * + * Returns the config vector read from the device + */ +u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev, + u16 vector) +{ + /* Setup the vector used for configuration events */ + iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); + /* Verify we had enough resources to assign the vector */ + /* Will also flush the write out to device */ + return ioread16(ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); +} +EXPORT_SYMBOL_GPL(vp_legacy_config_vector); + +/* + * vp_legacy_set_queue_address - set the virtqueue address + * @ldev: the legacy virtio-pci device + * @index: the queue index + * @queue_pfn: pfn of the virtqueue + */ +void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, + u16 index, u32 queue_pfn) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + iowrite32(queue_pfn, ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN); +} +EXPORT_SYMBOL_GPL(vp_legacy_set_queue_address); + +/* + * vp_legacy_get_queue_enable - enable a virtqueue + * @ldev: the legacy virtio-pci device + * @index: the queue index + * + * Returns whether a virtqueue is enabled or not + */ +bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, + u16 index) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + return ioread32(ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_queue_enable); + +/* + * vp_legacy_get_queue_size - get size for a virtqueue + * @ldev: the legacy virtio-pci device + * @index: the queue index + * + * Returns the size of the virtqueue + */ +u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, + u16 index) +{ + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); + return ioread16(ldev->ioaddr + VIRTIO_PCI_QUEUE_NUM); +} +EXPORT_SYMBOL_GPL(vp_legacy_get_queue_size); + +MODULE_VERSION("0.1"); +MODULE_DESCRIPTION("Legacy Virtio PCI Device"); +MODULE_AUTHOR("Wu Zongyong "); +MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio_pci_legacy.h b/include/linux/virtio_pci_legacy.h new file mode 100644 index 0000000000000..e5d665faf00e1 --- /dev/null +++ b/include/linux/virtio_pci_legacy.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_PCI_LEGACY_H +#define _LINUX_VIRTIO_PCI_LEGACY_H + +#include "linux/mod_devicetable.h" +#include +#include + +struct virtio_pci_legacy_device { + struct pci_dev *pci_dev; + + /* Where to read and clear interrupt */ + u8 __iomem *isr; + /* The IO mapping for the PCI config space (legacy mode only) */ + void __iomem *ioaddr; + + struct virtio_device_id id; +}; + +u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev); +u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev); +void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev, + u32 features); +u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev); +void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev, + u8 status); +u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, + u16 idx, u16 vector); +u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev, + u16 vector); +void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, + u16 index, u32 queue_pfn); +bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, + u16 idx); +void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev, + u16 idx, u16 size); +u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, + u16 idx); +int vp_legacy_probe(struct virtio_pci_legacy_device *ldev); +void vp_legacy_remove(struct virtio_pci_legacy_device *ldev); + +#endif From 95a7e74c5ab93d65e13e20f21cb057851240a59a Mon Sep 17 00:00:00 2001 From: Wu Zongyong Date: Fri, 22 Oct 2021 10:44:17 +0800 Subject: [PATCH 0620/1202] vdpa: fix typo Signed-off-by: Wu Zongyong Acked-by: Jason Wang Link: https://lore.kernel.org/r/a07bce647df93aa029e1993245b9b7990d4b0177.1634870456.git.wuzongyong@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 3972ab765de18..a896ee021e5f9 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -257,7 +257,7 @@ struct vdpa_config_ops { struct vdpa_notification_area (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); /* vq irq is not expected to be changed once DRIVER_OK is set */ - int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx); + int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx); /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); From e600e08659add18f943cfd9e84428d050e568053 Mon Sep 17 00:00:00 2001 From: Wu Zongyong Date: Fri, 22 Oct 2021 10:44:18 +0800 Subject: [PATCH 0621/1202] vp_vdpa: add vq irq offloading support This patch implements the get_vq_irq() callback for virtio pci devices to allow irq offloading. Signed-off-by: Wu Zongyong Acked-by: Jason Wang Link: https://lore.kernel.org/r/d3cf8ff3fd485874c168a2077fe4cee672377cca.1634870456.git.wuzongyong@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/virtio_pci/vp_vdpa.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 5bcd00246d2e8..e3ff7875e1234 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -76,6 +76,17 @@ static u8 vp_vdpa_get_status(struct vdpa_device *vdpa) return vp_modern_get_status(mdev); } +static int vp_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx) +{ + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); + int irq = vp_vdpa->vring[idx].irq; + + if (irq == VIRTIO_MSI_NO_VECTOR) + return -EINVAL; + + return irq; +} + static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa) { struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; @@ -427,6 +438,7 @@ static const struct vdpa_config_ops vp_vdpa_ops = { .get_config = vp_vdpa_get_config, .set_config = vp_vdpa_set_config, .set_config_cb = vp_vdpa_set_config_cb, + .get_vq_irq = vp_vdpa_get_vq_irq, }; static void vp_vdpa_free_irq_vectors(void *data) From 2a1ba6f27409bb5a444fcbf06d737b0cd7e3e0f6 Mon Sep 17 00:00:00 2001 From: Wu Zongyong Date: Fri, 22 Oct 2021 10:44:19 +0800 Subject: [PATCH 0622/1202] vdpa: add new callback get_vq_num_min in vdpa_config_ops This callback is optional. For vdpa devices that not support to change virtqueue size, get_vq_num_min and get_vq_num_max will return the same value, so that users can choose a correct value for that device. Suggested-by: Jason Wang Signed-off-by: Wu Zongyong Link: https://lore.kernel.org/r/7e4d859949fa37fce2289a4b287843cdeffcaf8a.1634870456.git.wuzongyong@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index a896ee021e5f9..30864848950b7 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -171,6 +171,9 @@ struct vdpa_map_file { * @get_vq_num_max: Get the max size of virtqueue * @vdev: vdpa device * Returns u16: max size of virtqueue + * @get_vq_num_min: Get the min size of virtqueue (optional) + * @vdev: vdpa device + * Returns u16: min size of virtqueue * @get_device_id: Get virtio device id * @vdev: vdpa device * Returns u32: virtio device id @@ -266,6 +269,7 @@ struct vdpa_config_ops { void (*set_config_cb)(struct vdpa_device *vdev, struct vdpa_callback *cb); u16 (*get_vq_num_max)(struct vdpa_device *vdev); + u16 (*get_vq_num_min)(struct vdpa_device *vdev); u32 (*get_device_id)(struct vdpa_device *vdev); u32 (*get_vendor_id)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev); From 371f108ca2dffe3f1552e8a8f95d281f1a8d742b Mon Sep 17 00:00:00 2001 From: Wu Zongyong Date: Fri, 22 Oct 2021 10:44:20 +0800 Subject: [PATCH 0623/1202] vdpa: min vq num of vdpa device cannot be greater than max vq num Just failed to probe the vdpa device if the min virtqueue num returned by get_vq_num_min is greater than the max virtqueue num returned by get_vq_num_max. Signed-off-by: Wu Zongyong Link: https://lore.kernel.org/r/41e5d4e7d9e6f46429ce45a80f81f40fdb8e11cb.1634870456.git.wuzongyong@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 1dc121a07a934..fd014ecec7111 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -26,8 +26,16 @@ static int vdpa_dev_probe(struct device *d) { struct vdpa_device *vdev = dev_to_vdpa(d); struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); + const struct vdpa_config_ops *ops = vdev->config; + u32 max_num, min_num = 0; int ret = 0; + max_num = ops->get_vq_num_max(vdev); + if (ops->get_vq_num_min) + min_num = ops->get_vq_num_min(vdev); + if (max_num < min_num) + return -EINVAL; + if (drv && drv->probe) ret = drv->probe(vdev); From 289359bc52711d9f5e388e4ad264270f29b7e050 Mon Sep 17 00:00:00 2001 From: Wu Zongyong Date: Fri, 22 Oct 2021 10:44:21 +0800 Subject: [PATCH 0624/1202] virtio_vdpa: setup correct vq size with callbacks get_vq_num_{max,min} For the devices which implement the get_vq_num_min callback, the driver should not negotiate with virtqueue size with the backend vdpa device if the value returned by get_vq_num_min equals to the value returned by get_vq_num_max. This is useful for vdpa devices based on legacy virtio specfication. Signed-off-by: Wu Zongyong Link: https://lore.kernel.org/r/c75b4499f7ead922daa19bf67b32eed6f185d260.1634870456.git.wuzongyong@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_vdpa.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 72eaef2caeb14..e42ace29daa1d 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -145,7 +145,8 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, /* Assume split virtqueue, switch to packed if necessary */ struct vdpa_vq_state state = {0}; unsigned long flags; - u32 align, num; + u32 align, max_num, min_num = 0; + bool may_reduce_num = true; int err; if (!name) @@ -163,22 +164,32 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, if (!info) return ERR_PTR(-ENOMEM); - num = ops->get_vq_num_max(vdpa); - if (num == 0) { + max_num = ops->get_vq_num_max(vdpa); + if (max_num == 0) { err = -ENOENT; goto error_new_virtqueue; } + if (ops->get_vq_num_min) + min_num = ops->get_vq_num_min(vdpa); + + may_reduce_num = (max_num == min_num) ? false : true; + /* Create the vring */ align = ops->get_vq_align(vdpa); - vq = vring_create_virtqueue(index, num, align, vdev, - true, true, ctx, + vq = vring_create_virtqueue(index, max_num, align, vdev, + true, may_reduce_num, ctx, virtio_vdpa_notify, callback, name); if (!vq) { err = -ENOMEM; goto error_new_virtqueue; } + if (virtqueue_get_vring_size(vq) < min_num) { + err = -EINVAL; + goto err_vq; + } + /* Setup virtqueue callback */ cb.callback = virtio_vdpa_virtqueue_cb; cb.private = info; From b0f7d34b5074482796ff24c4b16941240e0728b6 Mon Sep 17 00:00:00 2001 From: Wu Zongyong Date: Fri, 22 Oct 2021 10:44:22 +0800 Subject: [PATCH 0625/1202] vdpa: add new attribute VDPA_ATTR_DEV_MIN_VQ_SIZE This attribute advertises the min value of virtqueue size. The value is 0 by default. Signed-off-by: Wu Zongyong Link: https://lore.kernel.org/r/f3d36f0d7c9588aefaf6eeaa235b0a22fee23d56.1634870456.git.wuzongyong@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 5 +++++ include/uapi/linux/vdpa.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index fd014ecec7111..4aeb1458b924b 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -500,6 +500,7 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq int flags, struct netlink_ext_ack *extack) { u16 max_vq_size; + u16 min_vq_size = 0; u32 device_id; u32 vendor_id; void *hdr; @@ -516,6 +517,8 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq device_id = vdev->config->get_device_id(vdev); vendor_id = vdev->config->get_vendor_id(vdev); max_vq_size = vdev->config->get_vq_num_max(vdev); + if (vdev->config->get_vq_num_min) + min_vq_size = vdev->config->get_vq_num_min(vdev); err = -EMSGSIZE; if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) @@ -528,6 +531,8 @@ vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq goto msg_err; if (nla_put_u16(msg, VDPA_ATTR_DEV_MAX_VQ_SIZE, max_vq_size)) goto msg_err; + if (nla_put_u16(msg, VDPA_ATTR_DEV_MIN_VQ_SIZE, min_vq_size)) + goto msg_err; genlmsg_end(msg, hdr); return 0; diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 66a41e4ec163a..e3b87879514c3 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -32,6 +32,7 @@ enum vdpa_attr { VDPA_ATTR_DEV_VENDOR_ID, /* u32 */ VDPA_ATTR_DEV_MAX_VQS, /* u32 */ VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */ + VDPA_ATTR_DEV_MIN_VQ_SIZE, /* u16 */ /* new attributes must be added above here */ VDPA_ATTR_MAX, From 52e437b2b222dfa6a67b3076b9ae2fcf8e6ee042 Mon Sep 17 00:00:00 2001 From: Wu Zongyong Date: Fri, 22 Oct 2021 10:44:23 +0800 Subject: [PATCH 0626/1202] eni_vdpa: add vDPA driver for Alibaba ENI This patch adds a new vDPA driver for Alibaba ENI(Elastic Network Interface) which is build upon virtio 0.9.5 specification. And this driver doesn't support to run on BE host. Signed-off-by: Wu Zongyong Link: https://lore.kernel.org/r/6496b76a64303a3e23ea19e3e279644608de36fb.1634870456.git.wuzongyong@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/Kconfig | 8 + drivers/vdpa/Makefile | 1 + drivers/vdpa/alibaba/Makefile | 3 + drivers/vdpa/alibaba/eni_vdpa.c | 553 ++++++++++++++++++++++++++++++++ 4 files changed, 565 insertions(+) create mode 100644 drivers/vdpa/alibaba/Makefile create mode 100644 drivers/vdpa/alibaba/eni_vdpa.c diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 3d91982d83717..c0232a2148a70 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -78,4 +78,12 @@ config VP_VDPA help This kernel module bridges virtio PCI device to vDPA bus. +config ALIBABA_ENI_VDPA + tristate "vDPA driver for Alibaba ENI" + select VIRTIO_PCI_LEGACY_LIB + depends on PCI_MSI && !CPU_BIG_ENDIAN + help + VDPA driver for Alibaba ENI(Elastic Network Interface) which is build upon + virtio 0.9.5 specification. + endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index f02ebed33f193..15665563a7f44 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_VDPA_USER) += vdpa_user/ obj-$(CONFIG_IFCVF) += ifcvf/ obj-$(CONFIG_MLX5_VDPA) += mlx5/ obj-$(CONFIG_VP_VDPA) += virtio_pci/ +obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/ diff --git a/drivers/vdpa/alibaba/Makefile b/drivers/vdpa/alibaba/Makefile new file mode 100644 index 0000000000000..ef4aae69f87aa --- /dev/null +++ b/drivers/vdpa/alibaba/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ALIBABA_ENI_VDPA) += eni_vdpa.o + diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c new file mode 100644 index 0000000000000..6a09f157d8102 --- /dev/null +++ b/drivers/vdpa/alibaba/eni_vdpa.c @@ -0,0 +1,553 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vDPA bridge driver for Alibaba ENI(Elastic Network Interface) + * + * Copyright (c) 2021, Alibaba Inc. All rights reserved. + * Author: Wu Zongyong + * + */ + +#include "linux/bits.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ENI_MSIX_NAME_SIZE 256 + +#define ENI_ERR(pdev, fmt, ...) \ + dev_err(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__) +#define ENI_DBG(pdev, fmt, ...) \ + dev_dbg(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__) +#define ENI_INFO(pdev, fmt, ...) \ + dev_info(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__) + +struct eni_vring { + void __iomem *notify; + char msix_name[ENI_MSIX_NAME_SIZE]; + struct vdpa_callback cb; + int irq; +}; + +struct eni_vdpa { + struct vdpa_device vdpa; + struct virtio_pci_legacy_device ldev; + struct eni_vring *vring; + struct vdpa_callback config_cb; + char msix_name[ENI_MSIX_NAME_SIZE]; + int config_irq; + int queues; + int vectors; +}; + +static struct eni_vdpa *vdpa_to_eni(struct vdpa_device *vdpa) +{ + return container_of(vdpa, struct eni_vdpa, vdpa); +} + +static struct virtio_pci_legacy_device *vdpa_to_ldev(struct vdpa_device *vdpa) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + + return &eni_vdpa->ldev; +} + +static u64 eni_vdpa_get_features(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + u64 features = vp_legacy_get_features(ldev); + + features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); + features |= BIT_ULL(VIRTIO_F_ORDER_PLATFORM); + + return features; +} + +static int eni_vdpa_set_features(struct vdpa_device *vdpa, u64 features) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + if (!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) && features) { + ENI_ERR(ldev->pci_dev, + "VIRTIO_NET_F_MRG_RXBUF is not negotiated\n"); + return -EINVAL; + } + + vp_legacy_set_features(ldev, (u32)features); + + return 0; +} + +static u8 eni_vdpa_get_status(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return vp_legacy_get_status(ldev); +} + +static int eni_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + int irq = eni_vdpa->vring[idx].irq; + + if (irq == VIRTIO_MSI_NO_VECTOR) + return -EINVAL; + + return irq; +} + +static void eni_vdpa_free_irq(struct eni_vdpa *eni_vdpa) +{ + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + struct pci_dev *pdev = ldev->pci_dev; + int i; + + for (i = 0; i < eni_vdpa->queues; i++) { + if (eni_vdpa->vring[i].irq != VIRTIO_MSI_NO_VECTOR) { + vp_legacy_queue_vector(ldev, i, VIRTIO_MSI_NO_VECTOR); + devm_free_irq(&pdev->dev, eni_vdpa->vring[i].irq, + &eni_vdpa->vring[i]); + eni_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR; + } + } + + if (eni_vdpa->config_irq != VIRTIO_MSI_NO_VECTOR) { + vp_legacy_config_vector(ldev, VIRTIO_MSI_NO_VECTOR); + devm_free_irq(&pdev->dev, eni_vdpa->config_irq, eni_vdpa); + eni_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR; + } + + if (eni_vdpa->vectors) { + pci_free_irq_vectors(pdev); + eni_vdpa->vectors = 0; + } +} + +static irqreturn_t eni_vdpa_vq_handler(int irq, void *arg) +{ + struct eni_vring *vring = arg; + + if (vring->cb.callback) + return vring->cb.callback(vring->cb.private); + + return IRQ_HANDLED; +} + +static irqreturn_t eni_vdpa_config_handler(int irq, void *arg) +{ + struct eni_vdpa *eni_vdpa = arg; + + if (eni_vdpa->config_cb.callback) + return eni_vdpa->config_cb.callback(eni_vdpa->config_cb.private); + + return IRQ_HANDLED; +} + +static int eni_vdpa_request_irq(struct eni_vdpa *eni_vdpa) +{ + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + struct pci_dev *pdev = ldev->pci_dev; + int i, ret, irq; + int queues = eni_vdpa->queues; + int vectors = queues + 1; + + ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX); + if (ret != vectors) { + ENI_ERR(pdev, + "failed to allocate irq vectors want %d but %d\n", + vectors, ret); + return ret; + } + + eni_vdpa->vectors = vectors; + + for (i = 0; i < queues; i++) { + snprintf(eni_vdpa->vring[i].msix_name, ENI_MSIX_NAME_SIZE, + "eni-vdpa[%s]-%d\n", pci_name(pdev), i); + irq = pci_irq_vector(pdev, i); + ret = devm_request_irq(&pdev->dev, irq, + eni_vdpa_vq_handler, + 0, eni_vdpa->vring[i].msix_name, + &eni_vdpa->vring[i]); + if (ret) { + ENI_ERR(pdev, "failed to request irq for vq %d\n", i); + goto err; + } + vp_legacy_queue_vector(ldev, i, i); + eni_vdpa->vring[i].irq = irq; + } + + snprintf(eni_vdpa->msix_name, ENI_MSIX_NAME_SIZE, "eni-vdpa[%s]-config\n", + pci_name(pdev)); + irq = pci_irq_vector(pdev, queues); + ret = devm_request_irq(&pdev->dev, irq, eni_vdpa_config_handler, 0, + eni_vdpa->msix_name, eni_vdpa); + if (ret) { + ENI_ERR(pdev, "failed to request irq for config vq %d\n", i); + goto err; + } + vp_legacy_config_vector(ldev, queues); + eni_vdpa->config_irq = irq; + + return 0; +err: + eni_vdpa_free_irq(eni_vdpa); + return ret; +} + +static void eni_vdpa_set_status(struct vdpa_device *vdpa, u8 status) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + u8 s = eni_vdpa_get_status(vdpa); + + if (status & VIRTIO_CONFIG_S_DRIVER_OK && + !(s & VIRTIO_CONFIG_S_DRIVER_OK)) { + eni_vdpa_request_irq(eni_vdpa); + } + + vp_legacy_set_status(ldev, status); + + if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) && + (s & VIRTIO_CONFIG_S_DRIVER_OK)) + eni_vdpa_free_irq(eni_vdpa); +} + +static int eni_vdpa_reset(struct vdpa_device *vdpa) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + u8 s = eni_vdpa_get_status(vdpa); + + vp_legacy_set_status(ldev, 0); + + if (s & VIRTIO_CONFIG_S_DRIVER_OK) + eni_vdpa_free_irq(eni_vdpa); + + return 0; +} + +static u16 eni_vdpa_get_vq_num_max(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return vp_legacy_get_queue_size(ldev, 0); +} + +static u16 eni_vdpa_get_vq_num_min(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return vp_legacy_get_queue_size(ldev, 0); +} + +static int eni_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid, + struct vdpa_vq_state *state) +{ + return -EOPNOTSUPP; +} + +static int eni_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid, + const struct vdpa_vq_state *state) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + const struct vdpa_vq_state_split *split = &state->split; + + /* ENI is build upon virtio-pci specfication which not support + * to set state of virtqueue. But if the state is equal to the + * device initial state by chance, we can let it go. + */ + if (!vp_legacy_get_queue_enable(ldev, qid) + && split->avail_index == 0) + return 0; + + return -EOPNOTSUPP; +} + + +static void eni_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 qid, + struct vdpa_callback *cb) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + + eni_vdpa->vring[qid].cb = *cb; +} + +static void eni_vdpa_set_vq_ready(struct vdpa_device *vdpa, u16 qid, + bool ready) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + /* ENI is a legacy virtio-pci device. This is not supported + * by specification. But we can disable virtqueue by setting + * address to 0. + */ + if (!ready) + vp_legacy_set_queue_address(ldev, qid, 0); +} + +static bool eni_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 qid) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return vp_legacy_get_queue_enable(ldev, qid); +} + +static void eni_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 qid, + u32 num) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + struct pci_dev *pdev = ldev->pci_dev; + u16 n = vp_legacy_get_queue_size(ldev, qid); + + /* ENI is a legacy virtio-pci device which not allow to change + * virtqueue size. Just report a error if someone tries to + * change it. + */ + if (num != n) + ENI_ERR(pdev, + "not support to set vq %u fixed num %u to %u\n", + qid, n, num); +} + +static int eni_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 qid, + u64 desc_area, u64 driver_area, + u64 device_area) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + u32 pfn = desc_area >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + + vp_legacy_set_queue_address(ldev, qid, pfn); + + return 0; +} + +static void eni_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + + iowrite16(qid, eni_vdpa->vring[qid].notify); +} + +static u32 eni_vdpa_get_device_id(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return ldev->id.device; +} + +static u32 eni_vdpa_get_vendor_id(struct vdpa_device *vdpa) +{ + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); + + return ldev->id.vendor; +} + +static u32 eni_vdpa_get_vq_align(struct vdpa_device *vdpa) +{ + return PAGE_SIZE; +} + +static size_t eni_vdpa_get_config_size(struct vdpa_device *vdpa) +{ + return sizeof(struct virtio_net_config); +} + + +static void eni_vdpa_get_config(struct vdpa_device *vdpa, + unsigned int offset, + void *buf, unsigned int len) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + void __iomem *ioaddr = ldev->ioaddr + + VIRTIO_PCI_CONFIG_OFF(eni_vdpa->vectors) + + offset; + u8 *p = buf; + int i; + + for (i = 0; i < len; i++) + *p++ = ioread8(ioaddr + i); +} + +static void eni_vdpa_set_config(struct vdpa_device *vdpa, + unsigned int offset, const void *buf, + unsigned int len) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + void __iomem *ioaddr = ldev->ioaddr + + VIRTIO_PCI_CONFIG_OFF(eni_vdpa->vectors) + + offset; + const u8 *p = buf; + int i; + + for (i = 0; i < len; i++) + iowrite8(*p++, ioaddr + i); +} + +static void eni_vdpa_set_config_cb(struct vdpa_device *vdpa, + struct vdpa_callback *cb) +{ + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); + + eni_vdpa->config_cb = *cb; +} + +static const struct vdpa_config_ops eni_vdpa_ops = { + .get_features = eni_vdpa_get_features, + .set_features = eni_vdpa_set_features, + .get_status = eni_vdpa_get_status, + .set_status = eni_vdpa_set_status, + .reset = eni_vdpa_reset, + .get_vq_num_max = eni_vdpa_get_vq_num_max, + .get_vq_num_min = eni_vdpa_get_vq_num_min, + .get_vq_state = eni_vdpa_get_vq_state, + .set_vq_state = eni_vdpa_set_vq_state, + .set_vq_cb = eni_vdpa_set_vq_cb, + .set_vq_ready = eni_vdpa_set_vq_ready, + .get_vq_ready = eni_vdpa_get_vq_ready, + .set_vq_num = eni_vdpa_set_vq_num, + .set_vq_address = eni_vdpa_set_vq_address, + .kick_vq = eni_vdpa_kick_vq, + .get_device_id = eni_vdpa_get_device_id, + .get_vendor_id = eni_vdpa_get_vendor_id, + .get_vq_align = eni_vdpa_get_vq_align, + .get_config_size = eni_vdpa_get_config_size, + .get_config = eni_vdpa_get_config, + .set_config = eni_vdpa_set_config, + .set_config_cb = eni_vdpa_set_config_cb, + .get_vq_irq = eni_vdpa_get_vq_irq, +}; + + +static u16 eni_vdpa_get_num_queues(struct eni_vdpa *eni_vdpa) +{ + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; + u32 features = vp_legacy_get_features(ldev); + u16 num = 2; + + if (features & BIT_ULL(VIRTIO_NET_F_MQ)) { + __virtio16 max_virtqueue_pairs; + + eni_vdpa_get_config(&eni_vdpa->vdpa, + offsetof(struct virtio_net_config, max_virtqueue_pairs), + &max_virtqueue_pairs, + sizeof(max_virtqueue_pairs)); + num = 2 * __virtio16_to_cpu(virtio_legacy_is_little_endian(), + max_virtqueue_pairs); + } + + if (features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + num += 1; + + return num; +} + +static void eni_vdpa_free_irq_vectors(void *data) +{ + pci_free_irq_vectors(data); +} + +static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct eni_vdpa *eni_vdpa; + struct virtio_pci_legacy_device *ldev; + int ret, i; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + eni_vdpa = vdpa_alloc_device(struct eni_vdpa, vdpa, + dev, &eni_vdpa_ops, NULL, false); + if (IS_ERR(eni_vdpa)) { + ENI_ERR(pdev, "failed to allocate vDPA structure\n"); + return PTR_ERR(eni_vdpa); + } + + ldev = &eni_vdpa->ldev; + ldev->pci_dev = pdev; + + ret = vp_legacy_probe(ldev); + if (ret) { + ENI_ERR(pdev, "failed to probe legacy PCI device\n"); + goto err; + } + + pci_set_master(pdev); + pci_set_drvdata(pdev, eni_vdpa); + + eni_vdpa->vdpa.dma_dev = &pdev->dev; + eni_vdpa->queues = eni_vdpa_get_num_queues(eni_vdpa); + + ret = devm_add_action_or_reset(dev, eni_vdpa_free_irq_vectors, pdev); + if (ret) { + ENI_ERR(pdev, + "failed for adding devres for freeing irq vectors\n"); + goto err; + } + + eni_vdpa->vring = devm_kcalloc(&pdev->dev, eni_vdpa->queues, + sizeof(*eni_vdpa->vring), + GFP_KERNEL); + if (!eni_vdpa->vring) { + ret = -ENOMEM; + ENI_ERR(pdev, "failed to allocate virtqueues\n"); + goto err; + } + + for (i = 0; i < eni_vdpa->queues; i++) { + eni_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR; + eni_vdpa->vring[i].notify = ldev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; + } + eni_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR; + + ret = vdpa_register_device(&eni_vdpa->vdpa, eni_vdpa->queues); + if (ret) { + ENI_ERR(pdev, "failed to register to vdpa bus\n"); + goto err; + } + + return 0; + +err: + put_device(&eni_vdpa->vdpa.dev); + return ret; +} + +static void eni_vdpa_remove(struct pci_dev *pdev) +{ + struct eni_vdpa *eni_vdpa = pci_get_drvdata(pdev); + + vdpa_unregister_device(&eni_vdpa->vdpa); + vp_legacy_remove(&eni_vdpa->ldev); +} + +static struct pci_device_id eni_pci_ids[] = { + { PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET, + VIRTIO_TRANS_ID_NET, + PCI_SUBVENDOR_ID_REDHAT_QUMRANET, + VIRTIO_ID_NET) }, + { 0 }, +}; + +static struct pci_driver eni_vdpa_driver = { + .name = "alibaba-eni-vdpa", + .id_table = eni_pci_ids, + .probe = eni_vdpa_probe, + .remove = eni_vdpa_remove, +}; + +module_pci_driver(eni_vdpa_driver); + +MODULE_AUTHOR("Wu Zongyong "); +MODULE_DESCRIPTION("Alibaba ENI vDPA driver"); +MODULE_LICENSE("GPL v2"); From f5c4da43de8323114c9052dea0388ab9d0fdfe21 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 9 Sep 2021 15:36:33 +0300 Subject: [PATCH 0627/1202] vdpa/mlx5: Remove mtu field from vdpa net device No need to save the mtu int the net device struct. We can save it in the config struct which cannot be modified. Moreover, move the initialization to. mlx5_vdpa_set_features() callback is not the right place to put it. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20210909123635.30884-2-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index bd56de7484dcb..570bd5a8e068c 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -159,7 +159,6 @@ struct mlx5_vdpa_net { struct mlx5_fc *rx_counter; struct mlx5_flow_handle *rx_rule; bool setup; - u16 mtu; u32 cur_num_vqs; }; @@ -1942,8 +1941,6 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) return err; ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; - ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu); - ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); update_cvq_info(mvdev); return err; } @@ -2413,6 +2410,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) struct mlx5_vdpa_net *ndev; struct mlx5_core_dev *mdev; u32 max_vqs; + u16 mtu; int err; if (mgtdev->ndev) @@ -2440,10 +2438,13 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) init_mvqs(ndev); mutex_init(&ndev->reslock); config = &ndev->config; - err = query_mtu(mdev, &ndev->mtu); + err = query_mtu(mdev, &mtu); if (err) goto err_mtu; + ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); + err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); if (err) goto err_mtu; From bd78d689b657f89ce6199587eee3747e88262d39 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 9 Sep 2021 15:36:34 +0300 Subject: [PATCH 0628/1202] vdpa/mlx5: Rename control VQ workqueue to vdpa wq A subesequent patch will use the same workqueue for executing other work not related to control VQ. Rename the workqueue and the work queue entry used to convey information to the workqueue. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20210909123635.30884-3-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 2 +- drivers/vdpa/mlx5/net/mlx5_vnet.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 01a848adf5903..81dc3d88d3ddd 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -63,7 +63,7 @@ struct mlx5_control_vq { unsigned short head; }; -struct mlx5_ctrl_wq_ent { +struct mlx5_vdpa_wq_ent { struct work_struct work; struct mlx5_vdpa_dev *mvdev; }; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 570bd5a8e068c..b17310911ba92 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1556,14 +1556,14 @@ static void mlx5_cvq_kick_handler(struct work_struct *work) { virtio_net_ctrl_ack status = VIRTIO_NET_ERR; struct virtio_net_ctrl_hdr ctrl; - struct mlx5_ctrl_wq_ent *wqent; + struct mlx5_vdpa_wq_ent *wqent; struct mlx5_vdpa_dev *mvdev; struct mlx5_control_vq *cvq; struct mlx5_vdpa_net *ndev; size_t read, write; int err; - wqent = container_of(work, struct mlx5_ctrl_wq_ent, work); + wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); mvdev = wqent->mvdev; ndev = to_mlx5_vdpa_ndev(mvdev); cvq = &mvdev->cvq; @@ -1615,7 +1615,7 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); struct mlx5_vdpa_virtqueue *mvq; - struct mlx5_ctrl_wq_ent *wqent; + struct mlx5_vdpa_wq_ent *wqent; if (!is_index_valid(mvdev, idx)) return; @@ -2474,7 +2474,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) if (err) goto err_mr; - mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq"); + mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); if (!mvdev->wq) { err = -ENOMEM; goto err_res2; From e4b0c38fb12de8b2ad35add6217b48822f6826eb Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 9 Sep 2021 15:36:35 +0300 Subject: [PATCH 0629/1202] vdpa/mlx5: Propagate link status from device to vdpa driver Add code to register to hardware asynchronous events. Use this mechanism to track link status events coming from the device and update the config struct. After doing link status change, call the vdpa callback to notify of the link status change. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20210909123635.30884-4-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 94 ++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index b17310911ba92..b5bd1a5532560 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -160,6 +160,8 @@ struct mlx5_vdpa_net { struct mlx5_flow_handle *rx_rule; bool setup; u32 cur_num_vqs; + struct notifier_block nb; + struct vdpa_callback config_cb; }; static void free_resources(struct mlx5_vdpa_net *ndev); @@ -1851,6 +1853,7 @@ static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ); + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS); print_features(mvdev, ndev->mvdev.mlx_features, false); return ndev->mvdev.mlx_features; @@ -1947,8 +1950,10 @@ static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) { - /* not implemented */ - mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n"); + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + + ndev->config_cb = *cb; } #define MLX5_VDPA_MAX_VQ_ENTRIES 256 @@ -2401,6 +2406,82 @@ struct mlx5_vdpa_mgmtdev { struct mlx5_vdpa_net *ndev; }; +static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; + int err; + + MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); + MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); + + err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); + if (err) + return 0; + + return MLX5_GET(query_vport_state_out, out, state); +} + +static bool get_link_state(struct mlx5_vdpa_dev *mvdev) +{ + if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == + VPORT_STATE_UP) + return true; + + return false; +} + +static void update_carrier(struct work_struct *work) +{ + struct mlx5_vdpa_wq_ent *wqent; + struct mlx5_vdpa_dev *mvdev; + struct mlx5_vdpa_net *ndev; + + wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); + mvdev = wqent->mvdev; + ndev = to_mlx5_vdpa_ndev(mvdev); + if (get_link_state(mvdev)) + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); + else + ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); + + if (ndev->config_cb.callback) + ndev->config_cb.callback(ndev->config_cb.private); + + kfree(wqent); +} + +static int event_handler(struct notifier_block *nb, unsigned long event, void *param) +{ + struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); + struct mlx5_eqe *eqe = param; + int ret = NOTIFY_DONE; + struct mlx5_vdpa_wq_ent *wqent; + + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); + if (!wqent) + return NOTIFY_DONE; + + wqent->mvdev = &ndev->mvdev; + INIT_WORK(&wqent->work, update_carrier); + queue_work(ndev->mvdev.wq, &wqent->work); + ret = NOTIFY_OK; + break; + default: + return NOTIFY_DONE; + } + return ret; + } + return ret; +} + static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) { struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); @@ -2449,6 +2530,11 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) if (err) goto err_mtu; + if (get_link_state(mvdev)) + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); + else + ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); + if (!is_zero_ether_addr(config->mac)) { pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); err = mlx5_mpfs_add_mac(pfmdev, config->mac); @@ -2480,6 +2566,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) goto err_res2; } + ndev->nb.notifier_call = event_handler; + mlx5_notifier_register(mdev, &ndev->nb); ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs); mvdev->vdev.mdev = &mgtdev->mgtdev; err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1); @@ -2510,7 +2598,9 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * { struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); destroy_workqueue(mvdev->wq); _vdpa_unregister_device(dev); mgtdev->ndev = NULL; From 3f90786de96ba6700e928739d9aff123a500eb42 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 22 Sep 2021 19:09:00 +0200 Subject: [PATCH 0630/1202] hwrng: virtio - add an internal buffer hwrng core uses two buffers that can be mixed in the virtio-rng queue. If the buffer is provided with wait=0 it is enqueued in the virtio-rng queue but unused by the caller. On the next call, core provides another buffer but the first one is filled instead and the new one queued. And the caller reads the data from the new one that is not updated, and the data in the first one are lost. To avoid this mix, virtio-rng needs to use its own unique internal buffer at a cost of a data copy to the caller buffer. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210922170903.577801-2-lvivier@redhat.com Signed-off-by: Michael S. Tsirkin Tested-by: Alexander Potapenko --- drivers/char/hw_random/virtio-rng.c | 43 ++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index a90001e02bf7a..208c547dcac16 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -18,13 +18,20 @@ static DEFINE_IDA(rng_index_ida); struct virtrng_info { struct hwrng hwrng; struct virtqueue *vq; - struct completion have_data; char name[25]; - unsigned int data_avail; int index; bool busy; bool hwrng_register_done; bool hwrng_removed; + /* data transfer */ + struct completion have_data; + unsigned int data_avail; + /* minimal size returned by rng_buffer_size() */ +#if SMP_CACHE_BYTES < 32 + u8 data[32]; +#else + u8 data[SMP_CACHE_BYTES]; +#endif }; static void random_recv_done(struct virtqueue *vq) @@ -39,14 +46,14 @@ static void random_recv_done(struct virtqueue *vq) } /* The host will fill any buffer we give it with sweet, sweet randomness. */ -static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size) +static void register_buffer(struct virtrng_info *vi) { struct scatterlist sg; - sg_init_one(&sg, buf, size); + sg_init_one(&sg, vi->data, sizeof(vi->data)); /* There should always be room for one buffer. */ - virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL); + virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL); virtqueue_kick(vi->vq); } @@ -55,6 +62,8 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { int ret; struct virtrng_info *vi = (struct virtrng_info *)rng->priv; + unsigned int chunk; + size_t read; if (vi->hwrng_removed) return -ENODEV; @@ -62,19 +71,33 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) if (!vi->busy) { vi->busy = true; reinit_completion(&vi->have_data); - register_buffer(vi, buf, size); + register_buffer(vi); } if (!wait) return 0; - ret = wait_for_completion_killable(&vi->have_data); - if (ret < 0) - return ret; + read = 0; + while (size != 0) { + ret = wait_for_completion_killable(&vi->have_data); + if (ret < 0) + return ret; + + chunk = min_t(unsigned int, size, vi->data_avail); + memcpy(buf + read, vi->data, chunk); + read += chunk; + size -= chunk; + vi->data_avail = 0; + + if (size != 0) { + reinit_completion(&vi->have_data); + register_buffer(vi); + } + } vi->busy = false; - return vi->data_avail; + return read; } static void virtio_cleanup(struct hwrng *rng) From 7a0227b8216582633fdd7ad6b45bc43e660b4c59 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 22 Sep 2021 19:09:01 +0200 Subject: [PATCH 0631/1202] hwrng: virtio - don't wait on cleanup When virtio-rng device was dropped by the hwrng core we were forced to wait the buffer to come back from the device to not have remaining ongoing operation that could spoil the buffer. But now, as the buffer is internal to the virtio-rng we can release the waiting loop immediately, the buffer will be retrieve and use when the virtio-rng driver will be selected again. This avoids to hang on an rng_current write command if the virtio-rng device is blocked by a lack of entropy. This allows to select another entropy source if the current one is empty. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210922170903.577801-3-lvivier@redhat.com Signed-off-by: Michael S. Tsirkin Tested-by: Alexander Potapenko --- drivers/char/hw_random/virtio-rng.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 208c547dcac16..173aeea835bb6 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -82,6 +82,11 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) ret = wait_for_completion_killable(&vi->have_data); if (ret < 0) return ret; + /* if vi->data_avail is 0, we have been interrupted + * by a cleanup, but buffer stays in the queue + */ + if (vi->data_avail == 0) + return read; chunk = min_t(unsigned int, size, vi->data_avail); memcpy(buf + read, vi->data, chunk); @@ -105,7 +110,7 @@ static void virtio_cleanup(struct hwrng *rng) struct virtrng_info *vi = (struct virtrng_info *)rng->priv; if (vi->busy) - wait_for_completion(&vi->have_data); + complete(&vi->have_data); } static int probe_common(struct virtio_device *vdev) From caaf2874ba27b92bca6f0298bf88bad94067ec37 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 22 Sep 2021 19:09:02 +0200 Subject: [PATCH 0632/1202] hwrng: virtio - don't waste entropy if we don't use all the entropy available in the buffer, keep it and use it later. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210922170903.577801-4-lvivier@redhat.com Signed-off-by: Michael S. Tsirkin Tested-by: Alexander Potapenko --- drivers/char/hw_random/virtio-rng.c | 52 +++++++++++++++++++---------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 173aeea835bb6..8ba97cf4ca8fb 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -26,6 +26,7 @@ struct virtrng_info { /* data transfer */ struct completion have_data; unsigned int data_avail; + unsigned int data_idx; /* minimal size returned by rng_buffer_size() */ #if SMP_CACHE_BYTES < 32 u8 data[32]; @@ -42,6 +43,9 @@ static void random_recv_done(struct virtqueue *vq) if (!virtqueue_get_buf(vi->vq, &vi->data_avail)) return; + vi->data_idx = 0; + vi->busy = false; + complete(&vi->have_data); } @@ -58,6 +62,16 @@ static void register_buffer(struct virtrng_info *vi) virtqueue_kick(vi->vq); } +static unsigned int copy_data(struct virtrng_info *vi, void *buf, + unsigned int size) +{ + size = min_t(unsigned int, size, vi->data_avail); + memcpy(buf, vi->data + vi->data_idx, size); + vi->data_idx += size; + vi->data_avail -= size; + return size; +} + static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { int ret; @@ -68,17 +82,29 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) if (vi->hwrng_removed) return -ENODEV; - if (!vi->busy) { - vi->busy = true; - reinit_completion(&vi->have_data); - register_buffer(vi); + read = 0; + + /* copy available data */ + if (vi->data_avail) { + chunk = copy_data(vi, buf, size); + size -= chunk; + read += chunk; } if (!wait) - return 0; + return read; - read = 0; + /* We have already copied available entropy, + * so either size is 0 or data_avail is 0 + */ while (size != 0) { + /* data_avail is 0 */ + if (!vi->busy) { + /* no pending request, ask for more */ + vi->busy = true; + reinit_completion(&vi->have_data); + register_buffer(vi); + } ret = wait_for_completion_killable(&vi->have_data); if (ret < 0) return ret; @@ -88,20 +114,11 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) if (vi->data_avail == 0) return read; - chunk = min_t(unsigned int, size, vi->data_avail); - memcpy(buf + read, vi->data, chunk); - read += chunk; + chunk = copy_data(vi, buf + read, size); size -= chunk; - vi->data_avail = 0; - - if (size != 0) { - reinit_completion(&vi->have_data); - register_buffer(vi); - } + read += chunk; } - vi->busy = false; - return read; } @@ -161,6 +178,7 @@ static void remove_common(struct virtio_device *vdev) vi->hwrng_removed = true; vi->data_avail = 0; + vi->data_idx = 0; complete(&vi->have_data); vdev->config->reset(vdev); vi->busy = false; From 4675d126fa40fc0960e3b15f9994c33061747383 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 22 Sep 2021 19:09:03 +0200 Subject: [PATCH 0633/1202] hwrng: virtio - always add a pending request If we ensure we have already some data available by enqueuing again the buffer once data are exhausted, we can return what we have without waiting for the device answer. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210922170903.577801-5-lvivier@redhat.com Signed-off-by: Michael S. Tsirkin Tested-by: Alexander Potapenko --- drivers/char/hw_random/virtio-rng.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 8ba97cf4ca8fb..0b3c9b643495f 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -20,7 +20,6 @@ struct virtrng_info { struct virtqueue *vq; char name[25]; int index; - bool busy; bool hwrng_register_done; bool hwrng_removed; /* data transfer */ @@ -44,16 +43,16 @@ static void random_recv_done(struct virtqueue *vq) return; vi->data_idx = 0; - vi->busy = false; complete(&vi->have_data); } -/* The host will fill any buffer we give it with sweet, sweet randomness. */ -static void register_buffer(struct virtrng_info *vi) +static void request_entropy(struct virtrng_info *vi) { struct scatterlist sg; + reinit_completion(&vi->have_data); + sg_init_one(&sg, vi->data, sizeof(vi->data)); /* There should always be room for one buffer. */ @@ -69,6 +68,8 @@ static unsigned int copy_data(struct virtrng_info *vi, void *buf, memcpy(buf, vi->data + vi->data_idx, size); vi->data_idx += size; vi->data_avail -= size; + if (vi->data_avail == 0) + request_entropy(vi); return size; } @@ -98,13 +99,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) * so either size is 0 or data_avail is 0 */ while (size != 0) { - /* data_avail is 0 */ - if (!vi->busy) { - /* no pending request, ask for more */ - vi->busy = true; - reinit_completion(&vi->have_data); - register_buffer(vi); - } + /* data_avail is 0 but a request is pending */ ret = wait_for_completion_killable(&vi->have_data); if (ret < 0) return ret; @@ -126,8 +121,7 @@ static void virtio_cleanup(struct hwrng *rng) { struct virtrng_info *vi = (struct virtrng_info *)rng->priv; - if (vi->busy) - complete(&vi->have_data); + complete(&vi->have_data); } static int probe_common(struct virtio_device *vdev) @@ -163,6 +157,9 @@ static int probe_common(struct virtio_device *vdev) goto err_find; } + /* we always have a pending entropy request */ + request_entropy(vi); + return 0; err_find: @@ -181,7 +178,6 @@ static void remove_common(struct virtio_device *vdev) vi->data_idx = 0; complete(&vi->have_data); vdev->config->reset(vdev); - vi->busy = false; if (vi->hwrng_register_done) hwrng_unregister(&vi->hwrng); vdev->config->del_vqs(vdev); From f5c127ac43fe5ddf8fc85bbdb8cb457a8beb2cda Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 20 Oct 2021 19:23:22 +0800 Subject: [PATCH 0634/1202] virtio_ring: make virtqueue_add_indirect_packed prettier Align the arguments of virtqueue_add_indirect_packed() to the open ( to make it look prettier. Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Link: https://lore.kernel.org/r/20211020112323.67466-2-xuanzhuo@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index dd95dfd85e980..91a46c4da87d3 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1050,12 +1050,12 @@ static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, } static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, - struct scatterlist *sgs[], - unsigned int total_sg, - unsigned int out_sgs, - unsigned int in_sgs, - void *data, - gfp_t gfp) + struct scatterlist *sgs[], + unsigned int total_sg, + unsigned int out_sgs, + unsigned int in_sgs, + void *data, + gfp_t gfp) { struct vring_packed_desc *desc; struct scatterlist *sg; From b9748bf587fd774b5a8b36ea169f22d6ae74bf6a Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 19 Oct 2021 17:26:37 -0400 Subject: [PATCH 0635/1202] drm/amdgpu: use generic fb helpers instead of setting up AMD own's. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the shadow buffer support from generic framebuffer emulation, it's possible now to have runpm kicked when no update for console. Signed-off-by: Evan Quan Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 11 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 + drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 388 -------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 30 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 20 - 7 files changed, 50 insertions(+), 426 deletions(-) delete mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 653726588956e..7fedbb725e179 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -45,7 +45,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \ atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \ amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \ - amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \ + amdgpu_gem.o amdgpu_ring.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 69e0eccc00fb3..88b5402786015 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3674,8 +3674,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* Get a log2 for easy divisions. */ adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); - amdgpu_fbdev_init(adev); - r = amdgpu_pm_sysfs_init(adev); if (r) { adev->pm_sysfs_en = false; @@ -3833,8 +3831,6 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_ucode_sysfs_fini(adev); sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); - amdgpu_fbdev_fini(adev); - amdgpu_device_ip_fini_early(adev); amdgpu_irq_fini_hw(adev); @@ -3929,7 +3925,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_kms_helper_poll_disable(dev); if (fbcon) - amdgpu_fbdev_set_suspend(adev, 1); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); @@ -4006,7 +4002,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) flush_delayed_work(&adev->delayed_init_work); if (fbcon) - amdgpu_fbdev_set_suspend(adev, 0); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false); drm_kms_helper_poll_enable(dev); @@ -4638,7 +4634,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (r) goto out; - amdgpu_fbdev_set_suspend(tmp_adev, 0); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false); /* * The GPU enters bad state once faulty pages @@ -5025,7 +5021,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); - amdgpu_fbdev_set_suspend(tmp_adev, 1); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); /* disable ras on ALL IPs */ if (!need_emergency_restart && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index dc50c05f23fc2..5faf3ef28080d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1603,13 +1603,10 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) continue; } robj = gem_to_amdgpu_bo(fb->obj[0]); - /* don't unpin kernel fb objects */ - if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { - r = amdgpu_bo_reserve(robj, true); - if (r == 0) { - amdgpu_bo_unpin(robj); - amdgpu_bo_unreserve(robj); - } + r = amdgpu_bo_reserve(robj, true); + if (r == 0) { + amdgpu_bo_unpin(robj); + amdgpu_bo_unreserve(robj); } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index c718fb5f3f8a2..9e3d550223f58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2000,6 +2000,19 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, goto err_pci; } + /* + * 1. don't init fbdev on hw without DCE + * 2. don't init fbdev if there are no connectors + */ + if (adev->mode_info.mode_config_initialized && + !list_empty(&adev_to_drm(adev)->mode_config.connector_list)) { + /* select 8 bpp console on low vram cards */ + if (adev->gmc.real_vram_size <= (32*1024*1024)) + drm_fbdev_generic_setup(adev_to_drm(adev), 8); + else + drm_fbdev_generic_setup(adev_to_drm(adev), 32); + } + ret = amdgpu_debugfs_init(adev); if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c deleted file mode 100644 index cd0acbea75da6..0000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ /dev/null @@ -1,388 +0,0 @@ -/* - * Copyright © 2007 David Airlie - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * David Airlie - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "amdgpu.h" -#include "cikd.h" -#include "amdgpu_gem.h" - -#include "amdgpu_display.h" - -/* object hierarchy - - this contains a helper + a amdgpu fb - the helper contains a pointer to amdgpu framebuffer baseclass. -*/ - -static int -amdgpufb_open(struct fb_info *info, int user) -{ - struct drm_fb_helper *fb_helper = info->par; - int ret = pm_runtime_get_sync(fb_helper->dev->dev); - if (ret < 0 && ret != -EACCES) { - pm_runtime_mark_last_busy(fb_helper->dev->dev); - pm_runtime_put_autosuspend(fb_helper->dev->dev); - return ret; - } - return 0; -} - -static int -amdgpufb_release(struct fb_info *info, int user) -{ - struct drm_fb_helper *fb_helper = info->par; - - pm_runtime_mark_last_busy(fb_helper->dev->dev); - pm_runtime_put_autosuspend(fb_helper->dev->dev); - return 0; -} - -static const struct fb_ops amdgpufb_ops = { - .owner = THIS_MODULE, - DRM_FB_HELPER_DEFAULT_OPS, - .fb_open = amdgpufb_open, - .fb_release = amdgpufb_release, - .fb_fillrect = drm_fb_helper_cfb_fillrect, - .fb_copyarea = drm_fb_helper_cfb_copyarea, - .fb_imageblit = drm_fb_helper_cfb_imageblit, -}; - - -int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int cpp, bool tiled) -{ - int aligned = width; - int pitch_mask = 0; - - switch (cpp) { - case 1: - pitch_mask = 255; - break; - case 2: - pitch_mask = 127; - break; - case 3: - case 4: - pitch_mask = 63; - break; - } - - aligned += pitch_mask; - aligned &= ~pitch_mask; - return aligned * cpp; -} - -static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj) -{ - struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); - int ret; - - ret = amdgpu_bo_reserve(abo, true); - if (likely(ret == 0)) { - amdgpu_bo_kunmap(abo); - amdgpu_bo_unpin(abo); - amdgpu_bo_unreserve(abo); - } - drm_gem_object_put(gobj); -} - -static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object **gobj_p) -{ - const struct drm_format_info *info; - struct amdgpu_device *adev = rfbdev->adev; - struct drm_gem_object *gobj = NULL; - struct amdgpu_bo *abo = NULL; - bool fb_tiled = false; /* useful for testing */ - u32 tiling_flags = 0, domain; - int ret; - int aligned_size, size; - int height = mode_cmd->height; - u32 cpp; - u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; - - info = drm_get_format_info(adev_to_drm(adev), mode_cmd); - cpp = info->cpp[0]; - - /* need to align pitch with crtc limits */ - mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, - fb_tiled); - domain = amdgpu_display_supported_domains(adev, flags); - height = ALIGN(mode_cmd->height, 8); - size = mode_cmd->pitches[0] * height; - aligned_size = ALIGN(size, PAGE_SIZE); - ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags, - ttm_bo_type_device, NULL, &gobj); - if (ret) { - pr_err("failed to allocate framebuffer (%d)\n", aligned_size); - return -ENOMEM; - } - abo = gem_to_amdgpu_bo(gobj); - - if (fb_tiled) - tiling_flags = AMDGPU_TILING_SET(ARRAY_MODE, GRPH_ARRAY_2D_TILED_THIN1); - - ret = amdgpu_bo_reserve(abo, false); - if (unlikely(ret != 0)) - goto out_unref; - - if (tiling_flags) { - ret = amdgpu_bo_set_tiling_flags(abo, - tiling_flags); - if (ret) - dev_err(adev->dev, "FB failed to set tiling flags\n"); - } - - ret = amdgpu_bo_pin(abo, domain); - if (ret) { - amdgpu_bo_unreserve(abo); - goto out_unref; - } - - ret = amdgpu_ttm_alloc_gart(&abo->tbo); - if (ret) { - amdgpu_bo_unreserve(abo); - dev_err(adev->dev, "%p bind failed\n", abo); - goto out_unref; - } - - ret = amdgpu_bo_kmap(abo, NULL); - amdgpu_bo_unreserve(abo); - if (ret) { - goto out_unref; - } - - *gobj_p = gobj; - return 0; -out_unref: - amdgpufb_destroy_pinned_object(gobj); - *gobj_p = NULL; - return ret; -} - -static int amdgpufb_create(struct drm_fb_helper *helper, - struct drm_fb_helper_surface_size *sizes) -{ - struct amdgpu_fbdev *rfbdev = (struct amdgpu_fbdev *)helper; - struct amdgpu_device *adev = rfbdev->adev; - struct fb_info *info; - struct drm_framebuffer *fb = NULL; - struct drm_mode_fb_cmd2 mode_cmd; - struct drm_gem_object *gobj = NULL; - struct amdgpu_bo *abo = NULL; - int ret; - - memset(&mode_cmd, 0, sizeof(mode_cmd)); - mode_cmd.width = sizes->surface_width; - mode_cmd.height = sizes->surface_height; - - if (sizes->surface_bpp == 24) - sizes->surface_bpp = 32; - - mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, - sizes->surface_depth); - - ret = amdgpufb_create_pinned_object(rfbdev, &mode_cmd, &gobj); - if (ret) { - DRM_ERROR("failed to create fbcon object %d\n", ret); - return ret; - } - - abo = gem_to_amdgpu_bo(gobj); - - /* okay we have an object now allocate the framebuffer */ - info = drm_fb_helper_alloc_fbi(helper); - if (IS_ERR(info)) { - ret = PTR_ERR(info); - goto out; - } - - ret = amdgpu_display_gem_fb_init(adev_to_drm(adev), &rfbdev->rfb, - &mode_cmd, gobj); - if (ret) { - DRM_ERROR("failed to initialize framebuffer %d\n", ret); - goto out; - } - - fb = &rfbdev->rfb.base; - - /* setup helper */ - rfbdev->helper.fb = fb; - - info->fbops = &amdgpufb_ops; - - info->fix.smem_start = amdgpu_gmc_vram_cpu_pa(adev, abo); - info->fix.smem_len = amdgpu_bo_size(abo); - info->screen_base = amdgpu_bo_kptr(abo); - info->screen_size = amdgpu_bo_size(abo); - - drm_fb_helper_fill_info(info, &rfbdev->helper, sizes); - - /* setup aperture base/size for vesafb takeover */ - info->apertures->ranges[0].base = adev_to_drm(adev)->mode_config.fb_base; - info->apertures->ranges[0].size = adev->gmc.aper_size; - - /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - - if (info->screen_base == NULL) { - ret = -ENOSPC; - goto out; - } - - DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start); - DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->gmc.aper_base); - DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo)); - DRM_INFO("fb depth is %d\n", fb->format->depth); - DRM_INFO(" pitch is %d\n", fb->pitches[0]); - - vga_switcheroo_client_fb_set(adev->pdev, info); - return 0; - -out: - if (fb && ret) { - drm_gem_object_put(gobj); - drm_framebuffer_unregister_private(fb); - drm_framebuffer_cleanup(fb); - kfree(fb); - } - return ret; -} - -static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfbdev) -{ - struct amdgpu_framebuffer *rfb = &rfbdev->rfb; - int i; - - drm_fb_helper_unregister_fbi(&rfbdev->helper); - - if (rfb->base.obj[0]) { - for (i = 0; i < rfb->base.format->num_planes; i++) - drm_gem_object_put(rfb->base.obj[0]); - amdgpufb_destroy_pinned_object(rfb->base.obj[0]); - rfb->base.obj[0] = NULL; - drm_framebuffer_unregister_private(&rfb->base); - drm_framebuffer_cleanup(&rfb->base); - } - drm_fb_helper_fini(&rfbdev->helper); - - return 0; -} - -static const struct drm_fb_helper_funcs amdgpu_fb_helper_funcs = { - .fb_probe = amdgpufb_create, -}; - -int amdgpu_fbdev_init(struct amdgpu_device *adev) -{ - struct amdgpu_fbdev *rfbdev; - int bpp_sel = 32; - int ret; - - /* don't init fbdev on hw without DCE */ - if (!adev->mode_info.mode_config_initialized) - return 0; - - /* don't init fbdev if there are no connectors */ - if (list_empty(&adev_to_drm(adev)->mode_config.connector_list)) - return 0; - - /* select 8 bpp console on low vram cards */ - if (adev->gmc.real_vram_size <= (32*1024*1024)) - bpp_sel = 8; - - rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL); - if (!rfbdev) - return -ENOMEM; - - rfbdev->adev = adev; - adev->mode_info.rfbdev = rfbdev; - - drm_fb_helper_prepare(adev_to_drm(adev), &rfbdev->helper, - &amdgpu_fb_helper_funcs); - - ret = drm_fb_helper_init(adev_to_drm(adev), &rfbdev->helper); - if (ret) { - kfree(rfbdev); - return ret; - } - - /* disable all the possible outputs/crtcs before entering KMS mode */ - if (!amdgpu_device_has_dc_support(adev) && !amdgpu_virtual_display) - drm_helper_disable_unused_functions(adev_to_drm(adev)); - - drm_fb_helper_initial_config(&rfbdev->helper, bpp_sel); - return 0; -} - -void amdgpu_fbdev_fini(struct amdgpu_device *adev) -{ - if (!adev->mode_info.rfbdev) - return; - - amdgpu_fbdev_destroy(adev_to_drm(adev), adev->mode_info.rfbdev); - kfree(adev->mode_info.rfbdev); - adev->mode_info.rfbdev = NULL; -} - -void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state) -{ - if (adev->mode_info.rfbdev) - drm_fb_helper_set_suspend_unlocked(&adev->mode_info.rfbdev->helper, - state); -} - -int amdgpu_fbdev_total_size(struct amdgpu_device *adev) -{ - struct amdgpu_bo *robj; - int size = 0; - - if (!adev->mode_info.rfbdev) - return 0; - - robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]); - size += amdgpu_bo_size(robj); - return size; -} - -bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) -{ - if (!adev->mode_info.rfbdev) - return false; - if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0])) - return true; - return false; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index a573424a6e0b4..a224b5295edd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -876,6 +876,32 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, return r; } +static int amdgpu_gem_align_pitch(struct amdgpu_device *adev, + int width, + int cpp, + bool tiled) +{ + int aligned = width; + int pitch_mask = 0; + + switch (cpp) { + case 1: + pitch_mask = 255; + break; + case 2: + pitch_mask = 127; + break; + case 3: + case 4: + pitch_mask = 63; + break; + } + + aligned += pitch_mask; + aligned &= ~pitch_mask; + return aligned * cpp; +} + int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args) @@ -896,8 +922,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, if (adev->mman.buffer_funcs_enabled) flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; - args->pitch = amdgpu_align_pitch(adev, args->width, - DIV_ROUND_UP(args->bpp, 8), 0); + args->pitch = amdgpu_gem_align_pitch(adev, args->width, + DIV_ROUND_UP(args->bpp, 8), 0); args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); domain = amdgpu_bo_get_preferred_domain(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 89fb372ed49c9..6043bf6fd414d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -232,8 +232,6 @@ struct amdgpu_i2c_chan { struct mutex mutex; }; -struct amdgpu_fbdev; - struct amdgpu_afmt { bool enabled; int offset; @@ -309,13 +307,6 @@ struct amdgpu_framebuffer { uint64_t address; }; -struct amdgpu_fbdev { - struct drm_fb_helper helper; - struct amdgpu_framebuffer rfb; - struct list_head fbdev_list; - struct amdgpu_device *adev; -}; - struct amdgpu_mode_info { struct atom_context *atom_context; struct card_info *atom_card_info; @@ -341,8 +332,6 @@ struct amdgpu_mode_info { struct edid *bios_hardcoded_edid; int bios_hardcoded_edid_size; - /* pointer to fbdev info structure */ - struct amdgpu_fbdev *rfbdev; /* firmware flags */ u32 firmware_flags; /* pointer to backlight encoder */ @@ -631,15 +620,6 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode); -/* fbdev layer */ -int amdgpu_fbdev_init(struct amdgpu_device *adev); -void amdgpu_fbdev_fini(struct amdgpu_device *adev); -void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state); -int amdgpu_fbdev_total_size(struct amdgpu_device *adev); -bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj); - -int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled); - /* amdgpu_display.c */ void amdgpu_display_print_display_setup(struct drm_device *dev); int amdgpu_display_modeset_create_props(struct amdgpu_device *adev); From 98242b658a65e5aa3917f98293d977fafb73f943 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 May 2020 13:58:26 -0400 Subject: [PATCH 0636/1202] drm/amdgpu: add another raven1 gfxoff quirk HP Elite Disk 705 G4 Micro seems to have issues with gfxoff. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=207899 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 7f944bb11298e..28d3e8e80c889 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1269,6 +1269,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=207899 */ + { 0x1002, 0x15dd, 0x103c, 0x83e9, 0xd6 }, { 0, 0, 0, 0, 0 }, }; From eb7b12b84cfd97f86ae3c119c2a1ae4b04b83b47 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 Mar 2021 10:40:53 -0500 Subject: [PATCH 0637/1202] drm/amdgpu: only check for _PR3 on dGPUs We don't support runtime pm on APUs. They support more dynamic power savings using clock and powergating. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index d2955ea4a62bf..c5d8565f25fe4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -164,8 +164,10 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) flags |= AMD_IS_PX; - parent = pci_upstream_bridge(adev->pdev); - adev->has_pr3 = parent ? pci_pr3_present(parent) : false; + if (!(flags & AMD_IS_APU)) { + parent = pci_upstream_bridge(adev->pdev); + adev->has_pr3 = parent ? pci_pr3_present(parent) : false; + } /* amdgpu_device_init should report only fatal error * like memory allocation failure or iomapping failure, From 063244dd5577f094955b0d136343991d9e8cbaa2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Mar 2021 22:05:09 -0500 Subject: [PATCH 0638/1202] drm/amdgpu/swsmu/vgh: rename MSG_RlcPowerNotify To match the new definition: MSG_RlcPowerDownNotify. We only need to tell the SMU when we turn off the RLC; when it powers up it already handshakes with SMU. Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_types.h | 2 +- drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 18b862a90fbe5..f44a67254ef16 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -183,7 +183,7 @@ __SMU_DUMMY_MAP(SET_DRIVER_DUMMY_TABLE_DRAM_ADDR_LOW), \ __SMU_DUMMY_MAP(GET_UMC_FW_WA), \ __SMU_DUMMY_MAP(Mode1Reset), \ - __SMU_DUMMY_MAP(RlcPowerNotify), \ + __SMU_DUMMY_MAP(RlcPowerDownNotify), \ __SMU_DUMMY_MAP(SetHardMinIspiclkByFreq), \ __SMU_DUMMY_MAP(SetHardMinIspxclkByFreq), \ __SMU_DUMMY_MAP(SetSoftMinSocclkByFreq), \ diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h index fe130a497d6c3..27a8fd5bc14b6 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h @@ -41,7 +41,7 @@ #define PPSMC_MSG_PowerUpIspByTile 0x7 #define PPSMC_MSG_PowerDownVcn 0x8 // VCN is power gated by default #define PPSMC_MSG_PowerUpVcn 0x9 -#define PPSMC_MSG_RlcPowerNotify 0xA +#define PPSMC_MSG_RlcPowerDownNotify 0xA #define PPSMC_MSG_SetHardMinVcn 0xB // For wireless display #define PPSMC_MSG_SetSoftMinGfxclk 0xC //Sets SoftMin for GFXCLK. Arg is in MHz #define PPSMC_MSG_ActiveProcessNotify 0xD diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 421f38e8dada0..58053dddb6827 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -69,7 +69,7 @@ static struct cmn2asic_msg_mapping vangogh_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(PowerUpIspByTile, PPSMC_MSG_PowerUpIspByTile, 0), MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 0), MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 0), - MSG_MAP(RlcPowerNotify, PPSMC_MSG_RlcPowerNotify, 0), + MSG_MAP(RlcPowerDownNotify, PPSMC_MSG_RlcPowerDownNotify, 0), MSG_MAP(SetHardMinVcn, PPSMC_MSG_SetHardMinVcn, 0), MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxclk, 0), MSG_MAP(ActiveProcessNotify, PPSMC_MSG_ActiveProcessNotify, 0), @@ -1952,7 +1952,7 @@ static int vangogh_system_features_control(struct smu_context *smu, bool en) int ret = 0; if (adev->pm.fw_version >= 0x43f1700 && !en) - ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify, + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerDownNotify, RLC_STATUS_OFF, NULL); bitmap_zero(feature->enabled, feature->feature_num); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h index c56d4583dc723..6c8093eefef77 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h @@ -54,6 +54,5 @@ extern void vangogh_set_ppt_funcs(struct smu_context *smu); /* RLC Power Status */ #define RLC_STATUS_OFF 0 -#define RLC_STATUS_NORMAL 1 #endif From 0c981154251af1bd2b2589e6ce327e3e8aa86596 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 Mar 2021 09:15:26 -0500 Subject: [PATCH 0639/1202] Revert "amd/amdgpu: Disable VCN DPG mode for Picasso" This reverts commit c6d2b0fbb893d5c7dda405aa0e7bcbecf1c75f98. This patch is a workaround for a hardware bug, but I don't know that we've actually seen the hw bug triggered in practice, meanwhile a number of people have reported that this causes suspend and resume issues. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277 Cc: Veerabadhran Gopalakrishnan Cc: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 0c316a2d42ed2..b5d7f21018cb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1114,7 +1114,8 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_MMHUB | - AMD_PG_SUPPORT_VCN; + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } else { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | From ac97a93d6d3cd0e01c11163a16cf0fe0b7ee73b2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 23 Mar 2021 21:19:55 -0400 Subject: [PATCH 0640/1202] Revert "drm/amd/display: To modify the condition in indicating branch device" This breaks HDMI audio. This reverts commit 9413b23fadad3861f5afd626ac44ef83ad8068ab. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1536 Signed-off-by: Alex Deucher Cc: Martin Tsai Cc: Bindu Ramamurthy --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 296b0defcd1c3..15976fb0ec12a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4283,7 +4283,13 @@ static void get_active_converter_info( } /* DPCD 0x5 bit 0 = 1, it indicate it's branch device */ - link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT; + if (ds_port.fields.PORT_TYPE == DOWNSTREAM_DP) { + link->dpcd_caps.is_branch_dev = false; + } + + else { + link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT; + } switch (ds_port.fields.PORT_TYPE) { case DOWNSTREAM_VGA: From 0b7321c7ca9bdd9999c958b1de213dd0790c4dc8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 22 Jul 2019 16:38:14 +0200 Subject: [PATCH 0641/1202] drm/radeon: Add HD-audio component notifier support (v2) This patch adds the support for the notification of HD-audio hotplug via the already existing drm_audio_component framework to radeon driver. This allows us more reliable hotplug notification and ELD transfer without accessing HD-audio bus; it's more efficient, and more importantly, it works without waking up the runtime PM. The implementation is rather simplistic: radeon driver provides the get_eld ops for HD-audio, and it notifies the audio hotplug via pin_eld_notify callback upon each radeon_audio_enable() call. The pin->id is referred as the port number passed to the notifier callback, and the corresponding connector is looked through the encoder list in the get_eld callback in turn. The bind and unbind callbacks handle the device-link so that it assures the PM call order. v2: fix the logic in radeon_audio_component_get_eld to walk the connector list since that is where the EDID lives and we can derive the encoder from the connector because the encoder has not been assigned at this point (i.e., during monitor probe). Acked-by: Jim Qu Acked-by: Alex Deucher Signed-off-by: Takashi Iwai Signed-off-by: Alex Deucher --- drivers/gpu/drm/Kconfig | 1 + drivers/gpu/drm/radeon/radeon.h | 3 + drivers/gpu/drm/radeon/radeon_audio.c | 95 +++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2a926d0de4238..29e281e017d05 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -235,6 +235,7 @@ config DRM_RADEON select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE + select SND_HDA_COMPONENT if SND_HDA_CORE help Choose this option if you have an ATI Radeon graphics card. There are both PCI and AGP versions. You don't need to choose this to diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 895776c421d4d..67d0274e8fce0 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -79,6 +79,7 @@ #include #include +#include #include "radeon_family.h" #include "radeon_mode.h" @@ -1797,6 +1798,8 @@ struct r600_audio { struct radeon_audio_funcs *hdmi_funcs; struct radeon_audio_funcs *dp_funcs; struct radeon_audio_basic_funcs *funcs; + struct drm_audio_component *component; + bool component_registered; }; /* diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 7c5e80d03fc90..8c6e7979b9880 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -23,6 +23,7 @@ */ #include +#include #include #include "dce6_afmt.h" @@ -180,6 +181,14 @@ static struct radeon_audio_funcs dce6_dp_funcs = { .dpms = evergreen_dp_enable, }; +static void radeon_audio_component_notify(struct drm_audio_component *acomp, + int port) +{ + if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) + acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr, + port, -1); +} + static void radeon_audio_enable(struct radeon_device *rdev, struct r600_audio_pin *pin, u8 enable_mask) { @@ -207,6 +216,8 @@ static void radeon_audio_enable(struct radeon_device *rdev, if (rdev->audio.funcs->enable) rdev->audio.funcs->enable(rdev, pin, enable_mask); + + radeon_audio_component_notify(rdev->audio.component, pin->id); } static void radeon_audio_interface_init(struct radeon_device *rdev) @@ -230,6 +241,82 @@ static void radeon_audio_interface_init(struct radeon_device *rdev) } } +static int radeon_audio_component_get_eld(struct device *kdev, int port, + int pipe, bool *enabled, + unsigned char *buf, int max_bytes) +{ + struct drm_device *dev = dev_get_drvdata(kdev); + struct drm_encoder *encoder; + struct radeon_encoder *radeon_encoder; + struct radeon_encoder_atom_dig *dig; + struct drm_connector *connector; + int ret = 0; + + *enabled = false; + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + const struct drm_connector_helper_funcs *connector_funcs = + connector->helper_private; + encoder = connector_funcs->best_encoder(connector); + + if (!encoder) + continue; + if (!radeon_encoder_is_digital(encoder)) + continue; + radeon_encoder = to_radeon_encoder(encoder); + dig = radeon_encoder->enc_priv; + if (!dig->pin || dig->pin->id != port) + continue; + *enabled = true; + ret = drm_eld_size(connector->eld); + memcpy(buf, connector->eld, min(max_bytes, ret)); + break; + } + + return ret; +} + +static const struct drm_audio_component_ops radeon_audio_component_ops = { + .get_eld = radeon_audio_component_get_eld, +}; + +static int radeon_audio_component_bind(struct device *kdev, + struct device *hda_kdev, void *data) +{ + struct drm_device *dev = dev_get_drvdata(kdev); + struct radeon_device *rdev = dev->dev_private; + struct drm_audio_component *acomp = data; + + if (WARN_ON(!device_link_add(hda_kdev, kdev, DL_FLAG_STATELESS))) + return -ENOMEM; + + drm_modeset_lock_all(dev); + acomp->ops = &radeon_audio_component_ops; + acomp->dev = kdev; + rdev->audio.component = acomp; + drm_modeset_unlock_all(dev); + + return 0; +} + +static void radeon_audio_component_unbind(struct device *kdev, + struct device *hda_kdev, void *data) +{ + struct drm_device *dev = dev_get_drvdata(kdev); + struct radeon_device *rdev = dev->dev_private; + struct drm_audio_component *acomp = data; + + drm_modeset_lock_all(dev); + rdev->audio.component = NULL; + acomp->ops = NULL; + acomp->dev = NULL; + drm_modeset_unlock_all(dev); +} + +static const struct component_ops radeon_audio_component_bind_ops = { + .bind = radeon_audio_component_bind, + .unbind = radeon_audio_component_unbind, +}; + static int radeon_audio_chipset_supported(struct radeon_device *rdev) { return ASIC_IS_DCE2(rdev) && !ASIC_IS_NODCE(rdev); @@ -276,6 +363,9 @@ int radeon_audio_init(struct radeon_device *rdev) for (i = 0; i < rdev->audio.num_pins; i++) radeon_audio_enable(rdev, &rdev->audio.pin[i], 0); + if (!component_add(rdev->dev, &radeon_audio_component_bind_ops)) + rdev->audio.component_registered = true; + return 0; } @@ -428,6 +518,11 @@ void radeon_audio_fini(struct radeon_device *rdev) radeon_audio_enable(rdev, &rdev->audio.pin[i], 0); rdev->audio.enabled = false; + + if (rdev->audio.component_registered) { + component_del(rdev->dev, &radeon_audio_component_bind_ops); + rdev->audio.component_registered = false; + } } static void radeon_audio_set_dto(struct drm_encoder *encoder, unsigned int clock) From 519fc0a14c8187ab5ca1959f8264e1d9dc12b398 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Oct 2021 16:45:00 -0400 Subject: [PATCH 0642/1202] drm/amdgpu/display: add quirk handling for stutter mode Stutter mode is a power saving feature on GPUs, however at least one early raven system exhibits stability issues with it. Add a quirk to disable it for that system. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=214417 Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5b8d1635da5c8..b635b893837bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1295,6 +1295,37 @@ static struct hpd_rx_irq_offload_work_queue *hpd_rx_irq_create_workqueue(struct return hpd_rx_offload_wq; } +struct amdgpu_stutter_quirk { + u16 chip_vendor; + u16 chip_device; + u16 subsys_vendor; + u16 subsys_device; + u8 revision; +}; + +static const struct amdgpu_stutter_quirk amdgpu_stutter_quirk_list[] = { + /* https://bugzilla.kernel.org/show_bug.cgi?id=214417 */ + { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, + { 0, 0, 0, 0, 0 }, +}; + +static bool dm_should_disable_stutter(struct pci_dev *pdev) +{ + const struct amdgpu_stutter_quirk *p = amdgpu_stutter_quirk_list; + + while (p && p->chip_device != 0) { + if (pdev->vendor == p->chip_vendor && + pdev->device == p->chip_device && + pdev->subsystem_vendor == p->subsys_vendor && + pdev->subsystem_device == p->subsys_device && + pdev->revision == p->revision) { + return true; + } + ++p; + } + return false; +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1406,6 +1437,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (adev->asic_type != CHIP_CARRIZO && adev->asic_type != CHIP_STONEY) adev->dm.dc->debug.disable_stutter = amdgpu_pp_feature_mask & PP_STUTTER_MODE ? false : true; + if (dm_should_disable_stutter(adev->pdev)) + adev->dm.dc->debug.disable_stutter = true; if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER) adev->dm.dc->debug.disable_stutter = true; From b22dd1a547a2d0b2503cd8f008da32752bc5298b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 0643/1202] fuse: don't increment nlink in link() The fuse_iget() call in create_new_entry() already updated the inode with all the new attributes and incremented the attribute version. Incrementing the nlink will result in the wrong count. This wasn't noticed because the attributes were invalidated right after this. Updating ctime is still needed for the writeback case when the ctime is not refreshed. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index dd83b5d43fe5d..2b0a011be6333 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -745,9 +745,8 @@ void fuse_flush_time_update(struct inode *inode) mapping_set_error(inode->i_mapping, err); } -void fuse_update_ctime(struct inode *inode) +void fuse_update_ctime_in_cache(struct inode *inode) { - fuse_invalidate_attr(inode); if (!IS_NOCMTIME(inode)) { inode->i_ctime = current_time(inode); mark_inode_dirty_sync(inode); @@ -755,6 +754,12 @@ void fuse_update_ctime(struct inode *inode) } } +void fuse_update_ctime(struct inode *inode) +{ + fuse_invalidate_attr(inode); + fuse_update_ctime_in_cache(inode); +} + static void fuse_entry_unlinked(struct dentry *entry) { struct inode *inode = d_inode(entry); @@ -925,24 +930,11 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, args.in_args[1].size = newent->d_name.len + 1; args.in_args[1].value = newent->d_name.name; err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); - /* Contrary to "normal" filesystems it can happen that link - makes two "logical" inodes point to the same "physical" - inode. We invalidate the attributes of the old one, so it - will reflect changes in the backing inode (link count, - etc.) - */ - if (!err) { - struct fuse_inode *fi = get_fuse_inode(inode); - - spin_lock(&fi->lock); - fi->attr_version = atomic64_inc_return(&fm->fc->attr_version); - if (likely(inode->i_nlink < UINT_MAX)) - inc_nlink(inode); - spin_unlock(&fi->lock); - fuse_update_ctime(inode); - } else if (err == -EINTR) { + if (!err) + fuse_update_ctime_in_cache(inode); + else if (err == -EINTR) fuse_invalidate_attr(inode); - } + return err; } From 62a4aa8d0abd76506971d51d02da2585a9561292 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 0644/1202] fuse: selective attribute invalidation Only invalidate attributes that the operation might have changed. Introduce two constants for common combinations of changed attributes: FUSE_STATX_MODIFY: file contents are modified but not size FUSE_STATX_MODSIZE: size and/or file contents modified Signed-off-by: Miklos Szeredi --- fs/fuse/dax.c | 2 +- fs/fuse/dir.c | 4 ++-- fs/fuse/file.c | 16 ++++++++-------- fs/fuse/fuse_i.h | 8 ++++++++ 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 281d79f8b3d37..89bf96f4fb101 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -735,7 +735,7 @@ static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) if (ret < 0) return ret; - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); fuse_write_update_size(inode, iocb->ki_pos); return ret; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2b0a011be6333..f31398b339b3a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -116,7 +116,7 @@ u64 entry_attr_timeout(struct fuse_entry_out *o) return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); } -static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) +void fuse_invalidate_attr_mask(struct inode *inode, u32 mask) { set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask); } @@ -756,7 +756,7 @@ void fuse_update_ctime_in_cache(struct inode *inode) void fuse_update_ctime(struct inode *inode) { - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, STATX_CTIME); fuse_update_ctime_in_cache(inode); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 31266ca9c1f29..06967d8e27428 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -211,7 +211,7 @@ void fuse_finish_open(struct inode *inode, struct file *file) i_size_write(inode, 0); spin_unlock(&fi->lock); truncate_pagecache(inode, 0); - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); if (fc->writeback_cache) file_update_time(file); } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) { @@ -515,7 +515,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) * enabled, i_blocks from cached attr may not be accurate. */ if (!err && fm->fc->writeback_cache) - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, STATX_BLOCKS); return err; } @@ -1266,7 +1266,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, fuse_write_update_size(inode, pos); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); return res > 0 ? res : err; } @@ -1556,7 +1556,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) FUSE_DIO_WRITE); } } - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); if (res > 0) fuse_write_update_size(inode, iocb->ki_pos); inode_unlock(inode); @@ -1769,7 +1769,7 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, * is enabled, we trust local ctime/mtime. */ if (!fc->writeback_cache) - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODIFY); spin_lock(&fi->lock); rb_erase(&wpa->writepages_entry, &fi->writepages); while (wpa->next) { @@ -2875,7 +2875,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (iov_iter_rw(iter) == WRITE) { ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); } else { ret = __fuse_direct_read(io, iter, &pos); } @@ -2996,7 +2996,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) truncate_pagecache_range(inode, offset, offset + length - 1); - fuse_invalidate_attr(inode); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); out: if (!(mode & FALLOC_FL_KEEP_SIZE)) @@ -3109,7 +3109,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, file_update_time(file_out); } - fuse_invalidate_attr(inode_out); + fuse_invalidate_attr_mask(inode_out, FUSE_STATX_MODSIZE); err = outarg.size; out: diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index a59e36c7deaea..4f2f658fe5f16 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1065,7 +1065,15 @@ void fuse_wait_aborted(struct fuse_conn *fc); /** * Invalidate inode attributes */ + +/* Attributes possibly changed on data modification */ +#define FUSE_STATX_MODIFY (STATX_MTIME | STATX_CTIME | STATX_BLOCKS) + +/* Attributes possibly changed on data and/or size modification */ +#define FUSE_STATX_MODSIZE (FUSE_STATX_MODIFY | STATX_SIZE) + void fuse_invalidate_attr(struct inode *inode); +void fuse_invalidate_attr_mask(struct inode *inode, u32 mask); void fuse_invalidate_entry_cache(struct dentry *entry); From 4ab35b003435d0d5280eb764d89450273874dc66 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 0645/1202] fuse: don't bump attr_version in cached write The attribute version in fuse_inode should be updated whenever the attributes might have changed on the server. In case of cached writes this is not the case, so updating the attr_version is unnecessary and could possibly affect performance. Open code the remaining part of fuse_write_update_size(). Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 06967d8e27428..c09ab835821a2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2309,15 +2309,18 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, if (!copied) goto unlock; + pos += copied; if (!PageUptodate(page)) { /* Zero any unwritten bytes at the end of the page */ - size_t endoff = (pos + copied) & ~PAGE_MASK; + size_t endoff = pos & ~PAGE_MASK; if (endoff) zero_user_segment(page, endoff, PAGE_SIZE); SetPageUptodate(page); } - fuse_write_update_size(inode, pos + copied); + if (pos > inode->i_size) + i_size_write(inode, pos); + set_page_dirty(page); unlock: From c8ae84e8d31d00090832280638bfc07010e7c9bf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 0646/1202] fuse: rename fuse_write_update_size() This function already updates the attr_version in fuse_inode, regardless of whether the size was changed or not. Rename the helper to fuse_write_update_attr() to reflect the more generic nature. Signed-off-by: Miklos Szeredi --- fs/fuse/dax.c | 2 +- fs/fuse/dev.c | 2 +- fs/fuse/file.c | 12 ++++++------ fs/fuse/fuse_i.h | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 89bf96f4fb101..a73bae6497b05 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -736,7 +736,7 @@ static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) return ret; fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); - fuse_write_update_size(inode, iocb->ki_pos); + fuse_write_update_attr(inode, iocb->ki_pos); return ret; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 491c092d427bf..75ae30d568eaf 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1591,7 +1591,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, end = outarg.offset + outarg.size; if (end > file_size) { file_size = end; - fuse_write_update_size(inode, file_size); + fuse_write_update_attr(inode, file_size); } num = outarg.size; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c09ab835821a2..b5f37b8df0e05 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1066,7 +1066,7 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, return err ?: ia->write.out.size; } -bool fuse_write_update_size(struct inode *inode, loff_t pos) +bool fuse_write_update_attr(struct inode *inode, loff_t pos) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -1263,7 +1263,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, } while (!err && iov_iter_count(ii)); if (res > 0) - fuse_write_update_size(inode, pos); + fuse_write_update_attr(inode, pos); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); @@ -1558,7 +1558,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) } fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); if (res > 0) - fuse_write_update_size(inode, iocb->ki_pos); + fuse_write_update_attr(inode, iocb->ki_pos); inode_unlock(inode); return res; @@ -2901,7 +2901,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (iov_iter_rw(iter) == WRITE) { if (ret > 0) - fuse_write_update_size(inode, pos); + fuse_write_update_attr(inode, pos); else if (ret < 0 && offset + count > i_size) fuse_do_truncate(file); } @@ -2990,7 +2990,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, /* we could have extended the file */ if (!(mode & FALLOC_FL_KEEP_SIZE)) { - bool changed = fuse_write_update_size(inode, offset + length); + bool changed = fuse_write_update_attr(inode, offset + length); if (changed && fm->fc->writeback_cache) file_update_time(file); @@ -3108,7 +3108,7 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); if (fc->writeback_cache) { - fuse_write_update_size(inode_out, pos_out + outarg.size); + fuse_write_update_attr(inode_out, pos_out + outarg.size); file_update_time(file_out); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 4f2f658fe5f16..7f5847a48b4ba 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1217,7 +1217,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, __poll_t fuse_file_poll(struct file *file, poll_table *wait); int fuse_dev_release(struct inode *inode, struct file *file); -bool fuse_write_update_size(struct inode *inode, loff_t pos); +bool fuse_write_update_attr(struct inode *inode, loff_t pos); int fuse_flush_times(struct inode *inode, struct fuse_file *ff); int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); From b91247e786896329ce1cb9ff6eb792c6df3f0130 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:02 +0200 Subject: [PATCH 0647/1202] fuse: always invalidate attributes after writes Extend the fuse_write_update_attr() helper to invalidate cached attributes after a write. This has already been done in all cases except in fuse_notify_store(), so this is mostly a cleanup. fuse_direct_write_iter() calls fuse_direct_IO() which already calls fuse_write_update_attr(), so don't repeat that again in the former. Signed-off-by: Miklos Szeredi --- fs/fuse/dax.c | 5 +---- fs/fuse/dev.c | 2 +- fs/fuse/file.c | 26 ++++++++++++-------------- fs/fuse/fuse_i.h | 2 +- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index a73bae6497b05..713818d74de6c 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -732,11 +732,8 @@ static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) ssize_t ret; ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); - if (ret < 0) - return ret; - fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); - fuse_write_update_attr(inode, iocb->ki_pos); + fuse_write_update_attr(inode, iocb->ki_pos, ret); return ret; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 75ae30d568eaf..337fa6f5a7c6c 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1591,7 +1591,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, end = outarg.offset + outarg.size; if (end > file_size) { file_size = end; - fuse_write_update_attr(inode, file_size); + fuse_write_update_attr(inode, file_size, outarg.size); } num = outarg.size; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b5f37b8df0e05..c3fd88da2a0b1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1066,7 +1066,7 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, return err ?: ia->write.out.size; } -bool fuse_write_update_attr(struct inode *inode, loff_t pos) +bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -1074,12 +1074,14 @@ bool fuse_write_update_attr(struct inode *inode, loff_t pos) spin_lock(&fi->lock); fi->attr_version = atomic64_inc_return(&fc->attr_version); - if (pos > inode->i_size) { + if (written > 0 && pos > inode->i_size) { i_size_write(inode, pos); ret = true; } spin_unlock(&fi->lock); + fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); + return ret; } @@ -1262,11 +1264,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, kfree(ap->pages); } while (!err && iov_iter_count(ii)); - if (res > 0) - fuse_write_update_attr(inode, pos); - + fuse_write_update_attr(inode, pos, res); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); - fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); return res > 0 ? res : err; } @@ -1554,11 +1553,9 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) } else { res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); + fuse_write_update_attr(inode, iocb->ki_pos, res); } } - fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); - if (res > 0) - fuse_write_update_attr(inode, iocb->ki_pos); inode_unlock(inode); return res; @@ -2900,9 +2897,8 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) kref_put(&io->refcnt, fuse_io_release); if (iov_iter_rw(iter) == WRITE) { - if (ret > 0) - fuse_write_update_attr(inode, pos); - else if (ret < 0 && offset + count > i_size) + fuse_write_update_attr(inode, pos, ret); + if (ret < 0 && offset + count > i_size) fuse_do_truncate(file); } @@ -2990,7 +2986,8 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, /* we could have extended the file */ if (!(mode & FALLOC_FL_KEEP_SIZE)) { - bool changed = fuse_write_update_attr(inode, offset + length); + bool changed = fuse_write_update_attr(inode, offset + length, + length); if (changed && fm->fc->writeback_cache) file_update_time(file); @@ -3108,7 +3105,8 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); if (fc->writeback_cache) { - fuse_write_update_attr(inode_out, pos_out + outarg.size); + fuse_write_update_attr(inode_out, pos_out + outarg.size, + outarg.size); file_update_time(file_out); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7f5847a48b4ba..66c410a733c1b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1217,7 +1217,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, __poll_t fuse_file_poll(struct file *file, poll_table *wait); int fuse_dev_release(struct inode *inode, struct file *file); -bool fuse_write_update_attr(struct inode *inode, loff_t pos); +bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written); int fuse_flush_times(struct inode *inode, struct fuse_file *ff); int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); From d0efb34ff341e816f3dcfc9a8e5971f8b38b2171 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 0648/1202] fuse: fix attr version comparison in fuse_read_update_size() A READ request returning a short count is taken as indication of EOF, and the cached file size is modified accordingly. Fix the attribute version checking to allow for changes to fc->attr_version on other inodes. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c3fd88da2a0b1..ddd563fda648b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -787,7 +787,7 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fi->lock); - if (attr_ver == fi->attr_version && size < inode->i_size && + if (attr_ver >= fi->attr_version && size < inode->i_size && !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, size); From a99fd037f209a35f6c1aa8f48fc00bc1b55b8615 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 0649/1202] fuse: cleanup code conditional on fc->writeback_cache It's safe to call file_update_time() if writeback cache is not enabled, since S_NOCMTIME is set in this case. This part is purely a cleanup. __fuse_copy_file_range() also calls fuse_write_update_attr() only in the writeback cache case. This is inconsistent with other callers, where it's called unconditionally. Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ddd563fda648b..bc450daf27a2b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -211,9 +211,8 @@ void fuse_finish_open(struct inode *inode, struct file *file) i_size_write(inode, 0); spin_unlock(&fi->lock); truncate_pagecache(inode, 0); + file_update_time(file); fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); - if (fc->writeback_cache) - file_update_time(file); } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) { invalidate_inode_pages2(inode->i_mapping); } @@ -2986,10 +2985,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, /* we could have extended the file */ if (!(mode & FALLOC_FL_KEEP_SIZE)) { - bool changed = fuse_write_update_attr(inode, offset + length, - length); - - if (changed && fm->fc->writeback_cache) + if (fuse_write_update_attr(inode, offset + length, length)) file_update_time(file); } @@ -3104,13 +3100,8 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, ALIGN_DOWN(pos_out, PAGE_SIZE), ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); - if (fc->writeback_cache) { - fuse_write_update_attr(inode_out, pos_out + outarg.size, - outarg.size); - file_update_time(file_out); - } - - fuse_invalidate_attr_mask(inode_out, FUSE_STATX_MODSIZE); + file_update_time(file_out); + fuse_write_update_attr(inode_out, pos_out + outarg.size, outarg.size); err = outarg.size; out: From fdc239f5bc8e433ba097c38b046474f6a68d5da0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 0650/1202] fuse: simplify local variables holding writeback cache state There are two instances of "bool is_wb = fc->writeback_cache" where the actual use mostly involves checking "is_wb && S_ISREG(inode->i_mode)". Clean up these cases by storing the second condition in the local variable. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 8 ++++---- fs/fuse/inode.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index f31398b339b3a..2bc3359a8ef76 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1561,10 +1561,10 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; - bool is_wb = fc->writeback_cache; + bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); loff_t oldsize; int err; - bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); + bool trust_local_cmtime = is_wb; bool fault_blocked = false; if (!fc->default_permissions) @@ -1608,7 +1608,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, } /* Flush dirty data/metadata before non-truncate SETATTR */ - if (is_wb && S_ISREG(inode->i_mode) && + if (is_wb && attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | ATTR_TIMES_SET)) { @@ -1679,7 +1679,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, attr_timeout(&outarg)); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ - if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) + if (!is_wb || is_truncate) i_size_write(inode, outarg.attr.size); if (is_truncate) { diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2f999d38c9b4a..db33f2050f747 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -223,7 +223,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - bool is_wb = fc->writeback_cache; + bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); loff_t oldsize; struct timespec64 old_mtime; @@ -243,7 +243,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it. */ - if (!is_wb || !S_ISREG(inode->i_mode)) + if (!is_wb) i_size_write(inode, attr->size); spin_unlock(&fi->lock); From 057a09df10c2ead0f9b5a7b484de341a73b7702e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 0651/1202] fuse: move reverting attributes to fuse_change_attributes() In case of writeback_cache fuse_fillattr() would revert the queried attributes to the cached version. Move this to fuse_change_attributes() in order to manage the writeback logic in a central helper. This will be necessary for patches that follow. Only fuse_do_getattr() -> fuse_fillattr() uses the attributes after calling fuse_change_attributes(), so this should not change behavior. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 9 --------- fs/fuse/inode.c | 13 +++++++++++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2bc3359a8ef76..fd67e64a757ea 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -944,15 +944,6 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, unsigned int blkbits; struct fuse_conn *fc = get_fuse_conn(inode); - /* see the comment in fuse_change_attributes() */ - if (fc->writeback_cache && S_ISREG(inode->i_mode)) { - attr->size = i_size_read(inode); - attr->mtime = inode->i_mtime.tv_sec; - attr->mtimensec = inode->i_mtime.tv_nsec; - attr->ctime = inode->i_ctime.tv_sec; - attr->ctimensec = inode->i_ctime.tv_nsec; - } - stat->dev = inode->i_sb->s_dev; stat->ino = attr->ino; stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index db33f2050f747..1ecc46dff93e0 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -228,6 +228,19 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, struct timespec64 old_mtime; spin_lock(&fi->lock); + /* + * In case of writeback_cache enabled, writes update mtime, ctime and + * may update i_size. In these cases trust the cached value in the + * inode. + */ + if (is_wb) { + attr->size = i_size_read(inode); + attr->mtime = inode->i_mtime.tv_sec; + attr->mtimensec = inode->i_mtime.tv_nsec; + attr->ctime = inode->i_ctime.tv_sec; + attr->ctimensec = inode->i_ctime.tv_nsec; + } + if ((attr_version != 0 && fi->attr_version > attr_version) || test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fi->lock); From 334ec7cd29c98c8951d192da4c726ae515d22f35 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 0652/1202] fuse: add cache_mask If writeback_cache is enabled, then the size, mtime and ctime attributes of regular files are always valid in the kernel's cache. They are retrieved from userspace only when the inode is freshly looked up. Add a more generic "cache_mask", that indicates which attributes are currently valid in cache. This patch doesn't change behavior. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 3 ++- fs/fuse/fuse_i.h | 4 +++- fs/fuse/inode.c | 31 ++++++++++++++++++++++++------- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index fd67e64a757ea..bd43d15438c36 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1667,7 +1667,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, } fuse_change_attributes_common(inode, &outarg.attr, - attr_timeout(&outarg)); + attr_timeout(&outarg), + fuse_get_cache_mask(inode)); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ if (!is_wb || is_truncate) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 66c410a733c1b..531f6d98efc6a 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1031,7 +1031,9 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version); void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid); + u64 attr_valid, u32 cache_mask); + +u32 fuse_get_cache_mask(struct inode *inode); /** * Initialize the client device diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1ecc46dff93e0..8b89e3ba7df3f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -164,7 +164,7 @@ static ino_t fuse_squash_ino(u64 ino64) } void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, - u64 attr_valid) + u64 attr_valid, u32 cache_mask) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -184,9 +184,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_nsec = attr->atimensec; /* mtime from server may be stale due to local buffered write */ - if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { + if (!(cache_mask & STATX_MTIME)) { inode->i_mtime.tv_sec = attr->mtime; inode->i_mtime.tv_nsec = attr->mtimensec; + } + if (!(cache_mask & STATX_CTIME)) { inode->i_ctime.tv_sec = attr->ctime; inode->i_ctime.tv_nsec = attr->ctimensec; } @@ -218,12 +220,22 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_flags &= ~S_NOSEC; } +u32 fuse_get_cache_mask(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + + if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) + return 0; + + return STATX_MTIME | STATX_CTIME | STATX_SIZE; +} + void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode); + u32 cache_mask; loff_t oldsize; struct timespec64 old_mtime; @@ -233,10 +245,15 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, * may update i_size. In these cases trust the cached value in the * inode. */ - if (is_wb) { + cache_mask = fuse_get_cache_mask(inode); + if (cache_mask & STATX_SIZE) attr->size = i_size_read(inode); + + if (cache_mask & STATX_MTIME) { attr->mtime = inode->i_mtime.tv_sec; attr->mtimensec = inode->i_mtime.tv_nsec; + } + if (cache_mask & STATX_CTIME) { attr->ctime = inode->i_ctime.tv_sec; attr->ctimensec = inode->i_ctime.tv_nsec; } @@ -248,7 +265,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, } old_mtime = inode->i_mtime; - fuse_change_attributes_common(inode, attr, attr_valid); + fuse_change_attributes_common(inode, attr, attr_valid, cache_mask); oldsize = inode->i_size; /* @@ -256,11 +273,11 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it. */ - if (!is_wb) + if (!(cache_mask & STATX_SIZE)) i_size_write(inode, attr->size); spin_unlock(&fi->lock); - if (!is_wb && S_ISREG(inode->i_mode)) { + if (!cache_mask && S_ISREG(inode->i_mode)) { bool inval = false; if (oldsize != attr->size) { From 09fe6e0b335ab7585d5b8abb9d6884aab911b7b2 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 0653/1202] fuse: take cache_mask into account in getattr When deciding to send a GETATTR request take into account the cache mask (which attributes are always valid). The cache mask takes precedence over the invalid mask. This results in the GETATTR request not being sent unnecessarily. Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index bd43d15438c36..7dd7efacc98b9 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1021,12 +1021,14 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, struct fuse_inode *fi = get_fuse_inode(inode); int err = 0; bool sync; + u32 inval_mask = READ_ONCE(fi->inval_mask); + u32 cache_mask = fuse_get_cache_mask(inode); if (flags & AT_STATX_FORCE_SYNC) sync = true; else if (flags & AT_STATX_DONT_SYNC) sync = false; - else if (request_mask & READ_ONCE(fi->inval_mask)) + else if (request_mask & inval_mask & ~cache_mask) sync = true; else sync = time_before64(fi->i_time, get_jiffies_64()); From 85bf4c6df4e91d3693748d769d2ea6a30b573339 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 0654/1202] fuse: only update necessary attributes fuse_update_attributes() refreshes metadata for internal use. Each use needs a particular set of attributes to be refreshed, but currently that cannot be expressed and all but atime are refreshed. Add a mask argument, which lets fuse_update_get_attr() to decide based on the cache_mask and the inval_mask whether a GETATTR call is needed or not. Reported-by: Yongji Xie Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 6 ++---- fs/fuse/file.c | 9 +++++---- fs/fuse/fuse_i.h | 2 +- fs/fuse/readdir.c | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 7dd7efacc98b9..633d5091abff3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1045,11 +1045,9 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, return err; } -int fuse_update_attributes(struct inode *inode, struct file *file) +int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) { - /* Do *not* need to get atime for internal purposes */ - return fuse_update_get_attr(inode, file, NULL, - STATX_BASIC_STATS & ~STATX_ATIME, 0); + return fuse_update_get_attr(inode, file, NULL, mask, 0); } int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index bc450daf27a2b..26730e699d682 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -996,7 +996,7 @@ static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) if (fc->auto_inval_data || (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) { int err; - err = fuse_update_attributes(inode, iocb->ki_filp); + err = fuse_update_attributes(inode, iocb->ki_filp, STATX_SIZE); if (err) return err; } @@ -1282,7 +1282,8 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) if (fc->writeback_cache) { /* Update size (EOF optimization) and mode (SUID clearing) */ - err = fuse_update_attributes(mapping->host, file); + err = fuse_update_attributes(mapping->host, file, + STATX_SIZE | STATX_MODE); if (err) return err; @@ -2633,7 +2634,7 @@ static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) return vfs_setpos(file, outarg.offset, inode->i_sb->s_maxbytes); fallback: - err = fuse_update_attributes(inode, file); + err = fuse_update_attributes(inode, file, STATX_SIZE); if (!err) return generic_file_llseek(file, offset, whence); else @@ -2653,7 +2654,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) break; case SEEK_END: inode_lock(inode); - retval = fuse_update_attributes(inode, file); + retval = fuse_update_attributes(inode, file, STATX_SIZE); if (!retval) retval = generic_file_llseek(file, offset, whence); inode_unlock(inode); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 531f6d98efc6a..198637b41e194 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1161,7 +1161,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); void fuse_flush_time_update(struct inode *inode); void fuse_update_ctime(struct inode *inode); -int fuse_update_attributes(struct inode *inode, struct file *file); +int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask); void fuse_flush_writepages(struct inode *inode); diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index e38ac983435a6..b4e5657110457 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -454,7 +454,7 @@ static int fuse_readdir_cached(struct file *file, struct dir_context *ctx) * cache; both cases require an up-to-date mtime value. */ if (!ctx->pos && fc->auto_inval_data) { - int err = fuse_update_attributes(inode, file); + int err = fuse_update_attributes(inode, file, STATX_MTIME); if (err) return err; From 1d35b798bffe8b38bb0d5ce84fb7b85cd87622e5 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 22 Oct 2021 17:01:20 +0800 Subject: [PATCH 0655/1202] erofs: get rid of ->lru usage Currently, ->lru is a way to arrange non-LRU pages and has some in-kernel users. In order to minimize noticable issues of page reclaim and cache thrashing under high memory presure, limited temporary pages were all chained with ->lru and can be reused during the request. However, it seems that ->lru could be removed when folio is landing. Let's use page->private to chain temporary pages for now instead and transform EROFS formally after the topic of the folio / file page design is finalized. Link: https://lore.kernel.org/r/20211022090120.14675-1-hsiangkao@linux.alibaba.com Cc: Matthew Wilcox Cc: Kent Overstreet Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- fs/erofs/compress.h | 11 +++++----- fs/erofs/decompressor.c | 8 +++---- fs/erofs/decompressor_lzma.c | 2 +- fs/erofs/internal.h | 9 +++++++- fs/erofs/pcpubuf.c | 6 +++--- fs/erofs/utils.c | 19 +++++++++++----- fs/erofs/zdata.c | 42 ++++++++++++++++-------------------- 7 files changed, 53 insertions(+), 44 deletions(-) diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 8ea6a9b149626..5794065049190 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -22,7 +22,7 @@ struct z_erofs_decompress_req { struct z_erofs_decompressor { int (*decompress)(struct z_erofs_decompress_req *rq, - struct list_head *pagepool); + struct page **pagepool); char *name; }; @@ -64,7 +64,7 @@ static inline bool z_erofs_is_shortlived_page(struct page *page) return true; } -static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool, +static inline bool z_erofs_put_shortlivedpage(struct page **pagepool, struct page *page) { if (!z_erofs_is_shortlived_page(page)) @@ -75,8 +75,7 @@ static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool, put_page(page); } else { /* follow the pcluster rule above. */ - set_page_private(page, 0); - list_add(&page->lru, pagepool); + erofs_pagepool_add(pagepool, page); } return true; } @@ -89,9 +88,9 @@ static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, } int z_erofs_decompress(struct z_erofs_decompress_req *rq, - struct list_head *pagepool); + struct page **pagepool); /* prototypes for specific algorithms */ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, - struct list_head *pagepool); + struct page **pagepool); #endif diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 8a624d73c1856..a0786b95cdf99 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -57,7 +57,7 @@ int z_erofs_load_lz4_config(struct super_block *sb, * all physical pages are consecutive, which can be seen for moderate CR. */ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) + struct page **pagepool) { const unsigned int nr = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -254,7 +254,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, } static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) + struct page **pagepool) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -296,7 +296,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, } static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) + struct page **pagepool) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -352,7 +352,7 @@ static struct z_erofs_decompressor decompressors[] = { }; int z_erofs_decompress(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) + struct page **pagepool) { return decompressors[rq->alg].decompress(rq, pagepool); } diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index bd7d9809ecf7c..50045510a1f41 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -150,7 +150,7 @@ int z_erofs_load_lzma_config(struct super_block *sb, } int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) + struct page **pagepool) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index a6a53d22dfd6d..3265688af7f9f 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -499,7 +499,14 @@ void erofs_pcpubuf_init(void); void erofs_pcpubuf_exit(void); /* utils.c / zdata.c */ -struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp); +struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp); +static inline void erofs_pagepool_add(struct page **pagepool, + struct page *page) +{ + set_page_private(page, (unsigned long)*pagepool); + *pagepool = page; +} +void erofs_release_pages(struct page **pagepool); #ifdef CONFIG_EROFS_FS_ZIP int erofs_workgroup_put(struct erofs_workgroup *grp); diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c index 6c885575128ac..a2efd833d1b6c 100644 --- a/fs/erofs/pcpubuf.c +++ b/fs/erofs/pcpubuf.c @@ -49,7 +49,7 @@ int erofs_pcpubuf_growsize(unsigned int nrpages) { static DEFINE_MUTEX(pcb_resize_mutex); static unsigned int pcb_nrpages; - LIST_HEAD(pagepool); + struct page *pagepool = NULL; int delta, cpu, ret, i; mutex_lock(&pcb_resize_mutex); @@ -102,13 +102,13 @@ int erofs_pcpubuf_growsize(unsigned int nrpages) vunmap(old_ptr); free_pagearray: while (i) - list_add(&oldpages[--i]->lru, &pagepool); + erofs_pagepool_add(&pagepool, oldpages[--i]); kfree(oldpages); if (ret) break; } pcb_nrpages = nrpages; - put_pages_list(&pagepool); + erofs_release_pages(&pagepool); out: mutex_unlock(&pcb_resize_mutex); return ret; diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index bd86067a63f7f..84da2c2800129 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -6,20 +6,29 @@ #include "internal.h" #include -struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) +struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp) { - struct page *page; + struct page *page = *pagepool; - if (!list_empty(pool)) { - page = lru_to_page(pool); + if (page) { DBG_BUGON(page_ref_count(page) != 1); - list_del(&page->lru); + *pagepool = (struct page *)page_private(page); } else { page = alloc_page(gfp); } return page; } +void erofs_release_pages(struct page **pagepool) +{ + while (*pagepool) { + struct page *page = *pagepool; + + *pagepool = (struct page *)page_private(page); + put_page(page); + } +} + #ifdef CONFIG_EROFS_FS_ZIP /* global shrink count (for all mounted EROFS instances) */ static atomic_long_t erofs_global_shrink_cnt; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index d55e6215cd444..bcb1b91b234fb 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -236,7 +236,7 @@ static DEFINE_MUTEX(z_pagemap_global_lock); static void preload_compressed_pages(struct z_erofs_collector *clt, struct address_space *mc, enum z_erofs_cache_alloctype type, - struct list_head *pagepool) + struct page **pagepool) { struct z_erofs_pcluster *pcl = clt->pcl; bool standalone = true; @@ -287,12 +287,10 @@ static void preload_compressed_pages(struct z_erofs_collector *clt, if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t))) continue; - if (page) { + if (page) put_page(page); - } else if (newpage) { - set_page_private(newpage, 0); - list_add(&newpage->lru, pagepool); - } + else if (newpage) + erofs_pagepool_add(pagepool, newpage); } /* @@ -643,7 +641,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe, } static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, - struct page *page, struct list_head *pagepool) + struct page *page, struct page **pagepool) { struct inode *const inode = fe->inode; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); @@ -836,7 +834,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio) static int z_erofs_decompress_pcluster(struct super_block *sb, struct z_erofs_pcluster *pcl, - struct list_head *pagepool) + struct page **pagepool) { struct erofs_sb_info *const sbi = EROFS_SB(sb); struct z_erofs_pagevec_ctor ctor; @@ -1036,7 +1034,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, } static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, - struct list_head *pagepool) + struct page **pagepool) { z_erofs_next_pcluster_t owned = io->head; @@ -1060,18 +1058,18 @@ static void z_erofs_decompressqueue_work(struct work_struct *work) { struct z_erofs_decompressqueue *bgq = container_of(work, struct z_erofs_decompressqueue, u.work); - LIST_HEAD(pagepool); + struct page *pagepool = NULL; DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED); z_erofs_decompress_queue(bgq, &pagepool); - put_pages_list(&pagepool); + erofs_release_pages(&pagepool); kvfree(bgq); } static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, unsigned int nr, - struct list_head *pagepool, + struct page **pagepool, struct address_space *mc, gfp_t gfp) { @@ -1173,7 +1171,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, out_allocpage: page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL); if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) { - list_add(&page->lru, pagepool); + erofs_pagepool_add(pagepool, page); cond_resched(); goto repeat; } @@ -1257,7 +1255,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, static void z_erofs_submit_queue(struct super_block *sb, struct z_erofs_decompress_frontend *f, - struct list_head *pagepool, + struct page **pagepool, struct z_erofs_decompressqueue *fgq, bool *force_fg) { @@ -1365,7 +1363,7 @@ static void z_erofs_submit_queue(struct super_block *sb, static void z_erofs_runqueue(struct super_block *sb, struct z_erofs_decompress_frontend *f, - struct list_head *pagepool, bool force_fg) + struct page **pagepool, bool force_fg) { struct z_erofs_decompressqueue io[NR_JOBQUEUES]; @@ -1394,7 +1392,7 @@ static void z_erofs_runqueue(struct super_block *sb, static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, struct readahead_control *rac, erofs_off_t end, - struct list_head *pagepool, + struct page **pagepool, bool backmost) { struct inode *inode = f->inode; @@ -1457,8 +1455,8 @@ static int z_erofs_readpage(struct file *file, struct page *page) { struct inode *const inode = page->mapping->host; struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); + struct page *pagepool = NULL; int err; - LIST_HEAD(pagepool); trace_erofs_readpage(page, false); f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT; @@ -1479,8 +1477,7 @@ static int z_erofs_readpage(struct file *file, struct page *page) if (f.map.mpage) put_page(f.map.mpage); - /* clean up the remaining free pages */ - put_pages_list(&pagepool); + erofs_release_pages(&pagepool); return err; } @@ -1489,9 +1486,8 @@ static void z_erofs_readahead(struct readahead_control *rac) struct inode *const inode = rac->mapping->host; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); - struct page *page, *head = NULL; + struct page *pagepool = NULL, *head = NULL, *page; unsigned int nr_pages; - LIST_HEAD(pagepool); f.readahead = true; f.headoffset = readahead_pos(rac); @@ -1528,9 +1524,7 @@ static void z_erofs_readahead(struct readahead_control *rac) nr_pages <= sbi->opt.max_sync_decompress_pages); if (f.map.mpage) put_page(f.map.mpage); - - /* clean up the remaining free pages */ - put_pages_list(&pagepool); + erofs_release_pages(&pagepool); } const struct address_space_operations z_erofs_aops = { From 8e11deae5c48eb7b3ac21f995f425014a01cb372 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 11 Oct 2021 10:36:55 -0400 Subject: [PATCH 0656/1202] KVM: nSVM: move nested_vmcb_check_cr3_cr4 logic in nested_vmcb_valid_sregs Inline nested_vmcb_check_cr3_cr4 as it is not called by anyone else. Doing so simplifies next patches. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Maxim Levitsky Message-Id: <20211011143702.1786568-2-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index f8b7bc04b3e7a..946c06a25d37b 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -275,27 +275,6 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu, return true; } -static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu, - struct vmcb_save_area *save) -{ - /* - * These checks are also performed by KVM_SET_SREGS, - * except that EFER.LMA is not checked by SVM against - * CR0.PG && EFER.LME. - */ - if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) { - if (CC(!(save->cr4 & X86_CR4_PAE)) || - CC(!(save->cr0 & X86_CR0_PE)) || - CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3))) - return false; - } - - if (CC(!kvm_is_valid_cr4(vcpu, save->cr4))) - return false; - - return true; -} - /* Common checks that apply to both L1 and L2 state. */ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu, struct vmcb_save_area *save) @@ -317,7 +296,19 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu, if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7))) return false; - if (!nested_vmcb_check_cr3_cr4(vcpu, save)) + /* + * These checks are also performed by KVM_SET_SREGS, + * except that EFER.LMA is not checked by SVM against + * CR0.PG && EFER.LME. + */ + if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) { + if (CC(!(save->cr4 & X86_CR4_PAE)) || + CC(!(save->cr0 & X86_CR0_PE)) || + CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3))) + return false; + } + + if (CC(!kvm_is_valid_cr4(vcpu, save->cr4))) return false; if (CC(!kvm_valid_efer(vcpu, save->efer))) From d033d1b3d79e6999f894c7c0bc3526b15564aef9 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 11 Oct 2021 10:36:56 -0400 Subject: [PATCH 0657/1202] KVM: nSVM: introduce svm->nested.save to cache save area fields This is useful in next patch, to avoid having temporary copies of vmcb12 registers and passing them manually. Right now, instead of blindly copying everything, we just copy EFER, CR0, CR3, CR4, DR6 and DR7. If more fields will need to be added, it will be more obvious to see that they must be added in struct vmcb_save_area_cached and in nested_copy_vmcb_save_to_cache(). Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20211011143702.1786568-3-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 18 ++++++++++++++++++ arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/svm/svm.h | 16 ++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 946c06a25d37b..b19fd564f72db 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -328,6 +328,22 @@ void nested_load_control_from_vmcb12(struct vcpu_svm *svm, svm->nested.ctl.iopm_base_pa &= ~0x0fffULL; } +void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, + struct vmcb_save_area *save) +{ + /* + * Copy only fields that are validated, as we need them + * to avoid TOC/TOU races. + */ + svm->nested.save.efer = save->efer; + svm->nested.save.cr0 = save->cr0; + svm->nested.save.cr3 = save->cr3; + svm->nested.save.cr4 = save->cr4; + + svm->nested.save.dr6 = save->dr6; + svm->nested.save.dr7 = save->dr7; +} + /* * Synchronize fields that are written by the processor, so that * they can be copied back into the vmcb12. @@ -670,6 +686,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) return -EINVAL; nested_load_control_from_vmcb12(svm, &vmcb12->control); + nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) || !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) { @@ -1414,6 +1431,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); nested_load_control_from_vmcb12(svm, ctl); + nested_copy_vmcb_save_to_cache(svm, save); svm_switch_vmcb(svm, &svm->nested.vmcb02); nested_vmcb02_prepare_control(svm); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a2a4e9b42750c..82217073e9d23 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4436,6 +4436,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) vmcb12 = map.hva; nested_load_control_from_vmcb12(svm, &vmcb12->control); + nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); + ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false); unmap_save: diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 0d7bbe548ac3e..24623a5e2bcfc 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -103,6 +103,19 @@ struct kvm_vmcb_info { uint64_t asid_generation; }; +/* + * This struct is not kept up-to-date, and it is only valid within + * svm_set_nested_state and nested_svm_vmrun. + */ +struct vmcb_save_area_cached { + u64 efer; + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; +}; + struct svm_nested_state { struct kvm_vmcb_info vmcb02; u64 hsave_msr; @@ -119,6 +132,7 @@ struct svm_nested_state { /* cache for control fields of the guest */ struct vmcb_control_area ctl; + struct vmcb_save_area_cached save; bool initialized; }; @@ -490,6 +504,8 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu); void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier); void nested_load_control_from_vmcb12(struct vcpu_svm *svm, struct vmcb_control_area *control); +void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, + struct vmcb_save_area *save); void nested_sync_control_from_vmcb02(struct vcpu_svm *svm); void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm); void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb); From b40c22927d624f3776d0797779afbd883f4d156d Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 11 Oct 2021 10:36:57 -0400 Subject: [PATCH 0658/1202] KVM: nSVM: replace nested_load_control_from_vmcb12 with nested_copy_vmcb_control_to_cache Following the same naming convention of the previous patch, rename nested_load_control_from_vmcb12. In addition, inline copy_vmcb_control_area as it is only called by this function. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20211011143702.1786568-4-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 67 ++++++++++++++++++--------------------- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/svm/svm.h | 2 +- 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b19fd564f72db..254311c36f620 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -163,37 +163,6 @@ void recalc_intercepts(struct vcpu_svm *svm) vmcb_set_intercept(c, INTERCEPT_VMSAVE); } -static void copy_vmcb_control_area(struct vmcb_control_area *dst, - struct vmcb_control_area *from) -{ - unsigned int i; - - for (i = 0; i < MAX_INTERCEPT; i++) - dst->intercepts[i] = from->intercepts[i]; - - dst->iopm_base_pa = from->iopm_base_pa; - dst->msrpm_base_pa = from->msrpm_base_pa; - dst->tsc_offset = from->tsc_offset; - /* asid not copied, it is handled manually for svm->vmcb. */ - dst->tlb_ctl = from->tlb_ctl; - dst->int_ctl = from->int_ctl; - dst->int_vector = from->int_vector; - dst->int_state = from->int_state; - dst->exit_code = from->exit_code; - dst->exit_code_hi = from->exit_code_hi; - dst->exit_info_1 = from->exit_info_1; - dst->exit_info_2 = from->exit_info_2; - dst->exit_int_info = from->exit_int_info; - dst->exit_int_info_err = from->exit_int_info_err; - dst->nested_ctl = from->nested_ctl; - dst->event_inj = from->event_inj; - dst->event_inj_err = from->event_inj_err; - dst->nested_cr3 = from->nested_cr3; - dst->virt_ext = from->virt_ext; - dst->pause_filter_count = from->pause_filter_count; - dst->pause_filter_thresh = from->pause_filter_thresh; -} - static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { /* @@ -317,12 +286,36 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu, return true; } -void nested_load_control_from_vmcb12(struct vcpu_svm *svm, - struct vmcb_control_area *control) +void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm, + struct vmcb_control_area *control) { - copy_vmcb_control_area(&svm->nested.ctl, control); + unsigned int i; - /* Copy it here because nested_svm_check_controls will check it. */ + for (i = 0; i < MAX_INTERCEPT; i++) + svm->nested.ctl.intercepts[i] = control->intercepts[i]; + + svm->nested.ctl.iopm_base_pa = control->iopm_base_pa; + svm->nested.ctl.msrpm_base_pa = control->msrpm_base_pa; + svm->nested.ctl.tsc_offset = control->tsc_offset; + svm->nested.ctl.tlb_ctl = control->tlb_ctl; + svm->nested.ctl.int_ctl = control->int_ctl; + svm->nested.ctl.int_vector = control->int_vector; + svm->nested.ctl.int_state = control->int_state; + svm->nested.ctl.exit_code = control->exit_code; + svm->nested.ctl.exit_code_hi = control->exit_code_hi; + svm->nested.ctl.exit_info_1 = control->exit_info_1; + svm->nested.ctl.exit_info_2 = control->exit_info_2; + svm->nested.ctl.exit_int_info = control->exit_int_info; + svm->nested.ctl.exit_int_info_err = control->exit_int_info_err; + svm->nested.ctl.nested_ctl = control->nested_ctl; + svm->nested.ctl.event_inj = control->event_inj; + svm->nested.ctl.event_inj_err = control->event_inj_err; + svm->nested.ctl.nested_cr3 = control->nested_cr3; + svm->nested.ctl.virt_ext = control->virt_ext; + svm->nested.ctl.pause_filter_count = control->pause_filter_count; + svm->nested.ctl.pause_filter_thresh = control->pause_filter_thresh; + + /* Copy asid here because nested_vmcb_check_controls will check it. */ svm->nested.ctl.asid = control->asid; svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL; svm->nested.ctl.iopm_base_pa &= ~0x0fffULL; @@ -685,7 +678,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) if (WARN_ON_ONCE(!svm->nested.initialized)) return -EINVAL; - nested_load_control_from_vmcb12(svm, &vmcb12->control); + nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) || @@ -1430,7 +1423,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); - nested_load_control_from_vmcb12(svm, ctl); + nested_copy_vmcb_control_to_cache(svm, ctl); nested_copy_vmcb_save_to_cache(svm, save); svm_switch_vmcb(svm, &svm->nested.vmcb02); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 82217073e9d23..c195b85e791dc 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4435,7 +4435,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) */ vmcb12 = map.hva; - nested_load_control_from_vmcb12(svm, &vmcb12->control); + nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 24623a5e2bcfc..30ba2a7b53291 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -502,7 +502,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, int nested_svm_exit_special(struct vcpu_svm *svm); void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu); void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier); -void nested_load_control_from_vmcb12(struct vcpu_svm *svm, +void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm, struct vmcb_control_area *control); void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, struct vmcb_save_area *save); From 4a57b6601ecd12e180879df882e0d4ee92f4e694 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 11 Oct 2021 10:36:58 -0400 Subject: [PATCH 0659/1202] KVM: nSVM: use vmcb_save_area_cached in nested_vmcb_valid_sregs() Now that struct vmcb_save_area_cached contains the required vmcb fields values (done in nested_copy_vmcb_save_to_cache()), check them to see if they are correct in nested_vmcb_valid_sregs(). Since we are always checking the data in the cache, it is enough to keep only the vcpu as parameter. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20211011143702.1786568-5-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 254311c36f620..69b85d05f7c22 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -245,9 +245,10 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu, } /* Common checks that apply to both L1 and L2 state. */ -static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu, - struct vmcb_save_area *save) +static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_save_area_cached *save = &svm->nested.save; /* * FIXME: these should be done after copying the fields, * to avoid TOC/TOU races. For these save area checks @@ -681,7 +682,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); - if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) || + if (!nested_vmcb_valid_sregs(vcpu) || !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) { vmcb12->control.exit_code = SVM_EXIT_ERR; vmcb12->control.exit_code_hi = 0; @@ -1384,10 +1385,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, * Validate host state saved from before VMRUN (see * nested_svm_check_permissions). */ + nested_copy_vmcb_save_to_cache(svm, save); if (!(save->cr0 & X86_CR0_PG) || !(save->cr0 & X86_CR0_PE) || (save->rflags & X86_EFLAGS_VM) || - !nested_vmcb_valid_sregs(vcpu, save)) + !nested_vmcb_valid_sregs(vcpu)) goto out_free; /* @@ -1424,7 +1426,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); nested_copy_vmcb_control_to_cache(svm, ctl); - nested_copy_vmcb_save_to_cache(svm, save); svm_switch_vmcb(svm, &svm->nested.vmcb02); nested_vmcb02_prepare_control(svm); From 1a0b210f812dbbb115c388f90d140a78c03c0d78 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 11 Oct 2021 10:36:59 -0400 Subject: [PATCH 0660/1202] KVM: nSVM: use svm->nested.save to load vmcb12 registers and avoid TOC/TOU races Use the already checked svm->nested.save cached fields (EFER, CR0, CR4, ...) instead of vmcb12's in nested_vmcb02_prepare_save(). This prevents from creating TOC/TOU races, since the guest could modify the vmcb12 fields. This also avoids the need of force-setting EFER_SVME in nested_vmcb02_prepare_save. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20211011143702.1786568-6-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 69b85d05f7c22..4f30135b090c5 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -249,13 +249,7 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_save_area_cached *save = &svm->nested.save; - /* - * FIXME: these should be done after copying the fields, - * to avoid TOC/TOU races. For these save area checks - * the possible damage is limited since kvm_set_cr0 and - * kvm_set_cr4 handle failure; EFER_SVME is an exception - * so it is force-set later in nested_prepare_vmcb_save. - */ + if (CC(!(save->efer & EFER_SVME))) return false; @@ -491,15 +485,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); - /* - * Force-set EFER_SVME even though it is checked earlier on the - * VMCB12, because the guest can flip the bit between the check - * and now. Clearing EFER_SVME would call svm_free_nested. - */ - svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME); + svm_set_efer(&svm->vcpu, svm->nested.save.efer); - svm_set_cr0(&svm->vcpu, vmcb12->save.cr0); - svm_set_cr4(&svm->vcpu, vmcb12->save.cr4); + svm_set_cr0(&svm->vcpu, svm->nested.save.cr0); + svm_set_cr4(&svm->vcpu, svm->nested.save.cr4); svm->vcpu.arch.cr2 = vmcb12->save.cr2; @@ -514,8 +503,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 /* These bits will be set properly on the first execution when new_vmc12 is true */ if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) { - svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1; - svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW; + svm->vmcb->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1; + svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW; vmcb_mark_dirty(svm->vmcb, VMCB_DR); } } @@ -629,7 +618,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, nested_vmcb02_prepare_control(svm); nested_vmcb02_prepare_save(svm, vmcb12); - ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3, + ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3, nested_npt_enabled(svm), from_vmrun); if (ret) return ret; From 3d226bdf040f22e4b17e69c8339ebdf2de26717a Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 11 Oct 2021 10:37:00 -0400 Subject: [PATCH 0661/1202] KVM: nSVM: introduce struct vmcb_ctrl_area_cached This structure will replace vmcb_control_area in svm_nested_state, providing only the fields that are actually used by the nested state. This avoids having and copying around uninitialized fields. The cost of this, however, is that all functions (in this case vmcb_is_intercept) expect the old structure, so they need to be duplicated. Introduce also nested_copy_vmcb_cache_to_control(), useful to copy vmcb_ctrl_area_cached fields in vmcb_control_area. This will be used in the next patch. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20211011143702.1786568-7-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 32 ++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 4f30135b090c5..0c2b2a4f69497 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1242,6 +1242,38 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu) svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio); } +/* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */ +static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst, + struct vmcb_ctrl_area_cached *from) +{ + unsigned int i; + + for (i = 0; i < MAX_INTERCEPT; i++) + dst->intercepts[i] = from->intercepts[i]; + + dst->iopm_base_pa = from->iopm_base_pa; + dst->msrpm_base_pa = from->msrpm_base_pa; + dst->tsc_offset = from->tsc_offset; + dst->asid = from->asid; + dst->tlb_ctl = from->tlb_ctl; + dst->int_ctl = from->int_ctl; + dst->int_vector = from->int_vector; + dst->int_state = from->int_state; + dst->exit_code = from->exit_code; + dst->exit_code_hi = from->exit_code_hi; + dst->exit_info_1 = from->exit_info_1; + dst->exit_info_2 = from->exit_info_2; + dst->exit_int_info = from->exit_int_info; + dst->exit_int_info_err = from->exit_int_info_err; + dst->nested_ctl = from->nested_ctl; + dst->event_inj = from->event_inj; + dst->event_inj_err = from->event_inj_err; + dst->nested_cr3 = from->nested_cr3; + dst->virt_ext = from->virt_ext; + dst->pause_filter_count = from->pause_filter_count; + dst->pause_filter_thresh = from->pause_filter_thresh; +} + static int svm_get_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, u32 user_data_size) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 30ba2a7b53291..11c635e3691dd 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -116,6 +116,31 @@ struct vmcb_save_area_cached { u64 dr6; }; +struct vmcb_ctrl_area_cached { + u32 intercepts[MAX_INTERCEPT]; + u16 pause_filter_thresh; + u16 pause_filter_count; + u64 iopm_base_pa; + u64 msrpm_base_pa; + u64 tsc_offset; + u32 asid; + u8 tlb_ctl; + u32 int_ctl; + u32 int_vector; + u32 int_state; + u32 exit_code; + u32 exit_code_hi; + u64 exit_info_1; + u64 exit_info_2; + u32 exit_int_info; + u32 exit_int_info_err; + u64 nested_ctl; + u32 event_inj; + u32 event_inj_err; + u64 nested_cr3; + u64 virt_ext; +}; + struct svm_nested_state { struct kvm_vmcb_info vmcb02; u64 hsave_msr; @@ -311,6 +336,12 @@ static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit) return test_bit(bit, (unsigned long *)&control->intercepts); } +static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit) +{ + WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT); + return test_bit(bit, (unsigned long *)&control->intercepts); +} + static inline void set_dr_intercepts(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb01.ptr; From 5b286258d5e2ad3b5c78a36d1af48941b9dda362 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 11 Oct 2021 10:37:01 -0400 Subject: [PATCH 0662/1202] KVM: nSVM: use struct vmcb_ctrl_area_cached for svm->nested.ctl This requires changing all vmcb_is_intercept(&svm->nested.ctl, ...) calls with vmcb12_is_intercept(). In addition, in svm_get_nested_state() user space expects a vmcb_control_area struct, so we need to copy back all fields in a temporary structure to provide to the user space. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20211011143702.1786568-8-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 44 +++++++++++++++++++++++++-------------- arch/x86/kvm/svm/svm.c | 4 ++-- arch/x86/kvm/svm/svm.h | 8 +++---- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 0c2b2a4f69497..8201ec9a287fa 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -58,8 +58,9 @@ static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_excep struct vcpu_svm *svm = to_svm(vcpu); WARN_ON(!is_guest_mode(vcpu)); - if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) && - !svm->nested.nested_run_pending) { + if (vmcb12_is_intercept(&svm->nested.ctl, + INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) && + !svm->nested.nested_run_pending) { svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR; svm->vmcb->control.exit_code_hi = 0; svm->vmcb->control.exit_info_1 = fault->error_code; @@ -121,7 +122,8 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) void recalc_intercepts(struct vcpu_svm *svm) { - struct vmcb_control_area *c, *h, *g; + struct vmcb_control_area *c, *h; + struct vmcb_ctrl_area_cached *g; unsigned int i; vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -172,7 +174,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) */ int i; - if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) + if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) return true; for (i = 0; i < MSRPM_OFFSETS; i++) { @@ -220,9 +222,9 @@ static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl) } static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu, - struct vmcb_control_area *control) + struct vmcb_ctrl_area_cached *control) { - if (CC(!vmcb_is_intercept(control, INTERCEPT_VMRUN))) + if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN))) return false; if (CC(control->asid == 0)) @@ -979,7 +981,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) u32 offset, msr, value; int write, mask; - if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) + if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) return NESTED_EXIT_HOST; msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; @@ -1006,7 +1008,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm) u8 start_bit; u64 gpa; - if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT))) + if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT))) return NESTED_EXIT_HOST; port = svm->vmcb->control.exit_info_1 >> 16; @@ -1037,12 +1039,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm) vmexit = nested_svm_intercept_ioio(svm); break; case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { - if (vmcb_is_intercept(&svm->nested.ctl, exit_code)) + if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { - if (vmcb_is_intercept(&svm->nested.ctl, exit_code)) + if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) vmexit = NESTED_EXIT_DONE; break; } @@ -1060,7 +1062,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm) break; } default: { - if (vmcb_is_intercept(&svm->nested.ctl, exit_code)) + if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) vmexit = NESTED_EXIT_DONE; } } @@ -1138,7 +1140,7 @@ static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm) static inline bool nested_exit_on_init(struct vcpu_svm *svm) { - return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT); + return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT); } static int svm_check_nested_events(struct kvm_vcpu *vcpu) @@ -1279,6 +1281,7 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu, u32 user_data_size) { struct vcpu_svm *svm; + struct vmcb_control_area *ctl; struct kvm_nested_state kvm_state = { .flags = 0, .format = KVM_STATE_NESTED_FORMAT_SVM, @@ -1286,6 +1289,7 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu, }; struct vmcb __user *user_vmcb = (struct vmcb __user *) &user_kvm_nested_state->data.svm[0]; + int r; if (!vcpu) return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE; @@ -1320,9 +1324,17 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu, */ if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE)) return -EFAULT; - if (copy_to_user(&user_vmcb->control, &svm->nested.ctl, - sizeof(user_vmcb->control))) + + ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); + if (!ctl) + return -ENOMEM; + nested_copy_vmcb_cache_to_control(ctl, &svm->nested.ctl); + r = copy_to_user(&user_vmcb->control, ctl, + sizeof(user_vmcb->control)); + kfree(ctl); + if (r) return -EFAULT; + if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save, sizeof(user_vmcb->save))) return -EFAULT; @@ -1391,7 +1403,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, goto out_free; ret = -EINVAL; - if (!nested_vmcb_check_controls(vcpu, ctl)) + nested_copy_vmcb_control_to_cache(svm, ctl); + if (!nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) goto out_free; /* @@ -1446,7 +1459,6 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); - nested_copy_vmcb_control_to_cache(svm, ctl); svm_switch_vmcb(svm, &svm->nested.vmcb02); nested_vmcb02_prepare_control(svm); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c195b85e791dc..32b2ee080174b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2506,7 +2506,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu, bool ret = false; if (!is_guest_mode(vcpu) || - (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0)))) + (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0)))) return false; cr0 &= ~SVM_CR0_SELECTIVE_MASK; @@ -4216,7 +4216,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, info->intercept == x86_intercept_clts) break; - if (!(vmcb_is_intercept(&svm->nested.ctl, + if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))) break; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 11c635e3691dd..a77d94a8f4276 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -156,7 +156,7 @@ struct svm_nested_state { bool nested_run_pending; /* cache for control fields of the guest */ - struct vmcb_control_area ctl; + struct vmcb_ctrl_area_cached ctl; struct vmcb_save_area_cached save; bool initialized; @@ -494,17 +494,17 @@ static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu) static inline bool nested_exit_on_smi(struct vcpu_svm *svm) { - return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SMI); + return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SMI); } static inline bool nested_exit_on_intr(struct vcpu_svm *svm) { - return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INTR); + return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INTR); } static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) { - return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI); + return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_NMI); } int enter_svm_guest_mode(struct kvm_vcpu *vcpu, From e713253ba68c561714c3f757cdb2745d072533d0 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 11 Oct 2021 10:37:02 -0400 Subject: [PATCH 0663/1202] KVM: nSVM: remove unnecessary parameter in nested_vmcb_check_controls Just as in nested_vmcb_valid_sregs, we only need the vcpu param to perform the checks on vmcb nested state, since we always look at the cached fields. Signed-off-by: Emanuele Giuseppe Esposito Message-Id: <20211011143702.1786568-9-eesposit@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 8201ec9a287fa..11ef7b6868413 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -221,9 +221,11 @@ static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl) } } -static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu, - struct vmcb_ctrl_area_cached *control) +static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_ctrl_area_cached *control = &svm->nested.ctl; + if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN))) return false; @@ -674,7 +676,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); if (!nested_vmcb_valid_sregs(vcpu) || - !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) { + !nested_vmcb_check_controls(vcpu)) { vmcb12->control.exit_code = SVM_EXIT_ERR; vmcb12->control.exit_code_hi = 0; vmcb12->control.exit_info_1 = 0; @@ -1404,7 +1406,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, ret = -EINVAL; nested_copy_vmcb_control_to_cache(svm, ctl); - if (!nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) + if (!nested_vmcb_check_controls(vcpu)) goto out_free; /* From 64526244f3d049df4cb63ff4f36482554ce85cab Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 21 Oct 2021 17:19:27 -0400 Subject: [PATCH 0664/1202] KVM: x86: advertise absence of X86_BUG_NULL_SEG via CPUID Guests have X86_BUG_NULL_SEG if and only if the host have it. Use the info from static_cpu_has_bug to form the 0x80000021 CPUID leaf that was defined for Zen3. Userspace can then set the bit even on older CPUs that do not have the bug, such as Zen2. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2d70edb0f3233..47cee76a2ed24 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -583,9 +583,27 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, entry = &array->entries[array->nent++]; + memset(entry, 0, sizeof(*entry)); entry->function = function; entry->index = index; - entry->flags = 0; + switch (function & 0xC0000000) { + case 0x40000000: + /* Hypervisor leaves are always synthesized by __do_cpuid_func. */ + return entry; + + case 0x80000000: + /* + * 0x80000021 is sometimes synthesized by __do_cpuid_func, which + * would result in out-of-bounds calls to do_host_cpuid. + */ + { + static int max_cpuid_80000000; + if (!READ_ONCE(max_cpuid_80000000)) + WRITE_ONCE(max_cpuid_80000000, cpuid_eax(0x80000000)); + if (function > READ_ONCE(max_cpuid_80000000)) + return entry; + } + } cpuid_count(entry->function, entry->index, &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); @@ -902,7 +920,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = 0; break; case 0x80000000: - entry->eax = min(entry->eax, 0x8000001f); + entry->eax = min(entry->eax, 0x80000021); + /* + * NullSegClearsBase is not reported in CPUID on Zen2; in + * that case, provide the CPUID leaf ourselves. + */ + if (!static_cpu_has_bug(X86_BUG_NULL_SEG)) + entry->eax = max(entry->eax, 0x80000021); break; case 0x80000001: cpuid_entry_override(entry, CPUID_8000_0001_EDX); @@ -973,6 +997,15 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ebx &= ~GENMASK(11, 6); } break; + case 0x80000020: + entry->eax = entry->ebx = entry->ecx = entry->edx = 0; + break; + case 0x80000021: + entry->ebx = entry->ecx = entry->edx = 0; + entry->eax &= BIT(6); + if (!static_cpu_has_bug(X86_BUG_NULL_SEG)) + entry->eax |= BIT(6); + break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: /*Just support up to 0xC0000004 now*/ From ab2e0cc86d01cde20f55a645da04af875b2509fd Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Mon, 20 Sep 2021 11:37:34 +0100 Subject: [PATCH 0665/1202] KVM: x86: Clarify the kvm_run.emulation_failure structure layout Until more flags for kvm_run.emulation_failure flags are defined, it is undetermined whether new payload elements corresponding to those flags will be additive or alternative. As a hint to userspace that an alternative is possible, wrap the current payload elements in a union. Suggested-by: Sean Christopherson Signed-off-by: David Edmondson Reviewed-by: Sean Christopherson Message-Id: <20210920103737.2696756-2-david.edmondson@oracle.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/kvm.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 5ca5ffe16cb43..2c8aa8d4dac1f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -403,8 +403,12 @@ struct kvm_run { __u32 suberror; __u32 ndata; __u64 flags; - __u8 insn_size; - __u8 insn_bytes[15]; + union { + struct { + __u8 insn_size; + __u8 insn_bytes[15]; + }; + }; } emulation_failure; /* KVM_EXIT_OSI */ struct { From fd5f3b0e741e781d01df1be80cd53de56e221445 Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Mon, 20 Sep 2021 11:37:35 +0100 Subject: [PATCH 0666/1202] KVM: x86: Get exit_reason as part of kvm_x86_ops.get_exit_info Extend the get_exit_info static call to provide the reason for the VM exit. Modify relevant trace points to use this rather than extracting the reason in the caller. Signed-off-by: David Edmondson Reviewed-by: Sean Christopherson Message-Id: <20210920103737.2696756-3-david.edmondson@oracle.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 7 ++++--- arch/x86/kvm/svm/svm.c | 8 +++++--- arch/x86/kvm/trace.h | 9 +++++---- arch/x86/kvm/vmx/nested.c | 2 +- arch/x86/kvm/vmx/vmx.c | 6 ++++-- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c8530ea136aa6..0b8a9ea7a47b0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1412,10 +1412,11 @@ struct kvm_x86_ops { void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier); /* - * Retrieve somewhat arbitrary exit information. Intended to be used - * only from within tracepoints to avoid VMREADs when tracing is off. + * Retrieve somewhat arbitrary exit information. Intended to + * be used only from within tracepoints or error paths. */ - void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, + void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, u32 *exit_int_info, u32 *exit_int_info_err_code); int (*check_intercept)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 32b2ee080174b..7cbf0afecd34b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3362,11 +3362,13 @@ int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) return svm_exit_handlers[exit_code](vcpu); } -static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, +static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code) { struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; + *reason = control->exit_code; *info1 = control->exit_info_1; *info2 = control->exit_info_2; *intr_info = control->exit_int_info; @@ -3383,7 +3385,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; - trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); + trace_kvm_exit(vcpu, KVM_ISA_SVM); /* SEV-ES guests must use the CR write traps to track CR registers. */ if (!sev_es_guest(vcpu->kvm)) { @@ -3396,7 +3398,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) if (is_guest_mode(vcpu)) { int vmexit; - trace_kvm_nested_vmexit(exit_code, vcpu, KVM_ISA_SVM); + trace_kvm_nested_vmexit(vcpu, KVM_ISA_SVM); vmexit = nested_svm_exit_special(svm); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 03ebe368333ef..953b0fcb21ee8 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -288,8 +288,8 @@ TRACE_EVENT(kvm_apic, #define TRACE_EVENT_KVM_EXIT(name) \ TRACE_EVENT(name, \ - TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu, u32 isa), \ - TP_ARGS(exit_reason, vcpu, isa), \ + TP_PROTO(struct kvm_vcpu *vcpu, u32 isa), \ + TP_ARGS(vcpu, isa), \ \ TP_STRUCT__entry( \ __field( unsigned int, exit_reason ) \ @@ -303,11 +303,12 @@ TRACE_EVENT(name, \ ), \ \ TP_fast_assign( \ - __entry->exit_reason = exit_reason; \ __entry->guest_rip = kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ - static_call(kvm_x86_get_exit_info)(vcpu, &__entry->info1, \ + static_call(kvm_x86_get_exit_info)(vcpu, \ + &__entry->exit_reason, \ + &__entry->info1, \ &__entry->info2, \ &__entry->intr_info, \ &__entry->error_code); \ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index af1bbb73430a9..b4ee5e9f9e201 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6066,7 +6066,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) goto reflect_vmexit; } - trace_kvm_nested_vmexit(exit_reason.full, vcpu, KVM_ISA_VMX); + trace_kvm_nested_vmexit(vcpu, KVM_ISA_VMX); /* If L0 (KVM) wants the exit, it trumps L1's desires. */ if (nested_vmx_l0_wants_exit(vcpu, exit_reason)) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c11688f64e80c..85ce11dac8fdb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5658,11 +5658,13 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); -static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2, +static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, + u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); + *reason = vmx->exit_reason.full; *info1 = vmx_get_exit_qual(vcpu); if (!(vmx->exit_reason.failed_vmentry)) { *info2 = vmx->idt_vectoring_info; @@ -6814,7 +6816,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) if (likely(!vmx->exit_reason.failed_vmentry)) vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - trace_kvm_exit(vmx->exit_reason.full, vcpu, KVM_ISA_VMX); + trace_kvm_exit(vcpu, KVM_ISA_VMX); if (unlikely(vmx->exit_reason.failed_vmentry)) return EXIT_FASTPATH_NONE; From 2b9763e07317fdc0b6d6e8919d734e9b7ab46f4e Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Mon, 20 Sep 2021 11:37:36 +0100 Subject: [PATCH 0667/1202] KVM: x86: On emulation failure, convey the exit reason, etc. to userspace Should instruction emulation fail, include the VM exit reason, etc. in the emulation_failure data passed to userspace, in order that the VMM can report it as a debugging aid when describing the failure. Suggested-by: Joao Martins Signed-off-by: David Edmondson Reviewed-by: Sean Christopherson Message-Id: <20210920103737.2696756-4-david.edmondson@oracle.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/vmx/vmx.c | 5 +-- arch/x86/kvm/x86.c | 73 ++++++++++++++++++++++++++------- include/uapi/linux/kvm.h | 6 +++ 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0b8a9ea7a47b0..88fce6ab4bbd7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1667,6 +1667,9 @@ extern u64 kvm_mce_cap_supported; int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, + u64 *data, u8 ndata); +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 85ce11dac8fdb..71f54d85f104c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5408,10 +5408,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (vmx->emulation_required && !vmx->rmode.vm86_active && vcpu->arch.exception.pending) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + kvm_prepare_emulation_failure_exit(vcpu); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0377e61b8fc0b..ac83d873d65b0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7664,29 +7664,78 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) } EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); -static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) +static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, + u8 ndata, u8 *insn_bytes, u8 insn_size) { - struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; - u32 insn_size = ctxt->fetch.end - ctxt->fetch.data; struct kvm_run *run = vcpu->run; + u64 info[5]; + u8 info_start; + + /* + * Zero the whole array used to retrieve the exit info, as casting to + * u32 for select entries will leave some chunks uninitialized. + */ + memset(&info, 0, sizeof(info)); + + static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1], + &info[2], (u32 *)&info[3], + (u32 *)&info[4]); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION; - run->emulation_failure.ndata = 0; + + /* + * There's currently space for 13 entries, but 5 are used for the exit + * reason and info. Restrict to 4 to reduce the maintenance burden + * when expanding kvm_run.emulation_failure in the future. + */ + if (WARN_ON_ONCE(ndata > 4)) + ndata = 4; + + /* Always include the flags as a 'data' entry. */ + info_start = 1; run->emulation_failure.flags = 0; if (insn_size) { - run->emulation_failure.ndata = 3; + BUILD_BUG_ON((sizeof(run->emulation_failure.insn_size) + + sizeof(run->emulation_failure.insn_bytes) != 16)); + info_start += 2; run->emulation_failure.flags |= KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES; run->emulation_failure.insn_size = insn_size; memset(run->emulation_failure.insn_bytes, 0x90, sizeof(run->emulation_failure.insn_bytes)); - memcpy(run->emulation_failure.insn_bytes, - ctxt->fetch.data, insn_size); + memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size); } + + memcpy(&run->internal.data[info_start], info, sizeof(info)); + memcpy(&run->internal.data[info_start + ARRAY_SIZE(info)], data, + ndata * sizeof(data[0])); + + run->emulation_failure.ndata = info_start + ARRAY_SIZE(info) + ndata; } +static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu) +{ + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; + + prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data, + ctxt->fetch.end - ctxt->fetch.data); +} + +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, + u8 ndata) +{ + prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0); +} +EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit); + +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) +{ + __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0); +} +EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit); + static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { struct kvm *kvm = vcpu->kvm; @@ -7701,16 +7750,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) if (kvm->arch.exit_on_emulation_error || (emulation_type & EMULTYPE_SKIP)) { - prepare_emulation_failure_exit(vcpu); + prepare_emulation_ctxt_failure_exit(vcpu); return 0; } kvm_queue_exception(vcpu, UD_VECTOR); if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + prepare_emulation_ctxt_failure_exit(vcpu); return 0; } @@ -12336,9 +12383,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, * doesn't seem to be a real use-case behind such requests, just return * KVM_EXIT_INTERNAL_ERROR for now. */ - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + kvm_prepare_emulation_failure_exit(vcpu); return 0; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2c8aa8d4dac1f..78f0719cc2a3a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -398,6 +398,11 @@ struct kvm_run { * "ndata" is correct, that new fields are enumerated in "flags", * and that each flag enumerates fields that are 64-bit aligned * and sized (so that ndata+internal.data[] is valid/accurate). + * + * Space beyond the defined fields may be used to store arbitrary + * debug information relating to the emulation failure. It is + * accounted for in "ndata" but the format is unspecified and is + * not represented in "flags". Any such information is *not* ABI! */ struct { __u32 suberror; @@ -409,6 +414,7 @@ struct kvm_run { __u8 insn_bytes[15]; }; }; + /* Arbitrary debug data may follow. */ } emulation_failure; /* KVM_EXIT_OSI */ struct { From 5edcbfdf96fd9b80777f778431b44daeb8ee1fa9 Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Mon, 20 Sep 2021 11:37:37 +0100 Subject: [PATCH 0668/1202] KVM: x86: SGX must obey the KVM_INTERNAL_ERROR_EMULATION protocol When passing the failing address and size out to user space, SGX must ensure not to trample on the earlier fields of the emulation_failure sub-union of struct kvm_run. Signed-off-by: David Edmondson Reviewed-by: Sean Christopherson Message-Id: <20210920103737.2696756-5-david.edmondson@oracle.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/sgx.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 6693ebdc07701..35e7ec91ae864 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -53,11 +53,9 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset, static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr, unsigned int size) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 2; - vcpu->run->internal.data[0] = addr; - vcpu->run->internal.data[1] = size; + uint64_t data[2] = { addr, size }; + + __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data)); } static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data, @@ -112,9 +110,7 @@ static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr) * but the error code isn't (yet) plumbed through the ENCLS helpers. */ if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + kvm_prepare_emulation_failure_exit(vcpu); return 0; } @@ -155,9 +151,7 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0); sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1); if (!sgx_12_0 || !sgx_12_1) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + kvm_prepare_emulation_failure_exit(vcpu); return 0; } From e7242613e6d74e420b16d409b08cc877a0efa53b Mon Sep 17 00:00:00 2001 From: Chun-Hung Tseng Date: Wed, 15 Sep 2021 17:02:18 +0800 Subject: [PATCH 0669/1202] rcu: Replace ________p1 and _________p1 with __UNIQUE_ID(rcu) This commit replaces both ________p1 and _________p1 with __UNIQUE_ID(rcu), and also adjusts the callers of the affected macros. __UNIQUE_ID(rcu) will generate unique variable names during compilation, which eliminates the need of ________p1 and _________p1 (both having 4 occurrences prior to the code change). This also avoids the variable name shadowing issue, or at least makes those wishing to cause shadowing problems work much harder to do so. The same idea is used for the min/max macros (commit 589a978 and commit e9092d0). Signed-off-by: Jim Huang Signed-off-by: Chun-Hung Tseng Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 50 ++++++++++++++++++++++------------------ include/linux/srcu.h | 3 ++- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5e0beb5c5659a..88b42eb464068 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -364,6 +364,12 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_check_sparse(p, space) #endif /* #else #ifdef __CHECKER__ */ +#define __unrcu_pointer(p, local) \ +({ \ + typeof(*p) *local = (typeof(*p) *__force)(p); \ + rcu_check_sparse(p, __rcu); \ + ((typeof(*p) __force __kernel *)(local)); \ +}) /** * unrcu_pointer - mark a pointer as not being RCU protected * @p: pointer needing to lose its __rcu property @@ -371,39 +377,35 @@ static inline void rcu_preempt_sleep_check(void) { } * Converts @p from an __rcu pointer to a __kernel pointer. * This allows an __rcu pointer to be used with xchg() and friends. */ -#define unrcu_pointer(p) \ -({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)(p); \ - rcu_check_sparse(p, __rcu); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ -}) +#define unrcu_pointer(p) __unrcu_pointer(p, __UNIQUE_ID(rcu)) -#define __rcu_access_pointer(p, space) \ +#define __rcu_access_pointer(p, local, space) \ ({ \ - typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \ + typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ rcu_check_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ + ((typeof(*p) __force __kernel *)(local)); \ }) -#define __rcu_dereference_check(p, c, space) \ +#define __rcu_dereference_check(p, local, c, space) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ + typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_check_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(________p1)); \ + ((typeof(*p) __force __kernel *)(local)); \ }) -#define __rcu_dereference_protected(p, c, space) \ +#define __rcu_dereference_protected(p, local, c, space) \ ({ \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ rcu_check_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) -#define rcu_dereference_raw(p) \ +#define __rcu_dereference_raw(p, local) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(p) ________p1 = READ_ONCE(p); \ - ((typeof(*p) __force __kernel *)(________p1)); \ + typeof(p) local = READ_ONCE(p); \ + ((typeof(*p) __force __kernel *)(local)); \ }) +#define rcu_dereference_raw(p) __rcu_dereference_raw(p, __UNIQUE_ID(rcu)) /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable @@ -490,7 +492,7 @@ do { \ * when tearing down multi-linked structures after a grace period * has elapsed. */ -#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) +#define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu) /** * rcu_dereference_check() - rcu_dereference with debug checking @@ -526,7 +528,8 @@ do { \ * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_held(), __rcu) /** * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking @@ -541,7 +544,8 @@ do { \ * rcu_read_lock() but also rcu_read_lock_bh() into account. */ #define rcu_dereference_bh_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_bh_held(), __rcu) /** * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking @@ -556,7 +560,8 @@ do { \ * only rcu_read_lock() but also rcu_read_lock_sched() into account. */ #define rcu_dereference_sched_check(p, c) \ - __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || rcu_read_lock_sched_held(), \ __rcu) /* @@ -566,7 +571,8 @@ do { \ * The no-tracing version of rcu_dereference_raw() must not call * rcu_read_lock_held(). */ -#define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu) +#define rcu_dereference_raw_check(p) \ + __rcu_dereference_check((p), __UNIQUE_ID(rcu), 1, __rcu) /** * rcu_dereference_protected() - fetch RCU pointer when updates prevented @@ -585,7 +591,7 @@ do { \ * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ - __rcu_dereference_protected((p), (c), __rcu) + __rcu_dereference_protected((p), __UNIQUE_ID(rcu), (c), __rcu) /** diff --git a/include/linux/srcu.h b/include/linux/srcu.h index e6011a9975af2..01226e4d960a0 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -117,7 +117,8 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * lockdep_is_held() calls. */ #define srcu_dereference_check(p, ssp, c) \ - __rcu_dereference_check((p), (c) || srcu_read_lock_held(ssp), __rcu) + __rcu_dereference_check((p), __UNIQUE_ID(rcu), \ + (c) || srcu_read_lock_held(ssp), __rcu) /** * srcu_dereference - fetch SRCU-protected pointer for later dereferencing From 583042b89a39bcf34c97bb62e2962a60204975bc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Sep 2021 15:04:48 -0700 Subject: [PATCH 0670/1202] rcu: Tighten rcu_advance_cbs_nowake() checks Currently, rcu_advance_cbs_nowake() checks that a grace period is in progress, however, that grace period could end just after the check. This commit therefore also checks that the current rcu_node structure is waiting for a quiescent state from at least one CPU, and rechecks under the lock. This gurantees that the grace period cannot end while the current CPU's rcu_node structure's ->lock is held, thus avoiding false positives from the WARN_ON_ONCE(). Tested-By: Guillaume Morin Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ef8d36f580fc3..522e9817ab03f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1590,10 +1590,14 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp, struct rcu_data *rdp) { rcu_lockdep_assert_cblist_protected(rdp); - if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || + // Don't do anything unless the current grace period is guaranteed + // not to end. This means a grace period in progress and at least + // one holdout CPU. + if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !READ_ONCE(rnp->qsmask) || !raw_spin_trylock_rcu_node(rnp)) return; - WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp)); + if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) && READ_ONCE(rnp->qsmask)) + WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp)); raw_spin_unlock_rcu_node(rnp); } From 681945b872eff4ab520608ad35ae0886e0f90716 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 18 Sep 2021 20:40:48 -0700 Subject: [PATCH 0671/1202] rcu-tasks: Don't remove tasks with pending IPIs from holdout list Currently, the check_all_holdout_tasks_trace() function removes all tasks marked with ->trc_reader_checked from the holdout list, including those with IPIs pending. This means that the IPI handler might arrive at a task that has already been removed from the list, which is at best an accident waiting to happen. This commit therefore avoids removing tasks with IPIs pending from the holdout list. This in turn means that the "if" condition in the for_each_online_cpu() loop in rcu_tasks_trace_postgp() should always evaluate to false, so a WARN_ON_ONCE() is added to check that. Signed-off-by: Paul E. McKenney --- kernel/rcu/tasks.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 66e7586a33e91..e4a32db9f7122 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1121,7 +1121,8 @@ static void check_all_holdout_tasks_trace(struct list_head *hop, trc_wait_for_one_reader(t, hop); // If check succeeded, remove this task from the list. - if (READ_ONCE(t->trc_reader_checked)) + if (smp_load_acquire(&t->trc_ipi_to_cpu) == -1 && + READ_ONCE(t->trc_reader_checked)) trc_del_holdout(t); else if (needreport) show_stalled_task_trace(t, firstreport); @@ -1156,7 +1157,7 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp) // Yes, this assumes that CPUs process IPIs in order. If that ever // changes, there will need to be a recheck and/or timed wait. for_each_online_cpu(cpu) - if (smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu))) + if (WARN_ON_ONCE(smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu)))) smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1); // Remove the safety count. From 3d5e806fbb29b78da62846e182a0521ed087c0e5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 20 Sep 2021 16:43:47 -0700 Subject: [PATCH 0672/1202] testing/bpf: Update test names for xchg and cmpxchg The test_cmpxchg() and test_xchg() functions say "test_run add". Therefore, make them say "test_run cmpxchg" and "test_run xchg", respectively. Cc: Shuah Khan Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Andrii Nakryiko Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: John Fastabend Cc: KP Singh Cc: Cc: Cc: Cc: Signed-off-by: Paul E. McKenney --- tools/testing/selftests/bpf/prog_tests/atomics.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index ba0e1efe5a45f..ca431c2917e9d 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -167,7 +167,7 @@ static void test_cmpxchg(struct atomics *skel) prog_fd = skel->progs.cmpxchg.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run add", + if (CHECK(err || retval, "test_run cmpxchg", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) goto cleanup; @@ -196,7 +196,7 @@ static void test_xchg(struct atomics *skel) prog_fd = skel->progs.xchg.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run add", + if (CHECK(err || retval, "test_run xchg", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) goto cleanup; From ef972498078786cd86631a4a4eee57a7f96dd17b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Sep 2021 12:50:05 -0700 Subject: [PATCH 0673/1202] torture: Catch kvm.sh help text up with actual options This commit brings the kvm.sh script's help text up to date with recently (and some not-so-recently) added parameters. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 6cf9ec6a3d1c0..6de0c183db5b9 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -74,7 +74,9 @@ usage () { echo " --help" echo " --interactive" echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]" + echo " --kasan" echo " --kconfig Kconfig-options" + echo " --kcsan" echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" echo " --memory megabytes|nnnG" @@ -83,6 +85,7 @@ usage () { echo " --qemu-cmd qemu-system-..." echo " --remote" echo " --results absolute-pathname" + echo " --shutdown-grace seconds" echo " --torture lock|rcu|rcuscale|refscale|scf" echo " --trust-make" exit 1 @@ -175,14 +178,14 @@ do jitter="$2" shift ;; + --kasan) + TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG + ;; --kconfig|--kconfigs) checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$' TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; - --kasan) - TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG - ;; --kcsan) TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG ;; From 48653e3b31afe5dc0a676180a08df3b5b748e760 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 22 Sep 2021 20:31:44 -0700 Subject: [PATCH 0674/1202] rcutorture: Sanitize RCUTORTURE_RDR_MASK RCUTORTURE_RDR_MASK is currently not the bit indicated by RCUTORTURE_RDR_SHIFT, but is instead all the bits less significant than that one. This is an accident waiting to happen, so this commit makes RCUTORTURE_RDR_MASK be that one bit and adjusts uses accordingly. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 8b410d982990c..1e03fe681f025 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -54,7 +54,7 @@ MODULE_AUTHOR("Paul E. McKenney and Josh Triplett extendables field, extendables param, and related definitions. */ #define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */ -#define RCUTORTURE_RDR_MASK ((1 << RCUTORTURE_RDR_SHIFT) - 1) +#define RCUTORTURE_RDR_MASK (1 << RCUTORTURE_RDR_SHIFT) #define RCUTORTURE_RDR_BH 0x01 /* Extend readers by disabling bh. */ #define RCUTORTURE_RDR_IRQ 0x02 /* ... disabling interrupts. */ #define RCUTORTURE_RDR_PREEMPT 0x04 /* ... disabling preemption. */ @@ -1466,6 +1466,7 @@ static void rcutorture_one_extend(int *readstate, int newstate, if (lockit) raw_spin_lock_irqsave(¤t->pi_lock, flags); cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT); + idxold = 0; if (lockit) raw_spin_unlock_irqrestore(¤t->pi_lock, flags); } @@ -1476,7 +1477,7 @@ static void rcutorture_one_extend(int *readstate, int newstate, /* Update the reader state. */ if (idxnew == -1) - idxnew = idxold & ~RCUTORTURE_RDR_MASK; + idxnew = idxold & RCUTORTURE_RDR_MASK; WARN_ON_ONCE(idxnew < 0); WARN_ON_ONCE((idxnew >> RCUTORTURE_RDR_SHIFT) > 1); *readstate = idxnew | newstate; @@ -1626,7 +1627,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) rcu_torture_writer_state, cookie, cur_ops->get_gp_state()); rcutorture_one_extend(&readstate, 0, trsp, rtrsp); - WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK); + WARN_ON_ONCE(readstate); // This next splat is expected behavior if leakpointer, especially // for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels. WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1); From 87afcff458dfe3c1a81602eff77cda8071488fff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 22 Sep 2021 20:49:12 -0700 Subject: [PATCH 0675/1202] rcutorture: More thoroughly test nested readers Currently, nested readers occur only when a timer handler interrupts a reader. This is rare, and is thus insufficient testing of the transition between nesting levels. This commit therefore causes rcutorture nested readers to be the rule rather than the exception. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 73 ++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 1e03fe681f025..d295da380fb4f 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -53,15 +53,18 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney and Josh Triplett "); /* Bits for ->extendables field, extendables param, and related definitions. */ -#define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */ -#define RCUTORTURE_RDR_MASK (1 << RCUTORTURE_RDR_SHIFT) +#define RCUTORTURE_RDR_SHIFT_1 8 /* Put SRCU index in upper bits. */ +#define RCUTORTURE_RDR_MASK_1 (1 << RCUTORTURE_RDR_SHIFT_1) +#define RCUTORTURE_RDR_SHIFT_2 9 /* Put SRCU index in upper bits. */ +#define RCUTORTURE_RDR_MASK_2 (1 << RCUTORTURE_RDR_SHIFT_2) #define RCUTORTURE_RDR_BH 0x01 /* Extend readers by disabling bh. */ #define RCUTORTURE_RDR_IRQ 0x02 /* ... disabling interrupts. */ #define RCUTORTURE_RDR_PREEMPT 0x04 /* ... disabling preemption. */ #define RCUTORTURE_RDR_RBH 0x08 /* ... rcu_read_lock_bh(). */ #define RCUTORTURE_RDR_SCHED 0x10 /* ... rcu_read_lock_sched(). */ -#define RCUTORTURE_RDR_RCU 0x20 /* ... entering another RCU reader. */ -#define RCUTORTURE_RDR_NBITS 6 /* Number of bits defined above. */ +#define RCUTORTURE_RDR_RCU_1 0x20 /* ... entering another RCU reader. */ +#define RCUTORTURE_RDR_RCU_2 0x40 /* ... entering another RCU reader. */ +#define RCUTORTURE_RDR_NBITS 7 /* Number of bits defined above. */ #define RCUTORTURE_MAX_EXTEND \ (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ | RCUTORTURE_RDR_PREEMPT | \ RCUTORTURE_RDR_RBH | RCUTORTURE_RDR_SCHED) @@ -1420,13 +1423,15 @@ static void rcutorture_one_extend(int *readstate, int newstate, struct rt_read_seg *rtrsp) { unsigned long flags; - int idxnew = -1; - int idxold = *readstate; + int idxnew1 = -1; + int idxnew2 = -1; + int idxold1 = *readstate; + int idxold2 = idxold1; int statesnew = ~*readstate & newstate; int statesold = *readstate & ~newstate; - WARN_ON_ONCE(idxold < 0); - WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1); + WARN_ON_ONCE(idxold2 < 0); + WARN_ON_ONCE((idxold2 >> RCUTORTURE_RDR_SHIFT_2) > 1); rtrsp->rt_readstate = newstate; /* First, put new protection in place to avoid critical-section gap. */ @@ -1440,8 +1445,10 @@ static void rcutorture_one_extend(int *readstate, int newstate, preempt_disable(); if (statesnew & RCUTORTURE_RDR_SCHED) rcu_read_lock_sched(); - if (statesnew & RCUTORTURE_RDR_RCU) - idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT; + if (statesnew & RCUTORTURE_RDR_RCU_1) + idxnew1 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_1; + if (statesnew & RCUTORTURE_RDR_RCU_2) + idxnew2 = (cur_ops->readlock() & 0x1) << RCUTORTURE_RDR_SHIFT_2; /* * Next, remove old protection, in decreasing order of strength @@ -1460,13 +1467,19 @@ static void rcutorture_one_extend(int *readstate, int newstate, local_bh_enable(); if (statesold & RCUTORTURE_RDR_RBH) rcu_read_unlock_bh(); - if (statesold & RCUTORTURE_RDR_RCU) { + if (statesold & RCUTORTURE_RDR_RCU_2) { + cur_ops->readunlock((idxold2 >> RCUTORTURE_RDR_SHIFT_2) & 0x1); + WARN_ON_ONCE(idxnew2 != -1); + idxold2 = 0; + } + if (statesold & RCUTORTURE_RDR_RCU_1) { bool lockit = !statesnew && !(torture_random(trsp) & 0xffff); if (lockit) raw_spin_lock_irqsave(¤t->pi_lock, flags); - cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT); - idxold = 0; + cur_ops->readunlock((idxold1 >> RCUTORTURE_RDR_SHIFT_1) & 0x1); + WARN_ON_ONCE(idxnew1 != -1); + idxold1 = 0; if (lockit) raw_spin_unlock_irqrestore(¤t->pi_lock, flags); } @@ -1476,13 +1489,19 @@ static void rcutorture_one_extend(int *readstate, int newstate, cur_ops->read_delay(trsp, rtrsp); /* Update the reader state. */ - if (idxnew == -1) - idxnew = idxold & RCUTORTURE_RDR_MASK; - WARN_ON_ONCE(idxnew < 0); - WARN_ON_ONCE((idxnew >> RCUTORTURE_RDR_SHIFT) > 1); - *readstate = idxnew | newstate; - WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) < 0); - WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT) > 1); + if (idxnew1 == -1) + idxnew1 = idxold1 & RCUTORTURE_RDR_MASK_1; + WARN_ON_ONCE(idxnew1 < 0); + if (WARN_ON_ONCE((idxnew1 >> RCUTORTURE_RDR_SHIFT_1) > 1)) + pr_info("Unexpected idxnew1 value of %#x\n", idxnew1); + if (idxnew2 == -1) + idxnew2 = idxold2 & RCUTORTURE_RDR_MASK_2; + WARN_ON_ONCE(idxnew2 < 0); + WARN_ON_ONCE((idxnew2 >> RCUTORTURE_RDR_SHIFT_2) > 1); + *readstate = idxnew1 | idxnew2 | newstate; + WARN_ON_ONCE(*readstate < 0); + if (WARN_ON_ONCE((*readstate >> RCUTORTURE_RDR_SHIFT_2) > 1)) + pr_info("Unexpected idxnew2 value of %#x\n", idxnew2); } /* Return the biggest extendables mask given current RCU and boot parameters. */ @@ -1492,7 +1511,7 @@ static int rcutorture_extend_mask_max(void) WARN_ON_ONCE(extendables & ~RCUTORTURE_MAX_EXTEND); mask = extendables & RCUTORTURE_MAX_EXTEND & cur_ops->extendables; - mask = mask | RCUTORTURE_RDR_RCU; + mask = mask | RCUTORTURE_RDR_RCU_1 | RCUTORTURE_RDR_RCU_2; return mask; } @@ -1507,13 +1526,21 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ; unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH; - WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT); + WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT_1); /* Mostly only one bit (need preemption!), sometimes lots of bits. */ if (!(randmask1 & 0x7)) mask = mask & randmask2; else mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS)); + // Can't have nested RCU reader without outer RCU reader. + if (!(mask & RCUTORTURE_RDR_RCU_1) && (mask & RCUTORTURE_RDR_RCU_2)) { + if (oldmask & RCUTORTURE_RDR_RCU_1) + mask &= ~RCUTORTURE_RDR_RCU_2; + else + mask |= RCUTORTURE_RDR_RCU_1; + } + /* * Can't enable bh w/irq disabled. */ @@ -1533,7 +1560,7 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) mask |= oldmask & bhs; } - return mask ?: RCUTORTURE_RDR_RCU; + return mask ?: RCUTORTURE_RDR_RCU_1; } /* From 10ba5ab68f9bdbbf3dc8fd455259c9b8d072aabe Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 23 Sep 2021 10:07:14 -0700 Subject: [PATCH 0676/1202] srcu: Prevent redundant __srcu_read_unlock() wakeup Tiny SRCU readers can appear at task level, but also in interrupt and softirq handlers. Because Tiny SRCU is selected only in kernels built with CONFIG_SMP=n and CONFIG_PREEMPTION=n, it is not possible for a grace period to start while there is a non-task-level SRCU reader executing. This means that it does not make sense for __srcu_read_unlock() to awaken the Tiny SRCU grace period, because that can only happen when the grace period is waiting for one value of ->srcu_idx and __srcu_read_unlock() is ending the last reader for some other value of ->srcu_idx. After all, any such wakeup will be redundant. Worse yet, in some cases, such wakeups generate lockdep splats: ====================================================== WARNING: possible circular locking dependency detected 5.15.0-rc1+ #3758 Not tainted ------------------------------------------------------ rcu_torture_rea/53 is trying to acquire lock: ffffffff9514e6a8 (srcu_ctl.srcu_wq.lock){..-.}-{2:2}, at: xa/0x30 but task is already holding lock: ffff95c642479d80 (&p->pi_lock){-.-.}-{2:2}, at: _extend+0x370/0x400 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&p->pi_lock){-.-.}-{2:2}: _raw_spin_lock_irqsave+0x2f/0x50 try_to_wake_up+0x50/0x580 swake_up_locked.part.7+0xe/0x30 swake_up_one+0x22/0x30 rcutorture_one_extend+0x1b6/0x400 rcu_torture_one_read+0x290/0x5d0 rcu_torture_timer+0x1a/0x70 call_timer_fn+0xa6/0x230 run_timer_softirq+0x493/0x4c0 __do_softirq+0xc0/0x371 irq_exit+0x73/0x90 sysvec_apic_timer_interrupt+0x63/0x80 asm_sysvec_apic_timer_interrupt+0x12/0x20 default_idle+0xb/0x10 default_idle_call+0x5e/0x170 do_idle+0x18a/0x1f0 cpu_startup_entry+0xa/0x10 start_kernel+0x678/0x69f secondary_startup_64_no_verify+0xc2/0xcb -> #0 (srcu_ctl.srcu_wq.lock){..-.}-{2:2}: __lock_acquire+0x130c/0x2440 lock_acquire+0xc2/0x270 _raw_spin_lock_irqsave+0x2f/0x50 swake_up_one+0xa/0x30 rcutorture_one_extend+0x387/0x400 rcu_torture_one_read+0x290/0x5d0 rcu_torture_reader+0xac/0x200 kthread+0x12d/0x150 ret_from_fork+0x22/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&p->pi_lock); lock(srcu_ctl.srcu_wq.lock); lock(&p->pi_lock); lock(srcu_ctl.srcu_wq.lock); *** DEADLOCK *** 1 lock held by rcu_torture_rea/53: #0: ffff95c642479d80 (&p->pi_lock){-.-.}-{2:2}, at: _extend+0x370/0x400 stack backtrace: CPU: 0 PID: 53 Comm: rcu_torture_rea Not tainted 5.15.0-rc1+ Hardware name: Red Hat KVM/RHEL-AV, BIOS e_el8.5.0+746+bbd5d70c 04/01/2014 Call Trace: check_noncircular+0xfe/0x110 ? find_held_lock+0x2d/0x90 __lock_acquire+0x130c/0x2440 lock_acquire+0xc2/0x270 ? swake_up_one+0xa/0x30 ? find_held_lock+0x72/0x90 _raw_spin_lock_irqsave+0x2f/0x50 ? swake_up_one+0xa/0x30 swake_up_one+0xa/0x30 rcutorture_one_extend+0x387/0x400 rcu_torture_one_read+0x290/0x5d0 rcu_torture_reader+0xac/0x200 ? rcutorture_oom_notify+0xf0/0xf0 ? __kthread_parkme+0x61/0x90 ? rcu_torture_one_read+0x5d0/0x5d0 kthread+0x12d/0x150 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x22/0x30 This is a false positive because there is only one CPU, and both locks are raw (non-preemptible) spinlocks. However, it is worthwhile getting rid of the redundant wakeup, which has the side effect of breaking the theoretical deadlock cycle. This commit therefore eliminates the redundant wakeups. Signed-off-by: Paul E. McKenney --- kernel/rcu/srcutiny.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index a0ba2ed49bc61..92c002d654828 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -99,7 +99,7 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) int newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1; WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); - if (!newval && READ_ONCE(ssp->srcu_gp_waiting)) + if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task()) swake_up_one(&ssp->srcu_wq); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); From a8dd0f653719da9e13e9a63d347e8a34e8240b6c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 24 Sep 2021 21:30:26 -0700 Subject: [PATCH 0677/1202] rcutorture: Suppress pi-lock-across read-unlock testing for Tiny SRCU Because Tiny srcu_read_unlock() directly calls swake_up_one(), lockdep complains when a pi lock is held across that srcu_read_unlock(). Although this is a lockdep false positive (there is no other CPU to complete the deadlock cycle), lockdep is what it is at the moment. This commit therefore prevents rcutorture from holding pi lock across a Tiny srcu_read_unlock(). Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d295da380fb4f..503e14e62e8f2 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -349,6 +349,7 @@ struct rcu_torture_ops { int can_boost; int extendables; int slow_gps; + int no_pi_lock; const char *name; }; @@ -670,6 +671,7 @@ static struct rcu_torture_ops srcu_ops = { .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, .irq_capable = 1, + .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .name = "srcu" }; @@ -703,6 +705,7 @@ static struct rcu_torture_ops srcud_ops = { .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, .irq_capable = 1, + .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .name = "srcud" }; @@ -723,6 +726,7 @@ static struct rcu_torture_ops busted_srcud_ops = { .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, .irq_capable = 1, + .no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU), .extendables = RCUTORTURE_MAX_EXTEND, .name = "busted_srcud" }; @@ -1473,8 +1477,9 @@ static void rcutorture_one_extend(int *readstate, int newstate, idxold2 = 0; } if (statesold & RCUTORTURE_RDR_RCU_1) { - bool lockit = !statesnew && !(torture_random(trsp) & 0xffff); + bool lockit; + lockit = !cur_ops->no_pi_lock && !statesnew && !(torture_random(trsp) & 0xffff); if (lockit) raw_spin_lock_irqsave(¤t->pi_lock, flags); cur_ops->readunlock((idxold1 >> RCUTORTURE_RDR_SHIFT_1) & 0x1); From 073cd6bf79493e78f3381c83425a85f296b3f11c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2021 10:54:22 -0700 Subject: [PATCH 0678/1202] doc: Remove obsolete kernel-per-CPU-kthreads RCU_FAST_NO_HZ advice This document advises building with both CONFIG_NO_HZ=y and CONFIG_RCU_FAST_NO_HZ=y. However, CONFIG_NO_HZ=y offloads callbacks from all nohz_full CPUs, and CPUs with offloaded callbacks do not benefit from CONFIG_RCU_FAST_NO_HZ=y. Quite the opposite: CONFIG_RCU_FAST_NO_HZ=y simply adds a bit of idle entry/exit overhead. This commit therefore changes that advice to only CONFIG_NO_HZ=y. Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-per-CPU-kthreads.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst index 5e51ee5b03589..e4a5fc26f1a9d 100644 --- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst +++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst @@ -208,7 +208,7 @@ Do at least one of the following: 2. Enable RCU to do its processing remotely via dyntick-idle by doing all of the following: - a. Build with CONFIG_NO_HZ=y and CONFIG_RCU_FAST_NO_HZ=y. + a. Build with CONFIG_NO_HZ=y. b. Ensure that the CPU goes idle frequently, allowing other CPUs to detect that it has passed through an RCU quiescent state. If the kernel is built with CONFIG_NO_HZ_FULL=y, From d752004b90a54eb0e58937b2d983e1cb95863f14 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2021 14:13:24 -0700 Subject: [PATCH 0679/1202] torture: Remove RCU_FAST_NO_HZ from rcuscale and refscale scenarios All of the rcuscale and refscale scenarios that mention the Kconfig option CONFIG_RCU_FAST_NO_HZ disable it. But this Kconfig option is disabled by default, so this commit removes the pointless "CONFIG_RCU_FAST_NO_HZ=n" lines from these scenarios. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcuscale/TINY | 1 - tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 | 1 - tools/testing/selftests/rcutorture/configs/rcuscale/TREE | 1 - tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 | 1 - tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT | 1 - tools/testing/selftests/rcutorture/configs/refscale/PREEMPT | 1 - 6 files changed, 6 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY index fb05ef5279b43..7a7bf776a448b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY @@ -6,7 +6,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 index e6baa2fbaeb33..227aba7783aff 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 @@ -5,7 +5,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE index 4cc1cc5813214..f110d9ffbe4cb 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE @@ -6,7 +6,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 index f5952061fde7b..9f83e53727969 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT index ad505a887bec4..7f06838a91e61 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT @@ -6,7 +6,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT index 4f08e641bb6b8..52e3ef6740567 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT +++ b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT @@ -6,7 +6,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n From 3b767c4436ce5c5cd94bf600992ee832ee72bdfa Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2021 14:17:35 -0700 Subject: [PATCH 0680/1202] torture: Remove RCU_FAST_NO_HZ from rcu scenarios All of the rcu scenarios that mentioning CONFIG_RCU_FAST_NO_HZ disable it. But this Kconfig option is disabled by default, so this commit removes the pointless "CONFIG_RCU_FAST_NO_HZ=n" lines from these scenarios. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/configs/rcu/TREE02 | 1 - tools/testing/selftests/rcutorture/configs/rcu/TREE05 | 1 - tools/testing/selftests/rcutorture/configs/rcu/TREE06 | 1 - tools/testing/selftests/rcutorture/configs/rcu/TREE07 | 1 - tools/testing/selftests/rcutorture/configs/rcu/TREE08 | 1 - tools/testing/selftests/rcutorture/configs/rcu/TREE10 | 1 - 6 files changed, 6 deletions(-) diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index 65daee4fbf5a3..2871ee599891d 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 4f95f8544f3f0..9f48c73709ec3 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=6 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index bf4980d606b50..db27651de04b8 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index d7afb271a586d..2789b47e4ecd7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 index c810c5276a89f..8b561355b9ef2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 index 7311f84a58765..4a00539bfdd71 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n From b6093b4f9c44cb55b5f011816d67e5c999d4b574 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2021 14:18:51 -0700 Subject: [PATCH 0681/1202] rcu: Remove the RCU_FAST_NO_HZ Kconfig option All of the uses of CONFIG_RCU_FAST_NO_HZ=y that I have seen involve systems with RCU callbacks offloaded. In this situation, all that this Kconfig option does is slow down idle entry/exit with an additional allways-taken early exit. If this is the only use case, then this Kconfig option nothing but an attractive nuisance that needs to go away. This commit therefore removes the RCU_FAST_NO_HZ Kconfig option. Signed-off-by: Paul E. McKenney --- Documentation/RCU/stallwarn.rst | 11 -- .../admin-guide/kernel-parameters.txt | 4 - Documentation/timers/no_hz.rst | 10 +- kernel/rcu/Kconfig | 18 -- kernel/rcu/tree.c | 11 -- kernel/rcu/tree.h | 7 - kernel/rcu/tree_plugin.h | 185 +----------------- kernel/rcu/tree_stall.h | 27 +-- .../selftests/rcutorture/configs/rcu/TREE01 | 1 - .../selftests/rcutorture/configs/rcu/TREE04 | 1 - .../rcutorture/doc/TREE_RCU-kconfig.txt | 1 - 11 files changed, 7 insertions(+), 269 deletions(-) diff --git a/Documentation/RCU/stallwarn.rst b/Documentation/RCU/stallwarn.rst index 28f8ad16db250..78404625bad26 100644 --- a/Documentation/RCU/stallwarn.rst +++ b/Documentation/RCU/stallwarn.rst @@ -254,17 +254,6 @@ period (in this case 2603), the grace-period sequence number (7075), and an estimate of the total number of RCU callbacks queued across all CPUs (625 in this case). -In kernels with CONFIG_RCU_FAST_NO_HZ, more information is printed -for each CPU:: - - 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 dyntick_enabled: 1 - -The "last_accelerate:" prints the low-order 16 bits (in hex) of the -jiffies counter when this CPU last invoked rcu_try_advance_all_cbs() -from rcu_needs_cpu() or last invoked rcu_accelerate_cbs() from -rcu_prepare_for_idle(). "dyntick_enabled: 1" indicates that dyntick-idle -processing is enabled. - If the grace period ends just as the stall warning starts printing, there will be a spurious stall-warning message, which will include the following:: diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 91ba391f9b328..316027c3aadce 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4465,10 +4465,6 @@ on rcutree.qhimark at boot time and to zero to disable more aggressive help enlistment. - rcutree.rcu_idle_gp_delay= [KNL] - Set wakeup interval for idle CPUs that have - RCU callbacks (RCU_FAST_NO_HZ=y). - rcutree.rcu_kick_kthreads= [KNL] Cause the grace-period kthread to get an extra wake_up() if it sleeps three times longer than diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst index 6cadad7c3aad4..bfbaa5bfe90d1 100644 --- a/Documentation/timers/no_hz.rst +++ b/Documentation/timers/no_hz.rst @@ -184,16 +184,12 @@ There are situations in which idle CPUs cannot be permitted to enter either dyntick-idle mode or adaptive-tick mode, the most common being when that CPU has RCU callbacks pending. -The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs -to enter dyntick-idle mode or adaptive-tick mode anyway. In this case, -a timer will awaken these CPUs every four jiffies in order to ensure -that the RCU callbacks are processed in a timely fashion. - -Another approach is to offload RCU callback processing to "rcuo" kthreads +Avoid this by offloading RCU callback processing to "rcuo" kthreads using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to offload may be selected using The "rcu_nocbs=" kernel boot parameter, which takes a comma-separated list of CPUs and CPU ranges, for example, -"1,3-5" selects CPUs 1, 3, 4, and 5. +"1,3-5" selects CPUs 1, 3, 4, and 5. Note that CPUs specified by +the "nohz_full" kernel boot parameter are also offloaded. The offloaded CPUs will never queue RCU callbacks, and therefore RCU never prevents offloaded CPUs from entering either dyntick-idle mode diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 3128b7cf8e1fd..4005f2728f1a6 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -169,24 +169,6 @@ config RCU_FANOUT_LEAF Take the default if unsure. -config RCU_FAST_NO_HZ - bool "Accelerate last non-dyntick-idle CPU's grace periods" - depends on NO_HZ_COMMON && SMP && RCU_EXPERT - default n - help - This option permits CPUs to enter dynticks-idle state even if - they have RCU callbacks queued, and prevents RCU from waking - these CPUs up more than roughly once every four jiffies (by - default, you can adjust this using the rcutree.rcu_idle_gp_delay - parameter), thus improving energy efficiency. On the other - hand, this option increases the duration of RCU grace periods, - for example, slowing down synchronize_rcu(). - - Say Y if energy efficiency is critically important, and you - don't care about increased grace-period durations. - - Say N if you are unsure. - config RCU_BOOST bool "Enable RCU priority boosting" depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 522e9817ab03f..e80cdc150e2a5 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -624,7 +624,6 @@ static noinstr void rcu_eqs_enter(bool user) instrumentation_begin(); trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); - rcu_prepare_for_idle(); rcu_preempt_deferred_qs(current); // instrumentation for the noinstr rcu_dynticks_eqs_enter() @@ -768,9 +767,6 @@ noinstr void rcu_nmi_exit(void) trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ - if (!in_nmi()) - rcu_prepare_for_idle(); - // instrumentation for the noinstr rcu_dynticks_eqs_enter() instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); instrumentation_end(); @@ -872,7 +868,6 @@ static void noinstr rcu_eqs_exit(bool user) // instrumentation for the noinstr rcu_dynticks_eqs_exit() instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks)); - rcu_cleanup_after_idle(); trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); WRITE_ONCE(rdp->dynticks_nesting, 1); @@ -1014,12 +1009,6 @@ noinstr void rcu_nmi_enter(void) rcu_dynticks_eqs_exit(); // ... but is watching here. - if (!in_nmi()) { - instrumentation_begin(); - rcu_cleanup_after_idle(); - instrumentation_end(); - } - instrumentation_begin(); // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs() instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks)); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 305cf6aeb4086..7ca1aa46083cc 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -189,11 +189,6 @@ struct rcu_data { bool rcu_urgent_qs; /* GP old need light quiescent state. */ bool rcu_forced_tick; /* Forced tick to provide QS. */ bool rcu_forced_tick_exp; /* ... provide QS to expedited GP. */ -#ifdef CONFIG_RCU_FAST_NO_HZ - unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */ - unsigned long last_advance_all; /* Last jiffy CBs were all advanced. */ - int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ -#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ /* 4) rcu_barrier(), OOM callbacks, and expediting. */ struct rcu_head barrier_head; @@ -419,8 +414,6 @@ static bool rcu_is_callbacks_kthread(void); static void rcu_cpu_kthread_setup(unsigned int cpu); static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp); static void __init rcu_spawn_boost_kthreads(void); -static void rcu_cleanup_after_idle(void); -static void rcu_prepare_for_idle(void); static bool rcu_preempt_has_tasks(struct rcu_node *rnp); static bool rcu_preempt_need_deferred_qs(struct task_struct *t); static void rcu_preempt_deferred_qs(struct task_struct *t); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 5199559fbbf0c..19f7d578cedb3 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -51,8 +51,6 @@ static void __init rcu_bootup_announce_oddness(void) RCU_FANOUT); if (rcu_fanout_exact) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); - if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) - pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); if (IS_ENABLED(CONFIG_PROVE_RCU)) pr_info("\tRCU lockdep checking is enabled.\n"); if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) @@ -1253,16 +1251,14 @@ static void __init rcu_spawn_boost_kthreads(void) #endif /* #else #ifdef CONFIG_RCU_BOOST */ -#if !defined(CONFIG_RCU_FAST_NO_HZ) - /* * Check to see if any future non-offloaded RCU-related work will need * to be done by the current CPU, even if none need be done immediately, * returning 1 if so. This function is part of the RCU implementation; * it is -not- an exported member of the RCU API. * - * Because we not have RCU_FAST_NO_HZ, just check whether or not this - * CPU has RCU callbacks queued. + * Just check whether or not this CPU has non-offloaded RCU callbacks + * queued. */ int rcu_needs_cpu(u64 basemono, u64 *nextevt) { @@ -1271,183 +1267,6 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); } -/* - * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up - * after it. - */ -static void rcu_cleanup_after_idle(void) -{ -} - -/* - * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, - * is nothing. - */ -static void rcu_prepare_for_idle(void) -{ -} - -#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ - -/* - * This code is invoked when a CPU goes idle, at which point we want - * to have the CPU do everything required for RCU so that it can enter - * the energy-efficient dyntick-idle mode. - * - * The following preprocessor symbol controls this: - * - * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted - * to sleep in dyntick-idle mode with RCU callbacks pending. This - * is sized to be roughly one RCU grace period. Those energy-efficiency - * benchmarkers who might otherwise be tempted to set this to a large - * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your - * system. And if you are -that- concerned about energy efficiency, - * just power the system down and be done with it! - * - * The value below works well in practice. If future workloads require - * adjustment, they can be converted into kernel config parameters, though - * making the state machine smarter might be a better option. - */ -#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ - -static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; -module_param(rcu_idle_gp_delay, int, 0644); - -/* - * Try to advance callbacks on the current CPU, but only if it has been - * awhile since the last time we did so. Afterwards, if there are any - * callbacks ready for immediate invocation, return true. - */ -static bool __maybe_unused rcu_try_advance_all_cbs(void) -{ - bool cbs_ready = false; - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - struct rcu_node *rnp; - - /* Exit early if we advanced recently. */ - if (jiffies == rdp->last_advance_all) - return false; - rdp->last_advance_all = jiffies; - - rnp = rdp->mynode; - - /* - * Don't bother checking unless a grace period has - * completed since we last checked and there are - * callbacks not yet ready to invoke. - */ - if ((rcu_seq_completed_gp(rdp->gp_seq, - rcu_seq_current(&rnp->gp_seq)) || - unlikely(READ_ONCE(rdp->gpwrap))) && - rcu_segcblist_pend_cbs(&rdp->cblist)) - note_gp_changes(rdp); - - if (rcu_segcblist_ready_cbs(&rdp->cblist)) - cbs_ready = true; - return cbs_ready; -} - -/* - * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready - * to invoke. If the CPU has callbacks, try to advance them. Tell the - * caller about what to set the timeout. - * - * The caller must have disabled interrupts. - */ -int rcu_needs_cpu(u64 basemono, u64 *nextevt) -{ - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - unsigned long dj; - - lockdep_assert_irqs_disabled(); - - /* If no non-offloaded callbacks, RCU doesn't need the CPU. */ - if (rcu_segcblist_empty(&rdp->cblist) || - rcu_rdp_is_offloaded(rdp)) { - *nextevt = KTIME_MAX; - return 0; - } - - /* Attempt to advance callbacks. */ - if (rcu_try_advance_all_cbs()) { - /* Some ready to invoke, so initiate later invocation. */ - invoke_rcu_core(); - return 1; - } - rdp->last_accelerate = jiffies; - - /* Request timer and round. */ - dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies; - - *nextevt = basemono + dj * TICK_NSEC; - return 0; -} - -/* - * Prepare a CPU for idle from an RCU perspective. The first major task is to - * sense whether nohz mode has been enabled or disabled via sysfs. The second - * major task is to accelerate (that is, assign grace-period numbers to) any - * recently arrived callbacks. - * - * The caller must have disabled interrupts. - */ -static void rcu_prepare_for_idle(void) -{ - bool needwake; - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - struct rcu_node *rnp; - int tne; - - lockdep_assert_irqs_disabled(); - if (rcu_rdp_is_offloaded(rdp)) - return; - - /* Handle nohz enablement switches conservatively. */ - tne = READ_ONCE(tick_nohz_active); - if (tne != rdp->tick_nohz_enabled_snap) { - if (!rcu_segcblist_empty(&rdp->cblist)) - invoke_rcu_core(); /* force nohz to see update. */ - rdp->tick_nohz_enabled_snap = tne; - return; - } - if (!tne) - return; - - /* - * If we have not yet accelerated this jiffy, accelerate all - * callbacks on this CPU. - */ - if (rdp->last_accelerate == jiffies) - return; - rdp->last_accelerate = jiffies; - if (rcu_segcblist_pend_cbs(&rdp->cblist)) { - rnp = rdp->mynode; - raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ - needwake = rcu_accelerate_cbs(rnp, rdp); - raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ - if (needwake) - rcu_gp_kthread_wake(); - } -} - -/* - * Clean up for exit from idle. Attempt to advance callbacks based on - * any grace periods that elapsed while the CPU was idle, and if any - * callbacks are now ready to invoke, initiate invocation. - */ -static void rcu_cleanup_after_idle(void) -{ - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); - - lockdep_assert_irqs_disabled(); - if (rcu_rdp_is_offloaded(rdp)) - return; - if (rcu_try_advance_all_cbs()) - invoke_rcu_core(); -} - -#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ - /* * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the * grace-period kthread will do force_quiescent_state() processing? diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 677ee3d8671bf..dc30e77eaf053 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -347,26 +347,6 @@ static void rcu_dump_cpu_stacks(void) } } -#ifdef CONFIG_RCU_FAST_NO_HZ - -static void print_cpu_stall_fast_no_hz(char *cp, int cpu) -{ - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - - sprintf(cp, "last_accelerate: %04lx/%04lx dyntick_enabled: %d", - rdp->last_accelerate & 0xffff, jiffies & 0xffff, - !!rdp->tick_nohz_enabled_snap); -} - -#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ - -static void print_cpu_stall_fast_no_hz(char *cp, int cpu) -{ - *cp = '\0'; -} - -#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */ - static const char * const gp_state_names[] = { [RCU_GP_IDLE] = "RCU_GP_IDLE", [RCU_GP_WAIT_GPS] = "RCU_GP_WAIT_GPS", @@ -408,13 +388,12 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp) * of RCU grace periods that this CPU is ignorant of, for example, "1" * if the CPU was aware of the previous grace period. * - * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info. + * Also print out idle info. */ static void print_cpu_stall_info(int cpu) { unsigned long delta; bool falsepositive; - char fast_no_hz[72]; struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); char *ticks_title; unsigned long ticks_value; @@ -432,11 +411,10 @@ static void print_cpu_stall_info(int cpu) ticks_title = "ticks this GP"; ticks_value = rdp->ticks_this_gp; } - print_cpu_stall_fast_no_hz(fast_no_hz, cpu); delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq); falsepositive = rcu_is_gp_kthread_starving(NULL) && rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)); - pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s%s\n", + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n", cpu, "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], @@ -449,7 +427,6 @@ static void print_cpu_stall_info(int cpu) rdp->dynticks_nesting, rdp->dynticks_nmi_nesting, rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart, - fast_no_hz, falsepositive ? " (false positive?)" : ""); } diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index b5b53973c01e4..8ae41d5f81a3e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -6,7 +6,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_MAXSMP=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index f6d6a40c05768..22ad0261728d0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_TRACE=y CONFIG_RCU_FANOUT=4 CONFIG_RCU_FANOUT_LEAF=3 diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index 1b96d68473b8f..42acb1a64ce10 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -15,7 +15,6 @@ CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. -CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs. CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with rcu_nocbs=0, and one with all rcu_nocbs CPUs. CONFIG_RCU_TRACE -- Do half. From d93bfe038e929a71e2bf6c0dd506896c522185c8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Sep 2021 14:30:20 -0700 Subject: [PATCH 0682/1202] rcu: Move rcu_needs_cpu() to tree.c Now that RCU_FAST_NO_HZ is no more, there is but one implementation of the rcu_needs_cpu() function. This commit therefore moves this function from kernel/rcu/tree_plugin.c to kernel/rcu/tree.c. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 18 ++++++++++++++++++ kernel/rcu/tree_plugin.h | 16 ---------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e80cdc150e2a5..572f644f3240b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1075,6 +1075,24 @@ void rcu_irq_enter_irqson(void) local_irq_restore(flags); } +/* + * Check to see if any future non-offloaded RCU-related work will need + * to be done by the current CPU, even if none need be done immediately, + * returning 1 if so. This function is part of the RCU implementation; + * it is -not- an exported member of the RCU API. This is used by + * the idle-entry code to figure out whether it is safe to disable the + * scheduler-clock interrupt. + * + * Just check whether or not this CPU has non-offloaded RCU callbacks + * queued. + */ +int rcu_needs_cpu(u64 basemono, u64 *nextevt) +{ + *nextevt = KTIME_MAX; + return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && + !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); +} + /* * If any sort of urgency was applied to the current CPU (for example, * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 19f7d578cedb3..0575757a0f8fc 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1251,22 +1251,6 @@ static void __init rcu_spawn_boost_kthreads(void) #endif /* #else #ifdef CONFIG_RCU_BOOST */ -/* - * Check to see if any future non-offloaded RCU-related work will need - * to be done by the current CPU, even if none need be done immediately, - * returning 1 if so. This function is part of the RCU implementation; - * it is -not- an exported member of the RCU API. - * - * Just check whether or not this CPU has non-offloaded RCU callbacks - * queued. - */ -int rcu_needs_cpu(u64 basemono, u64 *nextevt) -{ - *nextevt = KTIME_MAX; - return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && - !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); -} - /* * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the * grace-period kthread will do force_quiescent_state() processing? From 20121ba4864a01df2eaa9c38c8abae0a8ed7e037 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Sep 2021 14:10:45 +0200 Subject: [PATCH 0683/1202] rcu: Ignore rdp.cpu_no_qs.b.exp on premptible RCU's rcu_qs() Preemptible RCU does not use the rcu_data structure's ->cpu_no_qs.b.exp, instead using a separate ->exp_deferred_qs field to record the need for an expedited quiescent state. In fact ->cpu_no_qs.b.exp should never be set in preemptible RCU because preemptible RCU's expedited grace periods use other mechanisms to record quiescent states. This commit therefore removes the implicit rcu_qs() reference to ->cpu_no_qs.b.exp in favor of a direct reference to ->cpu_no_qs.b.norm. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0575757a0f8fc..fb6161482ff75 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -275,12 +275,16 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) * current task, there might be any number of other tasks blocked while * in an RCU read-side critical section. * + * Unlike non-preemptible-RCU, quiescent state reports for expedited + * grace periods are handled seperately via deferred quiescent states + * and context switch events. + * * Callers to this function must disable preemption. */ static void rcu_qs(void) { RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n"); - if (__this_cpu_read(rcu_data.cpu_no_qs.s)) { + if (__this_cpu_read(rcu_data.cpu_no_qs.b.norm)) { trace_rcu_grace_period(TPS("rcu_preempt"), __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs")); From 3b8b60109fa6308436943e9624903614b6e091af Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Sep 2021 14:10:47 +0200 Subject: [PATCH 0684/1202] rcu: Move rcu_data.cpu_no_qs.b.exp reset to rcu_export_exp_rdp() On non-preemptible RCU, move clearing of the rcu_data structure's ->cpu_no_qs.b.exp filed to the actual expedited quiescent state report function, matching hw preemptible RCU handles the ->exp_deferred_qs field. This prepares for removing ->exp_deferred_qs in favor of ->cpu_no_qs.b.exp for both preemptible and non-preemptible RCU. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 1 + kernel/rcu/tree_plugin.h | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index f3947c49eee71..6c6eb32203852 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -256,6 +256,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, static void rcu_report_exp_rdp(struct rcu_data *rdp) { WRITE_ONCE(rdp->exp_deferred_qs, false); + WRITE_ONCE(rdp->cpu_no_qs.b.exp, false); rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true); } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fb6161482ff75..788fd4e6206c1 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -847,10 +847,8 @@ static void rcu_qs(void) trace_rcu_grace_period(TPS("rcu_sched"), __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs")); __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false); - if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp)) - return; - __this_cpu_write(rcu_data.cpu_no_qs.b.exp, false); - rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); + if (__this_cpu_read(rcu_data.cpu_no_qs.b.exp)) + rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); } /* From dbe7ced5d9c0bf595bc3a746b284ec0e054d894f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 16 Sep 2021 14:10:48 +0200 Subject: [PATCH 0685/1202] rcu: Remove rcu_data.exp_deferred_qs and convert to rcu_data.cpu no_qs.b.exp Having two fields for the same purpose with subtle differences on different RCU flavours is confusing, especially when both fields always exist on both RCU flavours. Fortunately, it is now safe for preemptible RCU to rely on the rcu_data structure's ->cpu_no_qs.b.exp field, just like non-preemptible RCU. This commit therefore removes the ad-hoc ->exp_deferred_qs field. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 1 - kernel/rcu/tree_exp.h | 5 ++--- kernel/rcu/tree_plugin.h | 12 ++++++------ 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 7ca1aa46083cc..70188cb424731 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -157,7 +157,6 @@ struct rcu_data { bool core_needs_qs; /* Core waits for quiescent state. */ bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible ->gp_seq wrap. */ - bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */ bool cpu_started; /* RCU watching this onlining CPU. */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6c6eb32203852..fc2ee326a6f73 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -255,7 +255,6 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, */ static void rcu_report_exp_rdp(struct rcu_data *rdp) { - WRITE_ONCE(rdp->exp_deferred_qs, false); WRITE_ONCE(rdp->cpu_no_qs.b.exp, false); rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true); } @@ -656,7 +655,7 @@ static void rcu_exp_handler(void *unused) rcu_dynticks_curr_cpu_in_eqs()) { rcu_report_exp_rdp(rdp); } else { - rdp->exp_deferred_qs = true; + WRITE_ONCE(rdp->cpu_no_qs.b.exp, true); set_tsk_need_resched(t); set_preempt_need_resched(); } @@ -678,7 +677,7 @@ static void rcu_exp_handler(void *unused) if (depth > 0) { raw_spin_lock_irqsave_rcu_node(rnp, flags); if (rnp->expmask & rdp->grpmask) { - rdp->exp_deferred_qs = true; + WRITE_ONCE(rdp->cpu_no_qs.b.exp, true); t->rcu_read_unlock_special.b.exp_hint = true; } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 788fd4e6206c1..e9f8202de0b18 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -258,10 +258,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) * no need to check for a subsequent expedited GP. (Though we are * still in a quiescent state in any case.) */ - if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs) + if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp) rcu_report_exp_rdp(rdp); else - WARN_ON_ONCE(rdp->exp_deferred_qs); + WARN_ON_ONCE(rdp->cpu_no_qs.b.exp); } /* @@ -352,7 +352,7 @@ void rcu_note_context_switch(bool preempt) * means that we continue to block the current grace period. */ rcu_qs(); - if (rdp->exp_deferred_qs) + if (rdp->cpu_no_qs.b.exp) rcu_report_exp_rdp(rdp); rcu_tasks_qs(current, preempt); trace_rcu_utilization(TPS("End context switch")); @@ -479,7 +479,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) */ special = t->rcu_read_unlock_special; rdp = this_cpu_ptr(&rcu_data); - if (!special.s && !rdp->exp_deferred_qs) { + if (!special.s && !rdp->cpu_no_qs.b.exp) { local_irq_restore(flags); return; } @@ -499,7 +499,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) * tasks are handled when removing the task from the * blocked-tasks list below. */ - if (rdp->exp_deferred_qs) + if (rdp->cpu_no_qs.b.exp) rcu_report_exp_rdp(rdp); /* Clean up if blocked during RCU read-side critical section. */ @@ -582,7 +582,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) */ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) { - return (__this_cpu_read(rcu_data.exp_deferred_qs) || + return (__this_cpu_read(rcu_data.cpu_no_qs.b.exp) || READ_ONCE(t->rcu_read_unlock_special.s)) && rcu_preempt_depth() == 0; } From 6fa0b199c576346e8ae188deb533a3105d0ea4ce Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 28 Sep 2021 11:06:35 -0700 Subject: [PATCH 0686/1202] rcu: Mark sync_sched_exp_online_cleanup() ->cpu_no_qs.b.exp load The sync_sched_exp_online_cleanup() is called from rcutree_online_cpu(), which can be invoked with interrupts enabled. This means that the ->cpu_no_qs.b.exp field is subject to data races from the rcu_exp_handler() IPI handler, so this commit marks the load from that field. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index fc2ee326a6f73..12f1a18d7b6d6 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -759,7 +759,7 @@ static void sync_sched_exp_online_cleanup(int cpu) my_cpu = get_cpu(); /* Quiescent state either not needed or already requested, leave. */ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || - rdp->cpu_no_qs.b.exp) { + READ_ONCE(rdp->cpu_no_qs.b.exp)) { put_cpu(); return; } From 9d112f6f50c7d1af626ab78db8b8b96f738148be Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 28 Sep 2021 08:21:28 +0800 Subject: [PATCH 0687/1202] rcu: in_irq() cleanup This commit replaces the obsolete and ambiguous macro in_irq() with its shiny new in_hardirq() equivalent. Signed-off-by: Changbin Du Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 2 +- kernel/rcu/tree.c | 2 +- kernel/rcu/tree_plugin.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 9be015305f9f9..858f4d429946d 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -85,7 +85,7 @@ static inline void rcu_irq_enter_irqson(void) { } static inline void rcu_irq_exit(void) { } static inline void rcu_irq_exit_check_preempt(void) { } #define rcu_is_idle_cpu(cpu) \ - (is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq()) + (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 572f644f3240b..aff67161ac60c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1474,7 +1474,7 @@ static void rcu_gp_kthread_wake(void) { struct task_struct *t = READ_ONCE(rcu_state.gp_kthread); - if ((current == t && !in_irq() && !in_serving_softirq()) || + if ((current == t && !in_hardirq() && !in_serving_softirq()) || !READ_ONCE(rcu_state.gp_flags) || !t) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e9f8202de0b18..e811898ae427f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -644,7 +644,7 @@ static void rcu_read_unlock_special(struct task_struct *t) (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled && t->rcu_blocked_node); // Need to defer quiescent state until everything is enabled. - if (use_softirq && (in_irq() || (expboost && !irqs_were_disabled))) { + if (use_softirq && (in_hardirq() || (expboost && !irqs_were_disabled))) { // Using softirq, safe to awaken, and either the // wakeup is free or there is either an expedited // GP in flight or a potential need to deboost. From a1fb4b31c892bf3b9be007ea0660c3adacbddb4a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 29 Sep 2021 09:21:34 -0700 Subject: [PATCH 0688/1202] rcu: Prevent expedited GP from enabling tick on offline CPU If an RCU expedited grace period starts just when a CPU is in the process of going offline, so that the outgoing CPU has completed its pass through stop-machine but has not yet completed its final dive into the idle loop, RCU will attempt to enable that CPU's scheduling-clock tick via a call to tick_dep_set_cpu(). For this to happen, that CPU has to have been online when the expedited grace period completed its CPU-selection phase. This is pointless: The outgoing CPU has interrupts disabled, so it cannot take a scheduling-clock tick anyway. In addition, the tick_dep_set_cpu() function's eventual call to irq_work_queue_on() will splat as follows: smpboot: CPU 1 is now offline WARNING: CPU: 6 PID: 124 at kernel/irq_work.c:95 +irq_work_queue_on+0x57/0x60 Modules linked in: CPU: 6 PID: 124 Comm: kworker/6:2 Not tainted 5.15.0-rc1+ #3 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS +rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014 Workqueue: rcu_gp wait_rcu_exp_gp RIP: 0010:irq_work_queue_on+0x57/0x60 Code: 8b 05 1d c7 ea 62 a9 00 00 f0 00 75 21 4c 89 ce 44 89 c7 e8 +9b 37 fa ff ba 01 00 00 00 89 d0 c3 4c 89 cf e8 3b ff ff ff eb ee <0f> 0b eb b7 +0f 0b eb db 90 48 c7 c0 98 2a 02 00 65 48 03 05 91 6f RSP: 0000:ffffb12cc038fe48 EFLAGS: 00010282 RAX: 0000000000000001 RBX: 0000000000005208 RCX: 0000000000000020 RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff9ad01f45a680 RBP: 000000000004c990 R08: 0000000000000001 R09: ffff9ad01f45a680 R10: ffffb12cc0317db0 R11: 0000000000000001 R12: 00000000fffecee8 R13: 0000000000000001 R14: 0000000000026980 R15: ffffffff9e53ae00 FS: 0000000000000000(0000) GS:ffff9ad01f580000(0000) +knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000000de0c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tick_nohz_dep_set_cpu+0x59/0x70 rcu_exp_wait_wake+0x54e/0x870 ? sync_rcu_exp_select_cpus+0x1fc/0x390 process_one_work+0x1ef/0x3c0 ? process_one_work+0x3c0/0x3c0 worker_thread+0x28/0x3c0 ? process_one_work+0x3c0/0x3c0 kthread+0x115/0x140 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x22/0x30 ---[ end trace c5bf75eb6aa80bc6 ]--- This commit therefore avoids invoking tick_dep_set_cpu() on offlined CPUs to limit both futility and false-positive splats. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_exp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 12f1a18d7b6d6..a96d17206d87a 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -506,7 +506,10 @@ static void synchronize_rcu_expedited_wait(void) if (rdp->rcu_forced_tick_exp) continue; rdp->rcu_forced_tick_exp = true; - tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP); + preempt_disable(); + if (cpu_online(cpu)) + tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP); + preempt_enable(); } } j = READ_ONCE(jiffies_till_first_fqs); From 75af965d1142a224e3b0305c6aef1d65ceaadd14 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 29 Sep 2021 11:09:34 -0700 Subject: [PATCH 0689/1202] rcu: Make idle entry report expedited quiescent states In non-preemptible kernels, an unfortunately timed expedited grace period can result in the rcu_exp_handler() IPI handler setting the rcu_data structure's cpu_no_qs.b.exp field just as the target CPU enters idle. There are situations in which this field will not be checked until after that CPU exits idle. The resulting grace-period latency does not qualify as "expedited". This commit therefore checks this field upon non-preemptible idle entry in the rcu_preempt_deferred_qs() function. It also qualifies the rcu_core() preempt_count() check with IS_ENABLED(CONFIG_PREEMPT_COUNT) to prevent false-positive quiescent states from count-free kernels. Reported-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- kernel/rcu/tree_plugin.h | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index aff67161ac60c..e8247861ca938 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2725,7 +2725,7 @@ static __latent_entropy void rcu_core(void) WARN_ON_ONCE(!rdp->beenonline); /* Report any deferred quiescent states if preemption enabled. */ - if (!(preempt_count() & PREEMPT_MASK)) { + if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) { rcu_preempt_deferred_qs(current); } else if (rcu_preempt_need_deferred_qs(current)) { set_tsk_need_resched(current); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e811898ae427f..e25c8ab292598 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -925,7 +925,18 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) { return false; } -static void rcu_preempt_deferred_qs(struct task_struct *t) { } + +// Except that we do need to respond to a request by an expedited grace +// period for a quiescent state from this CPU. Note that requests from +// tasks are handled when removing the task from the blocked-tasks list +// below. +static void rcu_preempt_deferred_qs(struct task_struct *t) +{ + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + + if (rdp->cpu_no_qs.b.exp) + rcu_report_exp_rdp(rdp); +} /* * Because there is no preemptible RCU, there can be no readers blocked, From 425a3c8800a0e0dad03f96f4c0510f219653772b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 11 Oct 2021 16:51:30 +0200 Subject: [PATCH 0690/1202] rcu/nocb: Make local rcu_nocb_lock_irqsave() safe against concurrent deoffloading rcu_nocb_lock_irqsave() can be preempted between the call to rcu_segcblist_is_offloaded() and the actual locking. This matters now that rcu_core() is preemptible on PREEMPT_RT and the (de-)offloading process can interrupt the softirq or the rcuc kthread. As a result we may locklessly call into code that requires nocb locking. In practice this is a problem while we accelerate callbacks on rcu_core(). Simply disabling interrupts before (instead of after) checking the NOCB offload state fixes the issue. Reported-and-tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Valentin Schneider Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 70188cb424731..deeaf2fee714c 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -439,12 +439,16 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp); #ifdef CONFIG_RCU_NOCB_CPU static void __init rcu_organize_nocb_kthreads(void); -#define rcu_nocb_lock_irqsave(rdp, flags) \ -do { \ - if (!rcu_segcblist_is_offloaded(&(rdp)->cblist)) \ - local_irq_save(flags); \ - else \ - raw_spin_lock_irqsave(&(rdp)->nocb_lock, (flags)); \ + +/* + * Disable IRQs before checking offloaded state so that local + * locking is safe against concurrent de-offloading. + */ +#define rcu_nocb_lock_irqsave(rdp, flags) \ +do { \ + local_irq_save(flags); \ + if (rcu_segcblist_is_offloaded(&(rdp)->cblist)) \ + raw_spin_lock(&(rdp)->nocb_lock); \ } while (0) #else /* #ifdef CONFIG_RCU_NOCB_CPU */ #define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags) From e79d482ae6c27f1bbdebaecb9e0920f801b87e02 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:07 +0200 Subject: [PATCH 0691/1202] rcu/nocb: Prepare state machine for a new step Currently SEGCBLIST_SOFTIRQ_ONLY is a bit of an exception among the segcblist flags because it is an exclusive state that doesn't mix up with the other flags. Remove it in favour of: _ A flag specifying that rcu_core() needs to perform callbacks execution and acceleration and _ A flag specifying we want the nocb lock to be held in any needed circumstances This clarifies the code and is more flexible: It allows to have a state where rcu_core() runs with locking while offloading hasn't started yet. This is a necessary step to prepare for triggering rcu_core() at the very beginning of the de-offloading process so that rcu_core() won't dismiss work while being preempted by the de-offloading process, at least not without a pending subsequent rcu_core() that will quickly catch up. Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 37 ++++++++++++++++++++++------------- kernel/rcu/rcu_segcblist.c | 6 +++--- kernel/rcu/rcu_segcblist.h | 12 +++++++----- kernel/rcu/tree.c | 2 +- kernel/rcu/tree_nocb.h | 24 +++++++++++++++-------- 5 files changed, 50 insertions(+), 31 deletions(-) diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 3db96c4f45fd4..812961b1d0642 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -69,7 +69,7 @@ struct rcu_cblist { * * * ---------------------------------------------------------------------------- - * | SEGCBLIST_SOFTIRQ_ONLY | + * | SEGCBLIST_RCU_CORE | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, without holding nocb_lock. | @@ -77,7 +77,7 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | SEGCBLIST_OFFLOADED | + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads, | @@ -89,7 +89,9 @@ struct rcu_cblist { * | | * v v * --------------------------------------- ----------------------------------| - * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_RCU_CORE | | | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_CB | | SEGCBLIST_KTHREAD_GP | * | | | | * | | | | @@ -104,9 +106,10 @@ struct rcu_cblist { * | * v * |--------------------------------------------------------------------------| - * | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | - * | SEGCBLIST_KTHREAD_GP | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_GP | | + * | SEGCBLIST_KTHREAD_CB | * | | * | Kthreads handle callbacks holding nocb_lock, local rcu_core() stops | * | handling callbacks. Enable bypass queueing. | @@ -120,7 +123,8 @@ struct rcu_cblist { * * * |--------------------------------------------------------------------------| - * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | * | | @@ -130,6 +134,8 @@ struct rcu_cblist { * | * v * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | * | | @@ -143,7 +149,9 @@ struct rcu_cblist { * | | * v v * ---------------------------------------------------------------------------| - * | | + * | | | + * | SEGCBLIST_RCU_CORE | | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | SEGCBLIST_LOCKING | | * | SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | * | | | * | GP kthread woke up and | CB kthread woke up and | @@ -159,7 +167,7 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | 0 | + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed. | @@ -168,17 +176,18 @@ struct rcu_cblist { * | * v * ---------------------------------------------------------------------------- - * | SEGCBLIST_SOFTIRQ_ONLY | + * | SEGCBLIST_RCU_CORE | * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, without holding nocb_lock. | * ---------------------------------------------------------------------------- */ #define SEGCBLIST_ENABLED BIT(0) -#define SEGCBLIST_SOFTIRQ_ONLY BIT(1) -#define SEGCBLIST_KTHREAD_CB BIT(2) -#define SEGCBLIST_KTHREAD_GP BIT(3) -#define SEGCBLIST_OFFLOADED BIT(4) +#define SEGCBLIST_RCU_CORE BIT(1) +#define SEGCBLIST_LOCKING BIT(2) +#define SEGCBLIST_KTHREAD_CB BIT(3) +#define SEGCBLIST_KTHREAD_GP BIT(4) +#define SEGCBLIST_OFFLOADED BIT(5) struct rcu_segcblist { struct rcu_head *head; diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index aaa111237b602..c07aab6e39ef2 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -261,14 +261,14 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp) } /* - * Mark the specified rcu_segcblist structure as offloaded. + * Mark the specified rcu_segcblist structure as offloaded (or not) */ void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload) { if (offload) { - rcu_segcblist_clear_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY); - rcu_segcblist_set_flags(rsclp, SEGCBLIST_OFFLOADED); + rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED); } else { + rcu_segcblist_set_flags(rsclp, SEGCBLIST_RCU_CORE); rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED); } } diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h index 9a19328ff2514..e373fbe44da5e 100644 --- a/kernel/rcu/rcu_segcblist.h +++ b/kernel/rcu/rcu_segcblist.h @@ -80,11 +80,14 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) return rcu_segcblist_test_flags(rsclp, SEGCBLIST_ENABLED); } -/* Is the specified rcu_segcblist offloaded, or is SEGCBLIST_SOFTIRQ_ONLY set? */ +/* + * Is the specified rcu_segcblist NOCB offloaded (or in the middle of the + * [de]offloading process)? + */ static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp) { if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && - !rcu_segcblist_test_flags(rsclp, SEGCBLIST_SOFTIRQ_ONLY)) + rcu_segcblist_test_flags(rsclp, SEGCBLIST_LOCKING)) return true; return false; @@ -92,9 +95,8 @@ static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp) static inline bool rcu_segcblist_completely_offloaded(struct rcu_segcblist *rsclp) { - int flags = SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | SEGCBLIST_OFFLOADED; - - if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && (rsclp->flags & flags) == flags) + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) && + !rcu_segcblist_test_flags(rsclp, SEGCBLIST_RCU_CORE)) return true; return false; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e8247861ca938..e38028d486481 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -79,7 +79,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_RCU_NOCB_CPU - .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY, + .cblist.flags = SEGCBLIST_RCU_CORE, #endif }; static struct rcu_state rcu_state = { diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 368ef7b9af4fe..b3e07d0bfbbf8 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1000,12 +1000,12 @@ static long rcu_nocb_rdp_deoffload(void *arg) */ rcu_nocb_lock_irqsave(rdp, flags); /* - * Theoretically we could set SEGCBLIST_SOFTIRQ_ONLY after the nocb + * Theoretically we could clear SEGCBLIST_LOCKING after the nocb * lock is released but how about being paranoid for once? */ - rcu_segcblist_set_flags(cblist, SEGCBLIST_SOFTIRQ_ONLY); + rcu_segcblist_clear_flags(cblist, SEGCBLIST_LOCKING); /* - * With SEGCBLIST_SOFTIRQ_ONLY, we can't use + * Without SEGCBLIST_LOCKING, we can't use * rcu_nocb_unlock_irqrestore() anymore. */ raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); @@ -1058,14 +1058,14 @@ static long rcu_nocb_rdp_offload(void *arg) pr_info("Offloading %d\n", rdp->cpu); /* - * Can't use rcu_nocb_lock_irqsave() while we are in - * SEGCBLIST_SOFTIRQ_ONLY mode. + * Can't use rcu_nocb_lock_irqsave() before SEGCBLIST_LOCKING + * is set. */ raw_spin_lock_irqsave(&rdp->nocb_lock, flags); /* * We didn't take the nocb lock while working on the - * rdp->cblist in SEGCBLIST_SOFTIRQ_ONLY mode. + * rdp->cblist with SEGCBLIST_LOCKING cleared (pure softirq/rcuc mode). * Every modifications that have been done previously on * rdp->cblist must be visible remotely by the nocb kthreads * upon wake up after reading the cblist flags. @@ -1084,6 +1084,14 @@ static long rcu_nocb_rdp_offload(void *arg) rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) && rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)); + /* + * All kthreads are ready to work, we can finally relieve rcu_core() and + * enable nocb bypass. + */ + rcu_nocb_lock_irqsave(rdp, flags); + rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE); + rcu_nocb_unlock_irqrestore(rdp, flags); + return ret; } @@ -1154,8 +1162,8 @@ void __init rcu_init_nohz(void) if (rcu_segcblist_empty(&rdp->cblist)) rcu_segcblist_init(&rdp->cblist); rcu_segcblist_offload(&rdp->cblist, true); - rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB); - rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP); + rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP); + rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_RCU_CORE); } rcu_organize_nocb_kthreads(); } From aef917e0c9128e284630fe5bfe9e41dc26f31278 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:08 +0200 Subject: [PATCH 0692/1202] rcu/nocb: Invoke rcu_core() at the start of deoffloading On PREEMPT_RT, if rcu_core() is preempted by the de-offloading process, some work, such as callbacks acceleration and invocation, may be left unattended due to the volatile checks on the offloaded state. In the worst case this work is postponed until the next rcu_pending() check that can take a jiffy to reach, which can be a problem in case of callbacks flooding. Solve that with invoking rcu_core() early in the de-offloading process. This way any work dismissed by an ongoing rcu_core() call fooled by a preempting deoffloading process will be caught up by a nearby future recall to rcu_core(), this time fully aware of the de-offloading state. Tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Valentin Schneider Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 14 ++++++++++++++ kernel/rcu/rcu_segcblist.c | 6 ++---- kernel/rcu/tree.c | 17 +++++++++++++++++ kernel/rcu/tree_nocb.h | 9 +++++++++ 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 812961b1d0642..659d13a7ddaaa 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -136,6 +136,20 @@ struct rcu_cblist { * |--------------------------------------------------------------------------| * | SEGCBLIST_RCU_CORE | | * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_CB | | + * | SEGCBLIST_KTHREAD_GP | + * | | + * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | + * | handles callbacks concurrently. Bypass enqueue is enabled. | + * | Invoke RCU core so we make sure not to preempt it in the middle with | + * | leaving some urgent work unattended within a jiffy. | + * ---------------------------------------------------------------------------- + * | + * v + * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | * | | diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index c07aab6e39ef2..81145c3ece25f 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -265,12 +265,10 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp) */ void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload) { - if (offload) { + if (offload) rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED); - } else { - rcu_segcblist_set_flags(rsclp, SEGCBLIST_RCU_CORE); + else rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED); - } } /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e38028d486481..b236271b9022c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2717,6 +2717,23 @@ static __latent_entropy void rcu_core(void) unsigned long flags; struct rcu_data *rdp = raw_cpu_ptr(&rcu_data); struct rcu_node *rnp = rdp->mynode; + /* + * On RT rcu_core() can be preempted when IRQs aren't disabled. + * Therefore this function can race with concurrent NOCB (de-)offloading + * on this CPU and the below condition must be considered volatile. + * However if we race with: + * + * _ Offloading: In the worst case we accelerate or process callbacks + * concurrently with NOCB kthreads. We are guaranteed to + * call rcu_nocb_lock() if that happens. + * + * _ Deoffloading: In the worst case we miss callbacks acceleration or + * processing. This is fine because the early stage + * of deoffloading invokes rcu_core() after setting + * SEGCBLIST_RCU_CORE. So we guarantee that we'll process + * what could have been dismissed without the need to wait + * for the next rcu_pending() check in the next jiffy. + */ const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist); if (cpu_is_offline(smp_processor_id())) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index b3e07d0bfbbf8..2461fe8d0c23a 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -990,6 +990,15 @@ static long rcu_nocb_rdp_deoffload(void *arg) * will refuse to put anything into the bypass. */ WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); + /* + * Start with invoking rcu_core() early. This way if the current thread + * happens to preempt an ongoing call to rcu_core() in the middle, + * leaving some work dismissed because rcu_core() still thinks the rdp is + * completely offloaded, we are guaranteed a nearby future instance of + * rcu_core() to catch up. + */ + rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE); + invoke_rcu_core(); ret = rdp_offload_toggle(rdp, false, flags); swait_event_exclusive(rdp->nocb_state_wq, !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB | From a217ed0a0d7bfa4a5a61de45f6150d8c586ba053 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Oct 2021 02:08:09 +0200 Subject: [PATCH 0693/1202] rcu/nocb: Make rcu_core() callbacks acceleration preempt-safe While reporting a quiescent state for a given CPU, rcu_core() takes advantage of the freshly loaded grace period sequence number and the locked rnp to accelerate the callbacks whose sequence number have been assigned a stale value. This action is only necessary when the rdp isn't offloaded, otherwise the NOCB kthreads already take care of the callbacks progression. However the check for the offloaded state is volatile because it is performed outside the IRQs disabled section. It's possible for the offloading process to preempt rcu_core() at that point on PREEMPT_RT. This is dangerous because rcu_core() may end up accelerating callbacks concurrently with NOCB kthreads without appropriate locking. Fix this with moving the offloaded check inside the rnp locking section. Reported-and-tested-by: Valentin Schneider Reviewed-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b236271b9022c..4869a6856bf15 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2288,7 +2288,6 @@ rcu_report_qs_rdp(struct rcu_data *rdp) unsigned long flags; unsigned long mask; bool needwake = false; - const bool offloaded = rcu_rdp_is_offloaded(rdp); struct rcu_node *rnp; WARN_ON_ONCE(rdp->cpu != smp_processor_id()); @@ -2315,8 +2314,10 @@ rcu_report_qs_rdp(struct rcu_data *rdp) /* * This GP can't end until cpu checks in, so all of our * callbacks can be processed during the next GP. + * + * NOCB kthreads have their own way to deal with that. */ - if (!offloaded) + if (!rcu_rdp_is_offloaded(rdp)) needwake = rcu_accelerate_cbs(rnp, rdp); rcu_disable_urgency_upon_qs(rdp); From 10e4a80b408b13bee6fc75c72759123928def002 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:10 +0200 Subject: [PATCH 0694/1202] rcu/nocb: Make rcu_core() callbacks acceleration (de-)offloading safe When callbacks are offloaded, the NOCB kthreads handle the callbacks progression on behalf of rcu_core(). However during the (de-)offloading process, the kthread may not be entirely up to the task. As a result some callbacks grace period sequence number may remain stale for a while because rcu_core() won't take care of them either. Fix this with forcing callbacks acceleration from rcu_core() as long as the offloading process isn't complete. Reported-and-tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Valentin Schneider Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 4869a6856bf15..a43924244000e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2288,6 +2288,7 @@ rcu_report_qs_rdp(struct rcu_data *rdp) unsigned long flags; unsigned long mask; bool needwake = false; + bool needacc = false; struct rcu_node *rnp; WARN_ON_ONCE(rdp->cpu != smp_processor_id()); @@ -2315,16 +2316,29 @@ rcu_report_qs_rdp(struct rcu_data *rdp) * This GP can't end until cpu checks in, so all of our * callbacks can be processed during the next GP. * - * NOCB kthreads have their own way to deal with that. + * NOCB kthreads have their own way to deal with that... */ - if (!rcu_rdp_is_offloaded(rdp)) + if (!rcu_rdp_is_offloaded(rdp)) { needwake = rcu_accelerate_cbs(rnp, rdp); + } else if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) { + /* + * ...but NOCB kthreads may miss or delay callbacks acceleration + * if in the middle of a (de-)offloading process. + */ + needacc = true; + } rcu_disable_urgency_upon_qs(rdp); rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags); /* ^^^ Released rnp->lock */ if (needwake) rcu_gp_kthread_wake(); + + if (needacc) { + rcu_nocb_lock_irqsave(rdp, flags); + rcu_accelerate_cbs_unlocked(rnp, rdp); + rcu_nocb_unlock_irqrestore(rdp, flags); + } } } From 54469a0f0f1a0eb91952abae364ef6403f1a51d5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:11 +0200 Subject: [PATCH 0695/1202] rcu/nocb: Check a stable offloaded state to manipulate qlen_last_fqs_check It's not entirely obvious why rdp->qlen_last_fqs_check is updated before processing the queue only on offloaded rdp. There can be different effect to that, either in favour of triggering the force quiescent state path or not. For example: 1) If the number of callbacks has decreased since the last rdp->qlen_last_fqs_check update (because we recently called rcu_do_batch() and we executed below qhimark callbacks) and the number of processed callbacks on a subsequent do_batch() arranges for exceeding qhimark on non-offloaded but not on offloaded setup, then we may spare a later run to the force quiescent state slow path on __call_rcu_nocb_wake(), as compared to the non-offloaded counterpart scenario. Here is such an offloaded scenario instance: qhimark = 1000 rdp->last_qlen_last_fqs_check = 3000 rcu_segcblist_n_cbs(rdp) = 2000 rcu_do_batch() { if (offloaded) rdp->last_qlen_fqs_check = rcu_segcblist_n_cbs(rdp) // 2000 // run 1000 callback rcu_segcblist_n_cbs(rdp) = 1000 // Not updating rdp->qlen_last_fqs_check if (count < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = count; } call_rcu() * 1001 { __call_rcu_nocb_wake() { // not taking the fqs slowpath: // rcu_segcblist_n_cbs(rdp) == 2001 // rdp->qlen_last_fqs_check == 2000 // qhimark == 1000 if (len > rdp->qlen_last_fqs_check + qhimark) ... } In the case of a non-offloaded scenario, rdp->qlen_last_fqs_check would be 1000 and the fqs slowpath would have executed. 2) If the number of callbacks has increased since the last rdp->qlen_last_fqs_check update (because we recently queued below qhimark callbacks) and the number of callbacks executed in rcu_do_batch() doesn't exceed qhimark for either offloaded or non-offloaded setup, then it's possible that the offloaded scenario later run the force quiescent state slow path on __call_rcu_nocb_wake() while the non-offloaded doesn't. qhimark = 1000 rdp->last_qlen_last_fqs_check = 3000 rcu_segcblist_n_cbs(rdp) = 2000 rcu_do_batch() { if (offloaded) rdp->last_qlen_last_fqs_check = rcu_segcblist_n_cbs(rdp) // 2000 // run 100 callbacks // concurrent queued 100 rcu_segcblist_n_cbs(rdp) = 2000 // Not updating rdp->qlen_last_fqs_check if (count < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = count; } call_rcu() * 1001 { __call_rcu_nocb_wake() { // Taking the fqs slowpath: // rcu_segcblist_n_cbs(rdp) == 3001 // rdp->qlen_last_fqs_check == 2000 // qhimark == 1000 if (len > rdp->qlen_last_fqs_check + qhimark) ... } In the case of a non-offloaded scenario, rdp->qlen_last_fqs_check would be 3000 and the fqs slowpath would have executed. The reason for updating rdp->qlen_last_fqs_check when invoking callbacks for offloaded CPUs is that there is usually no point in waking up either the rcuog or rcuoc kthreads while in this state. After all, both threads are prohibited from indefinite sleeps. The exception is when some huge number of callbacks are enqueued while rcu_do_batch() is in the midst of invoking, in which case interrupting the rcuog kthread's timed sleep might get more callbacks set up for the next grace period. Reported-and-tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Original-patch-by: Thomas Gleixner Signed-off-by: Frederic Weisbecker Cc: Valentin Schneider Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a43924244000e..27500981d4b1c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2508,7 +2508,7 @@ static void rcu_do_batch(struct rcu_data *rdp) trace_rcu_batch_start(rcu_state.name, rcu_segcblist_n_cbs(&rdp->cblist), bl); rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl); - if (offloaded) + if (rcu_rdp_is_offloaded(rdp)) rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued")); From 81b49300b321691880e0e2737a43760e4129ab35 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:12 +0200 Subject: [PATCH 0696/1202] rcu/nocb: Use appropriate rcu_nocb_lock_irqsave() Instead of hardcoding IRQ save and nocb lock, use the consolidated API (and fix a comment as per Valentin Schneider's suggestion). Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Valentin Schneider Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 27500981d4b1c..eaa9c7ce91bba 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2488,12 +2488,11 @@ static void rcu_do_batch(struct rcu_data *rdp) } /* - * Extract the list of ready callbacks, disabling to prevent + * Extract the list of ready callbacks, disabling IRQs to prevent * races with call_rcu() from interrupt handlers. Leave the * callback counts, as rcu_barrier() needs to be conservative. */ - local_irq_save(flags); - rcu_nocb_lock(rdp); + rcu_nocb_lock_irqsave(rdp, flags); WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); pending = rcu_segcblist_n_cbs(&rdp->cblist); div = READ_ONCE(rcu_divisor); @@ -2556,8 +2555,7 @@ static void rcu_do_batch(struct rcu_data *rdp) } } - local_irq_save(flags); - rcu_nocb_lock(rdp); + rcu_nocb_lock_irqsave(rdp, flags); rdp->n_cbs_invoked += count; trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(), is_idle_task(current), rcu_is_callbacks_kthread()); From 21bff5ed2eb01e7ba05cebe7eb865658f1e2005f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:13 +0200 Subject: [PATCH 0697/1202] rcu/nocb: Limit number of softirq callbacks only on softirq The current condition to limit the number of callbacks executed in a row checks the offloaded state of the rdp. Not only is it volatile but it is also misleading: the rcu_core() may well be executing callbacks concurrently with NOCB kthreads, and the offloaded state would then be verified on both cases. As a result the limit would spuriously not apply anymore on softirq while in the middle of (de-)offloading process. Fix and clarify the condition with those constraints in mind: _ If callbacks are processed either by rcuc or NOCB kthread, the call to cond_resched_tasks_rcu_qs() is enough to take care of the overload. _ If instead callbacks are processed by softirqs: * If need_resched(), exit the callbacks processing * Otherwise if CPU is idle we can continue * Otherwise exit because a softirq shouldn't interrupt a task for too long nor deprive other pending softirq vectors of the CPU. Tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Valentin Schneider Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index eaa9c7ce91bba..716dead1509d0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2535,9 +2535,8 @@ static void rcu_do_batch(struct rcu_data *rdp) /* * Stop only if limit reached and CPU has something to do. */ - if (count >= bl && !offloaded && - (need_resched() || - (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) + if (count >= bl && in_serving_softirq() && + (need_resched() || !is_idle_task(current))) break; if (unlikely(tlimit)) { /* only call local_clock() every 32 callbacks */ From f6819ab6626c8e9e4ac3b9dfcd6fcfa8244913ad Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:14 +0200 Subject: [PATCH 0698/1202] rcu: Fix callbacks processing time limit retaining cond_resched() The callbacks processing time limit makes sure we are not exceeding a given amount of time executing the queue. However its "continue" clause bypasses the cond_resched() call on rcuc and NOCB kthreads, delaying it until we reach the limit, which can be very long... Make sure the scheduler has a higher priority than the time limit. Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Valentin Schneider Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 716dead1509d0..7a655d93a28a0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2535,9 +2535,21 @@ static void rcu_do_batch(struct rcu_data *rdp) /* * Stop only if limit reached and CPU has something to do. */ - if (count >= bl && in_serving_softirq() && - (need_resched() || !is_idle_task(current))) - break; + if (in_serving_softirq()) { + if (count >= bl && (need_resched() || !is_idle_task(current))) + break; + } else { + local_bh_enable(); + lockdep_assert_irqs_enabled(); + cond_resched_tasks_rcu_qs(); + lockdep_assert_irqs_enabled(); + local_bh_disable(); + } + + /* + * Make sure we don't spend too much time here and deprive other + * softirq vectors of CPU cycles. + */ if (unlikely(tlimit)) { /* only call local_clock() every 32 callbacks */ if (likely((count & 31) || local_clock() < tlimit)) @@ -2545,13 +2557,6 @@ static void rcu_do_batch(struct rcu_data *rdp) /* Exceeded the time limit, so leave. */ break; } - if (!in_serving_softirq()) { - local_bh_enable(); - lockdep_assert_irqs_enabled(); - cond_resched_tasks_rcu_qs(); - lockdep_assert_irqs_enabled(); - local_bh_disable(); - } } rcu_nocb_lock_irqsave(rdp, flags); From 622d3c35c5e828e7a56a3373b62fa13eba07c35d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:15 +0200 Subject: [PATCH 0699/1202] rcu: Apply callbacks processing time limit only on softirq Time limit only makes sense when callbacks are serviced in softirq mode because: _ In case we need to get back to the scheduler, cond_resched_tasks_rcu_qs() is called after each callback. _ In case some other softirq vector needs the CPU, the call to local_bh_enable() before cond_resched_tasks_rcu_qs() takes care about them via a call to do_softirq(). Therefore, make sure the time limit only applies to softirq mode. Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Valentin Schneider Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7a655d93a28a0..0ca4f4ec724d0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2498,7 +2498,7 @@ static void rcu_do_batch(struct rcu_data *rdp) div = READ_ONCE(rcu_divisor); div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div; bl = max(rdp->blimit, pending >> div); - if (unlikely(bl > 100)) { + if (in_serving_softirq() && unlikely(bl > 100)) { long rrn = READ_ONCE(rcu_resched_ns); rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn; @@ -2538,6 +2538,17 @@ static void rcu_do_batch(struct rcu_data *rdp) if (in_serving_softirq()) { if (count >= bl && (need_resched() || !is_idle_task(current))) break; + /* + * Make sure we don't spend too much time here and deprive other + * softirq vectors of CPU cycles. + */ + if (unlikely(tlimit)) { + /* only call local_clock() every 32 callbacks */ + if (likely((count & 31) || local_clock() < tlimit)) + continue; + /* Exceeded the time limit, so leave. */ + break; + } } else { local_bh_enable(); lockdep_assert_irqs_enabled(); @@ -2545,18 +2556,6 @@ static void rcu_do_batch(struct rcu_data *rdp) lockdep_assert_irqs_enabled(); local_bh_disable(); } - - /* - * Make sure we don't spend too much time here and deprive other - * softirq vectors of CPU cycles. - */ - if (unlikely(tlimit)) { - /* only call local_clock() every 32 callbacks */ - if (likely((count & 31) || local_clock() < tlimit)) - continue; - /* Exceeded the time limit, so leave. */ - break; - } } rcu_nocb_lock_irqsave(rdp, flags); From c651c19ea55552bd685cd786ea902f32bda3606a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 19 Oct 2021 02:08:16 +0200 Subject: [PATCH 0700/1202] rcu/nocb: Don't invoke local rcu core on callback overload from nocb kthread rcu_core() tries to ensure that its self-invocation in case of callbacks overload only happen in softirq/rcuc mode. Indeed it doesn't make sense to trigger local RCU core from nocb_cb kthread since it can execute on a CPU different from the target rdp. Also in case of overload, the nocb_cb kthread simply iterates a new loop of callbacks processing. However the "offloaded" check that aims at preventing misplaced rcu_core() invocations is wrong. First of all that state is volatile and second: softirq/rcuc can execute while the target rdp is offloaded. As a result rcu_core() can be invoked on the wrong CPU while in the process of (de-)offloading. Fix that with moving the rcu_core() self-invocation to rcu_core() itself, irrespective of the rdp offloaded state. Tested-by: Valentin Schneider Tested-by: Sebastian Andrzej Siewior Signed-off-by: Frederic Weisbecker Cc: Valentin Schneider Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Josh Triplett Cc: Joel Fernandes Cc: Boqun Feng Cc: Neeraj Upadhyay Cc: Uladzislau Rezki Cc: Thomas Gleixner Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0ca4f4ec724d0..8270e58cd0f38 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2470,7 +2470,6 @@ static void rcu_do_batch(struct rcu_data *rdp) int div; bool __maybe_unused empty; unsigned long flags; - const bool offloaded = rcu_rdp_is_offloaded(rdp); struct rcu_head *rhp; struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); long bl, count = 0; @@ -2592,9 +2591,6 @@ static void rcu_do_batch(struct rcu_data *rdp) rcu_nocb_unlock_irqrestore(rdp, flags); - /* Re-invoke RCU core processing if there are callbacks remaining. */ - if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist)) - invoke_rcu_core(); tick_dep_clear_task(current, TICK_DEP_BIT_RCU); } @@ -2781,8 +2777,12 @@ static __latent_entropy void rcu_core(void) /* If there are callbacks ready, invoke them. */ if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) && - likely(READ_ONCE(rcu_scheduler_fully_active))) + likely(READ_ONCE(rcu_scheduler_fully_active))) { rcu_do_batch(rdp); + /* Re-invoke RCU core processing if there are callbacks remaining. */ + if (rcu_segcblist_ready_cbs(&rdp->cblist)) + invoke_rcu_core(); + } /* Do any needed deferred wakeups of rcuo kthreads. */ do_nocb_deferred_wakeup(rdp); From 9ec2a03bbf4bee3d9fbc02a402dee36efafc5a2d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 27 May 2021 11:03:28 -0700 Subject: [PATCH 0701/1202] clocksource: Forgive repeated long-latency watchdog clocksource reads Currently, the clocksource watchdog reacts to repeated long-latency clocksource reads by marking that clocksource unstable on the theory that these long-latency reads are a sign of a serious problem. And this theory does in fact have real-world support in the form of firmware issues [1]. However, it is also possible to trigger this using stress-ng on what the stress-ng man page terms "poorly designed hardware" [2]. And it is not necessarily a bad thing for the kernel to diagnose cases where heavy memory-contention workloads are being run on hardware that is not designed for this sort of use. Nevertheless, it is quite possible that real-world use will result in some situation requiring that high-stress workloads run on hardware not designed to accommodate them, and also requiring that the kernel refrain from marking clocksources unstable. Therefore, react to persistent long-latency reads by leaving the clocksource alone, but using the old 62.5-millisecond skew-detection threshold. In addition, the offending clocksource is marked for re-initialization, which both restarts that clocksource with a clean bill of health and avoids false-positive skew reports on later watchdog checks. Once marked for re-initialization, the clocksource is not subjected to further watchdog checking until a subsequent successful read from that clocksource that is free of excessive delays. However, if clocksource.max_cswd_coarse_reads consecutive clocksource read attempts result in long latencies, a warning (splat) will be emitted. This kernel boot parameter defaults to 100, and this warning can be disabled by setting it to zero or to a negative value. [ paulmck: Apply feedback from Chao Gao ] Link: https://lore.kernel.org/lkml/20210513155515.GB23902@xsang-OptiPlex-9020/ # [1] Link: https://lore.kernel.org/lkml/20210521083322.GG25531@xsang-OptiPlex-9020/ # [2] Link: https://lore.kernel.org/lkml/20210521084405.GH25531@xsang-OptiPlex-9020/ Link: https://lore.kernel.org/lkml/20210511233403.GA2896757@paulmck-ThinkPad-P17-Gen-1/ Tested-by: Chao Gao Signed-off-by: Paul E. McKenney --- .../admin-guide/kernel-parameters.txt | 8 ++++ include/linux/clocksource.h | 2 + kernel/time/clocksource-wdtest.c | 5 +-- kernel/time/clocksource.c | 38 +++++++++++++------ 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 316027c3aadce..61d2436ae9dfc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -600,6 +600,14 @@ loops can be debugged more effectively on production systems. + clocksource.max_cswd_coarse_reads= [KNL] + Number of consecutive clocksource_watchdog() + coarse reads (that is, clocksource reads that + were unduly delayed) that are permitted before + the kernel complains (gently). Set to a value + less than or equal to zero to suppress these + complaints. + clocksource.max_cswd_read_retries= [KNL] Number of clocksource_watchdog() retries due to external delays before the clock will be marked diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1d42d4b173271..3e925d9ffc31c 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -110,6 +110,7 @@ struct clocksource { int rating; enum clocksource_ids id; enum vdso_clock_mode vdso_clock_mode; + unsigned int n_coarse_reads; unsigned long flags; int (*enable)(struct clocksource *cs); @@ -291,6 +292,7 @@ static inline void timer_probe(void) {} #define TIMER_ACPI_DECLARE(name, table_id, fn) \ ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn) +extern int max_cswd_coarse_reads; extern ulong max_cswd_read_retries; void clocksource_verify_percpu(struct clocksource *cs); diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c index df922f49d171b..7e82500c400b9 100644 --- a/kernel/time/clocksource-wdtest.c +++ b/kernel/time/clocksource-wdtest.c @@ -145,13 +145,12 @@ static int wdtest_func(void *arg) else if (i <= max_cswd_read_retries) s = ", expect message"; else - s = ", expect clock skew"; + s = ", expect coarse-grained clock skew check and re-initialization"; pr_info("--- Watchdog with %dx error injection, %lu retries%s.\n", i, max_cswd_read_retries, s); WRITE_ONCE(wdtest_ktime_read_ndelays, i); schedule_timeout_uninterruptible(2 * HZ); WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays)); - WARN_ON_ONCE((i <= max_cswd_read_retries) != - !(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE)); + WARN_ON_ONCE(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE); wdtest_ktime_clocksource_reset(); } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index b8a14d2fb5ba6..796a127aabb94 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -199,6 +199,9 @@ void clocksource_mark_unstable(struct clocksource *cs) spin_unlock_irqrestore(&watchdog_lock, flags); } +int max_cswd_coarse_reads = 100; +module_param(max_cswd_coarse_reads, int, 0644); +EXPORT_SYMBOL_GPL(max_cswd_coarse_reads); ulong max_cswd_read_retries = 3; module_param(max_cswd_read_retries, ulong, 0644); EXPORT_SYMBOL_GPL(max_cswd_read_retries); @@ -226,13 +229,22 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", smp_processor_id(), watchdog->name, nretries); } - return true; + cs->n_coarse_reads = 0; + return false; } + WARN_ONCE(max_cswd_coarse_reads > 0 && + !(++cs->n_coarse_reads % max_cswd_coarse_reads), + "timekeeping watchdog on CPU%d: %s %u consecutive coarse-grained reads\n", smp_processor_id(), watchdog->name, cs->n_coarse_reads); } - pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n", - smp_processor_id(), watchdog->name, wd_delay, nretries); - return false; + if ((cs->flags & CLOCK_SOURCE_WATCHDOG) && !atomic_read(&watchdog_reset_pending)) { + pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, coarse-grained skew check followed by re-initialization\n", + smp_processor_id(), watchdog->name, wd_delay, nretries); + } else { + pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, awaiting re-initialization\n", + smp_processor_id(), watchdog->name, wd_delay, nretries); + } + return true; } static u64 csnow_mid; @@ -356,6 +368,7 @@ static void clocksource_watchdog(struct timer_list *unused) int next_cpu, reset_pending; int64_t wd_nsec, cs_nsec; struct clocksource *cs; + bool coarse; u32 md; spin_lock(&watchdog_lock); @@ -373,16 +386,13 @@ static void clocksource_watchdog(struct timer_list *unused) continue; } - if (!cs_watchdog_read(cs, &csnow, &wdnow)) { - /* Clock readout unreliable, so give it up. */ - __clocksource_unstable(cs); - continue; - } + coarse = cs_watchdog_read(cs, &csnow, &wdnow); /* Clocksource initialized ? */ if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || atomic_read(&watchdog_reset_pending)) { - cs->flags |= CLOCK_SOURCE_WATCHDOG; + if (!coarse) + cs->flags |= CLOCK_SOURCE_WATCHDOG; cs->wd_last = wdnow; cs->cs_last = csnow; continue; @@ -403,7 +413,13 @@ static void clocksource_watchdog(struct timer_list *unused) continue; /* Check the deviation from the watchdog clocksource. */ - md = cs->uncertainty_margin + watchdog->uncertainty_margin; + if (coarse) { + md = 62500 * NSEC_PER_USEC; + cs->flags &= ~CLOCK_SOURCE_WATCHDOG; + pr_warn("timekeeping watchdog on CPU%d: %s coarse-grained %lu.%03lu ms clock-skew check followed by re-initialization\n", smp_processor_id(), watchdog->name, md / NSEC_PER_MSEC, md % NSEC_PER_MSEC / NSEC_PER_USEC); + } else { + md = cs->uncertainty_margin + watchdog->uncertainty_margin; + } if (abs(cs_nsec - wd_nsec) > md) { pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n", smp_processor_id(), cs->name); From 6161068bf209e4f6b5ab68f2bcd7e046f1e79298 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 22 Oct 2021 16:04:24 -0700 Subject: [PATCH 0702/1202] torture: Test splatting for delay-ridden clocksources Provide a means of testing the warnings/splats that are to be generated when a clocksource has too many consecutive delay-ridden reads. Signed-off-by: Paul E. McKenney --- kernel/time/clocksource-wdtest.c | 16 +++++++++++----- .../testing/selftests/rcutorture/bin/torture.sh | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c index 7e82500c400b9..ffa881e9c6062 100644 --- a/kernel/time/clocksource-wdtest.c +++ b/kernel/time/clocksource-wdtest.c @@ -104,7 +104,7 @@ static void wdtest_ktime_clocksource_reset(void) static int wdtest_func(void *arg) { unsigned long j1, j2; - char *s; + char *s1, *s2; int i; schedule_timeout_uninterruptible(holdoff * HZ); @@ -141,12 +141,18 @@ static int wdtest_func(void *arg) /* Verify tsc-like stability with various numbers of errors injected. */ for (i = 0; i <= max_cswd_read_retries + 1; i++) { if (i <= 1 && i < max_cswd_read_retries) - s = ""; + s1 = ""; else if (i <= max_cswd_read_retries) - s = ", expect message"; + s1 = ", expect message"; else - s = ", expect coarse-grained clock skew check and re-initialization"; - pr_info("--- Watchdog with %dx error injection, %lu retries%s.\n", i, max_cswd_read_retries, s); + s1 = ", expect coarse-grained clock skew check and re-initialization"; + if (i != max_cswd_coarse_reads) + s2 = ""; + else if (!s1[0]) + s2 = ", expect splat"; + else + s2 = " along with a splat"; + pr_info("--- Watchdog with %dx error injection, %lu retries%s%s.\n", i, max_cswd_read_retries, s1, s2); WRITE_ONCE(wdtest_ktime_read_ndelays, i); schedule_timeout_uninterruptible(2 * HZ); WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays)); diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index eae88aacca2aa..738a633f2f6f5 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -386,7 +386,7 @@ fi if test "$do_clocksourcewd" = "yes" then - torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000" + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_coarse_reads=-1" // max_cswd_coarse_reads=2 to test splatting. torture_set "clocksourcewd-1" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_read_retries=1" From 992ed0c72eb9c459c402205ce274904ea789a780 Mon Sep 17 00:00:00 2001 From: Thomas Perrot Date: Fri, 22 Oct 2021 16:21:04 +0200 Subject: [PATCH 0703/1202] spi: spl022: fix Microwire full duplex mode There are missing braces in the function that verify controller parameters, then an error is always returned when the parameter to select Microwire frames operation is used on devices allowing it. Signed-off-by: Thomas Perrot Link: https://lore.kernel.org/r/20211022142104.1386379-1-thomas.perrot@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-pl022.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index feebda66f56eb..e4484ace584e4 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -1716,12 +1716,13 @@ static int verify_controller_parameters(struct pl022 *pl022, return -EINVAL; } } else { - if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) + if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) { dev_err(&pl022->adev->dev, "Microwire half duplex mode requested," " but this is only available in the" " ST version of PL022\n"); - return -EINVAL; + return -EINVAL; + } } } return 0; From 8292a174d55e43f24c402dc82502cb7e67786756 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 23 Oct 2021 07:52:23 -0600 Subject: [PATCH 0704/1202] usb: remove res2 argument from gadget code completions The USB gadget code is the only code that every tried to utilize the 2nd argument of the aio completions, but there are strong suspicions that it was never actually used by anything on the userspace side. Out of the 3 cases that touch it, two of them just pass in the same as res, and the last one passes in error/transfer in res like any other normal use case. Remove the 2nd argument, pass 0 like the rest of the in-kernel users of kiocb based IO. Link: https://lore.kernel.org/linux-block/20211021174021.273c82b1.john@metanate.com/ Signed-off-by: Jens Axboe --- drivers/usb/gadget/function/f_fs.c | 2 +- drivers/usb/gadget/legacy/inode.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 8260f38025b72..7bd22398d52ff 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -831,7 +831,7 @@ static void ffs_user_copy_worker(struct work_struct *work) kthread_unuse_mm(io_data->mm); } - io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); + io_data->kiocb->ki_complete(io_data->kiocb, ret, 0); if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd) eventfd_signal(io_data->ffs->ffs_eventfd, 1); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 539220d7f5b62..28d3d4e711829 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -469,7 +469,7 @@ static void ep_user_copy_worker(struct work_struct *work) ret = -EFAULT; /* completing the iocb can drop the ctx and mm, don't touch mm after */ - iocb->ki_complete(iocb, ret, ret); + iocb->ki_complete(iocb, ret, 0); kfree(priv->buf); kfree(priv->to_free); @@ -496,11 +496,9 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) kfree(priv->to_free); kfree(priv); iocb->private = NULL; - /* aio_complete() reports bytes-transferred _and_ faults */ - iocb->ki_complete(iocb, req->actual ? req->actual : (long)req->status, - req->status); + 0); } else { /* ep_copy_to_user() won't report both; we hide some faults */ if (unlikely(0 != req->status)) From 085df4af9a9c12a8cc0acef6b55e4617f5ca8c02 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 21 Oct 2021 09:22:35 -0600 Subject: [PATCH 0705/1202] fs: get rid of the res2 iocb->ki_complete argument The second argument was only used by the USB gadget code, yet everyone pays the overhead of passing a zero to be passed into aio, where it ends up being part of the aio res2 value. Now that everybody is passing in zero, kill off the extra argument. Signed-off-by: Jens Axboe --- block/fops.c | 2 +- crypto/af_alg.c | 2 +- drivers/block/loop.c | 4 ++-- drivers/nvme/target/io-cmd-file.c | 4 ++-- drivers/target/target_core_file.c | 4 ++-- drivers/usb/gadget/function/f_fs.c | 2 +- drivers/usb/gadget/legacy/inode.c | 5 ++--- fs/aio.c | 6 +++--- fs/cachefiles/io.c | 12 ++++++------ fs/ceph/file.c | 2 +- fs/cifs/file.c | 4 ++-- fs/direct-io.c | 2 +- fs/fuse/file.c | 2 +- fs/io_uring.c | 6 +++--- fs/iomap/direct-io.c | 2 +- fs/nfs/direct.c | 2 +- fs/overlayfs/file.c | 4 ++-- include/linux/fs.h | 2 +- 18 files changed, 33 insertions(+), 34 deletions(-) diff --git a/block/fops.c b/block/fops.c index 396537598e3e9..d86ebda73e8cf 100644 --- a/block/fops.c +++ b/block/fops.c @@ -164,7 +164,7 @@ static void blkdev_bio_end_io(struct bio *bio) ret = blk_status_to_errno(dio->bio.bi_status); } - dio->iocb->ki_complete(iocb, ret, 0); + dio->iocb->ki_complete(iocb, ret); if (dio->flags & DIO_MULTI_BIO) bio_put(&dio->bio); } else { diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 8bd288d2b089b..3dd5a773c320b 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1076,7 +1076,7 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err) af_alg_free_resources(areq); sock_put(sk); - iocb->ki_complete(iocb, err ? err : (int)resultlen, 0); + iocb->ki_complete(iocb, err ? err : (int)resultlen); } EXPORT_SYMBOL_GPL(af_alg_async_cb); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 7bf4686af774e..469d87e24fd45 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -554,7 +554,7 @@ static void lo_rw_aio_do_completion(struct loop_cmd *cmd) blk_mq_complete_request(rq); } -static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) +static void lo_rw_aio_complete(struct kiocb *iocb, long ret) { struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); @@ -627,7 +627,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, lo_rw_aio_do_completion(cmd); if (ret != -EIOCBQUEUED) - cmd->iocb.ki_complete(&cmd->iocb, ret, 0); + lo_rw_aio_complete(&cmd->iocb, ret); return 0; } diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 1dd1a0fe2e819..6aa30f30b572e 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -125,7 +125,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, return call_iter(iocb, &iter); } -static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) +static void nvmet_file_io_done(struct kiocb *iocb, long ret) { struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb); u16 status = NVME_SC_SUCCESS; @@ -222,7 +222,7 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) } complete: - nvmet_file_io_done(&req->f.iocb, ret, 0); + nvmet_file_io_done(&req->f.iocb, ret); return true; } diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 02f64453b4c5f..8190b840065f3 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -245,7 +245,7 @@ struct target_core_file_cmd { struct bio_vec bvecs[]; }; -static void cmd_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) +static void cmd_rw_aio_complete(struct kiocb *iocb, long ret) { struct target_core_file_cmd *cmd; @@ -303,7 +303,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, ret = call_read_iter(file, &aio_cmd->iocb, &iter); if (ret != -EIOCBQUEUED) - cmd_rw_aio_complete(&aio_cmd->iocb, ret, 0); + cmd_rw_aio_complete(&aio_cmd->iocb, ret); return 0; } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 7bd22398d52ff..e20c19a0f1062 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -831,7 +831,7 @@ static void ffs_user_copy_worker(struct work_struct *work) kthread_unuse_mm(io_data->mm); } - io_data->kiocb->ki_complete(io_data->kiocb, ret, 0); + io_data->kiocb->ki_complete(io_data->kiocb, ret); if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd) eventfd_signal(io_data->ffs->ffs_eventfd, 1); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 28d3d4e711829..78be947502329 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -469,7 +469,7 @@ static void ep_user_copy_worker(struct work_struct *work) ret = -EFAULT; /* completing the iocb can drop the ctx and mm, don't touch mm after */ - iocb->ki_complete(iocb, ret, 0); + iocb->ki_complete(iocb, ret); kfree(priv->buf); kfree(priv->to_free); @@ -497,8 +497,7 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) kfree(priv); iocb->private = NULL; iocb->ki_complete(iocb, - req->actual ? req->actual : (long)req->status, - 0); + req->actual ? req->actual : (long)req->status); } else { /* ep_copy_to_user() won't report both; we hide some faults */ if (unlikely(0 != req->status)) diff --git a/fs/aio.c b/fs/aio.c index 51b08ab01dffc..836dc7e48db75 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1417,7 +1417,7 @@ static void aio_remove_iocb(struct aio_kiocb *iocb) spin_unlock_irqrestore(&ctx->ctx_lock, flags); } -static void aio_complete_rw(struct kiocb *kiocb, long res, long res2) +static void aio_complete_rw(struct kiocb *kiocb, long res) { struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw); @@ -1437,7 +1437,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2) } iocb->ki_res.res = res; - iocb->ki_res.res2 = res2; + iocb->ki_res.res2 = 0; iocb_put(iocb); } @@ -1508,7 +1508,7 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret) ret = -EINTR; fallthrough; default: - req->ki_complete(req, ret, 0); + req->ki_complete(req, ret); } } diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index fac2e8e7b533e..effe37ef86291 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -37,11 +37,11 @@ static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) /* * Handle completion of a read from the cache. */ -static void cachefiles_read_complete(struct kiocb *iocb, long ret, long ret2) +static void cachefiles_read_complete(struct kiocb *iocb, long ret) { struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); - _enter("%ld,%ld", ret, ret2); + _enter("%ld", ret); if (ki->term_func) { if (ret >= 0) @@ -139,7 +139,7 @@ static int cachefiles_read(struct netfs_cache_resources *cres, fallthrough; default: ki->was_async = false; - cachefiles_read_complete(&ki->iocb, ret, 0); + cachefiles_read_complete(&ki->iocb, ret); if (ret > 0) ret = 0; break; @@ -159,12 +159,12 @@ static int cachefiles_read(struct netfs_cache_resources *cres, /* * Handle completion of a write to the cache. */ -static void cachefiles_write_complete(struct kiocb *iocb, long ret, long ret2) +static void cachefiles_write_complete(struct kiocb *iocb, long ret) { struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); struct inode *inode = file_inode(ki->iocb.ki_filp); - _enter("%ld,%ld", ret, ret2); + _enter("%ld", ret); /* Tell lockdep we inherited freeze protection from submission thread */ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); @@ -244,7 +244,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres, fallthrough; default: ki->was_async = false; - cachefiles_write_complete(&ki->iocb, ret, 0); + cachefiles_write_complete(&ki->iocb, ret); if (ret > 0) ret = 0; break; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d16fd2d5fd426..e10a80c1b5812 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1023,7 +1023,7 @@ static void ceph_aio_complete(struct inode *inode, ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR : CEPH_CAP_FILE_RD)); - aio_req->iocb->ki_complete(aio_req->iocb, ret, 0); + aio_req->iocb->ki_complete(aio_req->iocb, ret); ceph_free_cap_flush(aio_req->prealloc_cf); kfree(aio_req); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 13f3182cf7969..1b855fcb179eb 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3184,7 +3184,7 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) mutex_unlock(&ctx->aio_mutex); if (ctx->iocb && ctx->iocb->ki_complete) - ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0); + ctx->iocb->ki_complete(ctx->iocb, ctx->rc); else complete(&ctx->done); } @@ -3917,7 +3917,7 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx) mutex_unlock(&ctx->aio_mutex); if (ctx->iocb && ctx->iocb->ki_complete) - ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0); + ctx->iocb->ki_complete(ctx->iocb, ctx->rc); else complete(&ctx->done); } diff --git a/fs/direct-io.c b/fs/direct-io.c index 453dcff0e7f56..6544435580470 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -307,7 +307,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) if (ret > 0 && dio->op == REQ_OP_WRITE) ret = generic_write_sync(dio->iocb, ret); - dio->iocb->ki_complete(dio->iocb, ret, 0); + dio->iocb->ki_complete(dio->iocb, ret); } kmem_cache_free(dio_cache, dio); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 11404f8c21c75..e6039f22311b5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -687,7 +687,7 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) spin_unlock(&fi->lock); } - io->iocb->ki_complete(io->iocb, res, 0); + io->iocb->ki_complete(io->iocb, res); } kref_put(&io->refcnt, fuse_io_release); diff --git a/fs/io_uring.c b/fs/io_uring.c index d4631a55a6920..edf29406ba8b0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2689,7 +2689,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, __io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req)); } -static void io_complete_rw(struct kiocb *kiocb, long res, long res2) +static void io_complete_rw(struct kiocb *kiocb, long res) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); @@ -2700,7 +2700,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) io_req_task_work_add(req); } -static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) +static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); @@ -2913,7 +2913,7 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) ret = -EINTR; fallthrough; default: - kiocb->ki_complete(kiocb, ret, 0); + kiocb->ki_complete(kiocb, ret); } } diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 83ecfba53abe1..811c898125a54 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -125,7 +125,7 @@ static void iomap_dio_complete_work(struct work_struct *work) struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work); struct kiocb *iocb = dio->iocb; - iocb->ki_complete(iocb, iomap_dio_complete(dio), 0); + iocb->ki_complete(iocb, iomap_dio_complete(dio)); } /* diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2e894fec036b0..7a5f287c53915 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -275,7 +275,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) res = (long) dreq->count; WARN_ON_ONCE(dreq->count < 0); } - dreq->iocb->ki_complete(dreq->iocb, res, 0); + dreq->iocb->ki_complete(dreq->iocb, res); } complete(&dreq->completion); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index c88ac571593dc..ac461a4998827 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -272,14 +272,14 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) kmem_cache_free(ovl_aio_request_cachep, aio_req); } -static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2) +static void ovl_aio_rw_complete(struct kiocb *iocb, long res) { struct ovl_aio_req *aio_req = container_of(iocb, struct ovl_aio_req, iocb); struct kiocb *orig_iocb = aio_req->orig_iocb; ovl_aio_cleanup_handler(aio_req); - orig_iocb->ki_complete(orig_iocb, res, res2); + orig_iocb->ki_complete(orig_iocb, res); } static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) diff --git a/include/linux/fs.h b/include/linux/fs.h index 31029a91f440e..0dcb9020a7b3c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -330,7 +330,7 @@ struct kiocb { randomized_struct_fields_start loff_t ki_pos; - void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); + void (*ki_complete)(struct kiocb *iocb, long ret); void *private; int ki_flags; u16 ki_hint; From 2359af41cacc6ae432634723f490069dedf36ef4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 23 Oct 2021 12:13:56 +0100 Subject: [PATCH 0706/1202] io_uring: clean io_wq_submit_work()'s main loop Do a bit of cleaning for the main loop of io_wq_submit_work(). Get rid of switch, just replace it with a single if as we're retrying in both other cases. Kill issue_sqe label, Get rid of needs_poll nesting and disambiguate a bit the comment. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ed12ce0c64e051f9a6b8a37a24f8ea554d299c29.1634987320.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 736d456e7913f..7f92523c1282d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6749,40 +6749,24 @@ static void io_wq_submit_work(struct io_wq_work *work) } do { -issue_sqe: ret = io_issue_sqe(req, issue_flags); + if (ret != -EAGAIN) + break; /* - * We can get EAGAIN for polled IO even though we're + * We can get EAGAIN for iopolled IO even though we're * forcing a sync submission from here, since we can't * wait for request slots on the block side. */ - if (ret != -EAGAIN) - break; - if (needs_poll) { - bool armed = false; - - ret = 0; - needs_poll = false; - issue_flags &= ~IO_URING_F_NONBLOCK; - - switch (io_arm_poll_handler(req)) { - case IO_APOLL_READY: - goto issue_sqe; - case IO_APOLL_ABORTED: - /* - * somehow we failed to arm the poll infra, - * fallback it to a normal async worker try. - */ - break; - case IO_APOLL_OK: - armed = true; - break; - } - - if (armed) - break; + if (!needs_poll) { + cond_resched(); + continue; } - cond_resched(); + + if (io_arm_poll_handler(req) == IO_APOLL_OK) + return; + /* aborted or ready, in either case retry blocking */ + needs_poll = false; + issue_flags &= ~IO_URING_F_NONBLOCK; } while (1); } From 011d4d882512216dd7f36db567b1246e15f7d6af Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 23 Oct 2021 12:13:57 +0100 Subject: [PATCH 0707/1202] io_uring: clean iowq submit work cancellation If we've got IO_WQ_WORK_CANCEL in io_wq_submit_work(), handle the error on the same lines as the check instead of having a weird code flow. The main loop doesn't change but goes one indention left. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ff4a09cf41f7a22bbb294b6f1faea721e21fe615.1634987320.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 59 +++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7f92523c1282d..58cb3a14d58e8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6721,6 +6721,8 @@ static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) static void io_wq_submit_work(struct io_wq_work *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); + unsigned int issue_flags = IO_URING_F_UNLOCKED; + bool needs_poll = false; struct io_kiocb *timeout; int ret = 0; @@ -6735,40 +6737,37 @@ static void io_wq_submit_work(struct io_wq_work *work) io_queue_linked_timeout(timeout); /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ - if (work->flags & IO_WQ_WORK_CANCEL) - ret = -ECANCELED; + if (work->flags & IO_WQ_WORK_CANCEL) { + io_req_task_queue_fail(req, -ECANCELED); + return; + } - if (!ret) { - bool needs_poll = false; - unsigned int issue_flags = IO_URING_F_UNLOCKED; + if (req->flags & REQ_F_FORCE_ASYNC) { + needs_poll = req->file && file_can_poll(req->file); + if (needs_poll) + issue_flags |= IO_URING_F_NONBLOCK; + } - if (req->flags & REQ_F_FORCE_ASYNC) { - needs_poll = req->file && file_can_poll(req->file); - if (needs_poll) - issue_flags |= IO_URING_F_NONBLOCK; + do { + ret = io_issue_sqe(req, issue_flags); + if (ret != -EAGAIN) + break; + /* + * We can get EAGAIN for iopolled IO even though we're + * forcing a sync submission from here, since we can't + * wait for request slots on the block side. + */ + if (!needs_poll) { + cond_resched(); + continue; } - do { - ret = io_issue_sqe(req, issue_flags); - if (ret != -EAGAIN) - break; - /* - * We can get EAGAIN for iopolled IO even though we're - * forcing a sync submission from here, since we can't - * wait for request slots on the block side. - */ - if (!needs_poll) { - cond_resched(); - continue; - } - - if (io_arm_poll_handler(req) == IO_APOLL_OK) - return; - /* aborted or ready, in either case retry blocking */ - needs_poll = false; - issue_flags &= ~IO_URING_F_NONBLOCK; - } while (1); - } + if (io_arm_poll_handler(req) == IO_APOLL_OK) + return; + /* aborted or ready, in either case retry blocking */ + needs_poll = false; + issue_flags &= ~IO_URING_F_NONBLOCK; + } while (1); /* avoid locking problems by failing it from a clean context */ if (ret) From 7ec553c26c96841e1ae09e6bd44c823d1e5ac988 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 23 Oct 2021 12:13:58 +0100 Subject: [PATCH 0708/1202] io_uring: check if opcode needs poll first on arming ->pollout or ->pollin are set only for opcodes that need a file, so if io_arm_poll_handler() tests them first we can be sure that the request has file set and the ->file check can be removed. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9adfe4f543d984875e516fce6da35348aab48668.1634987320.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 58cb3a14d58e8..bff911f951ed5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5584,12 +5584,10 @@ static int io_arm_poll_handler(struct io_kiocb *req) struct io_poll_table ipt; __poll_t ret, mask = EPOLLONESHOT | POLLERR | POLLPRI; - if (!req->file || !file_can_poll(req->file)) - return IO_APOLL_ABORTED; - if (req->flags & REQ_F_POLLED) - return IO_APOLL_ABORTED; if (!def->pollin && !def->pollout) return IO_APOLL_ABORTED; + if (!file_can_poll(req->file) || (req->flags & REQ_F_POLLED)) + return IO_APOLL_ABORTED; if (def->pollin) { mask |= POLLIN | POLLRDNORM; From b8497934abddfbc4ef7f8a959ffe339efaeabbd0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 23 Oct 2021 12:13:59 +0100 Subject: [PATCH 0709/1202] io_uring: don't try io-wq polling if not supported If an opcode doesn't support polling, just let it be executed synchronously in iowq, otherwise it will do a nonblock attempt just to fail in io_arm_poll_handler() and return back to blocking execution. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6401256db01b88f448f15fcd241439cb76f5b940.1634987320.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bff911f951ed5..c6f32fcf387b4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6741,9 +6741,13 @@ static void io_wq_submit_work(struct io_wq_work *work) } if (req->flags & REQ_F_FORCE_ASYNC) { - needs_poll = req->file && file_can_poll(req->file); - if (needs_poll) + const struct io_op_def *def = &io_op_defs[req->opcode]; + bool opcode_poll = def->pollin || def->pollout; + + if (opcode_poll && file_can_poll(req->file)) { + needs_poll = true; issue_flags |= IO_URING_F_NONBLOCK; + } } do { From 9f8f49bac367a1c14db2094ac5a12f0a9b84a97a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 23 Oct 2021 12:14:00 +0100 Subject: [PATCH 0710/1202] io_uring: clean up timeout async_data allocation opcode prep functions are one of the first things that are called, we can't have ->async_data allocated at this point and it's certainly a bug. Reflect this assumption in io_timeout_prep() and add a WARN_ONCE just in case. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/75a28ca7dbcc5af8b6cd9092819e8384c24dedd4.1634987320.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c6f32fcf387b4..e775529a36d89 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6113,7 +6113,9 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (unlikely(off && !req->ctx->off_timeout_used)) req->ctx->off_timeout_used = true; - if (!req_has_async_data(req) && io_alloc_async_data(req)) + if (WARN_ON_ONCE(req_has_async_data(req))) + return -EFAULT; + if (io_alloc_async_data(req)) return -ENOMEM; data = req->async_data; From 5c834f6743af7803f44bc5cdd604cedd46b42498 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 23 Oct 2021 12:14:01 +0100 Subject: [PATCH 0711/1202] io_uring: kill unused param from io_file_supports_nowait io_file_supports_nowait() doesn't use rw argument anymore, remove it. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4bd6709fc573d70c866ea656cb7a7dbe94be8026.1634987320.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e775529a36d89..7042ed870b524 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2809,8 +2809,7 @@ static inline bool io_file_supports_nowait(struct io_kiocb *req) return req->flags & REQ_F_SUPPORT_NOWAIT; } -static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, - int rw) +static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw.kiocb; @@ -3352,7 +3351,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (unlikely(!(req->file->f_mode & FMODE_READ))) return -EBADF; - return io_prep_rw(req, sqe, READ); + return io_prep_rw(req, sqe); } /* @@ -3568,7 +3567,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (unlikely(!(req->file->f_mode & FMODE_WRITE))) return -EBADF; - return io_prep_rw(req, sqe, WRITE); + return io_prep_rw(req, sqe); } static int io_write(struct io_kiocb *req, unsigned int issue_flags) From 7bd9ef36078461ee3b7da2330e2a7e46b044ebb8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 23 Oct 2021 12:14:02 +0100 Subject: [PATCH 0712/1202] io_uring: clusterise ki_flags access in rw_prep ioprio setup doesn't depend on other fields that are modified in io_prep_rw() and we can move it down in the function without worrying about performance. It's useful as it makes iocb->ki_flags accesses/modifications closer together, so it's more likely the compiler will cache it in a register and avoid extra reloads. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8ee98779c06f1b59f6039b1e292db4332efd664b.1634987320.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7042ed870b524..bba2f77ae7e79 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2840,16 +2840,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) req->flags |= REQ_F_NOWAIT; - ioprio = READ_ONCE(sqe->ioprio); - if (ioprio) { - ret = ioprio_check_cap(ioprio); - if (ret) - return ret; - - kiocb->ki_ioprio = ioprio; - } else - kiocb->ki_ioprio = get_current_ioprio(); - if (ctx->flags & IORING_SETUP_IOPOLL) { if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) return -EOPNOTSUPP; @@ -2863,6 +2853,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) kiocb->ki_complete = io_complete_rw; } + ioprio = READ_ONCE(sqe->ioprio); + if (ioprio) { + ret = ioprio_check_cap(ioprio); + if (ret) + return ret; + + kiocb->ki_ioprio = ioprio; + } else { + kiocb->ki_ioprio = get_current_ioprio(); + } + req->imu = NULL; req->rw.addr = READ_ONCE(sqe->addr); req->rw.len = READ_ONCE(sqe->len); From c4d68f305394b244f87c7b363e4e70de10a8ebc8 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 17 Oct 2021 18:53:31 +0200 Subject: [PATCH 0713/1202] parisc: enhance warning regarding usage of O_NONBLOCK Instead of showing only the very first application which needs recompile, show all of them, but print them only once. Includes typo fix noticed by Colin Ian King. Signed-off-by: Helge Deller Signed-off-by: Colin Ian King --- arch/parisc/include/asm/thread_info.h | 1 + arch/parisc/kernel/sys_parisc.c | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 444588443c04c..75657c2c54e11 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -48,6 +48,7 @@ struct thread_info { #define TIF_BLOCKSTEP 10 /* branch stepping? */ #define TIF_SECCOMP 11 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 12 /* syscall tracepoint instrumentation */ +#define TIF_NONBLOCK_WARNING 13 /* warned about wrong O_NONBLOCK usage */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 5f12537318ab2..2b34294517a15 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -409,10 +409,12 @@ long parisc_personality(unsigned long personality) static int FIX_O_NONBLOCK(int flags) { - if (flags & O_NONBLOCK_MASK_OUT) { - struct task_struct *tsk = current; - pr_warn_once("%s(%d) uses a deprecated O_NONBLOCK value.\n", - tsk->comm, tsk->pid); + if ((flags & O_NONBLOCK_MASK_OUT) && + !test_thread_flag(TIF_NONBLOCK_WARNING)) { + set_thread_flag(TIF_NONBLOCK_WARNING); + pr_warn("%s(%d) uses a deprecated O_NONBLOCK value." + " Please recompile with newer glibc.\n", + current->comm, current->pid); } return flags & ~O_NONBLOCK_MASK_OUT; } From 129769b609a280a992be2a5b29a271b1cb38e463 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 19 Oct 2021 20:31:52 +0200 Subject: [PATCH 0714/1202] parisc: mark xchg functions notrace tracing the xchg functions leads to recursion in various places. Therefore mark the function as notrace. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/lib/bitops.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c index 9ac683bf6ae76..36a3141990746 100644 --- a/arch/parisc/lib/bitops.c +++ b/arch/parisc/lib/bitops.c @@ -18,7 +18,7 @@ arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = { #endif #ifdef CONFIG_64BIT -unsigned long __xchg64(unsigned long x, volatile unsigned long *ptr) +unsigned long notrace __xchg64(unsigned long x, volatile unsigned long *ptr) { unsigned long temp, flags; @@ -30,7 +30,7 @@ unsigned long __xchg64(unsigned long x, volatile unsigned long *ptr) } #endif -unsigned long __xchg32(int x, volatile int *ptr) +unsigned long notrace __xchg32(int x, volatile int *ptr) { unsigned long flags; long temp; @@ -43,7 +43,7 @@ unsigned long __xchg32(int x, volatile int *ptr) } -unsigned long __xchg8(char x, volatile char *ptr) +unsigned long notrace __xchg8(char x, volatile char *ptr) { unsigned long flags; long temp; @@ -56,7 +56,7 @@ unsigned long __xchg8(char x, volatile char *ptr) } -u64 __cmpxchg_u64(volatile u64 *ptr, u64 old, u64 new) +u64 notrace __cmpxchg_u64(volatile u64 *ptr, u64 old, u64 new) { unsigned long flags; u64 prev; @@ -68,7 +68,7 @@ u64 __cmpxchg_u64(volatile u64 *ptr, u64 old, u64 new) return prev; } -unsigned long __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsigned int new) +unsigned long notrace __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsigned int new) { unsigned long flags; unsigned int prev; @@ -80,7 +80,7 @@ unsigned long __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsign return (unsigned long)prev; } -u8 __cmpxchg_u8(volatile u8 *ptr, u8 old, u8 new) +u8 notrace __cmpxchg_u8(volatile u8 *ptr, u8 old, u8 new) { unsigned long flags; u8 prev; From 85b968b0c19e902a62884db7d016357132838ada Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 15 Oct 2021 21:56:12 +0200 Subject: [PATCH 0715/1202] parisc: don't enable irqs unconditionally in handle_interruption() If the previous context had interrupts disabled, we should better keep them disabled. This was noticed in the unwinding code where a copy_from_kernel_nofault() triggered a page fault, and after the fixup by the page fault handler interrupts where suddenly enabled. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 690e6abcaf221..b11fb26ce2998 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -481,7 +481,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) if (code == 1) pdc_console_restart(); /* switch back to pdc if HPMC */ - else + else if (!irqs_disabled_flags(regs->gr[0])) local_irq_enable(); /* Security check: From da26f007b27b37ad41cfab6b2f8f9cc109079a65 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Thu, 21 Oct 2021 16:42:13 +0800 Subject: [PATCH 0716/1202] parisc: Make use of the helper macro kthread_run() Replace kthread_create/wake_up_process() with kthread_run() to simplify the code. Signed-off-by: Cai Huoqing Signed-off-by: Helge Deller --- arch/parisc/kernel/pdt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index fcc761b0e11b9..e391b175f5ece 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -352,12 +352,10 @@ static int __init pdt_initcall(void) if (pdt_type == PDT_NONE) return -ENODEV; - kpdtd_task = kthread_create(pdt_mainloop, NULL, "kpdtd"); + kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd"); if (IS_ERR(kpdtd_task)) return PTR_ERR(kpdtd_task); - wake_up_process(kpdtd_task); - return 0; } From e1e134bf1eb6b71bf255e976dc3000078399a09e Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 23 Oct 2021 20:21:00 +0200 Subject: [PATCH 0717/1202] parisc/ftrace: set function trace function With DYNAMIC_FTRACE, we need to implement ftrace_update_trace_func and not call ftrace_trace_function() directly, as ftrace doesn't expect calls to this function during code patching. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/ftrace.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 0a1e75af5382d..3a577186f71b4 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -48,20 +48,16 @@ static void __hot prepare_ftrace_return(unsigned long *parent, } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +static ftrace_func_t ftrace_func; + void notrace __hot ftrace_function_trampoline(unsigned long parent, unsigned long self_addr, unsigned long org_sp_gr3, struct ftrace_regs *fregs) { -#ifndef CONFIG_DYNAMIC_FTRACE - extern ftrace_func_t ftrace_trace_function; -#endif extern struct ftrace_ops *function_trace_op; - if (function_trace_op->flags & FTRACE_OPS_FL_ENABLED && - ftrace_trace_function != ftrace_stub) - ftrace_trace_function(self_addr, parent, - function_trace_op, fregs); + ftrace_func(self_addr, parent, function_trace_op, fregs); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (dereference_function_descriptor(ftrace_graph_return) != @@ -99,8 +95,10 @@ int __init ftrace_dyn_arch_init(void) { return 0; } + int ftrace_update_ftrace_func(ftrace_func_t func) { + ftrace_func = func; return 0; } From 60017239b6b2892665070c812fee1b6a1c8137dc Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Sat, 23 Oct 2021 20:21:01 +0200 Subject: [PATCH 0718/1202] parisc/ftrace: use static key to enable/disable function graph tracer This avoids using dereference_function_descriptor in the ftrace code path, and it's also faster. Signed-off-by: Sven Schnelle Signed-off-by: Helge Deller --- arch/parisc/kernel/ftrace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 3a577186f71b4..8b9b8ce95d8da 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,7 @@ #define __hot __section(".text.hot") +static DEFINE_STATIC_KEY_FALSE(ftrace_graph_enable); #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * Hook the return address and push it in the stack of return addrs @@ -60,9 +62,7 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, ftrace_func(self_addr, parent, function_trace_op, fregs); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (dereference_function_descriptor(ftrace_graph_return) != - dereference_function_descriptor(ftrace_stub) || - ftrace_graph_entry != ftrace_graph_entry_stub) { + if (static_branch_unlikely(&ftrace_graph_enable)) { unsigned long *parent_rp; /* calculate pointer to %rp in stack */ @@ -80,11 +80,13 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, #ifdef CONFIG_FUNCTION_GRAPH_TRACER int ftrace_enable_ftrace_graph_caller(void) { + static_key_enable(&ftrace_graph_enable.key); return 0; } int ftrace_disable_ftrace_graph_caller(void) { + static_key_enable(&ftrace_graph_enable.key); return 0; } #endif From fefbed24c74dd16b206c59789c5bed254428c96b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 20 Oct 2021 09:48:18 +0300 Subject: [PATCH 0719/1202] Revert "memblock: exclude NOMAP regions from kmemleak" Commit 6e44bd6d34d6 ("memblock: exclude NOMAP regions from kmemleak") breaks boot on EFI systems with kmemleak and VM_DEBUG enabled: efi: Processing EFI memory map: efi: 0x000090000000-0x000091ffffff [Conventional| | | | | | | | | | |WB|WT|WC|UC] efi: 0x000092000000-0x0000928fffff [Runtime Data|RUN| | | | | | | | | |WB|WT|WC|UC] ------------[ cut here ]------------ kernel BUG at mm/kmemleak.c:1140! Internal error: Oops - BUG: 0 [#1] SMP Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 5.15.0-rc6-next-20211019+ #104 pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : kmemleak_free_part_phys+0x64/0x8c lr : kmemleak_free_part_phys+0x38/0x8c sp : ffff800011eafbc0 x29: ffff800011eafbc0 x28: 1fffff7fffb41c0d x27: fffffbfffda0e068 x26: 0000000092000000 x25: 1ffff000023d5f94 x24: ffff800011ed84d0 x23: ffff800011ed84c0 x22: ffff800011ed83d8 x21: 0000000000900000 x20: ffff800011782000 x19: 0000000092000000 x18: ffff800011ee0730 x17: 0000000000000000 x16: 0000000000000000 x15: 1ffff0000233252c x14: ffff800019a905a0 x13: 0000000000000001 x12: ffff7000023d5ed7 x11: 1ffff000023d5ed6 x10: ffff7000023d5ed6 x9 : dfff800000000000 x8 : ffff800011eaf6b7 x7 : 0000000000000001 x6 : ffff800011eaf6b0 x5 : 00008ffffdc2a12a x4 : ffff7000023d5ed7 x3 : 1ffff000023dbf99 x2 : 1ffff000022f0463 x1 : 0000000000000000 x0 : ffffffffffffffff Call trace: kmemleak_free_part_phys+0x64/0x8c memblock_mark_nomap+0x5c/0x78 reserve_regions+0x294/0x33c efi_init+0x2d0/0x490 setup_arch+0x80/0x138 start_kernel+0xa0/0x3ec __primary_switched+0xc0/0xc8 Code: 34000041 97d526e7 f9418e80 36000040 (d4210000) random: get_random_bytes called from print_oops_end_marker+0x34/0x80 with crng_init=0 ---[ end trace 0000000000000000 ]--- The crash happens because kmemleak_free_part_phys() tries to use __va() before memstart_addr is initialized and this triggers a VM_BUG_ON() in arch/arm64/include/asm/memory.h: Revert 6e44bd6d34d6 ("memblock: exclude NOMAP regions from kmemleak"), the issue it is fixing will be fixed differently. Reported-by: Qian Cai Signed-off-by: Mike Rapoport Acked-by: Catalin Marinas Reviewed-by: David Hildenbrand --- mm/memblock.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/mm/memblock.c b/mm/memblock.c index 5c3503c98b2f1..184dcd2e5d998 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -936,12 +936,7 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) */ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) { - int ret = memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP); - - if (!ret) - kmemleak_free_part_phys(base, size); - - return ret; + return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP); } /** From 5da2b76dd1f9f41ac46159b35043da6cb09be256 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 13 Oct 2021 08:36:59 +0300 Subject: [PATCH 0720/1202] memblock: exclude MEMBLOCK_NOMAP regions from kmemleak Vladimir Zapolskiy reports: commit a7259df76702 ("memblock: make memblock_find_in_range method private") invokes a kernel panic while running kmemleak on OF platforms with nomaped regions: Unable to handle kernel paging request at virtual address fff000021e00000 [...] scan_block+0x64/0x170 scan_gray_list+0xe8/0x17c kmemleak_scan+0x270/0x514 kmemleak_write+0x34c/0x4ac The memory allocated from memblock is registered with kmemleak, but if it is marked MEMBLOCK_NOMAP it won't have linear map entries so an attempt to scan such areas will fault. Ideally, memblock_mark_nomap() would inform kmemleak to ignore MEMBLOCK_NOMAP memory, but it can be called before kmemleak interfaces operating on physical addresses can use __va() conversion. Make sure that functions that mark allocated memory as MEMBLOCK_NOMAP take care of informing kmemleak to ignore such memory. Link: https://lore.kernel.org/all/8ade5174-b143-d621-8c8e-dc6a1898c6fb@linaro.org Link: https://lore.kernel.org/all/c30ff0a2-d196-c50d-22f0-bd50696b1205@quicinc.com Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private") Reported-by: Vladimir Zapolskiy Signed-off-by: Mike Rapoport Tested-by: Vladimir Zapolskiy Reviewed-by: Catalin Marinas Reviewed-by: David Hildenbrand --- drivers/acpi/tables.c | 3 +++ drivers/of/of_reserved_mem.c | 2 ++ mm/memblock.c | 3 +++ 3 files changed, 8 insertions(+) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index f9383736fa0fe..71419eb16e09f 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "internal.h" #ifdef CONFIG_ACPI_CUSTOM_DSDT @@ -601,6 +602,8 @@ void __init acpi_table_upgrade(void) */ arch_reserve_mem_area(acpi_tables_addr, all_tables_size); + kmemleak_ignore_phys(acpi_tables_addr); + /* * early_ioremap only can remap 256k one time. If we map all * tables one time, we will hit the limit. Need to map chunks diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 59c1390cdf423..9da8835ba5a58 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "of_private.h" @@ -46,6 +47,7 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, err = memblock_mark_nomap(base, size); if (err) memblock_free(base, size); + kmemleak_ignore_phys(base); } return err; diff --git a/mm/memblock.c b/mm/memblock.c index 184dcd2e5d998..dab804b09d625 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -932,6 +932,9 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) * covered by the memory map. The struct page representing NOMAP memory * frames in the memory map will be PageReserved() * + * Note: if the memory being marked %MEMBLOCK_NOMAP was allocated from + * memblock, the caller must inform kmemleak to ignore that memory + * * Return: 0 on success, -errno on failure. */ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) From 7c5c49dc2b808ff9562a0ae207caf45b5e3f83c9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 2 Oct 2021 19:02:42 +0900 Subject: [PATCH 0721/1202] [for -next only] kconfig: generate include/generated/rustc_cfg This patch is not intended for the mainline. I created this patch for Stephen Rothwell to resolve the merge conflicts in linux-next more easily. The Rust tree is touching scripts/kconfig/confdata.c to generate include/generated/rustc_cfg, which would cause complicated conflicts with the changes from the kbuild tree. I re-implemented it to produce the equaivalent rustc_cfg. I also fixed the memory leak; the original code from the Rust tree did not free the malloc'ed pointer. Stephen: When you resolve the conflicts in confdata.c, please take this one. Signed-off-by: Masahiro Yamada --- scripts/kconfig/confdata.c | 41 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 42bc56ee238c8..c5476a9f379c7 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -603,6 +603,9 @@ static const struct comment_style comment_style_c = { static void conf_write_heading(FILE *fp, const struct comment_style *cs) { + if (!cs) /* no heading for rustc_cfg? */ + return; + fprintf(fp, "%s\n", cs->prefix); fprintf(fp, "%s Automatically generated file; DO NOT EDIT.\n", @@ -750,6 +753,38 @@ static void print_symbol_for_c(FILE *fp, struct symbol *sym) free(escaped); } +/* rustc configuration */ +static void print_symbol_for_rustc_cfg(FILE *fp, struct symbol *sym) +{ + const char *val; + char *escaped; + + if (sym->type == S_UNKNOWN) + return; + + val = sym_get_string_value(sym); + + if (sym->type == S_BOOLEAN || sym->type == S_TRISTATE) { + if (*val == 'n') + return; + + /* + * To have similar functionality to the C macro `IS_ENABLED()` + * we provide an empty `--cfg CONFIG_X` here in both `y` + * and `m` cases. + * + * Then, the common `fprintf()` below will also give us + * a `--cfg CONFIG_X="y"` or `--cfg CONFIG_X="m"`, which can + * be used as the equivalent of `IS_BUILTIN()`/`IS_MODULE()`. + */ + fprintf(fp, "--cfg=%s%s\n", CONFIG_, sym->name); + } + + escaped = escape_string_value(val); + fprintf(fp, "--cfg=%s%s=%s\n", CONFIG_, sym->name, escaped); + free(escaped); +} + /* * Write out a minimal config. * All values that has default values are skipped as this is redundant. @@ -1129,6 +1164,12 @@ int conf_write_autoconf(int overwrite) if (ret) return ret; + ret = __conf_write_autoconf("include/generated/rustc_cfg", + print_symbol_for_rustc_cfg, + NULL); + if (ret) + return ret; + /* * Create include/config/auto.conf. This must be the last step because * Kbuild has a dependency on auto.conf and this marks the successful From a18c27eccafa8384dcd86b2849ca7646183daf27 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Fri, 15 Oct 2021 04:01:51 -0400 Subject: [PATCH 0722/1202] phy: hisilicon: Add of_node_put() in phy-hisi-inno-usb2 Fix following coccicheck warning: ./drivers/phy/hisilicon/phy-hisi-inno-usb2.c:138:1-23: WARNING: Function for_each_child_of_node should have of_node_put() before break Early exits from for_each_child_of_node should decrement the node reference counter. Signed-off-by: Wan Jiabing Link: https://lore.kernel.org/r/20211015080154.16016-1-wanjiabing@vivo.com Signed-off-by: Vinod Koul --- drivers/phy/hisilicon/phy-hisi-inno-usb2.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/phy/hisilicon/phy-hisi-inno-usb2.c b/drivers/phy/hisilicon/phy-hisi-inno-usb2.c index 34a6a9a1ceb25..b133ae06757ab 100644 --- a/drivers/phy/hisilicon/phy-hisi-inno-usb2.c +++ b/drivers/phy/hisilicon/phy-hisi-inno-usb2.c @@ -140,14 +140,19 @@ static int hisi_inno_phy_probe(struct platform_device *pdev) struct phy *phy; rst = of_reset_control_get_exclusive(child, NULL); - if (IS_ERR(rst)) + if (IS_ERR(rst)) { + of_node_put(child); return PTR_ERR(rst); + } + priv->ports[i].utmi_rst = rst; priv->ports[i].priv = priv; phy = devm_phy_create(dev, child, &hisi_inno_phy_ops); - if (IS_ERR(phy)) + if (IS_ERR(phy)) { + of_node_put(child); return PTR_ERR(phy); + } phy_set_bus_width(phy, 8); phy_set_drvdata(phy, &priv->ports[i]); @@ -155,6 +160,7 @@ static int hisi_inno_phy_probe(struct platform_device *pdev) if (i > INNO_PHY_PORT_NUM) { dev_warn(dev, "Support %d ports in maximum\n", i); + of_node_put(child); break; } } From 2b2156e8a878269a3f0e2057c2f2f11447b6160c Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 20 Oct 2021 19:23:23 +0800 Subject: [PATCH 0723/1202] virtio_ring: check desc == NULL when using indirect with packed When using indirect with packed, we don't check for allocation failures. This patch checks that and fall back on direct. Fixes: 1ce9e6055fa0 ("virtio_ring: introduce packed ring support") Signed-off-by: Xuan Zhuo Link: https://lore.kernel.org/r/20211020112323.67466-3-xuanzhuo@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 91a46c4da87d3..bc92a2faa28fe 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1065,6 +1065,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, head = vq->packed.next_avail_idx; desc = alloc_indirect_packed(total_sg, gfp); + if (!desc) + return -ENOMEM; if (unlikely(vq->vq.num_free < 1)) { pr_debug("Can't add buf len 1 - avail = 0\n"); @@ -1176,6 +1178,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, unsigned int i, n, c, descs_used, err_idx; __le16 head_flags, flags; u16 head, id, prev, curr, avail_used_flags; + int err; START_USE(vq); @@ -1191,9 +1194,14 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, BUG_ON(total_sg == 0); - if (virtqueue_use_indirect(_vq, total_sg)) - return virtqueue_add_indirect_packed(vq, sgs, total_sg, - out_sgs, in_sgs, data, gfp); + if (virtqueue_use_indirect(_vq, total_sg)) { + err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, + in_sgs, data, gfp); + if (err != -ENOMEM) + return err; + + /* fall back on direct */ + } head = vq->packed.next_avail_idx; avail_used_flags = vq->packed.avail_used_flags; From 6455c5399017b6e42025b0d2e7adce752baa0303 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 29 Sep 2021 14:15:04 -0500 Subject: [PATCH 0724/1202] ALSA: virtio: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Also, make use of the struct_size() helper in kzalloc(). [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.10/process/deprecated.html#zero-length-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20210929191504.GA337268@embeddedor Signed-off-by: Michael S. Tsirkin --- sound/virtio/virtio_pcm_msg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/virtio/virtio_pcm_msg.c b/sound/virtio/virtio_pcm_msg.c index f88c8f29cbd8c..aca2dc1989ba9 100644 --- a/sound/virtio/virtio_pcm_msg.c +++ b/sound/virtio/virtio_pcm_msg.c @@ -20,7 +20,7 @@ struct virtio_pcm_msg { struct virtio_snd_pcm_xfer xfer; struct virtio_snd_pcm_status status; size_t length; - struct scatterlist sgs[0]; + struct scatterlist sgs[]; }; /** @@ -146,8 +146,7 @@ int virtsnd_pcm_msg_alloc(struct virtio_pcm_substream *vss, int sg_num = virtsnd_pcm_sg_num(data, period_bytes); struct virtio_pcm_msg *msg; - msg = kzalloc(sizeof(*msg) + sizeof(*msg->sgs) * (sg_num + 2), - GFP_KERNEL); + msg = kzalloc(struct_size(msg, sgs, sg_num + 2), GFP_KERNEL); if (!msg) return -ENOMEM; From 9d5d1a58a08107a49cfb13da6fbd9f49ca0e741e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 22 Oct 2021 06:17:38 -0400 Subject: [PATCH 0725/1202] vdpa: fix Alibaba ENI kconfig text Add a missing space. Fix up grammar. Suggested-by: Randy Dunlap Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index c0232a2148a70..481a38e6c600c 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -83,7 +83,7 @@ config ALIBABA_ENI_VDPA select VIRTIO_PCI_LEGACY_LIB depends on PCI_MSI && !CPU_BIG_ENDIAN help - VDPA driver for Alibaba ENI(Elastic Network Interface) which is build upon + VDPA driver for Alibaba ENI (Elastic Network Interface) which is built upon virtio 0.9.5 specification. endif # VDPA From 2da9c72c8da665018b3645169c06a9357fd617c1 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Sat, 16 Oct 2021 11:06:46 +0200 Subject: [PATCH 0726/1202] virtio-pmem: add myself as virtio-pmem maintainer Adding myself as virtio-pmem maintainer and also adding virtualization mailing list entry for virtio specific bits. Helps to get notified for appropriate bug fixes & enhancements. Signed-off-by: Pankaj Gupta Link: https://lore.kernel.org/r/20211016090646.371145-1-pankaj.gupta.linux@gmail.com Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d118d7957d26..9c35a5f0e8a07 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19941,6 +19941,13 @@ S: Maintained F: drivers/i2c/busses/i2c-virtio.c F: include/uapi/linux/virtio_i2c.h +VIRTIO PMEM DRIVER +M: Pankaj Gupta +L: virtualization@lists.linux-foundation.org +S: Maintained +F: drivers/nvdimm/virtio_pmem.c +F: drivers/nvdimm/nd_virtio.c + VIRTUAL BOX GUEST DEVICE DRIVER M: Hans de Goede M: Arnd Bergmann From 207b60372880ebbd2b49fd141d2d86c4e45bfaa1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Oct 2021 15:01:43 +0800 Subject: [PATCH 0727/1202] virtio-blk: validate num_queues during probe If an untrusted device neogitates BLK_F_MQ but advertises a zero num_queues, the driver may end up trying to allocating zero size buffers where ZERO_SIZE_PTR is returned which may pass the checking against the NULL. This will lead unexpected results. Fixing this by failing the probe in this case. Cc: Paolo Bonzini Cc: Stefan Hajnoczi Cc: Stefano Garzarella Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211019070152.8236-2-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi --- drivers/block/virtio_blk.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a33fe07436722..dbcf2a7e4a00c 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -571,6 +571,10 @@ static int init_vq(struct virtio_blk *vblk) &num_vqs); if (err) num_vqs = 1; + if (!err && !num_vqs) { + dev_err(&vdev->dev, "MQ advertisted but zero queues reported\n"); + return -EINVAL; + } num_vqs = min_t(unsigned int, min_not_zero(num_request_queues, nr_cpu_ids), From 838f3bf8f6ea17e92dec86287e5a5510931f9011 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Oct 2021 15:01:44 +0800 Subject: [PATCH 0728/1202] virtio_console: validate max_nr_ports before trying to use it We calculate nr_ports based on the max_nr_ports: nr_queues = use_multiport(portdev) ? (nr_ports + 1) * 2 : 2; If the device advertises a large max_nr_ports, we will end up with a integer overflow. Fixing this by validating the max_nr_ports and fail the probe for invalid max_nr_ports in this case. Cc: Amit Shah Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211019070152.8236-3-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 7eaf303a7a86f..660c5c388c291 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -28,6 +28,7 @@ #include "../tty/hvc/hvc_console.h" #define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC) +#define VIRTCONS_MAX_PORTS 0x8000 /* * This is a global struct for storing common data for all the devices @@ -2036,6 +2037,14 @@ static int virtcons_probe(struct virtio_device *vdev) virtio_cread_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT, struct virtio_console_config, max_nr_ports, &portdev->max_nr_ports) == 0) { + if (portdev->max_nr_ports == 0 || + portdev->max_nr_ports > VIRTCONS_MAX_PORTS) { + dev_err(&vdev->dev, + "Invalidate max_nr_ports %d", + portdev->max_nr_ports); + err = -EINVAL; + goto free; + } multiport = true; } From bc986232701acb6e25d89fcbc47c6a0d2a53eaf7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Oct 2021 15:01:45 +0800 Subject: [PATCH 0729/1202] virtio_config: introduce a new .enable_cbs method This patch introduces a new method to enable the callbacks for config and virtqueues. This will be used for making sure the virtqueue callbacks are only enabled after virtio_device_ready() if transport implements this method. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211019070152.8236-4-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8519b3ae5d52e..4d107ad311490 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -23,6 +23,8 @@ struct virtio_shm_region { * any of @get/@set, @get_status/@set_status, or @get_features/ * @finalize_features are NOT safe to be called from an atomic * context. + * @enable_cbs: enable the callbacks + * vdev: the virtio_device * @get: read the value of a configuration field * vdev: the virtio_device * offset: the offset of the configuration field @@ -75,6 +77,7 @@ struct virtio_shm_region { */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { + void (*enable_cbs)(struct virtio_device *vdev); void (*get)(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len); void (*set)(struct virtio_device *vdev, unsigned offset, @@ -229,6 +232,9 @@ void virtio_device_ready(struct virtio_device *dev) { unsigned status = dev->config->get_status(dev); + if (dev->config->enable_cbs) + dev->config->enable_cbs(dev); + BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK); } From f223397a8b0e5472fee39318d4aaa7df44f9e466 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Oct 2021 15:01:46 +0800 Subject: [PATCH 0730/1202] virtio_pci: harden MSI-X interrupts We used to synchronize pending MSI-X irq handlers via synchronize_irq(), this may not work for the untrusted device which may keep sending interrupts after reset which may lead unexpected results. Similarly, we should not enable MSI-X interrupt until the device is ready. So this patch fixes those two issues by: 1) switching to use disable_irq() to prevent the virtio interrupt handlers to be called after the device is reset. 2) using IRQF_NO_AUTOEN and enable the MSI-X irq during .ready() This can make sure the virtio interrupt handler won't be called before virtio_device_ready() and after reset. Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul E. McKenney Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211019070152.8236-5-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 27 +++++++++++++++++++++------ drivers/virtio/virtio_pci_common.h | 6 ++++-- drivers/virtio/virtio_pci_legacy.c | 5 +++-- drivers/virtio/virtio_pci_modern.c | 6 ++++-- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index d724f676608ba..3f51fdb7be454 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -24,8 +24,8 @@ MODULE_PARM_DESC(force_legacy, "Force legacy mode for transitional virtio 1 devices"); #endif -/* wait for pending irq handlers */ -void vp_synchronize_vectors(struct virtio_device *vdev) +/* disable irq handlers */ +void vp_disable_cbs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; @@ -34,7 +34,20 @@ void vp_synchronize_vectors(struct virtio_device *vdev) synchronize_irq(vp_dev->pci_dev->irq); for (i = 0; i < vp_dev->msix_vectors; ++i) - synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); + disable_irq(pci_irq_vector(vp_dev->pci_dev, i)); +} + +/* enable irq handlers */ +void vp_enable_cbs(struct virtio_device *vdev) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + int i; + + if (vp_dev->intx_enabled) + return; + + for (i = 0; i < vp_dev->msix_vectors; ++i) + enable_irq(pci_irq_vector(vp_dev->pci_dev, i)); } /* the notify function used when creating a virt queue */ @@ -141,7 +154,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_config_changed, 0, vp_dev->msix_names[v], + vp_config_changed, IRQF_NO_AUTOEN, + vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -160,7 +174,8 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_vring_interrupt, 0, vp_dev->msix_names[v], + vp_vring_interrupt, IRQF_NO_AUTOEN, + vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -337,7 +352,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, 0, + vring_interrupt, IRQF_NO_AUTOEN, vp_dev->msix_names[msix_vec], vqs[i]); if (err) diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index eb17a29fc7ef1..d3c6f72c73906 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -101,8 +101,10 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) return container_of(vdev, struct virtio_pci_device, vdev); } -/* wait for pending irq handlers */ -void vp_synchronize_vectors(struct virtio_device *vdev); +/* disable irq handlers */ +void vp_disable_cbs(struct virtio_device *vdev); +/* enable irq handlers */ +void vp_enable_cbs(struct virtio_device *vdev); /* the notify function used when creating a virt queue */ bool vp_notify(struct virtqueue *vq); /* the config->del_vqs() implementation */ diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 82eb437ad9205..b3f8128b7983b 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -98,8 +98,8 @@ static void vp_reset(struct virtio_device *vdev) /* Flush out the status write, and flush in device writes, * including MSi-X interrupts, if any. */ vp_legacy_get_status(&vp_dev->ldev); - /* Flush pending VQ/configuration callbacks. */ - vp_synchronize_vectors(vdev); + /* Disable VQ/configuration callbacks. */ + vp_disable_cbs(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) @@ -185,6 +185,7 @@ static void del_vq(struct virtio_pci_vq_info *info) } static const struct virtio_config_ops virtio_pci_config_ops = { + .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .get_status = vp_get_status, diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 30654d3a0b41e..5455bc041fb69 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -172,8 +172,8 @@ static void vp_reset(struct virtio_device *vdev) */ while (vp_modern_get_status(mdev)) msleep(1); - /* Flush pending VQ/configuration callbacks. */ - vp_synchronize_vectors(vdev); + /* Disable VQ/configuration callbacks. */ + vp_disable_cbs(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) @@ -380,6 +380,7 @@ static bool vp_get_shm_region(struct virtio_device *vdev, } static const struct virtio_config_ops virtio_pci_config_nodev_ops = { + .enable_cbs = vp_enable_cbs, .get = NULL, .set = NULL, .generation = vp_generation, @@ -397,6 +398,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { }; static const struct virtio_config_ops virtio_pci_config_ops = { + .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .generation = vp_generation, From 9db5cd69b6f017391301cf1aeb85c46e25399389 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Oct 2021 15:01:47 +0800 Subject: [PATCH 0731/1202] virtio-pci: harden INTX interrupts This patch tries to make sure the virtio interrupt handler for INTX won't be called after a reset and before virtio_device_ready(). We can't use IRQF_NO_AUTOEN since we're using shared interrupt (IRQF_SHARED). So this patch tracks the INTX enabling status in a new intx_soft_enabled variable and toggle it during in vp_disable/enable_vectors(). The INTX interrupt handler will check intx_soft_enabled before processing the actual interrupt. Cc: Boqun Feng Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul E. McKenney Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211019070152.8236-6-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 23 +++++++++++++++++++++-- drivers/virtio/virtio_pci_common.h | 1 + 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 3f51fdb7be454..fdbde1db5ec59 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -30,8 +30,16 @@ void vp_disable_cbs(struct virtio_device *vdev) struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - if (vp_dev->intx_enabled) + if (vp_dev->intx_enabled) { + /* + * The below synchronize() guarantees that any + * interrupt for this line arriving after + * synchronize_irq() has completed is guaranteed to see + * intx_soft_enabled == false. + */ + WRITE_ONCE(vp_dev->intx_soft_enabled, false); synchronize_irq(vp_dev->pci_dev->irq); + } for (i = 0; i < vp_dev->msix_vectors; ++i) disable_irq(pci_irq_vector(vp_dev->pci_dev, i)); @@ -43,8 +51,16 @@ void vp_enable_cbs(struct virtio_device *vdev) struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - if (vp_dev->intx_enabled) + if (vp_dev->intx_enabled) { + disable_irq(vp_dev->pci_dev->irq); + /* + * The above disable_irq() provides TSO ordering and + * as such promotes the below store to store-release. + */ + WRITE_ONCE(vp_dev->intx_soft_enabled, true); + enable_irq(vp_dev->pci_dev->irq); return; + } for (i = 0; i < vp_dev->msix_vectors; ++i) enable_irq(pci_irq_vector(vp_dev->pci_dev, i)); @@ -97,6 +113,9 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) struct virtio_pci_device *vp_dev = opaque; u8 isr; + if (!READ_ONCE(vp_dev->intx_soft_enabled)) + return IRQ_NONE; + /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->isr); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index d3c6f72c73906..23f6c5c678d5e 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -63,6 +63,7 @@ struct virtio_pci_device { /* MSI-X support */ int msix_enabled; int intx_enabled; + bool intx_soft_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ From f772d42df4f62ad13725a99ea5de4228defb0648 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Oct 2021 15:01:48 +0800 Subject: [PATCH 0732/1202] virtio_ring: fix typos in vring_desc_extra We're actually tracking descriptor address and length instead of the buffer. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211019070152.8236-7-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index bc92a2faa28fe..4c0ec82cef562 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -79,8 +79,8 @@ struct vring_desc_state_packed { }; struct vring_desc_extra { - dma_addr_t addr; /* Buffer DMA addr. */ - u32 len; /* Buffer length. */ + dma_addr_t addr; /* Descriptor DMA addr. */ + u32 len; /* Descriptor length. */ u16 flags; /* Descriptor flags. */ u16 next; /* The next desc state in a list. */ }; From a05897a893cb8133a4df040f430743874dc05eaf Mon Sep 17 00:00:00 2001 From: Ye Guojin Date: Thu, 21 Oct 2021 06:51:11 +0000 Subject: [PATCH 0733/1202] virtio-blk: fixup coccinelle warnings coccicheck complains about the use of snprintf() in sysfs show functions: WARNING use scnprintf or sprintf Use sysfs_emit instead of scnprintf or sprintf makes more sense. Reported-by: Zeal Robot Signed-off-by: Ye Guojin Link: https://lore.kernel.org/r/20211021065111.1047824-1-ye.guojin@zte.com.cn Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Reviewed-by: Stefano Garzarella --- drivers/block/virtio_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dbcf2a7e4a00c..6318134aab76e 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -704,7 +704,7 @@ cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) u8 writeback = virtblk_get_cache_mode(vblk->vdev); BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); - return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); + return sysfs_emit(buf, "%s\n", virtblk_cache_types[writeback]); } static DEVICE_ATTR_RW(cache_type); From faa81bac592da23b8d5e87eee1744f0f21dee10f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 21 Oct 2021 19:35:02 +0300 Subject: [PATCH 0734/1202] vdpa: Introduce and use vdpa device get, set config helpers Subsequent patches enable get and set configuration either via management device or via vdpa device' config ops. This requires synchronization between multiple callers to get and set config callbacks. Features setting also influence the layout of the configuration fields endianness. To avoid exposing synchronization primitives to callers, introduce helper for setting the configuration and use it. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Acked-by: Jason Wang Link: https://lore.kernel.org/r/20211021163509.6978-2-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/vhost/vdpa.c | 3 +-- include/linux/vdpa.h | 19 ++++--------------- 3 files changed, 41 insertions(+), 17 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 4aeb1458b924b..7874f742b706d 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -297,6 +297,42 @@ void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev) } EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister); +/** + * vdpa_get_config - Get one or more device configuration fields. + * @vdev: vdpa device to operate on + * @offset: starting byte offset of the field + * @buf: buffer pointer to read to + * @len: length of the configuration fields in bytes + */ +void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, + void *buf, unsigned int len) +{ + const struct vdpa_config_ops *ops = vdev->config; + + /* + * Config accesses aren't supposed to trigger before features are set. + * If it does happen we assume a legacy guest. + */ + if (!vdev->features_valid) + vdpa_set_features(vdev, 0); + ops->get_config(vdev, offset, buf, len); +} +EXPORT_SYMBOL_GPL(vdpa_get_config); + +/** + * vdpa_set_config - Set one or more device configuration fields. + * @vdev: vdpa device to operate on + * @offset: starting byte offset of the field + * @buf: buffer pointer to read from + * @length: length of the configuration fields in bytes + */ +void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, + void *buf, unsigned int length) +{ + vdev->config->set_config(vdev, offset, buf, length); +} +EXPORT_SYMBOL_GPL(vdpa_set_config); + static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev, const char *busname, const char *devname) { diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 39039e0461175..01c59ce7e2508 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -237,7 +237,6 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, struct vhost_vdpa_config __user *c) { struct vdpa_device *vdpa = v->vdpa; - const struct vdpa_config_ops *ops = vdpa->config; struct vhost_vdpa_config config; unsigned long size = offsetof(struct vhost_vdpa_config, buf); u8 *buf; @@ -251,7 +250,7 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, if (IS_ERR(buf)) return PTR_ERR(buf); - ops->set_config(vdpa, config.off, buf, config.len); + vdpa_set_config(vdpa, config.off, buf, config.len); kvfree(buf); return 0; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 30864848950b7..5d84ff9173a02 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -386,21 +386,10 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features) return ops->set_features(vdev, features); } -static inline void vdpa_get_config(struct vdpa_device *vdev, - unsigned int offset, void *buf, - unsigned int len) -{ - const struct vdpa_config_ops *ops = vdev->config; - - /* - * Config accesses aren't supposed to trigger before features are set. - * If it does happen we assume a legacy guest. - */ - if (!vdev->features_valid) - vdpa_set_features(vdev, 0); - ops->get_config(vdev, offset, buf, len); -} - +void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, + void *buf, unsigned int len); +void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, + void *buf, unsigned int length); /** * struct vdpa_mgmtdev_ops - vdpa device ops * @dev_add: Add a vdpa device using alloc and register From aa8212c3bbbc792e389fd07123e8b73ed1886c8c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 21 Oct 2021 19:35:03 +0300 Subject: [PATCH 0735/1202] vdpa: Introduce query of device config layout Introduce a command to query a device config layout. An example query of network vdpa device: $ vdpa dev add name bar mgmtdev vdpasim_net $ vdpa dev config show bar: mac 00:35:09:19:48:05 link up link_announce false mtu 1500 $ vdpa dev config show -jp { "config": { "bar": { "mac": "00:35:09:19:48:05", "link ": "up", "link_announce ": false, "mtu": 1500, } } } Signed-off-by: Parav Pandit Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20211021163509.6978-3-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 176 ++++++++++++++++++++++++++++++++++++++ include/linux/vdpa.h | 2 + include/uapi/linux/vdpa.h | 6 ++ 3 files changed, 184 insertions(+) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 7874f742b706d..8dbe4cb99763a 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include static LIST_HEAD(mdev_head); /* A global mutex that protects vdpa management device and device level operations. */ @@ -66,6 +68,7 @@ static void vdpa_release_dev(struct device *d) ops->free(vdev); ida_simple_remove(&vdpa_index_ida, vdev->index); + mutex_destroy(&vdev->cf_mutex); kfree(vdev); } @@ -127,6 +130,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, if (err) goto err_name; + mutex_init(&vdev->cf_mutex); device_initialize(&vdev->dev); return vdev; @@ -309,6 +313,7 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, { const struct vdpa_config_ops *ops = vdev->config; + mutex_lock(&vdev->cf_mutex); /* * Config accesses aren't supposed to trigger before features are set. * If it does happen we assume a legacy guest. @@ -316,6 +321,7 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, if (!vdev->features_valid) vdpa_set_features(vdev, 0); ops->get_config(vdev, offset, buf, len); + mutex_unlock(&vdev->cf_mutex); } EXPORT_SYMBOL_GPL(vdpa_get_config); @@ -329,7 +335,9 @@ EXPORT_SYMBOL_GPL(vdpa_get_config); void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int length) { + mutex_lock(&vdev->cf_mutex); vdev->config->set_config(vdev, offset, buf, length); + mutex_unlock(&vdev->cf_mutex); } EXPORT_SYMBOL_GPL(vdpa_set_config); @@ -661,6 +669,168 @@ static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callba return msg->len; } +static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev, + struct sk_buff *msg, u64 features, + const struct virtio_net_config *config) +{ + u16 val_u16; + + if ((features & (1ULL << VIRTIO_NET_F_MQ)) == 0) + return 0; + + val_u16 = le16_to_cpu(config->max_virtqueue_pairs); + return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16); +} + +static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) +{ + struct virtio_net_config config = {}; + u64 features; + u16 val_u16; + + vdpa_get_config(vdev, 0, &config, sizeof(config)); + + if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac), + config.mac)) + return -EMSGSIZE; + + val_u16 = le16_to_cpu(config.status); + if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16)) + return -EMSGSIZE; + + val_u16 = le16_to_cpu(config.mtu); + if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) + return -EMSGSIZE; + + features = vdev->config->get_features(vdev); + + return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config); +} + +static int +vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq, + int flags, struct netlink_ext_ack *extack) +{ + u32 device_id; + void *hdr; + int err; + + hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, + VDPA_CMD_DEV_CONFIG_GET); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { + err = -EMSGSIZE; + goto msg_err; + } + + device_id = vdev->config->get_device_id(vdev); + if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { + err = -EMSGSIZE; + goto msg_err; + } + + switch (device_id) { + case VIRTIO_ID_NET: + err = vdpa_dev_net_config_fill(vdev, msg); + break; + default: + err = -EOPNOTSUPP; + break; + } + if (err) + goto msg_err; + + genlmsg_end(msg, hdr); + return 0; + +msg_err: + genlmsg_cancel(msg, hdr); + return err; +} + +static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct vdpa_device *vdev; + struct sk_buff *msg; + const char *devname; + struct device *dev; + int err; + + if (!info->attrs[VDPA_ATTR_DEV_NAME]) + return -EINVAL; + devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&vdpa_dev_mutex); + dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); + if (!dev) { + NL_SET_ERR_MSG_MOD(info->extack, "device not found"); + err = -ENODEV; + goto dev_err; + } + vdev = container_of(dev, struct vdpa_device, dev); + if (!vdev->mdev) { + NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); + err = -EINVAL; + goto mdev_err; + } + err = vdpa_dev_config_fill(vdev, msg, info->snd_portid, info->snd_seq, + 0, info->extack); + if (!err) + err = genlmsg_reply(msg, info); + +mdev_err: + put_device(dev); +dev_err: + mutex_unlock(&vdpa_dev_mutex); + if (err) + nlmsg_free(msg); + return err; +} + +static int vdpa_dev_config_dump(struct device *dev, void *data) +{ + struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); + struct vdpa_dev_dump_info *info = data; + int err; + + if (!vdev->mdev) + return 0; + if (info->idx < info->start_idx) { + info->idx++; + return 0; + } + err = vdpa_dev_config_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid, + info->cb->nlh->nlmsg_seq, NLM_F_MULTI, + info->cb->extack); + if (err) + return err; + + info->idx++; + return 0; +} + +static int +vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct vdpa_dev_dump_info info; + + info.msg = msg; + info.cb = cb; + info.start_idx = cb->args[0]; + info.idx = 0; + + mutex_lock(&vdpa_dev_mutex); + bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump); + mutex_unlock(&vdpa_dev_mutex); + cb->args[0] = info.idx; + return msg->len; +} + static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING }, [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, @@ -692,6 +862,12 @@ static const struct genl_ops vdpa_nl_ops[] = { .doit = vdpa_nl_cmd_dev_get_doit, .dumpit = vdpa_nl_cmd_dev_get_dumpit, }, + { + .cmd = VDPA_CMD_DEV_CONFIG_GET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = vdpa_nl_cmd_dev_config_get_doit, + .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, + }, }; static struct genl_family vdpa_nl_family __ro_after_init = { diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 5d84ff9173a02..7f3fc8ccba3e1 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -63,6 +63,7 @@ struct vdpa_mgmt_dev; * @dev: underlying device * @dma_dev: the actual device that is performing DMA * @config: the configuration ops for this device. + * @cf_mutex: Protects get and set access to configuration layout. * @index: device index * @features_valid: were features initialized? for legacy guests * @use_va: indicate whether virtual address must be used by this device @@ -74,6 +75,7 @@ struct vdpa_device { struct device dev; struct device *dma_dev; const struct vdpa_config_ops *config; + struct mutex cf_mutex; /* Protects get/set config */ unsigned int index; bool features_valid; bool use_va; diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index e3b87879514c3..a252f06f9dfd0 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -17,6 +17,7 @@ enum vdpa_command { VDPA_CMD_DEV_NEW, VDPA_CMD_DEV_DEL, VDPA_CMD_DEV_GET, /* can dump */ + VDPA_CMD_DEV_CONFIG_GET, /* can dump */ }; enum vdpa_attr { @@ -34,6 +35,11 @@ enum vdpa_attr { VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */ VDPA_ATTR_DEV_MIN_VQ_SIZE, /* u16 */ + VDPA_ATTR_DEV_NET_CFG_MACADDR, /* binary */ + VDPA_ATTR_DEV_NET_STATUS, /* u8 */ + VDPA_ATTR_DEV_NET_CFG_MAX_VQP, /* u16 */ + VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */ + /* new attributes must be added above here */ VDPA_ATTR_MAX, }; From d3c9864831cf779f7d79fba199c5192fb8a5cc2f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 21 Oct 2021 19:35:04 +0300 Subject: [PATCH 0736/1202] vdpa: Use kernel coding style for structure comments As subsequent patch adds new structure field with comment, move the structure comment to follow kernel coding style. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Link: https://lore.kernel.org/r/20211021163509.6978-4-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 7f3fc8ccba3e1..a1e40b1ecebd3 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -411,10 +411,17 @@ struct vdpa_mgmtdev_ops { void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); }; +/** + * struct vdpa_mgmt_dev - vdpa management device + * @device: Management parent device + * @ops: operations supported by management device + * @id_table: Pointer to device id table of supported ids + * @list: list entry + */ struct vdpa_mgmt_dev { struct device *device; const struct vdpa_mgmtdev_ops *ops; - const struct virtio_device_id *id_table; /* supported ids */ + const struct virtio_device_id *id_table; struct list_head list; }; From 4e92d2d7aaa676b83c8d921deefc20a52e84d91b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 21 Oct 2021 19:35:05 +0300 Subject: [PATCH 0737/1202] vdpa: Enable user to set mac and mtu of vdpa device $ vdpa dev add name bar mgmtdev vdpasim_net mac 00:11:22:33:44:55 mtu 9000 $ vdpa dev config show bar: mac 00:11:22:33:44:55 link up link_announce false mtu 9000 $ vdpa dev config show -jp { "config": { "bar": { "mac": "00:11:22:33:44:55", "link ": "up", "link_announce ": false, "mtu": 9000, } } } Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Link: https://lore.kernel.org/r/20211021163509.6978-5-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_main.c | 3 ++- drivers/vdpa/mlx5/net/mlx5_vnet.c | 3 ++- drivers/vdpa/vdpa.c | 33 ++++++++++++++++++++++++++-- drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 3 ++- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 3 ++- drivers/vdpa/vdpa_user/vduse_dev.c | 3 ++- include/linux/vdpa.h | 17 +++++++++++++- 7 files changed, 57 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index dcd648e1f7e7e..6dc75ca70b377 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -499,7 +499,8 @@ static u32 get_dev_type(struct pci_dev *pdev) return dev_type; } -static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; struct ifcvf_adapter *adapter; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index b5bd1a5532560..6bbdc0ece707f 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2482,7 +2482,8 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p return ret; } -static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) +static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, + const struct vdpa_dev_set_config *add_config) { struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct virtio_net_config *config; diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 8dbe4cb99763a..9607deb65fb60 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -14,7 +14,6 @@ #include #include #include -#include #include static LIST_HEAD(mdev_head); @@ -480,9 +479,15 @@ vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) return msg->len; } +#define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \ + (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) + static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info) { + struct vdpa_dev_set_config config = {}; + struct nlattr **nl_attrs = info->attrs; struct vdpa_mgmt_dev *mdev; + const u8 *macaddr; const char *name; int err = 0; @@ -491,6 +496,21 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { + macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); + memcpy(config.net.mac, macaddr, sizeof(config.net.mac)); + config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR); + } + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) { + config.net.mtu = + nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]); + config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU); + } + + if ((config.mask & VDPA_DEV_NET_ATTRS_MASK) && + !netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + mutex_lock(&vdpa_dev_mutex); mdev = vdpa_mgmtdev_get_from_attr(info->attrs); if (IS_ERR(mdev)) { @@ -498,8 +518,14 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i err = PTR_ERR(mdev); goto err; } + if ((config.mask & mdev->config_attr_mask) != config.mask) { + NL_SET_ERR_MSG_MOD(info->extack, + "All provided attributes are not supported"); + err = -EOPNOTSUPP; + goto err; + } - err = mdev->ops->dev_add(mdev, name); + err = mdev->ops->dev_add(mdev, name, &config); err: mutex_unlock(&vdpa_dev_mutex); return err; @@ -835,6 +861,9 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING }, [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING }, + [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, + /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ + [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), }; static const struct genl_ops vdpa_nl_ops[] = { diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index a790903f243e8..42d401d439117 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -248,7 +248,8 @@ static struct device vdpasim_blk_mgmtdev = { .release = vdpasim_blk_mgmtdev_release, }; -static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct vdpasim_dev_attr dev_attr = {}; struct vdpasim *simdev; diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index a1ab6163f7d13..d681e423e64f5 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -126,7 +126,8 @@ static struct device vdpasim_net_mgmtdev = { .release = vdpasim_net_mgmtdev_release, }; -static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct vdpasim_dev_attr dev_attr = {}; struct vdpasim *simdev; diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 841667a896dd0..c9204c62f339c 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1503,7 +1503,8 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) return 0; } -static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) +static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) { struct vduse_dev *dev; int ret; diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index a1e40b1ecebd3..a624d2528da20 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include /** * struct vdpa_calllback - vDPA callback definition. @@ -93,6 +95,14 @@ struct vdpa_iova_range { u64 last; }; +struct vdpa_dev_set_config { + struct { + u8 mac[ETH_ALEN]; + u16 mtu; + } net; + u64 mask; +}; + /** * Corresponding file area for device memory mapping * @file: vma->vm_file for the mapping @@ -397,6 +407,7 @@ void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, * @dev_add: Add a vdpa device using alloc and register * @mdev: parent device to use for device addition * @name: name of the new vdpa device + * @config: config attributes to apply to the device under creation * Driver need to add a new device using _vdpa_register_device() * after fully initializing the vdpa device. Driver must return 0 * on success or appropriate error code. @@ -407,7 +418,8 @@ void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, * _vdpa_unregister_device(). */ struct vdpa_mgmtdev_ops { - int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name); + int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config); void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); }; @@ -416,12 +428,15 @@ struct vdpa_mgmtdev_ops { * @device: Management parent device * @ops: operations supported by management device * @id_table: Pointer to device id table of supported ids + * @config_attr_mask: bit mask of attributes of type enum vdpa_attr that + * management devie support during dev_add callback * @list: list entry */ struct vdpa_mgmt_dev { struct device *device; const struct vdpa_mgmtdev_ops *ops; const struct virtio_device_id *id_table; + u64 config_attr_mask; struct list_head list; }; From ae6813ec8152e03d00cd2694f91bf804f3a54b2e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 21 Oct 2021 19:35:06 +0300 Subject: [PATCH 0738/1202] vdpa_sim_net: Enable user to set mac address and mtu Enable user to set the mac address and mtu so that each vdpa device can have its own user specified mac address and mtu. This is done by implementing the management device's configuration layout fields setting callback routine. Now that user is enabled to set the mac address, remove the module parameter for same. And example of setting mac addr and mtu: $ vdpa mgmtdev show $ vdpa dev add name bar mgmtdev vdpasim_net $ vdpa dev config set bar mac 00:11:22:33:44:55 mtu 9000 View the config after setting: $ vdpa dev config show bar: mac 00:11:22:33:44:55 link up link_announce false mtu 9000 Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Link: https://lore.kernel.org/r/20211021163509.6978-6-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 35 +++++++++++++++------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index d681e423e64f5..76dd24abc791c 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "vdpa_sim.h" @@ -29,12 +30,6 @@ #define VDPASIM_NET_VQ_NUM 2 -static char *macaddr; -module_param(macaddr, charp, 0); -MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); - -static u8 macaddr_buf[ETH_ALEN]; - static void vdpasim_net_work(struct work_struct *work) { struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); @@ -112,9 +107,21 @@ static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) { struct virtio_net_config *net_config = config; - net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); - memcpy(net_config->mac, macaddr_buf, ETH_ALEN); +} + +static void vdpasim_net_setup_config(struct vdpasim *vdpasim, + const struct vdpa_dev_set_config *config) +{ + struct virtio_net_config *vio_config = vdpasim->config; + + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) + memcpy(vio_config->mac, config->net.mac, ETH_ALEN); + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) + vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu); + else + /* Setup default MTU to be 1500 */ + vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); } static void vdpasim_net_mgmtdev_release(struct device *dev) @@ -147,6 +154,8 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, if (IS_ERR(simdev)) return PTR_ERR(simdev); + vdpasim_net_setup_config(simdev, config); + ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); if (ret) goto reg_err; @@ -180,20 +189,14 @@ static struct vdpa_mgmt_dev mgmt_dev = { .device = &vdpasim_net_mgmtdev, .id_table = id_table, .ops = &vdpasim_net_mgmtdev_ops, + .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR | + 1 << VDPA_ATTR_DEV_NET_CFG_MTU), }; static int __init vdpasim_net_init(void) { int ret; - if (macaddr) { - mac_pton(macaddr, macaddr_buf); - if (!is_valid_ether_addr(macaddr_buf)) - return -EADDRNOTAVAIL; - } else { - eth_random_addr(macaddr_buf); - } - ret = device_register(&vdpasim_net_mgmtdev); if (ret) return ret; From 6882fd567f05457d472366d33411a6b270b0c444 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 21 Oct 2021 19:35:07 +0300 Subject: [PATCH 0739/1202] vdpa/mlx5: Fix clearing of VIRTIO_NET_F_MAC feature bit Cited patch in the fixes tag clears the features bit during reset. mlx5 vdpa device feature bits are static decided by device capabilities. Clearing features bit cleared the VIRTIO_NET_F_MAC. Due to this MAC address provided by the device is not honored. Fix it by not clearing the static feature bits during reset. Fixes: 0686082dbf7a ("vdpa: Add reset callback in vdpa_config_ops") Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Link: https://lore.kernel.org/r/20211021163509.6978-7-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 6bbdc0ece707f..8d1539728a593 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2194,7 +2194,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) clear_vqs_ready(ndev); mlx5_vdpa_destroy_mr(&ndev->mvdev); ndev->mvdev.status = 0; - ndev->mvdev.mlx_features = 0; memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs)); ndev->mvdev.actual_features = 0; ++mvdev->generation; From 004f66172f94c96434b0e59a6a964ff145c947ef Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 21 Oct 2021 19:35:08 +0300 Subject: [PATCH 0740/1202] vdpa/mlx5: Support configuration of MAC Add code to accept MAC configuration through vdpa tool. The MAC is written into the config struct and later can be retrieved through get_config(). Examples: 1. Configure MAC while adding a device: $ vdpa dev add mgmtdev pci/0000:06:00.2 name vdpa0 mac 00:11:22:33:44:55 2. Show configured params: $ vdpa dev config show vdpa0: mac 00:11:22:33:44:55 link down link_announce false max_vq_pairs 8 mtu 1500 Signed-off-by: Eli Cohen Reviewed-by: Parav Pandit Link: https://lore.kernel.org/r/20211021163509.6978-8-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 8d1539728a593..fadb2dbe3f592 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -2526,9 +2527,13 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); - err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); - if (err) - goto err_mtu; + if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { + memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); + } else { + err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); + if (err) + goto err_mtu; + } if (get_link_state(mvdev)) ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); @@ -2632,6 +2637,7 @@ static int mlx5v_probe(struct auxiliary_device *adev, mgtdev->mgtdev.ops = &mdev_ops; mgtdev->mgtdev.device = mdev->device; mgtdev->mgtdev.id_table = id_table; + mgtdev->mgtdev.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR); mgtdev->madev = madev; err = vdpa_mgmtdev_register(&mgtdev->mgtdev); From 2b55c407185fdb1901d3dd3a7b5ef23ebd958e91 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 21 Oct 2021 19:35:09 +0300 Subject: [PATCH 0741/1202] vdpa/mlx5: Forward only packets with allowed MAC address Add rules to forward packets to the net device's TIR only if the destination MAC is equal to the configured MAC. This is required to prevent the netdevice from receiving traffic not destined to its configured MAC. Signed-off-by: Eli Cohen Reviewed-by: Parav Pandit Link: https://lore.kernel.org/r/20211021163509.6978-9-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 76 +++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index fadb2dbe3f592..8e03ba5461e80 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -158,7 +158,8 @@ struct mlx5_vdpa_net { struct mutex reslock; struct mlx5_flow_table *rxft; struct mlx5_fc *rx_counter; - struct mlx5_flow_handle *rx_rule; + struct mlx5_flow_handle *rx_rule_ucast; + struct mlx5_flow_handle *rx_rule_mcast; bool setup; u32 cur_num_vqs; struct notifier_block nb; @@ -1383,21 +1384,33 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_namespace *ns; + struct mlx5_flow_spec *spec; + void *headers_c; + void *headers_v; + u8 *dmac_c; + u8 *dmac_v; int err; - /* for now, one entry, match all, forward to tir */ - ft_attr.max_fte = 1; - ft_attr.autogroup.max_num_groups = 1; + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + ft_attr.max_fte = 2; + ft_attr.autogroup.max_num_groups = 2; ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); if (!ns) { - mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n"); - return -EOPNOTSUPP; + mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); + err = -EOPNOTSUPP; + goto err_ns; } ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(ndev->rxft)) - return PTR_ERR(ndev->rxft); + if (IS_ERR(ndev->rxft)) { + err = PTR_ERR(ndev->rxft); + goto err_ns; + } ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false); if (IS_ERR(ndev->rx_counter)) { @@ -1405,37 +1418,64 @@ static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) goto err_fc; } + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); + memset(dmac_c, 0xff, ETH_ALEN); + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); + ether_addr_copy(dmac_v, ndev->config.mac); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest[0].tir_num = ndev->res.tirn; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter_id = mlx5_fc_id(ndev->rx_counter); - ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2); - if (IS_ERR(ndev->rx_rule)) { - err = PTR_ERR(ndev->rx_rule); - ndev->rx_rule = NULL; - goto err_rule; + ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2); + + if (IS_ERR(ndev->rx_rule_ucast)) { + err = PTR_ERR(ndev->rx_rule_ucast); + ndev->rx_rule_ucast = NULL; + goto err_rule_ucast; + } + + memset(dmac_c, 0, ETH_ALEN); + memset(dmac_v, 0, ETH_ALEN); + dmac_c[0] = 1; + dmac_v[0] = 1; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1); + if (IS_ERR(ndev->rx_rule_mcast)) { + err = PTR_ERR(ndev->rx_rule_mcast); + ndev->rx_rule_mcast = NULL; + goto err_rule_mcast; } + kvfree(spec); return 0; -err_rule: +err_rule_mcast: + mlx5_del_flow_rules(ndev->rx_rule_ucast); + ndev->rx_rule_ucast = NULL; +err_rule_ucast: mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); err_fc: mlx5_destroy_flow_table(ndev->rxft); +err_ns: + kvfree(spec); return err; } static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) { - if (!ndev->rx_rule) + if (!ndev->rx_rule_ucast) return; - mlx5_del_flow_rules(ndev->rx_rule); + mlx5_del_flow_rules(ndev->rx_rule_mcast); + ndev->rx_rule_mcast = NULL; + mlx5_del_flow_rules(ndev->rx_rule_ucast); + ndev->rx_rule_ucast = NULL; mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); mlx5_destroy_flow_table(ndev->rxft); - - ndev->rx_rule = NULL; } static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) From 94e5d5b04f8056f8eec57f8b104ec76d3e945279 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 21 Oct 2021 15:17:49 +0530 Subject: [PATCH 0742/1202] i2c: virtio: Add support for zero-length requests The virtio specification received a new mandatory feature (VIRTIO_I2C_F_ZERO_LENGTH_REQUEST) for zero length requests. Fail if the feature isn't offered by the device. For each read-request, set the VIRTIO_I2C_FLAGS_M_RD flag, as required by the VIRTIO_I2C_F_ZERO_LENGTH_REQUEST feature. This allows us to support zero length requests, like SMBUS Quick, where the buffer need not be sent anymore. Signed-off-by: Viresh Kumar Link: https://lore.kernel.org/r/7c58868cd26d2fc4bd82d0d8b0dfb55636380110.1634808714.git.viresh.kumar@linaro.org Signed-off-by: Michael S. Tsirkin Acked-by: Jie Deng # once the spec is merged --- drivers/i2c/busses/i2c-virtio.c | 56 ++++++++++++++++++--------------- include/uapi/linux/virtio_i2c.h | 6 ++++ 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c index f10a603b13fb0..1ed4daa918a06 100644 --- a/drivers/i2c/busses/i2c-virtio.c +++ b/drivers/i2c/busses/i2c-virtio.c @@ -62,35 +62,33 @@ static int virtio_i2c_prepare_reqs(struct virtqueue *vq, for (i = 0; i < num; i++) { int outcnt = 0, incnt = 0; - /* - * We don't support 0 length messages and so filter out - * 0 length transfers by using i2c_adapter_quirks. - */ - if (!msgs[i].len) - break; - /* * Only 7-bit mode supported for this moment. For the address * format, Please check the Virtio I2C Specification. */ reqs[i].out_hdr.addr = cpu_to_le16(msgs[i].addr << 1); + if (msgs[i].flags & I2C_M_RD) + reqs[i].out_hdr.flags |= cpu_to_le32(VIRTIO_I2C_FLAGS_M_RD); + if (i != num - 1) - reqs[i].out_hdr.flags = cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT); + reqs[i].out_hdr.flags |= cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT); sg_init_one(&out_hdr, &reqs[i].out_hdr, sizeof(reqs[i].out_hdr)); sgs[outcnt++] = &out_hdr; - reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1); - if (!reqs[i].buf) - break; + if (msgs[i].len) { + reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1); + if (!reqs[i].buf) + break; - sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len); + sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len); - if (msgs[i].flags & I2C_M_RD) - sgs[outcnt + incnt++] = &msg_buf; - else - sgs[outcnt++] = &msg_buf; + if (msgs[i].flags & I2C_M_RD) + sgs[outcnt + incnt++] = &msg_buf; + else + sgs[outcnt++] = &msg_buf; + } sg_init_one(&in_hdr, &reqs[i].in_hdr, sizeof(reqs[i].in_hdr)); sgs[outcnt + incnt++] = &in_hdr; @@ -191,7 +189,7 @@ static int virtio_i2c_setup_vqs(struct virtio_i2c *vi) static u32 virtio_i2c_func(struct i2c_adapter *adap) { - return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } static struct i2c_algorithm virtio_algorithm = { @@ -199,15 +197,16 @@ static struct i2c_algorithm virtio_algorithm = { .functionality = virtio_i2c_func, }; -static const struct i2c_adapter_quirks virtio_i2c_quirks = { - .flags = I2C_AQ_NO_ZERO_LEN, -}; - static int virtio_i2c_probe(struct virtio_device *vdev) { struct virtio_i2c *vi; int ret; + if (!virtio_has_feature(vdev, VIRTIO_I2C_F_ZERO_LENGTH_REQUEST)) { + dev_err(&vdev->dev, "Zero-length request feature is mandatory\n"); + return -EINVAL; + } + vi = devm_kzalloc(&vdev->dev, sizeof(*vi), GFP_KERNEL); if (!vi) return -ENOMEM; @@ -225,7 +224,6 @@ static int virtio_i2c_probe(struct virtio_device *vdev) snprintf(vi->adap.name, sizeof(vi->adap.name), "i2c_virtio at virtio bus %d", vdev->index); vi->adap.algo = &virtio_algorithm; - vi->adap.quirks = &virtio_i2c_quirks; vi->adap.dev.parent = &vdev->dev; vi->adap.dev.of_node = vdev->dev.of_node; i2c_set_adapdata(&vi->adap, vi); @@ -270,11 +268,17 @@ static int virtio_i2c_restore(struct virtio_device *vdev) } #endif +static const unsigned int features[] = { + VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, +}; + static struct virtio_driver virtio_i2c_driver = { - .id_table = id_table, - .probe = virtio_i2c_probe, - .remove = virtio_i2c_remove, - .driver = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .id_table = id_table, + .probe = virtio_i2c_probe, + .remove = virtio_i2c_remove, + .driver = { .name = "i2c_virtio", }, #ifdef CONFIG_PM_SLEEP diff --git a/include/uapi/linux/virtio_i2c.h b/include/uapi/linux/virtio_i2c.h index 7c6a6fc01ad60..acf3b60691362 100644 --- a/include/uapi/linux/virtio_i2c.h +++ b/include/uapi/linux/virtio_i2c.h @@ -11,9 +11,15 @@ #include #include +/* Virtio I2C Feature bits */ +#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0 + /* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */ #define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0) +/* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */ +#define VIRTIO_I2C_FLAGS_M_RD _BITUL(1) + /** * struct virtio_i2c_out_hdr - the virtio I2C message OUT header * @addr: the controlled device address From e0716ce4fe7836258c260d907097cee372aefe2a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 13 Oct 2021 13:10:49 +0100 Subject: [PATCH 0743/1202] iommu/virtio: Add definitions for VIRTIO_IOMMU_F_BYPASS_CONFIG Add definitions for the VIRTIO_IOMMU_F_BYPASS_CONFIG, which supersedes VIRTIO_IOMMU_F_BYPASS. Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20211013121052.518113-2-jean-philippe@linaro.org Signed-off-by: Michael S. Tsirkin Reviewed-by: Kevin Tian --- include/uapi/linux/virtio_iommu.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h index 237e36a280cb6..cafd8cf7febfb 100644 --- a/include/uapi/linux/virtio_iommu.h +++ b/include/uapi/linux/virtio_iommu.h @@ -16,6 +16,7 @@ #define VIRTIO_IOMMU_F_BYPASS 3 #define VIRTIO_IOMMU_F_PROBE 4 #define VIRTIO_IOMMU_F_MMIO 5 +#define VIRTIO_IOMMU_F_BYPASS_CONFIG 6 struct virtio_iommu_range_64 { __le64 start; @@ -36,6 +37,8 @@ struct virtio_iommu_config { struct virtio_iommu_range_32 domain_range; /* Probe buffer size */ __le32 probe_size; + __u8 bypass; + __u8 reserved[7]; }; /* Request types */ @@ -66,11 +69,14 @@ struct virtio_iommu_req_tail { __u8 reserved[3]; }; +#define VIRTIO_IOMMU_ATTACH_F_BYPASS (1 << 0) + struct virtio_iommu_req_attach { struct virtio_iommu_req_head head; __le32 domain; __le32 endpoint; - __u8 reserved[8]; + __le32 flags; + __u8 reserved[4]; struct virtio_iommu_req_tail tail; }; From b783f6271458dbabd1a680f6cfb83bf98a019b89 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 13 Oct 2021 13:10:50 +0100 Subject: [PATCH 0744/1202] iommu/virtio: Support bypass domains The VIRTIO_IOMMU_F_BYPASS_CONFIG feature adds a new flag to the ATTACH request, that creates a bypass domain. Use it to enable identity domains. When VIRTIO_IOMMU_F_BYPASS_CONFIG is not supported by the device, we currently fail attaching to an identity domain. Future patches will instead create identity mappings in this case. Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20211013121052.518113-3-jean-philippe@linaro.org Signed-off-by: Michael S. Tsirkin Reviewed-by: Kevin Tian --- drivers/iommu/virtio-iommu.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 80930ce04a168..ee8a7afd667b5 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -71,6 +71,7 @@ struct viommu_domain { struct rb_root_cached mappings; unsigned long nr_endpoints; + bool bypass; }; struct viommu_endpoint { @@ -587,7 +588,9 @@ static struct iommu_domain *viommu_domain_alloc(unsigned type) { struct viommu_domain *vdomain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) + if (type != IOMMU_DOMAIN_UNMANAGED && + type != IOMMU_DOMAIN_DMA && + type != IOMMU_DOMAIN_IDENTITY) return NULL; vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL); @@ -630,6 +633,17 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev, vdomain->map_flags = viommu->map_flags; vdomain->viommu = viommu; + if (domain->type == IOMMU_DOMAIN_IDENTITY) { + if (!virtio_has_feature(viommu->vdev, + VIRTIO_IOMMU_F_BYPASS_CONFIG)) { + ida_free(&viommu->domain_ids, vdomain->id); + vdomain->viommu = 0; + return -EOPNOTSUPP; + } + + vdomain->bypass = true; + } + return 0; } @@ -691,6 +705,9 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) .domain = cpu_to_le32(vdomain->id), }; + if (vdomain->bypass) + req.flags |= cpu_to_le32(VIRTIO_IOMMU_ATTACH_F_BYPASS); + for (i = 0; i < fwspec->num_ids; i++) { req.endpoint = cpu_to_le32(fwspec->ids[i]); @@ -1132,6 +1149,7 @@ static unsigned int features[] = { VIRTIO_IOMMU_F_DOMAIN_RANGE, VIRTIO_IOMMU_F_PROBE, VIRTIO_IOMMU_F_MMIO, + VIRTIO_IOMMU_F_BYPASS_CONFIG, }; static struct virtio_device_id id_table[] = { From 20a162ea95cadfc2d6c6d68e3dc49d18cc0bf756 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 13 Oct 2021 13:10:51 +0100 Subject: [PATCH 0745/1202] iommu/virtio: Sort reserved regions To ease identity mapping support, keep the list of reserved regions sorted. Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20211013121052.518113-4-jean-philippe@linaro.org Signed-off-by: Michael S. Tsirkin Reviewed-by: Kevin Tian --- drivers/iommu/virtio-iommu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index ee8a7afd667b5..d63ec4d11b009 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -423,7 +423,7 @@ static int viommu_add_resv_mem(struct viommu_endpoint *vdev, size_t size; u64 start64, end64; phys_addr_t start, end; - struct iommu_resv_region *region = NULL; + struct iommu_resv_region *region = NULL, *next; unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; start = start64 = le64_to_cpu(mem->start); @@ -454,7 +454,12 @@ static int viommu_add_resv_mem(struct viommu_endpoint *vdev, if (!region) return -ENOMEM; - list_add(®ion->list, &vdev->resv_regions); + /* Keep the list sorted */ + list_for_each_entry(next, &vdev->resv_regions, list) { + if (next->start > region->start) + break; + } + list_add_tail(®ion->list, &next->list); return 0; } From eda80309b8c0df70e4eb29057bb991ab829d910d Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 13 Oct 2021 13:10:52 +0100 Subject: [PATCH 0746/1202] iommu/virtio: Pass end address to viommu_add_mapping() To support identity mappings, the virtio-iommu driver must be able to represent full 64-bit ranges internally. Pass (start, end) instead of (start, size) to viommu_add/del_mapping(). Clean comments. The one about the returned size was never true: when sweeping the whole address space the returned size will most certainly be smaller than 2^64. Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20211013121052.518113-5-jean-philippe@linaro.org Signed-off-by: Michael S. Tsirkin Reviewed-by: Kevin Tian --- drivers/iommu/virtio-iommu.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index d63ec4d11b009..eceb9281c8c1e 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -311,8 +311,8 @@ static int viommu_send_req_sync(struct viommu_dev *viommu, void *buf, * * On success, return the new mapping. Otherwise return NULL. */ -static int viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova, - phys_addr_t paddr, size_t size, u32 flags) +static int viommu_add_mapping(struct viommu_domain *vdomain, u64 iova, u64 end, + phys_addr_t paddr, u32 flags) { unsigned long irqflags; struct viommu_mapping *mapping; @@ -323,7 +323,7 @@ static int viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova, mapping->paddr = paddr; mapping->iova.start = iova; - mapping->iova.last = iova + size - 1; + mapping->iova.last = end; mapping->flags = flags; spin_lock_irqsave(&vdomain->mappings_lock, irqflags); @@ -338,26 +338,24 @@ static int viommu_add_mapping(struct viommu_domain *vdomain, unsigned long iova, * * @vdomain: the domain * @iova: start of the range - * @size: size of the range. A size of 0 corresponds to the entire address - * space. + * @end: end of the range * - * On success, returns the number of unmapped bytes (>= size) + * On success, returns the number of unmapped bytes */ static size_t viommu_del_mappings(struct viommu_domain *vdomain, - unsigned long iova, size_t size) + u64 iova, u64 end) { size_t unmapped = 0; unsigned long flags; - unsigned long last = iova + size - 1; struct viommu_mapping *mapping = NULL; struct interval_tree_node *node, *next; spin_lock_irqsave(&vdomain->mappings_lock, flags); - next = interval_tree_iter_first(&vdomain->mappings, iova, last); + next = interval_tree_iter_first(&vdomain->mappings, iova, end); while (next) { node = next; mapping = container_of(node, struct viommu_mapping, iova); - next = interval_tree_iter_next(node, iova, last); + next = interval_tree_iter_next(node, iova, end); /* Trying to split a mapping? */ if (mapping->iova.start < iova) @@ -656,8 +654,8 @@ static void viommu_domain_free(struct iommu_domain *domain) { struct viommu_domain *vdomain = to_viommu_domain(domain); - /* Free all remaining mappings (size 2^64) */ - viommu_del_mappings(vdomain, 0, 0); + /* Free all remaining mappings */ + viommu_del_mappings(vdomain, 0, ULLONG_MAX); if (vdomain->viommu) ida_free(&vdomain->viommu->domain_ids, vdomain->id); @@ -742,6 +740,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova, { int ret; u32 flags; + u64 end = iova + size - 1; struct virtio_iommu_req_map map; struct viommu_domain *vdomain = to_viommu_domain(domain); @@ -752,7 +751,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova, if (flags & ~vdomain->map_flags) return -EINVAL; - ret = viommu_add_mapping(vdomain, iova, paddr, size, flags); + ret = viommu_add_mapping(vdomain, iova, end, paddr, flags); if (ret) return ret; @@ -761,7 +760,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova, .domain = cpu_to_le32(vdomain->id), .virt_start = cpu_to_le64(iova), .phys_start = cpu_to_le64(paddr), - .virt_end = cpu_to_le64(iova + size - 1), + .virt_end = cpu_to_le64(end), .flags = cpu_to_le32(flags), }; @@ -770,7 +769,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova, ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map)); if (ret) - viommu_del_mappings(vdomain, iova, size); + viommu_del_mappings(vdomain, iova, end); return ret; } @@ -783,7 +782,7 @@ static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova, struct virtio_iommu_req_unmap unmap; struct viommu_domain *vdomain = to_viommu_domain(domain); - unmapped = viommu_del_mappings(vdomain, iova, size); + unmapped = viommu_del_mappings(vdomain, iova, iova + size - 1); if (unmapped < size) return 0; From b4cfbcf032a7efc3b359e9b656140be02842843c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 13 Oct 2021 13:10:53 +0100 Subject: [PATCH 0747/1202] iommu/virtio: Support identity-mapped domains Support identity domains for devices that do not offer the VIRTIO_IOMMU_F_BYPASS_CONFIG feature, by creating 1:1 mappings between the virtual and physical address space. Identity domains created this way still perform noticeably better than DMA domains, because they don't have the overhead of setting up and tearing down mappings at runtime. The performance difference between this and bypass is minimal in comparison. It does not matter that the physical addresses in the identity mappings do not all correspond to memory. By enabling passthrough we are trusting the device driver and the device itself to only perform DMA to suitable locations. In some cases it may even be desirable to perform DMA to MMIO regions. Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20211013121052.518113-6-jean-philippe@linaro.org Signed-off-by: Michael S. Tsirkin Reviewed-by: Kevin Tian --- drivers/iommu/virtio-iommu.c | 61 +++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index eceb9281c8c1e..c9e8367d2962b 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -375,6 +375,55 @@ static size_t viommu_del_mappings(struct viommu_domain *vdomain, return unmapped; } +/* + * Fill the domain with identity mappings, skipping the device's reserved + * regions. + */ +static int viommu_domain_map_identity(struct viommu_endpoint *vdev, + struct viommu_domain *vdomain) +{ + int ret; + struct iommu_resv_region *resv; + u64 iova = vdomain->domain.geometry.aperture_start; + u64 limit = vdomain->domain.geometry.aperture_end; + u32 flags = VIRTIO_IOMMU_MAP_F_READ | VIRTIO_IOMMU_MAP_F_WRITE; + unsigned long granule = 1UL << __ffs(vdomain->domain.pgsize_bitmap); + + iova = ALIGN(iova, granule); + limit = ALIGN_DOWN(limit + 1, granule) - 1; + + list_for_each_entry(resv, &vdev->resv_regions, list) { + u64 resv_start = ALIGN_DOWN(resv->start, granule); + u64 resv_end = ALIGN(resv->start + resv->length, granule) - 1; + + if (resv_end < iova || resv_start > limit) + /* No overlap */ + continue; + + if (resv_start > iova) { + ret = viommu_add_mapping(vdomain, iova, resv_start - 1, + (phys_addr_t)iova, flags); + if (ret) + goto err_unmap; + } + + if (resv_end >= limit) + return 0; + + iova = resv_end + 1; + } + + ret = viommu_add_mapping(vdomain, iova, limit, (phys_addr_t)iova, + flags); + if (ret) + goto err_unmap; + return 0; + +err_unmap: + viommu_del_mappings(vdomain, 0, iova); + return ret; +} + /* * viommu_replay_mappings - re-send MAP requests * @@ -637,14 +686,18 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev, vdomain->viommu = viommu; if (domain->type == IOMMU_DOMAIN_IDENTITY) { - if (!virtio_has_feature(viommu->vdev, - VIRTIO_IOMMU_F_BYPASS_CONFIG)) { + if (virtio_has_feature(viommu->vdev, + VIRTIO_IOMMU_F_BYPASS_CONFIG)) { + vdomain->bypass = true; + return 0; + } + + ret = viommu_domain_map_identity(vdev, vdomain); + if (ret) { ida_free(&viommu->domain_ids, vdomain->id); vdomain->viommu = 0; return -EOPNOTSUPP; } - - vdomain->bypass = true; } return 0; From 2b109044b081148b58974f5696ffd4383c3e9abb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 24 Oct 2021 09:41:40 -0400 Subject: [PATCH 0748/1202] virtio_blk: allow 0 as num_request_queues The default value is 0 meaning "no limit". However if 0 is specified on the command line it is instead silently converted to 1. Further, the value is already validated at point of use, there's no point in duplicating code validating the value when it is set. Simplify the code while making the behaviour more consistent by using plain module_param. Fixes: 1a662cf6cb9a ("virtio-blk: add num_request_queues module parameter") Cc: Max Gurtovoy Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6318134aab76e..c336d9bb9105c 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -30,20 +30,8 @@ #define VIRTIO_BLK_INLINE_SG_CNT 2 #endif -static int virtblk_queue_count_set(const char *val, - const struct kernel_param *kp) -{ - return param_set_uint_minmax(val, kp, 1, nr_cpu_ids); -} - -static const struct kernel_param_ops queue_count_ops = { - .set = virtblk_queue_count_set, - .get = param_get_uint, -}; - static unsigned int num_request_queues; -module_param_cb(num_request_queues, &queue_count_ops, &num_request_queues, - 0644); +module_param(num_request_queues, uint, 0644); MODULE_PARM_DESC(num_request_queues, "Limit the number of request queues to use for blk device. " "0 for no limit. " From 2ce681584dbc5482260e34127f7392048dc18f14 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Oct 2021 14:56:00 -0700 Subject: [PATCH 0749/1202] mm: shmem: fix uninitialized variable use in me_pagecache_clean() It appears that the has_extra_refcount() is now in the wrong place: mm/memory-failure.c:892:6: error: variable 'extra_pins' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized] if (!mapping) { ^~~~~~~~ mm/memory-failure.c:915:32: note: uninitialized use occurs here if (has_extra_refcount(ps, p, extra_pins)) ^~~~~~~~~~ mm/memory-failure.c:892:2: note: remove the 'if' if its condition is always false if (!mapping) { ^~~~~~~~~~~~~~~ mm/memory-failure.c:879:6: error: variable 'extra_pins' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized] if (PageAnon(p)) { ^~~~~~~~~~~ mm/memory-failure.c:915:32: note: uninitialized use occurs here if (has_extra_refcount(ps, p, extra_pins)) ^~~~~~~~~~ mm/memory-failure.c:879:2: note: remove the 'if' if its condition is always false if (PageAnon(p)) { ^~~~~~~~~~~~~~~~~~ mm/memory-failure.c:871:17: note: initialize the variable 'extra_pins' to silence this warning bool extra_pins; ^ = 0 In both of those cases, we already set an error code and don't need to override that one. Link: https://lkml.kernel.org/r/20211022064748.4173718-1-arnd@kernel.org Fixes: d882a43a0011 ("mm: shmem: don't truncate page if memory failure happens") Signed-off-by: Arnd Bergmann Cc: Naoya Horiguchi Cc: Yang Shi Cc: Stephen Rothwell Cc: Oscar Salvador Cc: Miaohe Lin Cc: Nathan Chancellor Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory-failure.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 28e7959d3c5c9..952a41d568c46 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -909,12 +909,12 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p) * Open: to take i_rwsem or not for this? Right now we don't. */ ret = truncate_error_page(p, page_to_pfn(p), mapping); -out: - unlock_page(p); - if (has_extra_refcount(ps, p, extra_pins)) ret = MF_FAILED; +out: + unlock_page(p); + return ret; } From 3c93fd30f314034aa197a8d2fcbd6d08d82fe726 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 21 Oct 2021 15:07:52 +1100 Subject: [PATCH 0750/1202] mm: hwpoison: handle non-anonymous THP correctly Currently hwpoison doesn't handle non-anonymous THP, but since v4.8 THP support for tmpfs and read-only file cache has been added. They could be offlined by split THP, just like anonymous THP. Link: https://lkml.kernel.org/r/20211020210755.23964-7-shy828301@gmail.com Acked-by: Naoya Horiguchi Signed-off-by: Yang Shi Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: Oscar Salvador Cc: Peter Xu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory-failure.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 952a41d568c46..f38b7b42a508a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1444,14 +1444,11 @@ static int identify_page_state(unsigned long pfn, struct page *p, static int try_to_split_thp_page(struct page *page, const char *msg) { lock_page(page); - if (!PageAnon(page) || unlikely(split_huge_page(page))) { + if (unlikely(split_huge_page(page))) { unsigned long pfn = page_to_pfn(page); unlock_page(page); - if (!PageAnon(page)) - pr_info("%s: %#lx: non anonymous thp\n", msg, pfn); - else - pr_info("%s: %#lx: thp split failed\n", msg, pfn); + pr_info("%s: %#lx: thp split failed\n", msg, pfn); put_page(page); return -EBUSY; } From c376e56a34748f027e25b6e454da53030aaad52b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 21 Oct 2021 15:07:52 +1100 Subject: [PATCH 0751/1202] mm/hugetlb: drop __unmap_hugepage_range definition from hugetlb.h Remove __unmap_hugepage_range() from the header file, because it is only used in hugetlb.c. Link: https://lkml.kernel.org/r/20210917165108.9341-1-peterx@redhat.com Signed-off-by: Peter Xu Suggested-by: Mike Kravetz Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Reviewed-by: Muchun Song Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/hugetlb.h | 10 ---------- mm/hugetlb.c | 6 +++--- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1faebe1cd0ed5..3cbf604643984 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -143,9 +143,6 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page); -void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct page *ref_page); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(char *buf, int len, int nid); void hugetlb_show_meminfo(void); @@ -385,13 +382,6 @@ static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, BUG(); } -static inline void __unmap_hugepage_range(struct mmu_gather *tlb, - struct vm_area_struct *vma, unsigned long start, - unsigned long end, struct page *ref_page) -{ - BUG(); -} - static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, unsigned int flags) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 95dc7b83381f9..d394d9545c4eb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4426,9 +4426,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, return ret; } -void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct page *ref_page) +static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct page *ref_page) { struct mm_struct *mm = vma->vm_mm; unsigned long address; From 65b896c559bb83433702cfd032700531e12d9ea2 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 21 Oct 2021 15:07:53 +1100 Subject: [PATCH 0752/1202] hugetlb: add demote hugetlb page sysfs interfaces Patch series "hugetlb: add demote/split page functionality", v4. The concurrent use of multiple hugetlb page sizes on a single system is becoming more common. One of the reasons is better TLB support for gigantic page sizes on x86 hardware. In addition, hugetlb pages are being used to back VMs in hosting environments. When using hugetlb pages to back VMs, it is often desirable to preallocate hugetlb pools. This avoids the delay and uncertainty of allocating hugetlb pages at VM startup. In addition, preallocating huge pages minimizes the issue of memory fragmentation that increases the longer the system is up and running. In such environments, a combination of larger and smaller hugetlb pages are preallocated in anticipation of backing VMs of various sizes. Over time, the preallocated pool of smaller hugetlb pages may become depleted while larger hugetlb pages still remain. In such situations, it is desirable to convert larger hugetlb pages to smaller hugetlb pages. Converting larger to smaller hugetlb pages can be accomplished today by first freeing the larger page to the buddy allocator and then allocating the smaller pages. For example, to convert 50 GB pages on x86: gb_pages=`cat .../hugepages-1048576kB/nr_hugepages` m2_pages=`cat .../hugepages-2048kB/nr_hugepages` echo $(($gb_pages - 50)) > .../hugepages-1048576kB/nr_hugepages echo $(($m2_pages + 25600)) > .../hugepages-2048kB/nr_hugepages On an idle system this operation is fairly reliable and results are as expected. The number of 2MB pages is increased as expected and the time of the operation is a second or two. However, when there is activity on the system the following issues arise: 1) This process can take quite some time, especially if allocation of the smaller pages is not immediate and requires migration/compaction. 2) There is no guarantee that the total size of smaller pages allocated will match the size of the larger page which was freed. This is because the area freed by the larger page could quickly be fragmented. In a test environment with a load that continually fills the page cache with clean pages, results such as the following can be observed: Unexpected number of 2MB pages allocated: Expected 25600, have 19944 real 0m42.092s user 0m0.008s sys 0m41.467s To address these issues, introduce the concept of hugetlb page demotion. Demotion provides a means of 'in place' splitting of a hugetlb page to pages of a smaller size. This avoids freeing pages to buddy and then trying to allocate from buddy. Page demotion is controlled via sysfs files that reside in the per-hugetlb page size and per node directories. - demote_size Target page size for demotion, a smaller huge page size. File can be written to chose a smaller huge page size if multiple are available. - demote Writable number of hugetlb pages to be demoted To demote 50 GB huge pages, one would: cat .../hugepages-1048576kB/free_hugepages /* optional, verify free pages */ cat .../hugepages-1048576kB/demote_size /* optional, verify target size */ echo 50 > .../hugepages-1048576kB/demote Only hugetlb pages which are free at the time of the request can be demoted. Demotion does not add to the complexity of surplus pages and honors reserved huge pages. Therefore, when a value is written to the sysfs demote file, that value is only the maximum number of pages which will be demoted. It is possible fewer will actually be demoted. The recently introduced per-hstate mutex is used to synchronize demote operations with other operations that modify hugetlb pools. Real world use cases -------------------- The above scenario describes a real world use case where hugetlb pages are used to back VMs on x86. Both issues of long allocation times and not necessarily getting the expected number of smaller huge pages after a free and allocate cycle have been experienced. The occurrence of these issues is dependent on other activity within the host and can not be predicted. This patch (of 5): Two new sysfs files are added to demote hugtlb pages. These files are both per-hugetlb page size and per node. Files are: demote_size - The size in Kb that pages are demoted to. (read-write) demote - The number of huge pages to demote. (write-only) By default, demote_size is the next smallest huge page size. Valid huge page sizes less than huge page size may be written to this file. When huge pages are demoted, they are demoted to this size. Writing a value to demote will result in an attempt to demote that number of hugetlb pages to an appropriate number of demote_size pages. NOTE: Demote interfaces are only provided for huge page sizes if there is a smaller target demote huge page size. For example, on x86 1GB huge pages will have demote interfaces. 2MB huge pages will not have demote interfaces. This patch does not provide full demote functionality. It only provides the sysfs interfaces. It also provides documentation for the new interfaces. Link: https://lkml.kernel.org/r/20211007181918.136982-2-mike.kravetz@oracle.com Link: https://lkml.kernel.org/r/20211007181918.136982-2-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Cc: David Hildenbrand Cc: Michal Hocko Cc: Oscar Salvador Cc: Zi Yan Cc: Muchun Song Cc: Naoya Horiguchi Cc: David Rientjes Cc: "Aneesh Kumar K . V" Cc: Nghia Le Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/mm/hugetlbpage.rst | 30 +++- include/linux/hugetlb.h | 1 + mm/hugetlb.c | 155 ++++++++++++++++++- 3 files changed, 183 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst index 8abaeb144e44d..bb90de3885d1b 100644 --- a/Documentation/admin-guide/mm/hugetlbpage.rst +++ b/Documentation/admin-guide/mm/hugetlbpage.rst @@ -234,8 +234,12 @@ will exist, of the form:: hugepages-${size}kB -Inside each of these directories, the same set of files will exist:: +Inside each of these directories, the set of files contained in ``/proc`` +will exist. In addition, two additional interfaces for demoting huge +pages may exist:: + demote + demote_size nr_hugepages nr_hugepages_mempolicy nr_overcommit_hugepages @@ -243,7 +247,29 @@ Inside each of these directories, the same set of files will exist:: resv_hugepages surplus_hugepages -which function as described above for the default huge page-sized case. +The demote interfaces provide the ability to split a huge page into +smaller huge pages. For example, the x86 architecture supports both +1GB and 2MB huge pages sizes. A 1GB huge page can be split into 512 +2MB huge pages. Demote interfaces are not available for the smallest +huge page size. The demote interfaces are: + +demote_size + is the size of demoted pages. When a page is demoted a corresponding + number of huge pages of demote_size will be created. By default, + demote_size is set to the next smaller huge page size. If there are + multiple smaller huge page sizes, demote_size can be set to any of + these smaller sizes. Only huge page sizes less than the current huge + pages size are allowed. + +demote + is used to demote a number of huge pages. A user with root privileges + can write to this file. It may not be possible to demote the + requested number of huge pages. To determine how many pages were + actually demoted, compare the value of nr_hugepages before and after + writing to the demote interface. demote is a write only interface. + +The interfaces which are the same as in ``/proc`` (all except demote and +demote_size) function as described above for the default huge page-sized case. .. _mem_policy_and_hp_alloc: diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 3cbf604643984..2bddd6c382046 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -586,6 +586,7 @@ struct hstate { int next_nid_to_alloc; int next_nid_to_free; unsigned int order; + unsigned int demote_order; unsigned long mask; unsigned long max_huge_pages; unsigned long nr_huge_pages; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d394d9545c4eb..b876055ac0ec4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2986,7 +2986,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) static void __init hugetlb_init_hstates(void) { - struct hstate *h; + struct hstate *h, *h2; for_each_hstate(h) { if (minimum_order > huge_page_order(h)) @@ -2995,6 +2995,22 @@ static void __init hugetlb_init_hstates(void) /* oversize hugepages were init'ed in early boot */ if (!hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); + + /* + * Set demote order for each hstate. Note that + * h->demote_order is initially 0. + * - We can not demote gigantic pages if runtime freeing + * is not supported, so skip this. + */ + if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) + continue; + for_each_hstate(h2) { + if (h2 == h) + continue; + if (h2->order < h->order && + h2->order > h->demote_order) + h->demote_order = h2->order; + } } VM_BUG_ON(minimum_order == UINT_MAX); } @@ -3235,9 +3251,31 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, return 0; } +static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) + __must_hold(&hugetlb_lock) +{ + int rc = 0; + + lockdep_assert_held(&hugetlb_lock); + + /* We should never get here if no demote order */ + if (!h->demote_order) { + pr_warn("HugeTLB: NULL demote order passed to demote_pool_huge_page.\n"); + return -EINVAL; /* internal error */ + } + + /* + * TODO - demote fucntionality will be added in subsequent patch + */ + return rc; +} + #define HSTATE_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) +#define HSTATE_ATTR_WO(_name) \ + static struct kobj_attribute _name##_attr = __ATTR_WO(_name) + #define HSTATE_ATTR(_name) \ static struct kobj_attribute _name##_attr = \ __ATTR(_name, 0644, _name##_show, _name##_store) @@ -3433,6 +3471,105 @@ static ssize_t surplus_hugepages_show(struct kobject *kobj, } HSTATE_ATTR_RO(surplus_hugepages); +static ssize_t demote_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t len) +{ + unsigned long nr_demote; + unsigned long nr_available; + nodemask_t nodes_allowed, *n_mask; + struct hstate *h; + int err = 0; + int nid; + + err = kstrtoul(buf, 10, &nr_demote); + if (err) + return err; + h = kobj_to_hstate(kobj, &nid); + + /* Synchronize with other sysfs operations modifying huge pages */ + mutex_lock(&h->resize_lock); + + if (nid != NUMA_NO_NODE) { + init_nodemask_of_node(&nodes_allowed, nid); + n_mask = &nodes_allowed; + } else { + n_mask = &node_states[N_MEMORY]; + } + + spin_lock_irq(&hugetlb_lock); + while (nr_demote) { + /* + * Check for available pages to demote each time thorough the + * loop as demote_pool_huge_page will drop hugetlb_lock. + * + * NOTE: demote_pool_huge_page does not yet drop hugetlb_lock + * but will when full demote functionality is added in a later + * patch. + */ + if (nid != NUMA_NO_NODE) + nr_available = h->free_huge_pages_node[nid]; + else + nr_available = h->free_huge_pages; + nr_available -= h->resv_huge_pages; + if (!nr_available) + break; + + err = demote_pool_huge_page(h, n_mask); + if (err) + break; + + nr_demote--; + } + + spin_unlock_irq(&hugetlb_lock); + mutex_unlock(&h->resize_lock); + + if (err) + return err; + return len; +} +HSTATE_ATTR_WO(demote); + +static ssize_t demote_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + int nid; + struct hstate *h = kobj_to_hstate(kobj, &nid); + unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K; + + return sysfs_emit(buf, "%lukB\n", demote_size); +} + +static ssize_t demote_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct hstate *h, *demote_hstate; + unsigned long demote_size; + unsigned int demote_order; + int nid; + + demote_size = (unsigned long)memparse(buf, NULL); + + demote_hstate = size_to_hstate(demote_size); + if (!demote_hstate) + return -EINVAL; + demote_order = demote_hstate->order; + + /* demote order must be smaller than hstate order */ + h = kobj_to_hstate(kobj, &nid); + if (demote_order >= h->order) + return -EINVAL; + + /* resize_lock synchronizes access to demote size and writes */ + mutex_lock(&h->resize_lock); + h->demote_order = demote_order; + mutex_unlock(&h->resize_lock); + + return count; +} +HSTATE_ATTR(demote_size); + static struct attribute *hstate_attrs[] = { &nr_hugepages_attr.attr, &nr_overcommit_hugepages_attr.attr, @@ -3449,6 +3586,16 @@ static const struct attribute_group hstate_attr_group = { .attrs = hstate_attrs, }; +static struct attribute *hstate_demote_attrs[] = { + &demote_size_attr.attr, + &demote_attr.attr, + NULL, +}; + +static const struct attribute_group hstate_demote_attr_group = { + .attrs = hstate_demote_attrs, +}; + static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, struct kobject **hstate_kobjs, const struct attribute_group *hstate_attr_group) @@ -3466,6 +3613,12 @@ static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, hstate_kobjs[hi] = NULL; } + if (h->demote_order) { + if (sysfs_create_group(hstate_kobjs[hi], + &hstate_demote_attr_group)) + pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name); + } + return retval; } From 317bb58bb7a6f59c0f4569d0a4fd38f48432f708 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 21 Oct 2021 15:07:53 +1100 Subject: [PATCH 0753/1202] mm/cma: add cma_pages_valid to determine if pages are in CMA Add new interface cma_pages_valid() which indicates if the specified pages are part of a CMA region. This interface will be used in a subsequent patch by hugetlb code. In order to keep the same amount of DEBUG information, a pr_debug() call was added to cma_pages_valid(). In the case where the page passed to cma_release is not in cma region, the debug message will be printed from cma_pages_valid as opposed to cma_release. Link: https://lkml.kernel.org/r/20211007181918.136982-3-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: "Aneesh Kumar K . V" Cc: David Rientjes Cc: Michal Hocko Cc: Muchun Song Cc: Naoya Horiguchi Cc: Nghia Le Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/cma.h | 1 + mm/cma.c | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index 53fd8c3cdbd04..bd801023504b2 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -46,6 +46,7 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align, bool no_warn); +extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count); extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); diff --git a/mm/cma.c b/mm/cma.c index 995e15480937f..11152c3fb23cc 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -524,6 +524,25 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, return page; } +bool cma_pages_valid(struct cma *cma, const struct page *pages, + unsigned long count) +{ + unsigned long pfn; + + if (!cma || !pages) + return false; + + pfn = page_to_pfn(pages); + + if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) { + pr_debug("%s(page %p, count %lu)\n", __func__, + (void *)pages, count); + return false; + } + + return true; +} + /** * cma_release() - release allocated pages * @cma: Contiguous memory region for which the allocation is performed. @@ -539,16 +558,13 @@ bool cma_release(struct cma *cma, const struct page *pages, { unsigned long pfn; - if (!cma || !pages) + if (!cma_pages_valid(cma, pages, count)) return false; pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count); pfn = page_to_pfn(pages); - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) - return false; - VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); free_contig_range(pfn, count); From fbb55b06ce3dd24cfcec19d24eb589d5c487f72a Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 21 Oct 2021 15:07:54 +1100 Subject: [PATCH 0754/1202] hugetlb: be sure to free demoted CMA pages to CMA When huge page demotion is fully implemented, gigantic pages can be demoted to a smaller huge page size. For example, on x86 a 1G page can be demoted to 512 2M pages. However, gigantic pages can potentially be allocated from CMA. If a gigantic page which was allocated from CMA is demoted, the corresponding demoted pages needs to be returned to CMA. Use the new interface cma_pages_valid() to determine if a non-gigantic hugetlb page should be freed to CMA. Also, clear mapping field of these pages as expected by cma_release. This also requires a change to CMA region creation for gigantic pages. CMA uses a per-region bit map to track allocations. When setting up the region, you specify how many pages each bit represents. Currently, only gigantic pages are allocated/freed from CMA so the region is set up such that one bit represents a gigantic page size allocation. With demote, a gigantic page (allocation) could be split into smaller size pages. And, these smaller size pages will be freed to CMA. So, since the per-region bit map needs to be set up to represent the smallest allocation/free size, it now needs to be set to the smallest huge page size which can be freed to CMA. Unfortunately, we set up the CMA region for huge pages before we set up huge pages sizes (hstates). So, technically we do not know the smallest huge page size as this can change via command line options and architecture specific code. Therefore, at region setup time we use HUGETLB_PAGE_ORDER as the smallest possible huge page size that can be given back to CMA. It is possible that this value is sub-optimal for some architectures/config options. If needed, this can be addressed in follow on work. Link: https://lkml.kernel.org/r/20211007181918.136982-4-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Cc: "Aneesh Kumar K . V" Cc: David Hildenbrand Cc: David Rientjes Cc: Michal Hocko Cc: Muchun Song Cc: Naoya Horiguchi Cc: Nghia Le Cc: Oscar Salvador Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/hugetlb.c | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b876055ac0ec4..bff3b942b6690 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -50,6 +50,16 @@ struct hstate hstates[HUGE_MAX_HSTATE]; #ifdef CONFIG_CMA static struct cma *hugetlb_cma[MAX_NUMNODES]; +static bool hugetlb_cma_page(struct page *page, unsigned int order) +{ + return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page, + 1 << order); +} +#else +static bool hugetlb_cma_page(struct page *page, unsigned int order) +{ + return false; +} #endif static unsigned long hugetlb_cma_size __initdata; @@ -1272,6 +1282,7 @@ static void destroy_compound_gigantic_page(struct page *page, atomic_set(compound_pincount_ptr(page), 0); for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { + p->mapping = NULL; clear_compound_head(p); set_page_refcounted(p); } @@ -1476,7 +1487,13 @@ static void __update_and_free_page(struct hstate *h, struct page *page) 1 << PG_active | 1 << PG_private | 1 << PG_writeback); } - if (hstate_is_gigantic(h)) { + + /* + * Non-gigantic pages demoted from CMA allocated gigantic pages + * need to be given back to CMA in free_gigantic_page. + */ + if (hstate_is_gigantic(h) || + hugetlb_cma_page(page, huge_page_order(h))) { destroy_compound_gigantic_page(page, huge_page_order(h)); free_gigantic_page(page, huge_page_order(h)); } else { @@ -3001,9 +3018,13 @@ static void __init hugetlb_init_hstates(void) * h->demote_order is initially 0. * - We can not demote gigantic pages if runtime freeing * is not supported, so skip this. + * - If CMA allocation is possible, we can not demote + * HUGETLB_PAGE_ORDER or smaller size pages. */ if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) continue; + if (hugetlb_cma_size && h->order <= HUGETLB_PAGE_ORDER) + continue; for_each_hstate(h2) { if (h2 == h) continue; @@ -3555,6 +3576,8 @@ static ssize_t demote_size_store(struct kobject *kobj, if (!demote_hstate) return -EINVAL; demote_order = demote_hstate->order; + if (demote_order < HUGETLB_PAGE_ORDER) + return -EINVAL; /* demote order must be smaller than hstate order */ h = kobj_to_hstate(kobj, &nid); @@ -6543,6 +6566,7 @@ void __init hugetlb_cma_reserve(int order) if (hugetlb_cma_size < (PAGE_SIZE << order)) { pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n", (PAGE_SIZE << order) / SZ_1M); + hugetlb_cma_size = 0; return; } @@ -6563,7 +6587,13 @@ void __init hugetlb_cma_reserve(int order) size = round_up(size, PAGE_SIZE << order); snprintf(name, sizeof(name), "hugetlb%d", nid); - res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, + /* + * Note that 'order per bit' is based on smallest size that + * may be returned to CMA allocator in the case of + * huge page demotion. + */ + res = cma_declare_contiguous_nid(0, size, 0, + PAGE_SIZE << HUGETLB_PAGE_ORDER, 0, false, name, &hugetlb_cma[nid], nid); if (res) { @@ -6579,6 +6609,13 @@ void __init hugetlb_cma_reserve(int order) if (reserved >= hugetlb_cma_size) break; } + + if (!reserved) + /* + * hugetlb_cma_size is used to determine if allocations from + * cma are possible. Set to zero if no cma regions are set up. + */ + hugetlb_cma_size = 0; } void __init hugetlb_cma_check(void) From bbfa69c3d407ba61cedfd70eb5ee85657cb71748 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 21 Oct 2021 15:07:55 +1100 Subject: [PATCH 0755/1202] hugetlb: add demote bool to gigantic page routines The routines remove_hugetlb_page and destroy_compound_gigantic_page will remove a gigantic page and make the set of base pages ready to be returned to a lower level allocator. In the process of doing this, they make all base pages reference counted. The routine prep_compound_gigantic_page creates a gigantic page from a set of base pages. It assumes that all these base pages are reference counted. During demotion, a gigantic page will be split into huge pages of a smaller size. This logically involves use of the routines, remove_hugetlb_page, and destroy_compound_gigantic_page followed by prep_compound*_page for each smaller huge page. When pages are reference counted (ref count >= 0), additional speculative ref counts could be taken. This could result in errors while demoting a huge page. Quite a bit of code would need to be created to handle all possible issues. Instead of dealing with the possibility of speculative ref counts, avoid the possibility by keeping ref counts at zero during the demote process. Add a boolean 'demote' to the routines remove_hugetlb_page, destroy_compound_gigantic_page and prep_compound_gigantic_page. If the boolean is set, the remove and destroy routines will not reference count pages and the prep routine will not expect reference counted pages. '*_for_demote' wrappers of the routines will be added in a subsequent patch where this functionality is used. Link: https://lkml.kernel.org/r/20211007181918.136982-5-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Cc: "Aneesh Kumar K . V" Cc: David Hildenbrand Cc: David Rientjes Cc: Michal Hocko Cc: Muchun Song Cc: Naoya Horiguchi Cc: Nghia Le Cc: Oscar Salvador Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/hugetlb.c | 54 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bff3b942b6690..82da0fbd1db28 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1271,8 +1271,8 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) nr_nodes--) #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static void destroy_compound_gigantic_page(struct page *page, - unsigned int order) +static void __destroy_compound_gigantic_page(struct page *page, + unsigned int order, bool demote) { int i; int nr_pages = 1 << order; @@ -1284,7 +1284,8 @@ static void destroy_compound_gigantic_page(struct page *page, for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { p->mapping = NULL; clear_compound_head(p); - set_page_refcounted(p); + if (!demote) + set_page_refcounted(p); } set_compound_order(page, 0); @@ -1292,6 +1293,12 @@ static void destroy_compound_gigantic_page(struct page *page, __ClearPageHead(page); } +static void destroy_compound_gigantic_page(struct page *page, + unsigned int order) +{ + __destroy_compound_gigantic_page(page, order, false); +} + static void free_gigantic_page(struct page *page, unsigned int order) { /* @@ -1364,12 +1371,15 @@ static inline void destroy_compound_gigantic_page(struct page *page, /* * Remove hugetlb page from lists, and update dtor so that page appears - * as just a compound page. A reference is held on the page. + * as just a compound page. + * + * A reference is held on the page, except in the case of demote. * * Must be called with hugetlb lock held. */ -static void remove_hugetlb_page(struct hstate *h, struct page *page, - bool adjust_surplus) +static void __remove_hugetlb_page(struct hstate *h, struct page *page, + bool adjust_surplus, + bool demote) { int nid = page_to_nid(page); @@ -1407,8 +1417,12 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page, * * This handles the case where more than one ref is held when and * after update_and_free_page is called. + * + * In the case of demote we do not ref count the page as it will soon + * be turned into a page of smaller size. */ - set_page_refcounted(page); + if (!demote) + set_page_refcounted(page); if (hstate_is_gigantic(h)) set_compound_page_dtor(page, NULL_COMPOUND_DTOR); else @@ -1418,6 +1432,12 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page, h->nr_huge_pages_node[nid]--; } +static void remove_hugetlb_page(struct hstate *h, struct page *page, + bool adjust_surplus) +{ + __remove_hugetlb_page(h, page, adjust_surplus, false); +} + static void add_hugetlb_page(struct hstate *h, struct page *page, bool adjust_surplus) { @@ -1681,7 +1701,8 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) spin_unlock_irq(&hugetlb_lock); } -static bool prep_compound_gigantic_page(struct page *page, unsigned int order) +static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, + bool demote) { int i, j; int nr_pages = 1 << order; @@ -1719,10 +1740,16 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order) * the set of pages can not be converted to a gigantic page. * The caller who allocated the pages should then discard the * pages using the appropriate free interface. + * + * In the case of demote, the ref count will be zero. */ - if (!page_ref_freeze(p, 1)) { - pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n"); - goto out_error; + if (!demote) { + if (!page_ref_freeze(p, 1)) { + pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n"); + goto out_error; + } + } else { + VM_BUG_ON_PAGE(page_count(p), p); } set_page_count(p, 0); set_compound_head(p, page); @@ -1747,6 +1774,11 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order) return false; } +static bool prep_compound_gigantic_page(struct page *page, unsigned int order) +{ + return __prep_compound_gigantic_page(page, order, false); +} + /* * PageHuge() only returns true for hugetlbfs pages, but not for normal or * transparent huge pages. See the PageTransHuge() documentation for more From a69e8ca017b7271589f4be7633933cc7c1affd7c Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 21 Oct 2021 15:07:55 +1100 Subject: [PATCH 0756/1202] hugetlb: add hugetlb demote page support Demote page functionality will split a huge page into a number of huge pages of a smaller size. For example, on x86 a 1GB huge page can be demoted into 512 2M huge pages. Demotion is done 'in place' by simply splitting the huge page. Added '*_for_demote' wrappers for remove_hugetlb_page, destroy_compound_hugetlb_page and prep_compound_gigantic_page for use by demote code. Link: https://lkml.kernel.org/r/20211007181918.136982-6-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Cc: "Aneesh Kumar K . V" Cc: David Hildenbrand Cc: David Rientjes Cc: Michal Hocko Cc: Muchun Song Cc: Naoya Horiguchi Cc: Nghia Le Cc: Oscar Salvador Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/hugetlb.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 8 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 82da0fbd1db28..ddfa6e49629cd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1270,7 +1270,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ((node = hstate_next_node_to_free(hs, mask)) || 1); \ nr_nodes--) -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE +/* used to demote non-gigantic_huge pages as well */ static void __destroy_compound_gigantic_page(struct page *page, unsigned int order, bool demote) { @@ -1293,6 +1293,13 @@ static void __destroy_compound_gigantic_page(struct page *page, __ClearPageHead(page); } +static void destroy_compound_hugetlb_page_for_demote(struct page *page, + unsigned int order) +{ + __destroy_compound_gigantic_page(page, order, true); +} + +#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE static void destroy_compound_gigantic_page(struct page *page, unsigned int order) { @@ -1438,6 +1445,12 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page, __remove_hugetlb_page(h, page, adjust_surplus, false); } +static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page, + bool adjust_surplus) +{ + __remove_hugetlb_page(h, page, adjust_surplus, true); +} + static void add_hugetlb_page(struct hstate *h, struct page *page, bool adjust_surplus) { @@ -1779,6 +1792,12 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order) return __prep_compound_gigantic_page(page, order, false); } +static bool prep_compound_gigantic_page_for_demote(struct page *page, + unsigned int order) +{ + return __prep_compound_gigantic_page(page, order, true); +} + /* * PageHuge() only returns true for hugetlbfs pages, but not for normal or * transparent huge pages. See the PageTransHuge() documentation for more @@ -3304,9 +3323,54 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, return 0; } +static int demote_free_huge_page(struct hstate *h, struct page *page) +{ + int i, nid = page_to_nid(page); + struct hstate *target_hstate; + int rc = 0; + + target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); + + remove_hugetlb_page_for_demote(h, page, false); + spin_unlock_irq(&hugetlb_lock); + + rc = alloc_huge_page_vmemmap(h, page); + if (rc) { + /* Allocation of vmemmmap failed, we can not demote page */ + spin_lock_irq(&hugetlb_lock); + set_page_refcounted(page); + add_hugetlb_page(h, page, false); + return rc; + } + + /* + * Use destroy_compound_hugetlb_page_for_demote for all huge page + * sizes as it will not ref count pages. + */ + destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h)); + + for (i = 0; i < pages_per_huge_page(h); + i += pages_per_huge_page(target_hstate)) { + if (hstate_is_gigantic(target_hstate)) + prep_compound_gigantic_page_for_demote(page + i, + target_hstate->order); + else + prep_compound_page(page + i, target_hstate->order); + set_page_private(page + i, 0); + set_page_refcounted(page + i); + prep_new_huge_page(target_hstate, page + i, nid); + put_page(page + i); + } + + spin_lock_irq(&hugetlb_lock); + return rc; +} + static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) __must_hold(&hugetlb_lock) { + int nr_nodes, node; + struct page *page; int rc = 0; lockdep_assert_held(&hugetlb_lock); @@ -3317,9 +3381,15 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) return -EINVAL; /* internal error */ } - /* - * TODO - demote fucntionality will be added in subsequent patch - */ + for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) { + if (!list_empty(&h->hugepage_freelists[node])) { + page = list_entry(h->hugepage_freelists[node].next, + struct page, lru); + rc = demote_free_huge_page(h, page); + break; + } + } + return rc; } @@ -3554,10 +3624,6 @@ static ssize_t demote_store(struct kobject *kobj, /* * Check for available pages to demote each time thorough the * loop as demote_pool_huge_page will drop hugetlb_lock. - * - * NOTE: demote_pool_huge_page does not yet drop hugetlb_lock - * but will when full demote functionality is added in a later - * patch. */ if (nid != NUMA_NO_NODE) nr_available = h->free_huge_pages_node[nid]; From d9a9e257efcb062d850aba7be087096936f6fe16 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 21 Oct 2021 15:07:56 +1100 Subject: [PATCH 0757/1202] hugetlb-add-hugetlb-demote-page-support-v4 On 10/7/21 11:19 AM, Mike Kravetz wrote: > +static int demote_free_huge_page(struct hstate *h, struct page *page) > +{ > + int i, nid = page_to_nid(page); > + struct hstate *target_hstate; > + int rc = 0; > + > + target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order); > + > + remove_hugetlb_page_for_demote(h, page, false); > + spin_unlock_irq(&hugetlb_lock); > + > + rc = alloc_huge_page_vmemmap(h, page); > + if (rc) { > + /* Allocation of vmemmmap failed, we can not demote page */ > + spin_lock_irq(&hugetlb_lock); > + set_page_refcounted(page); > + add_hugetlb_page(h, page, false); > + return rc; > + } > + > + /* > + * Use destroy_compound_hugetlb_page_for_demote for all huge page > + * sizes as it will not ref count pages. > + */ > + destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h)); > + > + for (i = 0; i < pages_per_huge_page(h); > + i += pages_per_huge_page(target_hstate)) { > + if (hstate_is_gigantic(target_hstate)) > + prep_compound_gigantic_page_for_demote(page + i, > + target_hstate->order); > + else > + prep_compound_page(page + i, target_hstate->order); > + set_page_private(page + i, 0); > + set_page_refcounted(page + i); > + prep_new_huge_page(target_hstate, page + i, nid); > + put_page(page + i); > + } I was doing some stress testing with multiple concurrent writers to sysfs/.../nr_hugepages and sysfs/.../demote. On occasion, I would see unexpected surplus pages of the smaller huge page size (2M on x86). Here is what was happening. One task was decrementing the number of 2M huge pages with "echo 0 > nr_hugepages. It proceeded to the routine set_max_huge_pages and was executing the following: /* * Decrease the pool size * First return free pages to the buddy allocator (being careful * to keep enough around to satisfy reservations). Then place * pages into surplus state as needed so the pool will shrink * to the desired size as pages become free. * * By placing pages into the surplus state independent of the * overcommit value, we are allowing the surplus pool size to * exceed overcommit. There are few sane options here. Since * alloc_surplus_huge_page() is checking the global counter, * though, we'll note that we're not allowed to exceed surplus * and won't grow the pool anywhere else. Not until one of the * sysctls are changed, or the surplus pages go out of use. */ min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; min_count = max(count, min_count); try_to_free_low(h, min_count, nodes_allowed); /* * Collect pages to be removed on list without dropping lock */ while (min_count < persistent_huge_pages(h)) { page = remove_pool_huge_page(h, nodes_allowed, 0); if (!page) break; list_add(&page->lru, &page_list); } /* free the pages after dropping lock */ spin_unlock_irq(&hugetlb_lock); update_and_free_pages_bulk(h, &page_list); flush_free_hpage_work(h); Now, while the lock was dropped the routine demote_free_huge_page above added 512 huge pages to the 2M pool. spin_lock_irq(&hugetlb_lock); Then after acquiring the lock we make these 512 pages surplus. while (count < persistent_huge_pages(h)) { if (!adjust_pool_surplus(h, nodes_allowed, 1)) break; } To prevent this race from happening in general, the hstate specific mutex resize_lock is held for the duration of set_max_huge_pages. Since, the demote code is also adjusting pool sizes it should also take the mutex. The routine demote_store already takes the mutex of the hstate of the page size being demoted (1M in this case). That is because the 1M pool size will be decreased. We also need to take the resize mutex of the 2M pool as this pool will be increased. To prevent deadlocks, we use the convention of always taking the resize mutex of the larger hstate first. An updated version of this patch below adds taking the 'target hstate' mutex in demote_free_huge_page. Although unnecessary, it also updates max_huge_pages of both hstates for consistency. Demote page functionality will split a huge page into a number of huge pages of a smaller size. For example, on x86 a 1GB huge page can be demoted into 512 2M huge pages. Demotion is done 'in place' by simply splitting the huge page. Added '*_for_demote' wrappers for remove_hugetlb_page, destroy_compound_hugetlb_page and prep_compound_gigantic_page for use by demote code. Link: https://lkml.kernel.org/r/6ca29b8e-527c-d6ec-900e-e6a43e4f8b73@oracle.com Signed-off-by: Mike Kravetz Cc: "Aneesh Kumar K . V" Cc: David Hildenbrand Cc: David Rientjes Cc: Michal Hocko Cc: Muchun Song Cc: Naoya Horiguchi Cc: Nghia Le Cc: Oscar Salvador Cc: Zi Yan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/hugetlb.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ddfa6e49629cd..75edf153e87e9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3349,6 +3349,15 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) */ destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h)); + /* + * Taking target hstate mutex synchronizes with set_max_huge_pages. + * Without the mutex, pages added to target hstate could be marked + * as surplus. + * + * Note that we already hold h->resize_lock. To prevent deadlock, + * use the convention of always taking larger size hstate mutex first. + */ + mutex_lock(&target_hstate->resize_lock); for (i = 0; i < pages_per_huge_page(h); i += pages_per_huge_page(target_hstate)) { if (hstate_is_gigantic(target_hstate)) @@ -3361,8 +3370,17 @@ static int demote_free_huge_page(struct hstate *h, struct page *page) prep_new_huge_page(target_hstate, page + i, nid); put_page(page + i); } + mutex_unlock(&target_hstate->resize_lock); spin_lock_irq(&hugetlb_lock); + + /* + * Not absolutely necessary, but for consistency update max_huge_pages + * based on pool changes for the demoted page. + */ + h->max_huge_pages--; + target_hstate->max_huge_pages += pages_per_huge_page(h); + return rc; } From 773b3b59f27743d39ba79ec35fc3d19e37517e48 Mon Sep 17 00:00:00 2001 From: zhangyiru Date: Thu, 21 Oct 2021 15:07:56 +1100 Subject: [PATCH 0758/1202] mm,hugetlb: remove mlock ulimit for SHM_HUGETLB commit 21a3c273f88c9cbbaf7e ("mm, hugetlb: add thread name and pid to SHM_HUGETLB mlock rlimit warning") marked this as deprecated in 2012, but it is not deleted yet. Mike says he still see that message in log files on occasion, so maybe we should preserve this warning. Link: https://lkml.kernel.org/r/20211009094355.127705-1-zhangyiru3@huawei.com Signed-off-by: zhangyiru Reviewed-by: Mike Kravetz Cc: Michal Hocko Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/hugetlbfs/inode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index cdfb1ae78a3f8..5ff3418759edb 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1467,13 +1467,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size, *ucounts = current_ucounts(); if (user_shm_lock(size, *ucounts)) { task_lock(current); - pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n", + pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is obsolete\n", current->comm, current->pid); task_unlock(current); - } else { - *ucounts = NULL; - return ERR_PTR(-EPERM); } + *ucounts = NULL; + return ERR_PTR(-EPERM); } file = ERR_PTR(-ENOSPC); From 03bb6870075c75f0a4876ccfdd1803e1432efe3b Mon Sep 17 00:00:00 2001 From: Liangcai Fan Date: Thu, 21 Oct 2021 15:07:57 +1100 Subject: [PATCH 0759/1202] mm: khugepaged: Recalculate min_free_kbytes after stopping khugepaged When initializing transparent huge pages, min_free_kbytes would be calculated according to what khugepaged expected. So when disable transparent huge pages, min_free_kbytes should be recalculated instead of the higher value set by khugepaged. Link: https://lkml.kernel.org/r/1633937809-16558-1-git-send-email-liangcaifan19@gmail.com Signed-off-by: Liangcai Fan Signed-off-by: Chunyan Zhang Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mm.h | 1 + mm/khugepaged.c | 10 ++++++++-- mm/page_alloc.c | 7 ++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 6e29deb1e0d4b..7e37c726b9db1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2453,6 +2453,7 @@ extern void memmap_init_range(unsigned long, int, unsigned long, unsigned long, unsigned long, enum meminit_context, struct vmem_altmap *, int migratetype); extern void setup_per_zone_wmarks(void); +extern void calculate_min_free_kbytes(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); extern void __init mmap_init(void); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 045cc579f724e..682130fd65234 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2291,6 +2291,11 @@ static void set_recommended_min_free_kbytes(void) int nr_zones = 0; unsigned long recommended_min; + if (!khugepaged_enabled()) { + calculate_min_free_kbytes(); + goto update_wmarks; + } + for_each_populated_zone(zone) { /* * We don't need to worry about fragmentation of @@ -2326,6 +2331,8 @@ static void set_recommended_min_free_kbytes(void) min_free_kbytes = recommended_min; } + +update_wmarks: setup_per_zone_wmarks(); } @@ -2347,12 +2354,11 @@ int start_stop_khugepaged(void) if (!list_empty(&khugepaged_scan.mm_head)) wake_up_interruptible(&khugepaged_wait); - - set_recommended_min_free_kbytes(); } else if (khugepaged_thread) { kthread_stop(khugepaged_thread); khugepaged_thread = NULL; } + set_recommended_min_free_kbytes(); fail: mutex_unlock(&khugepaged_mutex); return err; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2aafc337d0684..5a02d6d0a296c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8469,7 +8469,7 @@ void setup_per_zone_wmarks(void) * 8192MB: 11584k * 16384MB: 16384k */ -int __meminit init_per_zone_wmark_min(void) +void calculate_min_free_kbytes(void) { unsigned long lowmem_kbytes; int new_min_free_kbytes; @@ -8487,6 +8487,11 @@ int __meminit init_per_zone_wmark_min(void) pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n", new_min_free_kbytes, user_min_free_kbytes); } +} + +int __meminit init_per_zone_wmark_min(void) +{ + calculate_min_free_kbytes(); setup_per_zone_wmarks(); refresh_zone_stat_thresholds(); setup_per_zone_lowmem_reserve(); From 6fb73ce1d3eef8770c9ac94e58abc5c8db85e04a Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 21 Oct 2021 15:07:58 +1100 Subject: [PATCH 0760/1202] mm, hugepages: add mremap() support for hugepage backed vma Support mremap() for hugepage backed vma segment by simply repositioning page table entries. The page table entries are repositioned to the new virtual address on mremap(). Hugetlb mremap() support is of course generic; my motivating use case is a library (hugepage_text), which reloads the ELF text of executables in hugepages. This significantly increases the execution performance of said executables. Restricts the mremap operation on hugepages to up to the size of the original mapping as the underlying hugetlb reservation is not yet capable of handling remapping to a larger size. During the mremap() operation we detect pmd_share'd mappings and we unshare those during the mremap(). On access and fault the sharing is established again. Link: https://lkml.kernel.org/r/20211013195825.3058275-1-almasrymina@google.com Signed-off-by: Mina Almasry Reviewed-by: Mike Kravetz Cc: Ken Chen Cc: Chris Kennelly Cc: Michal Hocko Cc: Vlastimil Babka Cc: Kirill Shutemov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/hugetlb.h | 19 +++++++ mm/hugetlb.c | 111 +++++++++++++++++++++++++++++++++++++--- mm/mremap.c | 36 +++++++++++-- 3 files changed, 157 insertions(+), 9 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 2bddd6c382046..c0a20781b28ef 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -124,6 +124,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, void hugepage_put_subpool(struct hugepage_subpool *spool); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); +void clear_vma_resv_huge_pages(struct vm_area_struct *vma); int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, loff_t *); @@ -132,6 +133,10 @@ int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); +int move_hugetlb_page_tables(struct vm_area_struct *vma, + struct vm_area_struct *new_vma, + unsigned long old_addr, unsigned long new_addr, + unsigned long len); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, @@ -215,6 +220,10 @@ static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) { } +static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) +{ +} + static inline unsigned long hugetlb_total_pages(void) { return 0; @@ -262,6 +271,16 @@ static inline int copy_hugetlb_page_range(struct mm_struct *dst, return 0; } +static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, + struct vm_area_struct *new_vma, + unsigned long old_addr, + unsigned long new_addr, + unsigned long len) +{ + BUG(); + return 0; +} + static inline void hugetlb_report_meminfo(struct seq_file *m) { } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 75edf153e87e9..75363b224d797 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1014,6 +1014,35 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma) vma->vm_private_data = (void *)0; } +/* + * Reset and decrement one ref on hugepage private reservation. + * Called with mm->mmap_sem writer semaphore held. + * This function should be only used by move_vma() and operate on + * same sized vma. It should never come here with last ref on the + * reservation. + */ +void clear_vma_resv_huge_pages(struct vm_area_struct *vma) +{ + /* + * Clear the old hugetlb private page reservation. + * It has already been transferred to new_vma. + * + * During a mremap() operation of a hugetlb vma we call move_vma() + * which copies vma into new_vma and unmaps vma. After the copy + * operation both new_vma and vma share a reference to the resv_map + * struct, and at that point vma is about to be unmapped. We don't + * want to return the reservation to the pool at unmap of vma because + * the reservation still lives on in new_vma, so simply decrement the + * ref here and remove the resv_map reference from this vma. + */ + struct resv_map *reservations = vma_resv_map(vma); + + if (reservations && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) + kref_put(&reservations->refs, resv_map_release); + + reset_vma_resv_huge_pages(vma); +} + /* Returns true if the VMA has associated reserve pages */ static bool vma_has_reserves(struct vm_area_struct *vma, long chg) { @@ -4718,6 +4747,82 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, return ret; } +static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, + unsigned long new_addr, pte_t *src_pte) +{ + struct hstate *h = hstate_vma(vma); + struct mm_struct *mm = vma->vm_mm; + pte_t *dst_pte, pte; + spinlock_t *src_ptl, *dst_ptl; + + dst_pte = huge_pte_offset(mm, new_addr, huge_page_size(h)); + dst_ptl = huge_pte_lock(h, mm, dst_pte); + src_ptl = huge_pte_lockptr(h, mm, src_pte); + + /* + * We don't have to worry about the ordering of src and dst ptlocks + * because exclusive mmap_sem (or the i_mmap_lock) prevents deadlock. + */ + if (src_ptl != dst_ptl) + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); + + pte = huge_ptep_get_and_clear(mm, old_addr, src_pte); + set_huge_pte_at(mm, new_addr, dst_pte, pte); + + if (src_ptl != dst_ptl) + spin_unlock(src_ptl); + spin_unlock(dst_ptl); +} + +int move_hugetlb_page_tables(struct vm_area_struct *vma, + struct vm_area_struct *new_vma, + unsigned long old_addr, unsigned long new_addr, + unsigned long len) +{ + struct hstate *h = hstate_vma(vma); + struct address_space *mapping = vma->vm_file->f_mapping; + unsigned long sz = huge_page_size(h); + struct mm_struct *mm = vma->vm_mm; + unsigned long old_end = old_addr + len; + unsigned long old_addr_copy; + pte_t *src_pte, *dst_pte; + struct mmu_notifier_range range; + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, old_addr, + old_end); + adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); + mmu_notifier_invalidate_range_start(&range); + /* Prevent race with file truncation */ + i_mmap_lock_write(mapping); + for (; old_addr < old_end; old_addr += sz, new_addr += sz) { + src_pte = huge_pte_offset(mm, old_addr, sz); + if (!src_pte) + continue; + if (huge_pte_none(huge_ptep_get(src_pte))) + continue; + + /* old_addr arg to huge_pmd_unshare() is a pointer and so the + * arg may be modified. Pass a copy instead to preserve the + * value in old_addr. + */ + old_addr_copy = old_addr; + + if (huge_pmd_unshare(mm, vma, &old_addr_copy, src_pte)) + continue; + + dst_pte = huge_pte_alloc(mm, new_vma, new_addr, sz); + if (!dst_pte) + break; + + move_huge_pte(vma, old_addr, new_addr, src_pte); + } + i_mmap_unlock_write(mapping); + flush_tlb_range(vma, old_end - len, old_end); + mmu_notifier_invalidate_range_end(&range); + + return len + old_addr - old_end; +} + static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) @@ -6257,12 +6362,6 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, * sharing is possible. For hugetlbfs, this prevents removal of any page * table entries associated with the address space. This is important as we * are setting up sharing based on existing page table entries (mappings). - * - * NOTE: This routine is only called from huge_pte_alloc. Some callers of - * huge_pte_alloc know that sharing is not possible and do not take - * i_mmap_rwsem as a performance optimization. This is handled by the - * if !vma_shareable check at the beginning of the routine. i_mmap_rwsem is - * only required for subsequent processing. */ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pud_t *pud) diff --git a/mm/mremap.c b/mm/mremap.c index c0b6c41b7b78f..002eec83e91e5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -489,6 +489,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma, old_end = old_addr + len; flush_cache_range(vma, old_addr, old_end); + if (is_vm_hugetlb_page(vma)) + return move_hugetlb_page_tables(vma, new_vma, old_addr, + new_addr, len); + mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm, old_addr, old_end); mmu_notifier_invalidate_range_start(&range); @@ -646,6 +650,10 @@ static unsigned long move_vma(struct vm_area_struct *vma, mremap_userfaultfd_prep(new_vma, uf); } + if (is_vm_hugetlb_page(vma)) { + clear_vma_resv_huge_pages(vma); + } + /* Conceal VM_ACCOUNT so old reservation is not undone */ if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) { vma->vm_flags &= ~VM_ACCOUNT; @@ -739,9 +747,6 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))) return ERR_PTR(-EINVAL); - if (is_vm_hugetlb_page(vma)) - return ERR_PTR(-EINVAL); - /* We can't remap across vm area boundaries */ if (old_len > vma->vm_end - addr) return ERR_PTR(-EFAULT); @@ -937,6 +942,31 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (mmap_write_lock_killable(current->mm)) return -EINTR; + vma = find_vma(mm, addr); + if (!vma || vma->vm_start > addr) { + ret = EFAULT; + goto out; + } + + if (is_vm_hugetlb_page(vma)) { + struct hstate *h __maybe_unused = hstate_vma(vma); + + old_len = ALIGN(old_len, huge_page_size(h)); + new_len = ALIGN(new_len, huge_page_size(h)); + + /* addrs must be huge page aligned */ + if (addr & ~huge_page_mask(h)) + goto out; + if (new_addr & ~huge_page_mask(h)) + goto out; + + /* + * Don't allow remap expansion, because the underlying hugetlb + * reservation is not yet capable to handle split reservation. + */ + if (new_len > old_len) + goto out; + } if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) { ret = mremap_to(addr, old_len, new_addr, new_len, From 1c2d077b22d3598294af6f278a9d915257875424 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 21 Oct 2021 15:07:58 +1100 Subject: [PATCH 0761/1202] mm, hugepages: add hugetlb vma mremap() test Link: https://lkml.kernel.org/r/20211013195825.3058275-2-almasrymina@google.com Signed-off-by: Mina Almasry Cc: Mike Kravetz Cc: Ken Chen Cc: Chris Kennelly Cc: Michal Hocko Cc: Vlastimil Babka Cc: Kirill Shutemov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/hugepage-mremap.c | 165 +++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 tools/testing/selftests/vm/hugepage-mremap.c diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index b02eac613fdda..2e7e86e852828 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only hugepage-mmap +hugepage-mremap hugepage-shm khugepaged map_hugetlb diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d9605bd10f2de..1607322a112c9 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -29,6 +29,7 @@ TEST_GEN_FILES = compaction_test TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugepage-mmap +TEST_GEN_FILES += hugepage-mremap TEST_GEN_FILES += hugepage-shm TEST_GEN_FILES += khugepaged TEST_GEN_FILES += madv_populate diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c new file mode 100644 index 0000000000000..e84b79922fe6e --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * hugepage-mremap: + * + * Example of remapping huge page memory in a user application using the + * mremap system call. Before running this application, make sure that the + * administrator has mounted the hugetlbfs filesystem (on some directory + * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this + * example, the app is requesting memory of size 10MB that is backed by + * huge pages. + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include /* Definition of O_* constants */ +#include /* Definition of SYS_* constants */ +#include +#include +#include + +#define LENGTH (1UL * 1024 * 1024 * 1024) + +#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC) +#define FLAGS (MAP_SHARED | MAP_ANONYMOUS) + +static void check_bytes(char *addr) +{ + printf("First hex is %x\n", *((unsigned int *)addr)); +} + +static void write_bytes(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int read_bytes(char *addr) +{ + unsigned long i; + + check_bytes(addr); + for (i = 0; i < LENGTH; i++) + if (*(addr + i) != (char)i) { + printf("Mismatch at %lu\n", i); + return 1; + } + return 0; +} + +static void register_region_with_uffd(char *addr, size_t len) +{ + long uffd; /* userfaultfd file descriptor */ + struct uffdio_api uffdio_api; + struct uffdio_register uffdio_register; + + /* Create and enable userfaultfd object. */ + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd == -1) { + perror("userfaultfd"); + exit(1); + } + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { + perror("ioctl-UFFDIO_API"); + exit(1); + } + + /* Create a private anonymous mapping. The memory will be + * demand-zero paged--that is, not yet allocated. When we + * actually touch the memory, it will be allocated via + * the userfaultfd. + */ + + addr = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + printf("Address returned by mmap() = %p\n", addr); + + /* Register the memory range of the mapping we just created for + * handling by the userfaultfd object. In mode, we request to track + * missing pages (i.e., pages that have not yet been faulted in). + */ + + uffdio_register.range.start = (unsigned long)addr; + uffdio_register.range.len = len; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { + perror("ioctl-UFFDIO_REGISTER"); + exit(1); + } +} + +int main(void) +{ + int ret = 0; + + int fd = open("/mnt/huge/test", O_CREAT | O_RDWR, 0755); + + if (fd < 0) { + perror("Open failed"); + exit(1); + } + + /* mmap to a PUD aligned address to hopefully trigger pmd sharing. */ + unsigned long suggested_addr = 0x7eaa40000000; + void *haddr = mmap((void *)suggested_addr, LENGTH, PROTECTION, + MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); + printf("Map haddr: Returned address is %p\n", haddr); + if (haddr == MAP_FAILED) { + perror("mmap1"); + exit(1); + } + + /* mmap again to a dummy address to hopefully trigger pmd sharing. */ + suggested_addr = 0x7daa40000000; + void *daddr = mmap((void *)suggested_addr, LENGTH, PROTECTION, + MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); + printf("Map daddr: Returned address is %p\n", daddr); + if (daddr == MAP_FAILED) { + perror("mmap3"); + exit(1); + } + + suggested_addr = 0x7faa40000000; + void *vaddr = + mmap((void *)suggested_addr, LENGTH, PROTECTION, FLAGS, -1, 0); + printf("Map vaddr: Returned address is %p\n", vaddr); + if (vaddr == MAP_FAILED) { + perror("mmap2"); + exit(1); + } + + register_region_with_uffd(haddr, LENGTH); + + void *addr = mremap(haddr, LENGTH, LENGTH, + MREMAP_MAYMOVE | MREMAP_FIXED, vaddr); + if (addr == MAP_FAILED) { + perror("mremap"); + exit(1); + } + + printf("Mremap: Returned address is %p\n", addr); + check_bytes(addr); + write_bytes(addr); + ret = read_bytes(addr); + + munmap(addr, LENGTH); + + return ret; +} From a65b4ecf1a656cfdb40480731ed6b98b74be4850 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 21 Oct 2021 15:07:59 +1100 Subject: [PATCH 0762/1202] mm-hugepages-add-hugetlb-vma-mremap-test-v8 Link: https://lkml.kernel.org/r/20211014200542.4126947-2-almasrymina@google.com Signed-off-by: Mina Almasry Acked-by: Mike Kravetz Cc: Ken Chen Cc: Chris Kennelly Cc: Michal Hocko Cc: Vlastimil Babka Cc: Kirill Shutemov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/hugepage-mremap.c | 10 +++------- tools/testing/selftests/vm/run_vmtests.sh | 11 +++++++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index e84b79922fe6e..76d5f42bf4ae2 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -3,12 +3,8 @@ * hugepage-mremap: * * Example of remapping huge page memory in a user application using the - * mremap system call. Before running this application, make sure that the - * administrator has mounted the hugetlbfs filesystem (on some directory - * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this - * example, the app is requesting memory of size 10MB that is backed by - * huge pages. - * + * mremap system call. Code assumes a hugetlbfs filesystem is mounted + * at './huge'. The code will use 10MB worth of huge pages. */ #define _GNU_SOURCE @@ -109,7 +105,7 @@ int main(void) { int ret = 0; - int fd = open("/mnt/huge/test", O_CREAT | O_RDWR, 0755); + int fd = open("/huge/test", O_CREAT | O_RDWR, 0755); if (fd < 0) { perror("Open failed"); diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 45e803af7c775..a24d30af30949 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -108,6 +108,17 @@ else echo "[PASS]" fi +echo "-----------------------" +echo "running hugepage-mremap" +echo "-----------------------" +./hugepage-mremap +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" echo " hugetlb regression testing." From 0f3dd184c7f6cc3ebd982698d3b6b0ccc0764477 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 21 Oct 2021 15:07:59 +1100 Subject: [PATCH 0763/1202] hugetlb: support node specified when using cma for gigantic hugepages Now the size of CMA area for gigantic hugepages runtime allocation is balanced for all online nodes, but we also want to specify the size of CMA per-node, or only one node in some cases, which are similar with patch [1]. For example, on some multi-nodes systems, each node's memory can be different, allocating the same size of CMA for each node is not suitable for the low-memory nodes. Meanwhile some workloads like DPDK mentioned by Zhenguo in patch [1] only need hugepages in one node. On the other hand, we have some machines with multiple types of memory, like DRAM and PMEM (persistent memory). On this system, we may want to specify all the hugepages only on DRAM node, or specify the proportion of DRAM node and PMEM node, to tuning the performance of the workloads. Thus this patch adds node format for 'hugetlb_cma' parameter to support specifying the size of CMA per-node. An example is as follows: hugetlb_cma=0:5G,2:5G which means allocating 5G size of CMA area on node 0 and node 2 respectively. And the users should use the node specific sysfs file to allocate the gigantic hugepages if specified the CMA size on that node. [1] https://lkml.kernel.org/r/20211005054729.86457-1-yaozhenguo1@gmail.com Link: https://lkml.kernel.org/r/bb790775ca60bb8f4b26956bb3f6988f74e075c7.1634261144.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: Mike Kravetz Cc: Michal Hocko Cc: Roman Gushchin Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- .../admin-guide/kernel-parameters.txt | 6 +- mm/hugetlb.c | 86 +++++++++++++++++-- 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 43dc35fe5bc03..e7f7904edf20f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1587,8 +1587,10 @@ registers. Default set by CONFIG_HPET_MMAP_DEFAULT. hugetlb_cma= [HW,CMA] The size of a CMA area used for allocation - of gigantic hugepages. - Format: nn[KMGTPE] + of gigantic hugepages. Or using node format, the size + of a CMA area per node can be specified. + Format: nn[KMGTPE] or (node format) + :nn[KMGTPE][,:nn[KMGTPE]] Reserve a CMA area of given size and allocate gigantic hugepages using the CMA allocator. If enabled, the diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 75363b224d797..fabb061338396 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -50,6 +50,7 @@ struct hstate hstates[HUGE_MAX_HSTATE]; #ifdef CONFIG_CMA static struct cma *hugetlb_cma[MAX_NUMNODES]; +static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata; static bool hugetlb_cma_page(struct page *page, unsigned int order) { return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page, @@ -6762,7 +6763,38 @@ static bool cma_reserve_called __initdata; static int __init cmdline_parse_hugetlb_cma(char *p) { - hugetlb_cma_size = memparse(p, &p); + int nid, count = 0; + unsigned long tmp; + char *s = p; + + while (*s) { + if (sscanf(s, "%lu%n", &tmp, &count) != 1) + break; + + if (s[count] == ':') { + nid = tmp; + if (nid < 0 || nid >= MAX_NUMNODES) + break; + + s += count + 1; + tmp = memparse(s, &s); + hugetlb_cma_size_in_node[nid] = tmp; + hugetlb_cma_size += tmp; + + /* + * Skip the separator if have one, otherwise + * break the parsing. + */ + if (*s == ',') + s++; + else + break; + } else { + hugetlb_cma_size = memparse(p, &p); + break; + } + } + return 0; } @@ -6771,10 +6803,36 @@ early_param("hugetlb_cma", cmdline_parse_hugetlb_cma); void __init hugetlb_cma_reserve(int order) { unsigned long size, reserved, per_node; + bool node_specific_cma_alloc = false; int nid; cma_reserve_called = true; + if (!hugetlb_cma_size) + return; + + for (nid = 0; nid < MAX_NUMNODES; nid++) { + if (hugetlb_cma_size_in_node[nid] == 0) + continue; + + if (!node_state(nid, N_ONLINE)) { + pr_warn("hugetlb_cma: invalid node %d specified\n", nid); + hugetlb_cma_size -= hugetlb_cma_size_in_node[nid]; + hugetlb_cma_size_in_node[nid] = 0; + continue; + } + + if (hugetlb_cma_size_in_node[nid] < (PAGE_SIZE << order)) { + pr_warn("hugetlb_cma: cma area of node %d should be at least %lu MiB\n", + nid, (PAGE_SIZE << order) / SZ_1M); + hugetlb_cma_size -= hugetlb_cma_size_in_node[nid]; + hugetlb_cma_size_in_node[nid] = 0; + } else { + node_specific_cma_alloc = true; + } + } + + /* Validate the CMA size again in case some invalid nodes specified. */ if (!hugetlb_cma_size) return; @@ -6785,20 +6843,30 @@ void __init hugetlb_cma_reserve(int order) return; } - /* - * If 3 GB area is requested on a machine with 4 numa nodes, - * let's allocate 1 GB on first three nodes and ignore the last one. - */ - per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes); - pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", - hugetlb_cma_size / SZ_1M, per_node / SZ_1M); + if (!node_specific_cma_alloc) { + /* + * If 3 GB area is requested on a machine with 4 numa nodes, + * let's allocate 1 GB on first three nodes and ignore the last one. + */ + per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes); + pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", + hugetlb_cma_size / SZ_1M, per_node / SZ_1M); + } reserved = 0; for_each_node_state(nid, N_ONLINE) { int res; char name[CMA_MAX_NAME]; - size = min(per_node, hugetlb_cma_size - reserved); + if (node_specific_cma_alloc) { + if (hugetlb_cma_size_in_node[nid] == 0) + continue; + + size = hugetlb_cma_size_in_node[nid]; + } else { + size = min(per_node, hugetlb_cma_size - reserved); + } + size = round_up(size, PAGE_SIZE << order); snprintf(name, sizeof(name), "hugetlb%d", nid); From 532824b7e7260b82af4c8b15ba3207696f179537 Mon Sep 17 00:00:00 2001 From: Ran Jianping Date: Thu, 21 Oct 2021 15:08:00 +1100 Subject: [PATCH 0764/1202] mm: remove duplicate include in hugepage-mremap.c Remove duplicate includes 'unistd.h' included in '/tools/testing/selftests/vm/hugepage-mremap.c' is duplicated.It is also included on 23 line. Link: https://lkml.kernel.org/r/20211018102336.869726-1-ran.jianping@zte.com.cn Signed-off-by: Ran Jianping Reported-by: Zeal Robot Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/hugepage-mremap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index 76d5f42bf4ae2..8bc117b12f78b 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -16,7 +16,6 @@ #include #include /* Definition of O_* constants */ #include /* Definition of SYS_* constants */ -#include #include #include From ee2df6c5ebf42fdb20ff12c3b216507da7e4b7ec Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Thu, 21 Oct 2021 15:08:00 +1100 Subject: [PATCH 0765/1202] userfaultfd/selftests: don't rely on GNU extensions for random numbers Patch series "Small userfaultfd selftest fixups", v2. This patch (of 3): Two arguments for doing this: First, and maybe most importantly, the resulting code is significantly shorter / simpler. Then, we avoid using GNU libc extensions. Why does this matter? It makes testing userfaultfd with the selftest easier e.g. on distros which use something other than glibc (e.g., Alpine, which uses musl); basically, it makes the test more portable. Link: https://lkml.kernel.org/r/20210930212309.4001967-2-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Peter Xu Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/userfaultfd.c | 26 ++++-------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 60aa1a4fc69b6..d2acab4411cf7 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -57,6 +57,7 @@ #include #include #include +#include #include "../kselftest.h" @@ -518,22 +519,10 @@ static void continue_range(int ufd, __u64 start, __u64 len) static void *locking_thread(void *arg) { unsigned long cpu = (unsigned long) arg; - struct random_data rand; unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */ - int32_t rand_nr; unsigned long long count; - char randstate[64]; - unsigned int seed; - if (bounces & BOUNCE_RANDOM) { - seed = (unsigned int) time(NULL) - bounces; - if (!(bounces & BOUNCE_RACINGFAULTS)) - seed += cpu; - bzero(&rand, sizeof(rand)); - bzero(&randstate, sizeof(randstate)); - if (initstate_r(seed, randstate, sizeof(randstate), &rand)) - err("initstate_r failed"); - } else { + if (!(bounces & BOUNCE_RANDOM)) { page_nr = -bounces; if (!(bounces & BOUNCE_RACINGFAULTS)) page_nr += cpu * nr_pages_per_cpu; @@ -541,15 +530,8 @@ static void *locking_thread(void *arg) while (!finished) { if (bounces & BOUNCE_RANDOM) { - if (random_r(&rand, &rand_nr)) - err("random_r failed"); - page_nr = rand_nr; - if (sizeof(page_nr) > sizeof(rand_nr)) { - if (random_r(&rand, &rand_nr)) - err("random_r failed"); - page_nr |= (((unsigned long) rand_nr) << 16) << - 16; - } + if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr)) + err("getrandom failed"); } else page_nr += 1; page_nr %= nr_pages; From d1127b192d8db7a1c406de20140665ac848a09f1 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Thu, 21 Oct 2021 15:08:01 +1100 Subject: [PATCH 0766/1202] userfaultfd/selftests: fix feature support detection Before any tests are run, in set_test_type, we decide what feature(s) we are going to be testing, based upon our command line arguments. However, the supported features are not just a function of the memory type being used, so this is broken. For instance, consider writeprotect support. It is "normally" supported for anonymous memory, but furthermore it requires that the kernel has CONFIG_HAVE_ARCH_USERFAULTFD_WP. So, it is *not* supported at all on aarch64, for example. So, this commit fixes this by querying the kernel for the set of features it supports in set_test_type, by opening a userfaultfd and issuing a UFFDIO_API ioctl. Based upon the reported features, we toggle what tests are enabled. Link: https://lkml.kernel.org/r/20210930212309.4001967-3-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Cc: Peter Xu Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/userfaultfd.c | 54 ++++++++++++++---------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index d2acab4411cf7..4efdaff2b5636 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -346,6 +346,16 @@ static struct uffd_test_ops hugetlb_uffd_test_ops = { static struct uffd_test_ops *uffd_test_ops; +static inline uint64_t uffd_minor_feature(void) +{ + if (test_type == TEST_HUGETLB && map_shared) + return UFFD_FEATURE_MINOR_HUGETLBFS; + else if (test_type == TEST_SHMEM) + return UFFD_FEATURE_MINOR_SHMEM; + else + return 0; +} + static void userfaultfd_open(uint64_t *features) { struct uffdio_api uffdio_api; @@ -406,7 +416,7 @@ static void uffd_test_ctx_clear(void) munmap_area((void **)&area_dst_alias); } -static void uffd_test_ctx_init_ext(uint64_t *features) +static void uffd_test_ctx_init(uint64_t features) { unsigned long nr, cpu; @@ -415,7 +425,7 @@ static void uffd_test_ctx_init_ext(uint64_t *features) uffd_test_ops->allocate_area((void **)&area_src); uffd_test_ops->allocate_area((void **)&area_dst); - userfaultfd_open(features); + userfaultfd_open(&features); count_verify = malloc(nr_pages * sizeof(unsigned long long)); if (!count_verify) @@ -463,11 +473,6 @@ static void uffd_test_ctx_init_ext(uint64_t *features) err("pipe"); } -static inline void uffd_test_ctx_init(uint64_t features) -{ - uffd_test_ctx_init_ext(&features); -} - static int my_bcmp(char *str1, char *str2, size_t n) { unsigned long i; @@ -1208,7 +1213,6 @@ static int userfaultfd_minor_test(void) void *expected_page; char c; struct uffd_stats stats = { 0 }; - uint64_t req_features, features_out; if (!test_uffdio_minor) return 0; @@ -1216,21 +1220,7 @@ static int userfaultfd_minor_test(void) printf("testing minor faults: "); fflush(stdout); - if (test_type == TEST_HUGETLB) - req_features = UFFD_FEATURE_MINOR_HUGETLBFS; - else if (test_type == TEST_SHMEM) - req_features = UFFD_FEATURE_MINOR_SHMEM; - else - return 1; - - features_out = req_features; - uffd_test_ctx_init_ext(&features_out); - /* If kernel reports required features aren't supported, skip test. */ - if ((features_out & req_features) != req_features) { - printf("skipping test due to lack of feature support\n"); - fflush(stdout); - return 0; - } + uffd_test_ctx_init(uffd_minor_feature()); uffdio_register.range.start = (unsigned long)area_dst_alias; uffdio_register.range.len = nr_pages * page_size; @@ -1591,6 +1581,8 @@ unsigned long default_huge_page_size(void) static void set_test_type(const char *type) { + uint64_t features = UFFD_API_FEATURES; + if (!strcmp(type, "anon")) { test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; @@ -1624,6 +1616,22 @@ static void set_test_type(const char *type) if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 > page_size) err("Impossible to run this test"); + + /* + * Whether we can test certain features depends not just on test type, + * but also on whether or not this particular kernel supports the + * feature. + */ + + userfaultfd_open(&features); + + test_uffdio_wp = test_uffdio_wp && + (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); + test_uffdio_minor = test_uffdio_minor && + (features & uffd_minor_feature()); + + close(uffd); + uffd = -1; } static void sigalrm(int sig) From e0b8b826065c4cbaefb69e26938f75adefce0676 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Thu, 21 Oct 2021 15:08:02 +1100 Subject: [PATCH 0767/1202] userfaultfd/selftests: fix calculation of expected ioctls Today, we assert that the ioctls the kernel reports as supported for a registration match a precomputed list. We decide which ioctls are supported by examining the memory type. Then, in several locations we "fix up" this list by adding or removing things this initial decision got wrong. What ioctls the kernel reports is actually a function of several things: - The memory type - Kernel feature support (e.g., no writeprotect on aarch64) - The registration type (e.g., CONTINUE only supported for MINOR mode) So, we can't fully compute this at the start, in set_test_type. It varies per test, depending on what registration mode(s) those tests use. Instead, introduce a new function which computes the correct list. This centralizes the add/remove of ioctls depending on these function inputs in one place, so we don't have to repeat ourselves in various tests. Not only is the resulting code a bit shorter, but it fixes a real bug in the existing code: previously, we would incorrectly require the writeprotect ioctl to be present on aarch64, where it isn't actually supported. Link: https://lkml.kernel.org/r/20210930212309.4001967-4-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Cc: Peter Xu Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/userfaultfd.c | 77 ++++++++++++------------ 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 4efdaff2b5636..8a09057d2f223 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -308,37 +308,24 @@ static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) } struct uffd_test_ops { - unsigned long expected_ioctls; void (*allocate_area)(void **alloc_area); void (*release_pages)(char *rel_area); void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); }; -#define SHMEM_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ - (1 << _UFFDIO_COPY) | \ - (1 << _UFFDIO_ZEROPAGE)) - -#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ - (1 << _UFFDIO_COPY) | \ - (1 << _UFFDIO_ZEROPAGE) | \ - (1 << _UFFDIO_WRITEPROTECT)) - static struct uffd_test_ops anon_uffd_test_ops = { - .expected_ioctls = ANON_EXPECTED_IOCTLS, .allocate_area = anon_allocate_area, .release_pages = anon_release_pages, .alias_mapping = noop_alias_mapping, }; static struct uffd_test_ops shmem_uffd_test_ops = { - .expected_ioctls = SHMEM_EXPECTED_IOCTLS, .allocate_area = shmem_allocate_area, .release_pages = shmem_release_pages, .alias_mapping = shmem_alias_mapping, }; static struct uffd_test_ops hugetlb_uffd_test_ops = { - .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC & ~(1 << _UFFDIO_CONTINUE), .allocate_area = hugetlb_allocate_area, .release_pages = hugetlb_release_pages, .alias_mapping = hugetlb_alias_mapping, @@ -356,6 +343,33 @@ static inline uint64_t uffd_minor_feature(void) return 0; } +static uint64_t get_expected_ioctls(uint64_t mode) +{ + uint64_t ioctls = UFFD_API_RANGE_IOCTLS; + + if (test_type == TEST_HUGETLB) + ioctls &= ~(1 << _UFFDIO_ZEROPAGE); + + if (!((mode & UFFDIO_REGISTER_MODE_WP) && test_uffdio_wp)) + ioctls &= ~(1 << _UFFDIO_WRITEPROTECT); + + if (!((mode & UFFDIO_REGISTER_MODE_MINOR) && test_uffdio_minor)) + ioctls &= ~(1 << _UFFDIO_CONTINUE); + + return ioctls; +} + +static void assert_expected_ioctls_present(uint64_t mode, uint64_t ioctls) +{ + uint64_t expected = get_expected_ioctls(mode); + uint64_t actual = ioctls & expected; + + if (actual != expected) { + err("missing ioctl(s): expected %"PRIx64" actual: %"PRIx64, + expected, actual); + } +} + static void userfaultfd_open(uint64_t *features) { struct uffdio_api uffdio_api; @@ -1017,11 +1031,9 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) { struct uffdio_zeropage uffdio_zeropage; int ret; - unsigned long has_zeropage; + bool has_zeropage = get_expected_ioctls(0) & (1 << _UFFDIO_ZEROPAGE); __s64 res; - has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE); - if (offset >= nr_pages * page_size) err("unexpected offset %lu", offset); uffdio_zeropage.range.start = (unsigned long) area_dst + offset; @@ -1061,7 +1073,6 @@ static int uffdio_zeropage(int ufd, unsigned long offset) static int userfaultfd_zeropage_test(void) { struct uffdio_register uffdio_register; - unsigned long expected_ioctls; printf("testing UFFDIO_ZEROPAGE: "); fflush(stdout); @@ -1076,9 +1087,8 @@ static int userfaultfd_zeropage_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) err("register failure"); - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) - err("unexpected missing ioctl for anon memory"); + assert_expected_ioctls_present( + uffdio_register.mode, uffdio_register.ioctls); if (uffdio_zeropage(uffd, 0)) if (my_bcmp(area_dst, zeropage, page_size)) @@ -1091,7 +1101,6 @@ static int userfaultfd_zeropage_test(void) static int userfaultfd_events_test(void) { struct uffdio_register uffdio_register; - unsigned long expected_ioctls; pthread_t uffd_mon; int err, features; pid_t pid; @@ -1115,9 +1124,8 @@ static int userfaultfd_events_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) err("register failure"); - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) - err("unexpected missing ioctl for anon memory"); + assert_expected_ioctls_present( + uffdio_register.mode, uffdio_register.ioctls); if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) err("uffd_poll_thread create"); @@ -1145,7 +1153,6 @@ static int userfaultfd_events_test(void) static int userfaultfd_sig_test(void) { struct uffdio_register uffdio_register; - unsigned long expected_ioctls; unsigned long userfaults; pthread_t uffd_mon; int err, features; @@ -1169,9 +1176,8 @@ static int userfaultfd_sig_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) err("register failure"); - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) - err("unexpected missing ioctl for anon memory"); + assert_expected_ioctls_present( + uffdio_register.mode, uffdio_register.ioctls); if (faulting_process(1)) err("faulting process failed"); @@ -1206,7 +1212,6 @@ static int userfaultfd_sig_test(void) static int userfaultfd_minor_test(void) { struct uffdio_register uffdio_register; - unsigned long expected_ioctls; unsigned long p; pthread_t uffd_mon; uint8_t expected_byte; @@ -1228,10 +1233,8 @@ static int userfaultfd_minor_test(void) if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) err("register failure"); - expected_ioctls = uffd_test_ops->expected_ioctls; - expected_ioctls |= 1 << _UFFDIO_CONTINUE; - if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) - err("unexpected missing ioctl(s)"); + assert_expected_ioctls_present( + uffdio_register.mode, uffdio_register.ioctls); /* * After registering with UFFD, populate the non-UFFD-registered side of @@ -1428,8 +1431,6 @@ static int userfaultfd_stress(void) pthread_attr_setstacksize(&attr, 16*1024*1024); while (bounces--) { - unsigned long expected_ioctls; - printf("bounces: %d, mode:", bounces); if (bounces & BOUNCE_RANDOM) printf(" rnd"); @@ -1457,10 +1458,8 @@ static int userfaultfd_stress(void) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) err("register failure"); - expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - err("unexpected missing ioctl for anon memory"); + assert_expected_ioctls_present( + uffdio_register.mode, uffdio_register.ioctls); if (area_dst_alias) { uffdio_register.range.start = (unsigned long) From e87417a4c40349c7678c91df160af904287d2ec4 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 21 Oct 2021 15:08:02 +1100 Subject: [PATCH 0768/1202] mm/page_isolation: fix potential missing call to unset_migratetype_isolate() In start_isolate_page_range() undo path, pfn_to_online_page() just checks the first pfn in a pageblock while __first_valid_page() will traverse the pageblock until the first online pfn is found. So we may miss the call to unset_migratetype_isolate() in undo path and pages will remain isolated unexpectedly. Fix this by calling undo_isolate_page_range() and this will also help to simplify the code further. Note we shouldn't ever trigger it because MAX_ORDER-1 aligned pfn ranges shouldn't contain memory holes now. Link: https://lkml.kernel.org/r/20210914114348.15569-1-linmiaohe@huawei.com Fixes: 2ce13640b3f4 ("mm: __first_valid_page skip over offline pages") Signed-off-by: Miaohe Lin Reviewed-by: David Hildenbrand Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_isolation.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index a95c2c6562d0f..f93cc63d8fa1d 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -183,7 +183,6 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned migratetype, int flags) { unsigned long pfn; - unsigned long undo_pfn; struct page *page; BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); @@ -193,25 +192,12 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (page) { - if (set_migratetype_isolate(page, migratetype, flags)) { - undo_pfn = pfn; - goto undo; - } + if (page && set_migratetype_isolate(page, migratetype, flags)) { + undo_isolate_page_range(start_pfn, pfn, migratetype); + return -EBUSY; } } return 0; -undo: - for (pfn = start_pfn; - pfn < undo_pfn; - pfn += pageblock_nr_pages) { - struct page *page = pfn_to_online_page(pfn); - if (!page) - continue; - unset_migratetype_isolate(page, migratetype); - } - - return -EBUSY; } /* From b6f60efad609c3c8bcd1261ec9caeee65dd81670 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 21 Oct 2021 15:08:03 +1100 Subject: [PATCH 0769/1202] mm/page_isolation: guard against possible putback unisolated page Isolating a free page in an isolated pageblock is expected to always work as watermarks don't apply here. But if __isolate_free_page() failed, due to condition changes, the page will be left on the free list. And the page will be put back to free list again via __putback_isolated_page(). This may trigger VM_BUG_ON_PAGE() on page->flags checking in __free_one_page() if PageReported is set. Or we will corrupt the free list because list_add() will be called for pages already on another list. Add a VM_WARN_ON() to complain about this change. Link: https://lkml.kernel.org/r/20210914114508.23725-1-linmiaohe@huawei.com Fixes: 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock") Signed-off-by: Miaohe Lin Reviewed-by: David Hildenbrand Acked-by: Vlastimil Babka Cc: John Hubbard Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_isolation.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index f93cc63d8fa1d..f67c4c70f17f6 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -94,8 +94,13 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) buddy = page + (buddy_pfn - pfn); if (!is_migrate_isolate_page(buddy)) { - __isolate_free_page(page, order); - isolated_page = true; + isolated_page = !!__isolate_free_page(page, order); + /* + * Isolating a free page in an isolated pageblock + * is expected to always work as watermarks don't + * apply here. + */ + VM_WARN_ON(!isolated_page); } } } From 17d09157cd365ff735e8f2eb2fb6da0f3f8127ec Mon Sep 17 00:00:00 2001 From: Kai Song Date: Thu, 21 Oct 2021 15:08:03 +1100 Subject: [PATCH 0770/1202] mm/vmscan.c: fix -Wunused-but-set-variable warning We fix the following warning when building kernel with W=1: mm/vmscan.c:1362:6: warning: variable 'err' set but not used [-Wunused-but-set-variable] Link: https://lkml.kernel.org/r/20210924181218.21165-1-songkai01@inspur.com Signed-off-by: Kai Song Reviewed-by: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmscan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index d010b8f64568e..0b161eaaa03c2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1344,7 +1344,6 @@ static unsigned int demote_page_list(struct list_head *demote_pages, { int target_nid = next_demotion_node(pgdat->node_id); unsigned int nr_succeeded; - int err; if (list_empty(demote_pages)) return 0; @@ -1353,7 +1352,7 @@ static unsigned int demote_page_list(struct list_head *demote_pages, return 0; /* Demotion ignores all cpuset and mempolicy settings */ - err = migrate_pages(demote_pages, alloc_demote_page, NULL, + migrate_pages(demote_pages, alloc_demote_page, NULL, target_nid, MIGRATE_ASYNC, MR_DEMOTION, &nr_succeeded); From 25986c9d9d599e8546b971f52fc5e1006564ace8 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 21 Oct 2021 15:08:04 +1100 Subject: [PATCH 0771/1202] mm/vmscan: throttle reclaim until some writeback completes if congested Patch series "Remove dependency on congestion_wait in mm/", v4. This series that removes all calls to congestion_wait in mm/ and deletes wait_iff_congested. It's not a clever implementation but congestion_wait has been broken for a long time (https://lore.kernel.org/linux-mm/45d8b7a6-8548-65f5-cccf-9f451d4ae3d4@kernel.dk/). Even if congestion throttling worked, it was never a great idea. While excessive dirty/writeback pages at the tail of the LRU is one possibility that reclaim may be slow, there is also the problem of too many pages being isolated and reclaim failing for other reasons (elevated references, too many pages isolated, excessive LRU contention etc). This series replaces the "congestion" throttling with 3 different types. o If there are too many dirty/writeback pages, sleep until a timeout or enough pages get cleaned o If too many pages are isolated, sleep until enough isolated pages are either reclaimed or put back on the LRU o If no progress is being made, direct reclaim tasks sleep until another task makes progress with acceptable efficiency. This was initially tested with a mix of workloads that used to trigger corner cases that no longer work. A new test case was created called "stutterp" (pagereclaim-stutterp-noreaders in mmtests) using a freshly created XFS filesystem. Note that it may be necessary to increase the timeout of ssh if executing remotely as ssh itself can get throttled and the connection may timeout. stutterp varies the number of "worker" processes from 4 up to NR_CPUS*4 to check the impact as the number of direct reclaimers increase. It has four types of worker. o One "anon latency" worker creates small mappings with mmap() and times how long it takes to fault the mapping reading it 4K at a time o X file writers which is fio randomly writing X files where the total size of the files add up to the allowed dirty_ratio. fio is allowed to run for a warmup period to allow some file-backed pages to accumulate. The duration of the warmup is based on the best-case linear write speed of the storage. o Y file readers which is fio randomly reading small files o Z anon memory hogs which continually map (100-dirty_ratio)% of memory o Total estimated WSS = (100+dirty_ration) percentage of memory X+Y+Z+1 == NR_WORKERS varying from 4 up to NR_CPUS*4 The intent is to maximise the total WSS with a mix of file and anon memory where some anonymous memory must be swapped and there is a high likelihood of dirty/writeback pages reaching the end of the LRU. The test can be configured to have no background readers to stress dirty/writeback pages. The results below are based on having zero readers. The short summary of the results is that the series works and stalls until some event occurs but the timeouts may need adjustment. The test results are not broken down by patch as the series should be treated as one block that replaces a broken throttling mechanism with a working one. Finally, three machines were tested but I'm reporting the worst set of results. The other two machines had much better latencies for example. First the results of the "anon latency" latency stutterp 5.15.0-rc1 5.15.0-rc1 vanilla mm-reclaimcongest-v4r2 Amean mmap-4 31.4003 ( 0.00%) 176.8729 (-463.28%) Amean mmap-7 38.1641 ( 0.00%) 605.8866 (-1487.58%) Amean mmap-12 60.0981 ( 0.00%) 2866.8561 (-4670.29%) Amean mmap-21 161.2699 ( 0.00%) 118.6587 ( 26.42%) Amean mmap-30 174.5589 ( 0.00%) 3729.2263 (-2036.37%) Amean mmap-48 8106.8160 ( 0.00%) 1463.7815 ( 81.94%) Stddev mmap-4 41.3455 ( 0.00%) 5847.5425 (-14043.13%) Stddev mmap-7 53.5556 ( 0.00%) 12091.9011 (-22478.20%) Stddev mmap-12 171.3897 ( 0.00%) 28785.9881 (-16695.63%) Stddev mmap-21 1506.6752 ( 0.00%) 1609.0361 ( -6.79%) Stddev mmap-30 557.5806 ( 0.00%) 32712.2440 (-5766.82%) Stddev mmap-48 61681.5718 ( 0.00%) 15971.4654 ( 74.11%) Max-90 mmap-4 31.4243 ( 0.00%) 30.1957 ( 3.91%) Max-90 mmap-7 41.0410 ( 0.00%) 36.7782 ( 10.39%) Max-90 mmap-12 66.5255 ( 0.00%) 121.8574 ( -83.17%) Max-90 mmap-21 146.7479 ( 0.00%) 132.2327 ( 9.89%) Max-90 mmap-30 193.9513 ( 0.00%) 61.6135 ( 68.23%) Max-90 mmap-48 277.9137 ( 0.00%) 593.7413 (-113.64%) Max mmap-4 1913.8009 ( 0.00%) 239690.5578 (-12424.32%) Max mmap-7 2423.9665 ( 0.00%) 270122.1751 (-11043.81%) Max mmap-12 6845.6573 ( 0.00%) 308761.7416 (-4410.33%) Max mmap-21 56278.6508 ( 0.00%) 79286.8553 ( -40.88%) Max mmap-30 19716.2990 ( 0.00%) 306793.2333 (-1456.04%) Max mmap-48 477923.9400 ( 0.00%) 229791.8793 ( 51.92%) For most thread counts, the time to mmap() is unfortunately increased. In earlier versions of the series, this was lower but a large number of throttling events were reaching their timeout increasing the amount of inefficient scanning of the LRU. There is no prioritisation of reclaim tasks making progress based on each tasks rate of page allocation versus progress of reclaim. The variance is also impacted for high worker counts but in all cases, the differences in latency are not statistically significant due to very large maximum outliers. Max-90 shows that 90% of the stalls are comparable but the Max results show the massive outliers which are increased to to stalling. It is expected that this will be very machine dependant. Due to the test design, reclaim is difficult so allocations stall and there are variances depending on whether THPs can be allocated or not. The amount of memory will affect exactly how bad the corner cases are and how often they trigger. The warmup period calculation is not ideal as it's based on linear writes where as fio is randomly writing multiple files from multiple tasks so the start state of the test is variable. For example, these are the latencies on a single-socket machine that had more memory Amean mmap-4 20.5437 ( 0.00%) 19.1772 * 6.65%* Amean mmap-6 39.2860 ( 0.00%) 69.4987 ( -76.90%) Amean mmap-8 2476.1950 ( 0.00%) 151.7673 ( 93.87%) Amean mmap-12 178.0936 ( 0.00%) 209.1427 ( -17.43%) Amean mmap-18 3238.9125 ( 0.00%) 262.5806 ( 91.89%) Amean mmap-24 7922.7016 ( 0.00%) 322.9738 ( 95.92%) Amean mmap-30 1766.8392 ( 0.00%) 405.8898 ( 77.03%) Amean mmap-32 7542.2844 ( 0.00%) 555.6236 ( 92.63%) Amean mmap-32 7542.2844 ( 0.00%) 512.1812 ( 93.21%) The overall system CPU usage and elapsed time is as follows 5.15.0-rc3 5.15.0-rc3 vanilla mm-reclaimcongest-v4r2 Duration User 6989.03 2368.70 Duration System 7308.12 843.35 Duration Elapsed 2277.67 2131.77 The patches reduce system CPU usage by 88% as the vanilla kernel is rarely stalling. The high-level /proc/vmstats show 5.15.0-rc1 5.15.0-rc1 vanilla mm-reclaimcongest-v4r2 Ops Direct pages scanned 1056608451.00 76886196.00 Ops Kswapd pages scanned 109795048.00 82179688.00 Ops Kswapd pages reclaimed 63269243.00 27410157.00 Ops Direct pages reclaimed 10803973.00 8016444.00 Ops Kswapd efficiency % 57.62 33.35 Ops Kswapd velocity 48204.98 38549.98 Ops Direct efficiency % 1.02 10.43 Ops Direct velocity 463898.83 36066.83 Kswapd scanned lesspages but the detailed pattern is different. The vanilla kernel scans slowly over time where as the patches exhibits burst patterns of scan activity. Direct reclaim scanning is reduced by 92% due to stalling. The pattern for stealing pages is also slightly different. Both kernels exhibit spikes but the vanilla kernel when reclaiming shows pages being reclaimed over a period of time where as the patches tend to reclaim in spikes. The difference is that vanilla is not throttling and instead scanning constantly finding some pages over time where as the patched kernel throttles and reclaims in spikes. Ops Percentage direct scans 90.59 48.34 For direct reclaim, vanilla scanned 90.59% of pages where as with the patches, 48.34% were direct reclaim due to throttling Ops Page writes by reclaim 2613590.00 1882533.00 Page writes from reclaim context are reduced. Ops Page writes anon 2932752.00 2266749.00 And there is slightly less swapping. Ops Page reclaim immediate 996248528.00 29230920.00 The number of pages encountered at the tail of the LRU tagged for immediate reclaim but still dirty/writeback is reduced by 97%. Ops Slabs scanned 164284.00 166646.00 Slab scan activity is similar. ftrace was used to gather stall activity Vanilla ------- 1 writeback_wait_iff_congested: usec_timeout=100000 usec_delayed=16000 2 writeback_wait_iff_congested: usec_timeout=100000 usec_delayed=12000 8 writeback_wait_iff_congested: usec_timeout=100000 usec_delayed=8000 29 writeback_wait_iff_congested: usec_timeout=100000 usec_delayed=4000 82394 writeback_wait_iff_congested: usec_timeout=100000 usec_delayed=0 The fast majority of wait_iff_congested calls do not stall at all. What is likely happening is that cond_resched() reschedules the task for a short period when the BDI is not registering congestion (which it never will in this test setup). 1 writeback_congestion_wait: usec_timeout=100000 usec_delayed=120000 2 writeback_congestion_wait: usec_timeout=100000 usec_delayed=132000 4 writeback_congestion_wait: usec_timeout=100000 usec_delayed=112000 380 writeback_congestion_wait: usec_timeout=100000 usec_delayed=108000 778 writeback_congestion_wait: usec_timeout=100000 usec_delayed=104000 congestion_wait if called always exceeds the timeout as there is no trigger to wake it up. Bottom line: Vanilla will throttle but it's not effective. Patch series ------------ Kswapd throttle activity was always due to scanning pages tagged for immediate reclaim at the tail of the LRU 1 usect_delayed=12000 reason=VMSCAN_THROTTLE_WRITEBACK 1 usect_delayed=16000 reason=VMSCAN_THROTTLE_WRITEBACK 1 usect_delayed=28000 reason=VMSCAN_THROTTLE_WRITEBACK 1 usect_delayed=68000 reason=VMSCAN_THROTTLE_WRITEBACK 1 usect_delayed=96000 reason=VMSCAN_THROTTLE_WRITEBACK 2 usect_delayed=24000 reason=VMSCAN_THROTTLE_WRITEBACK 8 usect_delayed=8000 reason=VMSCAN_THROTTLE_WRITEBACK 23 usect_delayed=100000 reason=VMSCAN_THROTTLE_WRITEBACK 52 usect_delayed=4000 reason=VMSCAN_THROTTLE_WRITEBACK 61 usect_delayed=0 reason=VMSCAN_THROTTLE_WRITEBAC The majority of events did not stall or stalled for a short period. Roughly 16% of stalls reached the timeout before expiry. For direct reclaim, the number of times stalled for each reason were 13594 reason=VMSCAN_THROTTLE_ISOLATED 72247 reason=VMSCAN_THROTTLE_WRITEBACK 77203 reason=VMSCAN_THROTTLE_NOPROGRESS The most common reason to stall was due to a failure to make forward progress followed closely by excessive pages tagged for immediate reclaim at the tail of the LRU. A relatively small number were due to too many pages isolated from the LRU by parallel threads For VMSCAN_THROTTLE_ISOLATED, the breakdown of delays was 3 usec_timeout=20000 usect_delayed=16000 reason=VMSCAN_THROTTLE_ISOLATED 8 usec_timeout=20000 usect_delayed=8000 reason=VMSCAN_THROTTLE_ISOLATED 9 usec_timeout=20000 usect_delayed=12000 reason=VMSCAN_THROTTLE_ISOLATED 18 usec_timeout=20000 usect_delayed=4000 reason=VMSCAN_THROTTLE_ISOLATED 69 usec_timeout=20000 usect_delayed=20000 reason=VMSCAN_THROTTLE_ISOLATED 1946 usec_timeout=20000 usect_delayed=0 reason=VMSCAN_THROTTLE_ISOLATED Most did not stall at all or for a short period. A small number reached the timeout. For VMSCAN_THROTTLE_NOPROGRESS, the breakdown of stalls were all over the map 1 usec_timeout=500000 usect_delayed=188000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=204000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=264000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=268000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=276000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=360000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=364000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=380000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=388000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=400000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=432000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usec_timeout=500000 usect_delayed=468000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usec_timeout=500000 usect_delayed=180000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usec_timeout=500000 usect_delayed=236000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usec_timeout=500000 usect_delayed=284000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usec_timeout=500000 usect_delayed=396000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usec_timeout=500000 usect_delayed=404000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usec_timeout=500000 usect_delayed=420000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usec_timeout=500000 usect_delayed=436000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usec_timeout=500000 usect_delayed=456000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usec_timeout=500000 usect_delayed=464000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usec_timeout=500000 usect_delayed=472000 reason=VMSCAN_THROTTLE_NOPROGRESS 3 usec_timeout=500000 usect_delayed=156000 reason=VMSCAN_THROTTLE_NOPROGRESS 3 usec_timeout=500000 usect_delayed=328000 reason=VMSCAN_THROTTLE_NOPROGRESS 3 usec_timeout=500000 usect_delayed=336000 reason=VMSCAN_THROTTLE_NOPROGRESS 3 usec_timeout=500000 usect_delayed=476000 reason=VMSCAN_THROTTLE_NOPROGRESS 4 usec_timeout=500000 usect_delayed=168000 reason=VMSCAN_THROTTLE_NOPROGRESS 4 usec_timeout=500000 usect_delayed=348000 reason=VMSCAN_THROTTLE_NOPROGRESS 4 usec_timeout=500000 usect_delayed=412000 reason=VMSCAN_THROTTLE_NOPROGRESS 4 usec_timeout=500000 usect_delayed=452000 reason=VMSCAN_THROTTLE_NOPROGRESS 4 usec_timeout=500000 usect_delayed=484000 reason=VMSCAN_THROTTLE_NOPROGRESS 5 usec_timeout=500000 usect_delayed=164000 reason=VMSCAN_THROTTLE_NOPROGRESS 5 usec_timeout=500000 usect_delayed=240000 reason=VMSCAN_THROTTLE_NOPROGRESS 5 usec_timeout=500000 usect_delayed=256000 reason=VMSCAN_THROTTLE_NOPROGRESS 5 usec_timeout=500000 usect_delayed=316000 reason=VMSCAN_THROTTLE_NOPROGRESS 5 usec_timeout=500000 usect_delayed=352000 reason=VMSCAN_THROTTLE_NOPROGRESS 5 usec_timeout=500000 usect_delayed=408000 reason=VMSCAN_THROTTLE_NOPROGRESS 5 usec_timeout=500000 usect_delayed=444000 reason=VMSCAN_THROTTLE_NOPROGRESS 5 usec_timeout=500000 usect_delayed=492000 reason=VMSCAN_THROTTLE_NOPROGRESS 6 usec_timeout=500000 usect_delayed=280000 reason=VMSCAN_THROTTLE_NOPROGRESS 6 usec_timeout=500000 usect_delayed=332000 reason=VMSCAN_THROTTLE_NOPROGRESS 6 usec_timeout=500000 usect_delayed=368000 reason=VMSCAN_THROTTLE_NOPROGRESS 7 usec_timeout=500000 usect_delayed=116000 reason=VMSCAN_THROTTLE_NOPROGRESS 7 usec_timeout=500000 usect_delayed=132000 reason=VMSCAN_THROTTLE_NOPROGRESS 7 usec_timeout=500000 usect_delayed=136000 reason=VMSCAN_THROTTLE_NOPROGRESS 7 usec_timeout=500000 usect_delayed=272000 reason=VMSCAN_THROTTLE_NOPROGRESS 7 usec_timeout=500000 usect_delayed=308000 reason=VMSCAN_THROTTLE_NOPROGRESS 7 usec_timeout=500000 usect_delayed=440000 reason=VMSCAN_THROTTLE_NOPROGRESS 8 usec_timeout=500000 usect_delayed=292000 reason=VMSCAN_THROTTLE_NOPROGRESS 8 usec_timeout=500000 usect_delayed=324000 reason=VMSCAN_THROTTLE_NOPROGRESS 8 usec_timeout=500000 usect_delayed=448000 reason=VMSCAN_THROTTLE_NOPROGRESS 9 usec_timeout=500000 usect_delayed=144000 reason=VMSCAN_THROTTLE_NOPROGRESS 9 usec_timeout=500000 usect_delayed=152000 reason=VMSCAN_THROTTLE_NOPROGRESS 9 usec_timeout=500000 usect_delayed=184000 reason=VMSCAN_THROTTLE_NOPROGRESS 10 usec_timeout=500000 usect_delayed=392000 reason=VMSCAN_THROTTLE_NOPROGRESS 10 usec_timeout=500000 usect_delayed=424000 reason=VMSCAN_THROTTLE_NOPROGRESS 11 usec_timeout=500000 usect_delayed=220000 reason=VMSCAN_THROTTLE_NOPROGRESS 11 usec_timeout=500000 usect_delayed=228000 reason=VMSCAN_THROTTLE_NOPROGRESS 11 usec_timeout=500000 usect_delayed=252000 reason=VMSCAN_THROTTLE_NOPROGRESS 12 usec_timeout=500000 usect_delayed=140000 reason=VMSCAN_THROTTLE_NOPROGRESS 12 usec_timeout=500000 usect_delayed=148000 reason=VMSCAN_THROTTLE_NOPROGRESS 12 usec_timeout=500000 usect_delayed=384000 reason=VMSCAN_THROTTLE_NOPROGRESS 13 usec_timeout=500000 usect_delayed=212000 reason=VMSCAN_THROTTLE_NOPROGRESS 14 usec_timeout=500000 usect_delayed=176000 reason=VMSCAN_THROTTLE_NOPROGRESS 14 usec_timeout=500000 usect_delayed=488000 reason=VMSCAN_THROTTLE_NOPROGRESS 15 usec_timeout=500000 usect_delayed=196000 reason=VMSCAN_THROTTLE_NOPROGRESS 17 usec_timeout=500000 usect_delayed=216000 reason=VMSCAN_THROTTLE_NOPROGRESS 18 usec_timeout=500000 usect_delayed=112000 reason=VMSCAN_THROTTLE_NOPROGRESS 20 usec_timeout=500000 usect_delayed=124000 reason=VMSCAN_THROTTLE_NOPROGRESS 20 usec_timeout=500000 usect_delayed=300000 reason=VMSCAN_THROTTLE_NOPROGRESS 21 usec_timeout=500000 usect_delayed=304000 reason=VMSCAN_THROTTLE_NOPROGRESS 24 usec_timeout=500000 usect_delayed=120000 reason=VMSCAN_THROTTLE_NOPROGRESS 24 usec_timeout=500000 usect_delayed=312000 reason=VMSCAN_THROTTLE_NOPROGRESS 27 usec_timeout=500000 usect_delayed=224000 reason=VMSCAN_THROTTLE_NOPROGRESS 27 usec_timeout=500000 usect_delayed=68000 reason=VMSCAN_THROTTLE_NOPROGRESS 28 usec_timeout=500000 usect_delayed=416000 reason=VMSCAN_THROTTLE_NOPROGRESS 29 usec_timeout=500000 usect_delayed=200000 reason=VMSCAN_THROTTLE_NOPROGRESS 30 usec_timeout=500000 usect_delayed=160000 reason=VMSCAN_THROTTLE_NOPROGRESS 30 usec_timeout=500000 usect_delayed=60000 reason=VMSCAN_THROTTLE_NOPROGRESS 30 usec_timeout=500000 usect_delayed=76000 reason=VMSCAN_THROTTLE_NOPROGRESS 31 usec_timeout=500000 usect_delayed=496000 reason=VMSCAN_THROTTLE_NOPROGRESS 32 usec_timeout=500000 usect_delayed=192000 reason=VMSCAN_THROTTLE_NOPROGRESS 32 usec_timeout=500000 usect_delayed=296000 reason=VMSCAN_THROTTLE_NOPROGRESS 38 usec_timeout=500000 usect_delayed=232000 reason=VMSCAN_THROTTLE_NOPROGRESS 38 usec_timeout=500000 usect_delayed=320000 reason=VMSCAN_THROTTLE_NOPROGRESS 39 usec_timeout=500000 usect_delayed=208000 reason=VMSCAN_THROTTLE_NOPROGRESS 47 usec_timeout=500000 usect_delayed=108000 reason=VMSCAN_THROTTLE_NOPROGRESS 47 usec_timeout=500000 usect_delayed=52000 reason=VMSCAN_THROTTLE_NOPROGRESS 52 usec_timeout=500000 usect_delayed=128000 reason=VMSCAN_THROTTLE_NOPROGRESS 54 usec_timeout=500000 usect_delayed=80000 reason=VMSCAN_THROTTLE_NOPROGRESS 55 usec_timeout=500000 usect_delayed=288000 reason=VMSCAN_THROTTLE_NOPROGRESS 61 usec_timeout=500000 usect_delayed=72000 reason=VMSCAN_THROTTLE_NOPROGRESS 63 usec_timeout=500000 usect_delayed=84000 reason=VMSCAN_THROTTLE_NOPROGRESS 68 usec_timeout=500000 usect_delayed=64000 reason=VMSCAN_THROTTLE_NOPROGRESS 75 usec_timeout=500000 usect_delayed=44000 reason=VMSCAN_THROTTLE_NOPROGRESS 80 usec_timeout=500000 usect_delayed=48000 reason=VMSCAN_THROTTLE_NOPROGRESS 83 usec_timeout=500000 usect_delayed=88000 reason=VMSCAN_THROTTLE_NOPROGRESS 88 usec_timeout=500000 usect_delayed=56000 reason=VMSCAN_THROTTLE_NOPROGRESS 97 usec_timeout=500000 usect_delayed=100000 reason=VMSCAN_THROTTLE_NOPROGRESS 99 usec_timeout=500000 usect_delayed=36000 reason=VMSCAN_THROTTLE_NOPROGRESS 99 usec_timeout=500000 usect_delayed=92000 reason=VMSCAN_THROTTLE_NOPROGRESS 102 usec_timeout=500000 usect_delayed=480000 reason=VMSCAN_THROTTLE_NOPROGRESS 149 usec_timeout=500000 usect_delayed=40000 reason=VMSCAN_THROTTLE_NOPROGRESS 187 usec_timeout=500000 usect_delayed=32000 reason=VMSCAN_THROTTLE_NOPROGRESS 196 usec_timeout=500000 usect_delayed=28000 reason=VMSCAN_THROTTLE_NOPROGRESS 245 usec_timeout=500000 usect_delayed=96000 reason=VMSCAN_THROTTLE_NOPROGRESS 322 usec_timeout=500000 usect_delayed=24000 reason=VMSCAN_THROTTLE_NOPROGRESS 406 usec_timeout=500000 usect_delayed=20000 reason=VMSCAN_THROTTLE_NOPROGRESS 588 usec_timeout=500000 usect_delayed=16000 reason=VMSCAN_THROTTLE_NOPROGRESS 843 usec_timeout=500000 usect_delayed=12000 reason=VMSCAN_THROTTLE_NOPROGRESS 1299 usec_timeout=500000 usect_delayed=104000 reason=VMSCAN_THROTTLE_NOPROGRESS 2839 usec_timeout=500000 usect_delayed=8000 reason=VMSCAN_THROTTLE_NOPROGRESS 10111 usec_timeout=500000 usect_delayed=4000 reason=VMSCAN_THROTTLE_NOPROGRESS 21492 usec_timeout=500000 usect_delayed=0 reason=VMSCAN_THROTTLE_NOPROGRESS 36441 usec_timeout=500000 usect_delayed=500000 reason=VMSCAN_THROTTLE_NOPROGRESS The full timeout is often hit but a large number also do not stall at all. The remainder slept a little allowing other reclaim tasks to make progress. While this timeout could be further increased, it could also negatively impact worst-case behaviour when there is no prioritisation of what task should make progress. For VMSCAN_THROTTLE_WRITEBACK, the breakdown was 5 usec_timeout=100000 usect_delayed=48000 reason=VMSCAN_THROTTLE_WRITEBACK 7 usec_timeout=100000 usect_delayed=80000 reason=VMSCAN_THROTTLE_WRITEBACK 8 usec_timeout=100000 usect_delayed=60000 reason=VMSCAN_THROTTLE_WRITEBACK 9 usec_timeout=100000 usect_delayed=28000 reason=VMSCAN_THROTTLE_WRITEBACK 12 usec_timeout=100000 usect_delayed=72000 reason=VMSCAN_THROTTLE_WRITEBACK 12 usec_timeout=100000 usect_delayed=84000 reason=VMSCAN_THROTTLE_WRITEBACK 13 usec_timeout=100000 usect_delayed=40000 reason=VMSCAN_THROTTLE_WRITEBACK 14 usec_timeout=100000 usect_delayed=44000 reason=VMSCAN_THROTTLE_WRITEBACK 14 usec_timeout=100000 usect_delayed=76000 reason=VMSCAN_THROTTLE_WRITEBACK 16 usec_timeout=100000 usect_delayed=88000 reason=VMSCAN_THROTTLE_WRITEBACK 21 usec_timeout=100000 usect_delayed=68000 reason=VMSCAN_THROTTLE_WRITEBACK 24 usec_timeout=100000 usect_delayed=36000 reason=VMSCAN_THROTTLE_WRITEBACK 25 usec_timeout=100000 usect_delayed=56000 reason=VMSCAN_THROTTLE_WRITEBACK 26 usec_timeout=100000 usect_delayed=32000 reason=VMSCAN_THROTTLE_WRITEBACK 32 usec_timeout=100000 usect_delayed=52000 reason=VMSCAN_THROTTLE_WRITEBACK 45 usec_timeout=100000 usect_delayed=96000 reason=VMSCAN_THROTTLE_WRITEBACK 50 usec_timeout=100000 usect_delayed=92000 reason=VMSCAN_THROTTLE_WRITEBACK 60 usec_timeout=100000 usect_delayed=64000 reason=VMSCAN_THROTTLE_WRITEBACK 74 usec_timeout=100000 usect_delayed=24000 reason=VMSCAN_THROTTLE_WRITEBACK 122 usec_timeout=100000 usect_delayed=16000 reason=VMSCAN_THROTTLE_WRITEBACK 134 usec_timeout=100000 usect_delayed=20000 reason=VMSCAN_THROTTLE_WRITEBACK 310 usec_timeout=100000 usect_delayed=12000 reason=VMSCAN_THROTTLE_WRITEBACK 568 usec_timeout=100000 usect_delayed=8000 reason=VMSCAN_THROTTLE_WRITEBACK 2038 usec_timeout=100000 usect_delayed=4000 reason=VMSCAN_THROTTLE_WRITEBACK 7061 usec_timeout=100000 usect_delayed=0 reason=VMSCAN_THROTTLE_WRITEBACK 61547 usec_timeout=100000 usect_delayed=100000 reason=VMSCAN_THROTTLE_WRITEBACK The majority hit the timeout in direct reclaim context although a sizable number did not stall at all. This is very different to kswapd where only a tiny percentage of stalls due to writeback reached the timeout. Bottom line, the throttling appears to work and the wakeup events may limit worst case stalls. There might be some grounds for adjusting timeouts but it's likely futile as the worst-case scenarios depend on the workload, memory size and the speed of the storage. A better approach to improve the series further would be to prioritise tasks based on their rate of allocation with the caveat that it may be very expensive to track. This patch (of 8): Page reclaim throttles on wait_iff_congested under the following conditions o kswapd is encountering pages under writeback and marked for immediate reclaim implying that pages are cycling through the LRU faster than pages can be cleaned. o Direct reclaim will stall if all dirty pages are backed by congested inodes. wait_iff_congested is almost completely broken with few exceptions. This patch adds a new node-based workqueue and tracks the number of throttled tasks and pages written back since throttling started. If enough pages belonging to the node are written back then the throttled tasks will wake early. If not, the throttled tasks sleeps until the timeout expires. [neilb@suse.de: Uninterruptible sleep and simpler wakeups] [hdanton@sina.com: Avoid race when reclaim starts] [vbabka@suse.cz: vmstat irq-safe api, clarifications] Link: https://lkml.kernel.org/r/20211019090108.25501-1-mgorman@techsingularity.net Link: https://lkml.kernel.org/r/20211019090108.25501-2-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: NeilBrown Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: "Darrick J . Wong" Cc: Matthew Wilcox Cc: Michal Hocko Cc: Dave Chinner Cc: Rik van Riel Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Hillf Danton Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/backing-dev.h | 1 - include/linux/mmzone.h | 9 ++++ include/trace/events/vmscan.h | 34 +++++++++++++ include/trace/events/writeback.h | 7 --- mm/backing-dev.c | 48 ------------------- mm/filemap.c | 1 + mm/internal.h | 11 +++++ mm/page_alloc.c | 1 + mm/vmscan.c | 82 +++++++++++++++++++++++++++----- mm/vmstat.c | 1 + 10 files changed, 127 insertions(+), 68 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index ac7f231b88258..9fb1f0ae273c8 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -154,7 +154,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) } long congestion_wait(int sync, long timeout); -long wait_iff_congested(int sync, long timeout); static inline bool mapping_can_writeback(struct address_space *mapping) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fb36a29e3aaeb..241e8dc5b359b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -199,6 +199,7 @@ enum node_stat_item { NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ + NR_THROTTLED_WRITTEN, /* NR_WRITTEN while reclaim throttled */ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ @@ -272,6 +273,10 @@ enum lru_list { NR_LRU_LISTS }; +enum vmscan_throttle_state { + VMSCAN_THROTTLE_WRITEBACK, +}; + #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) @@ -841,6 +846,10 @@ typedef struct pglist_data { int node_id; wait_queue_head_t kswapd_wait; wait_queue_head_t pfmemalloc_wait; + wait_queue_head_t reclaim_wait; /* wq for throttling reclaim */ + atomic_t nr_reclaim_throttled; /* nr of throtted tasks */ + unsigned long nr_reclaim_start; /* nr pages written while throttled + * when throttling started. */ struct task_struct *kswapd; /* Protected by mem_hotplug_begin/end() */ int kswapd_order; diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 88faf2400ec25..c317f9fe0d175 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -27,6 +27,14 @@ {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \ ) : "RECLAIM_WB_NONE" +#define _VMSCAN_THROTTLE_WRITEBACK (1 << VMSCAN_THROTTLE_WRITEBACK) + +#define show_throttle_flags(flags) \ + (flags) ? __print_flags(flags, "|", \ + {_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"} \ + ) : "VMSCAN_THROTTLE_NONE" + + #define trace_reclaim_flags(file) ( \ (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ (RECLAIM_WB_ASYNC) \ @@ -454,6 +462,32 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end, TP_ARGS(nr_reclaimed) ); +TRACE_EVENT(mm_vmscan_throttled, + + TP_PROTO(int nid, int usec_timeout, int usec_delayed, int reason), + + TP_ARGS(nid, usec_timeout, usec_delayed, reason), + + TP_STRUCT__entry( + __field(int, nid) + __field(int, usec_timeout) + __field(int, usec_delayed) + __field(int, reason) + ), + + TP_fast_assign( + __entry->nid = nid; + __entry->usec_timeout = usec_timeout; + __entry->usec_delayed = usec_delayed; + __entry->reason = 1U << reason; + ), + + TP_printk("nid=%d usec_timeout=%d usect_delayed=%d reason=%s", + __entry->nid, + __entry->usec_timeout, + __entry->usec_delayed, + show_throttle_flags(__entry->reason)) +); #endif /* _TRACE_VMSCAN_H */ /* This part must be outside protection */ diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 840d1ba84cf5c..3bc759b818974 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -763,13 +763,6 @@ DEFINE_EVENT(writeback_congest_waited_template, writeback_congestion_wait, TP_ARGS(usec_timeout, usec_delayed) ); -DEFINE_EVENT(writeback_congest_waited_template, writeback_wait_iff_congested, - - TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), - - TP_ARGS(usec_timeout, usec_delayed) -); - DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_PROTO(struct inode *inode, diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 4a9d4e27d0d9b..0ea1a105eae57 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -1041,51 +1041,3 @@ long congestion_wait(int sync, long timeout) return ret; } EXPORT_SYMBOL(congestion_wait); - -/** - * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes - * @sync: SYNC or ASYNC IO - * @timeout: timeout in jiffies - * - * In the event of a congested backing_dev (any backing_dev) this waits - * for up to @timeout jiffies for either a BDI to exit congestion of the - * given @sync queue or a write to complete. - * - * The return value is 0 if the sleep is for the full timeout. Otherwise, - * it is the number of jiffies that were still remaining when the function - * returned. return_value == timeout implies the function did not sleep. - */ -long wait_iff_congested(int sync, long timeout) -{ - long ret; - unsigned long start = jiffies; - DEFINE_WAIT(wait); - wait_queue_head_t *wqh = &congestion_wqh[sync]; - - /* - * If there is no congestion, yield if necessary instead - * of sleeping on the congestion queue - */ - if (atomic_read(&nr_wb_congested[sync]) == 0) { - cond_resched(); - - /* In case we scheduled, work out time remaining */ - ret = timeout - (jiffies - start); - if (ret < 0) - ret = 0; - - goto out; - } - - /* Sleep until uncongested or a write happens */ - prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); - ret = io_schedule_timeout(timeout); - finish_wait(wqh, &wait); - -out: - trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout), - jiffies_to_usecs(jiffies - start)); - - return ret; -} -EXPORT_SYMBOL(wait_iff_congested); diff --git a/mm/filemap.c b/mm/filemap.c index b0254cf793732..33cbdc4219bf9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1613,6 +1613,7 @@ void end_page_writeback(struct page *page) smp_mb__after_atomic(); wake_up_page(page, PG_writeback); + acct_reclaim_writeback(page); put_page(page); } EXPORT_SYMBOL(end_page_writeback); diff --git a/mm/internal.h b/mm/internal.h index 6c3e1a9f8c5a6..b2132c44300d9 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -34,6 +34,17 @@ void page_writeback_init(void); +void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page, + int nr_throttled); +static inline void acct_reclaim_writeback(struct page *page) +{ + pg_data_t *pgdat = page_pgdat(page); + int nr_throttled = atomic_read(&pgdat->nr_reclaim_throttled); + + if (nr_throttled) + __acct_reclaim_writeback(pgdat, page, nr_throttled); +} + vm_fault_t do_swap_page(struct vm_fault *vmf); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5a02d6d0a296c..3f78e65647e83 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7415,6 +7415,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); + init_waitqueue_head(&pgdat->reclaim_wait); pgdat_page_ext_init(pgdat); lruvec_init(&pgdat->__lruvec); diff --git a/mm/vmscan.c b/mm/vmscan.c index 0b161eaaa03c2..aa8290328b6e9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1006,6 +1006,64 @@ static void handle_write_error(struct address_space *mapping, unlock_page(page); } +static void +reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, + long timeout) +{ + wait_queue_head_t *wqh = &pgdat->reclaim_wait; + long ret; + DEFINE_WAIT(wait); + + /* + * Do not throttle IO workers, kthreads other than kswapd or + * workqueues. They may be required for reclaim to make + * forward progress (e.g. journalling workqueues or kthreads). + */ + if (!current_is_kswapd() && + current->flags & (PF_IO_WORKER|PF_KTHREAD)) + return; + + if (atomic_inc_return(&pgdat->nr_reclaim_throttled) == 1) { + WRITE_ONCE(pgdat->nr_reclaim_start, + node_page_state(pgdat, NR_THROTTLED_WRITTEN)); + } + + prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); + ret = schedule_timeout(timeout); + finish_wait(wqh, &wait); + atomic_dec(&pgdat->nr_reclaim_throttled); + + trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout), + jiffies_to_usecs(timeout - ret), + reason); +} + +/* + * Account for pages written if tasks are throttled waiting on dirty + * pages to clean. If enough pages have been cleaned since throttling + * started then wakeup the throttled tasks. + */ +void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page, + int nr_throttled) +{ + unsigned long nr_written; + + inc_node_page_state(page, NR_THROTTLED_WRITTEN); + + /* + * This is an inaccurate read as the per-cpu deltas may not + * be synchronised. However, given that the system is + * writeback throttled, it is not worth taking the penalty + * of getting an accurate count. At worst, the throttle + * timeout guarantees forward progress. + */ + nr_written = node_page_state(pgdat, NR_THROTTLED_WRITTEN) - + READ_ONCE(pgdat->nr_reclaim_start); + + if (nr_written > SWAP_CLUSTER_MAX * nr_throttled) + wake_up_all(&pgdat->reclaim_wait); +} + /* possible outcome of pageout() */ typedef enum { /* failed to write page out, page is locked */ @@ -1418,9 +1476,8 @@ static unsigned int shrink_page_list(struct list_head *page_list, /* * The number of dirty pages determines if a node is marked - * reclaim_congested which affects wait_iff_congested. kswapd - * will stall and start writing pages if the tail of the LRU - * is all dirty unqueued pages. + * reclaim_congested. kswapd will stall and start writing + * pages if the tail of the LRU is all dirty unqueued pages. */ page_check_dirty_writeback(page, &dirty, &writeback); if (dirty || writeback) @@ -3186,19 +3243,19 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) * If kswapd scans pages marked for immediate * reclaim and under writeback (nr_immediate), it * implies that pages are cycling through the LRU - * faster than they are written so also forcibly stall. + * faster than they are written so forcibly stall + * until some pages complete writeback. */ if (sc->nr.immediate) - congestion_wait(BLK_RW_ASYNC, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10); } /* - * Tag a node/memcg as congested if all the dirty pages - * scanned were backed by a congested BDI and - * wait_iff_congested will stall. + * Tag a node/memcg as congested if all the dirty pages were marked + * for writeback and immediate reclaim (counted in nr.congested). * * Legacy memcg will stall in page writeback so avoid forcibly - * stalling in wait_iff_congested(). + * stalling in reclaim_throttle(). */ if ((current_is_kswapd() || (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) && @@ -3206,15 +3263,15 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) set_bit(LRUVEC_CONGESTED, &target_lruvec->flags); /* - * Stall direct reclaim for IO completions if underlying BDIs - * and node is congested. Allow kswapd to continue until it + * Stall direct reclaim for IO completions if the lruvec is + * node is congested. Allow kswapd to continue until it * starts encountering unqueued dirty pages or cycling through * the LRU too quickly. */ if (!current_is_kswapd() && current_may_throttle() && !sc->hibernation_mode && test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) - wait_iff_congested(BLK_RW_ASYNC, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10); if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc)) @@ -4292,6 +4349,7 @@ static int kswapd(void *p) WRITE_ONCE(pgdat->kswapd_order, 0); WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); + atomic_set(&pgdat->nr_reclaim_throttled, 0); for ( ; ; ) { bool ret; diff --git a/mm/vmstat.c b/mm/vmstat.c index ae87c90e0b4e9..0f4643e5324d3 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1225,6 +1225,7 @@ const char * const vmstat_text[] = { "nr_vmscan_immediate_reclaim", "nr_dirtied", "nr_written", + "nr_throttled_written", "nr_kernel_misc_reclaimable", "nr_foll_pin_acquired", "nr_foll_pin_released", From ce64c2e39687c24ee56e61b12b94235248954bdd Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 21 Oct 2021 15:08:05 +1100 Subject: [PATCH 0772/1202] mm/vmscan: throttle reclaim and compaction when too may pages are isolated Page reclaim throttles on congestion if too many parallel reclaim instances have isolated too many pages. This makes no sense, excessive parallelisation has nothing to do with writeback or congestion. This patch creates an additional workqueue to sleep on when too many pages are isolated. The throttled tasks are woken when the number of isolated pages is reduced or a timeout occurs. There may be some false positive wakeups for GFP_NOIO/GFP_NOFS callers but the tasks will throttle again if necessary. [shy828301@gmail.com: Wake up from compaction context] [vbabka@suse.cz: Account number of throttled tasks only for writeback] Link: https://lkml.kernel.org/r/20211019090108.25501-3-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Reviewed-by: Yang Shi Cc: Andreas Dilger Cc: "Darrick J . Wong" Cc: Dave Chinner Cc: Hillf Danton Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Michal Hocko Cc: NeilBrown Cc: Rik van Riel Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mmzone.h | 6 ++++-- include/trace/events/vmscan.h | 4 +++- mm/compaction.c | 10 ++++++++-- mm/internal.h | 13 ++++++++++++- mm/page_alloc.c | 6 +++++- mm/vmscan.c | 28 +++++++++++++++++++--------- 6 files changed, 51 insertions(+), 16 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 241e8dc5b359b..12c566c9a6d90 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -275,6 +275,8 @@ enum lru_list { enum vmscan_throttle_state { VMSCAN_THROTTLE_WRITEBACK, + VMSCAN_THROTTLE_ISOLATED, + NR_VMSCAN_THROTTLE, }; #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) @@ -846,8 +848,8 @@ typedef struct pglist_data { int node_id; wait_queue_head_t kswapd_wait; wait_queue_head_t pfmemalloc_wait; - wait_queue_head_t reclaim_wait; /* wq for throttling reclaim */ - atomic_t nr_reclaim_throttled; /* nr of throtted tasks */ + wait_queue_head_t reclaim_wait[NR_VMSCAN_THROTTLE]; + atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */ unsigned long nr_reclaim_start; /* nr pages written while throttled * when throttling started. */ struct task_struct *kswapd; /* Protected by diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index c317f9fe0d175..d4905bd9e9c4c 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -28,10 +28,12 @@ ) : "RECLAIM_WB_NONE" #define _VMSCAN_THROTTLE_WRITEBACK (1 << VMSCAN_THROTTLE_WRITEBACK) +#define _VMSCAN_THROTTLE_ISOLATED (1 << VMSCAN_THROTTLE_ISOLATED) #define show_throttle_flags(flags) \ (flags) ? __print_flags(flags, "|", \ - {_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"} \ + {_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"}, \ + {_VMSCAN_THROTTLE_ISOLATED, "VMSCAN_THROTTLE_ISOLATED"} \ ) : "VMSCAN_THROTTLE_NONE" diff --git a/mm/compaction.c b/mm/compaction.c index bfc93da1c2c7c..7359093d8ac0e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -761,6 +761,8 @@ isolate_freepages_range(struct compact_control *cc, /* Similar to reclaim, but different enough that they don't share logic */ static bool too_many_isolated(pg_data_t *pgdat) { + bool too_many; + unsigned long active, inactive, isolated; inactive = node_page_state(pgdat, NR_INACTIVE_FILE) + @@ -770,7 +772,11 @@ static bool too_many_isolated(pg_data_t *pgdat) isolated = node_page_state(pgdat, NR_ISOLATED_FILE) + node_page_state(pgdat, NR_ISOLATED_ANON); - return isolated > (inactive + active) / 2; + too_many = isolated > (inactive + active) / 2; + if (!too_many) + wake_throttle_isolated(pgdat); + + return too_many; } /** @@ -822,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (cc->mode == MIGRATE_ASYNC) return -EAGAIN; - congestion_wait(BLK_RW_ASYNC, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10); if (fatal_signal_pending(current)) return -EINTR; diff --git a/mm/internal.h b/mm/internal.h index b2132c44300d9..cfd42598ba97f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -39,12 +39,21 @@ void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page, static inline void acct_reclaim_writeback(struct page *page) { pg_data_t *pgdat = page_pgdat(page); - int nr_throttled = atomic_read(&pgdat->nr_reclaim_throttled); + int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled); if (nr_throttled) __acct_reclaim_writeback(pgdat, page, nr_throttled); } +static inline void wake_throttle_isolated(pg_data_t *pgdat) +{ + wait_queue_head_t *wqh; + + wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED]; + if (waitqueue_active(wqh)) + wake_up_all(wqh); +} + vm_fault_t do_swap_page(struct vm_fault *vmf); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, @@ -121,6 +130,8 @@ extern unsigned long highest_memmap_pfn; */ extern int isolate_lru_page(struct page *page); extern void putback_lru_page(struct page *page); +extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, + long timeout); /* * in mm/rmap.c: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3f78e65647e83..001f9469632d5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7408,6 +7408,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat) {} static void __meminit pgdat_init_internals(struct pglist_data *pgdat) { + int i; + pgdat_resize_init(pgdat); pgdat_init_split_queue(pgdat); @@ -7415,7 +7417,9 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); - init_waitqueue_head(&pgdat->reclaim_wait); + + for (i = 0; i < NR_VMSCAN_THROTTLE; i++) + init_waitqueue_head(&pgdat->reclaim_wait[i]); pgdat_page_ext_init(pgdat); lruvec_init(&pgdat->__lruvec); diff --git a/mm/vmscan.c b/mm/vmscan.c index aa8290328b6e9..ea8ae82dc1d41 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1006,12 +1006,12 @@ static void handle_write_error(struct address_space *mapping, unlock_page(page); } -static void -reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, +void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, long timeout) { - wait_queue_head_t *wqh = &pgdat->reclaim_wait; + wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason]; long ret; + bool acct_writeback = (reason == VMSCAN_THROTTLE_WRITEBACK); DEFINE_WAIT(wait); /* @@ -1023,7 +1023,8 @@ reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, current->flags & (PF_IO_WORKER|PF_KTHREAD)) return; - if (atomic_inc_return(&pgdat->nr_reclaim_throttled) == 1) { + if (acct_writeback && + atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) { WRITE_ONCE(pgdat->nr_reclaim_start, node_page_state(pgdat, NR_THROTTLED_WRITTEN)); } @@ -1031,7 +1032,9 @@ reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); ret = schedule_timeout(timeout); finish_wait(wqh, &wait); - atomic_dec(&pgdat->nr_reclaim_throttled); + + if (acct_writeback) + atomic_dec(&pgdat->nr_writeback_throttled); trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout), jiffies_to_usecs(timeout - ret), @@ -1061,7 +1064,7 @@ void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page, READ_ONCE(pgdat->nr_reclaim_start); if (nr_written > SWAP_CLUSTER_MAX * nr_throttled) - wake_up_all(&pgdat->reclaim_wait); + wake_up_all(&pgdat->reclaim_wait[VMSCAN_THROTTLE_WRITEBACK]); } /* possible outcome of pageout() */ @@ -2182,6 +2185,7 @@ static int too_many_isolated(struct pglist_data *pgdat, int file, struct scan_control *sc) { unsigned long inactive, isolated; + bool too_many; if (current_is_kswapd()) return 0; @@ -2205,7 +2209,13 @@ static int too_many_isolated(struct pglist_data *pgdat, int file, if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) inactive >>= 3; - return isolated > inactive; + too_many = isolated > inactive; + + /* Wake up tasks throttled due to too_many_isolated. */ + if (!too_many) + wake_throttle_isolated(pgdat); + + return too_many; } /* @@ -2314,8 +2324,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, return 0; /* wait a bit for the reclaimer. */ - msleep(100); stalled = true; + reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10); /* We are about to die and free our memory. Return now. */ if (fatal_signal_pending(current)) @@ -4349,7 +4359,7 @@ static int kswapd(void *p) WRITE_ONCE(pgdat->kswapd_order, 0); WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); - atomic_set(&pgdat->nr_reclaim_throttled, 0); + atomic_set(&pgdat->nr_writeback_throttled, 0); for ( ; ; ) { bool ret; From 221461959d6f6c3ae26be3d72b32f20994b31f29 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 21 Oct 2021 15:08:05 +1100 Subject: [PATCH 0773/1202] mm/vmscan: throttle reclaim when no progress is being made Memcg reclaim throttles on congestion if no reclaim progress is made. This makes little sense, it might be due to writeback or a host of other factors. For !memcg reclaim, it's messy. Direct reclaim primarily is throttled in the page allocator if it is failing to make progress. Kswapd throttles if too many pages are under writeback and marked for immediate reclaim. This patch explicitly throttles if reclaim is failing to make progress. [vbabka@suse.cz: Remove redundant code] Link: https://lkml.kernel.org/r/20211019090108.25501-4-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andreas Dilger Cc: "Darrick J . Wong" Cc: Dave Chinner Cc: Hillf Danton Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Michal Hocko Cc: NeilBrown Cc: Rik van Riel Cc: "Theodore Ts'o" Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mmzone.h | 1 + include/trace/events/vmscan.h | 4 +++- mm/memcontrol.c | 10 +--------- mm/vmscan.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 12c566c9a6d90..0b1658c02fc49 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -276,6 +276,7 @@ enum lru_list { enum vmscan_throttle_state { VMSCAN_THROTTLE_WRITEBACK, VMSCAN_THROTTLE_ISOLATED, + VMSCAN_THROTTLE_NOPROGRESS, NR_VMSCAN_THROTTLE, }; diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index d4905bd9e9c4c..f25a6149d3ba5 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -29,11 +29,13 @@ #define _VMSCAN_THROTTLE_WRITEBACK (1 << VMSCAN_THROTTLE_WRITEBACK) #define _VMSCAN_THROTTLE_ISOLATED (1 << VMSCAN_THROTTLE_ISOLATED) +#define _VMSCAN_THROTTLE_NOPROGRESS (1 << VMSCAN_THROTTLE_NOPROGRESS) #define show_throttle_flags(flags) \ (flags) ? __print_flags(flags, "|", \ {_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"}, \ - {_VMSCAN_THROTTLE_ISOLATED, "VMSCAN_THROTTLE_ISOLATED"} \ + {_VMSCAN_THROTTLE_ISOLATED, "VMSCAN_THROTTLE_ISOLATED"}, \ + {_VMSCAN_THROTTLE_NOPROGRESS, "VMSCAN_THROTTLE_NOPROGRESS"} \ ) : "VMSCAN_THROTTLE_NONE" diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5b00db3a8d530..4a0ac30d2e5b6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3487,19 +3487,11 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg) /* try to free all pages in this cgroup */ while (nr_retries && page_counter_read(&memcg->memory)) { - int progress; - if (signal_pending(current)) return -EINTR; - progress = try_to_free_mem_cgroup_pages(memcg, 1, - GFP_KERNEL, true); - if (!progress) { + if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true)) nr_retries--; - /* maybe some writeback is necessary */ - congestion_wait(BLK_RW_ASYNC, HZ/10); - } - } return 0; diff --git a/mm/vmscan.c b/mm/vmscan.c index ea8ae82dc1d41..fe5526dc41256 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3329,6 +3329,33 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx); } +static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc) +{ + /* If reclaim is making progress, wake any throttled tasks. */ + if (sc->nr_reclaimed) { + wait_queue_head_t *wqh; + + wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_NOPROGRESS]; + if (waitqueue_active(wqh)) + wake_up_all(wqh); + + return; + } + + /* + * Do not throttle kswapd on NOPROGRESS as it will throttle on + * VMSCAN_THROTTLE_WRITEBACK if there are too many pages under + * writeback and marked for immediate reclaim at the tail of + * the LRU. + */ + if (current_is_kswapd()) + return; + + /* Throttle if making no progress at high prioities. */ + if (sc->priority < DEF_PRIORITY - 2) + reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS, HZ/10); +} + /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation @@ -3413,6 +3440,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) continue; last_pgdat = zone->zone_pgdat; shrink_node(zone->zone_pgdat, sc); + consider_reclaim_throttle(zone->zone_pgdat, sc); } /* From 770456953f6846690cf325897f72494848446b42 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 21 Oct 2021 15:08:06 +1100 Subject: [PATCH 0774/1202] mm/writeback: throttle based on page writeback instead of congestion do_writepages throttles on congestion if the writepages() fails due to a lack of memory but congestion_wait() is partially broken as the congestion state is not updated for all BDIs. This patch stalls waiting for a number of pages to complete writeback that located on the local node. The main weakness is that there is no correlation between the location of the inode's pages and locality but that is still better than congestion_wait. Link: https://lkml.kernel.org/r/20211019090108.25501-5-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andreas Dilger Cc: "Darrick J . Wong" Cc: Dave Chinner Cc: Hillf Danton Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Michal Hocko Cc: NeilBrown Cc: Rik van Riel Cc: "Theodore Ts'o" Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page-writeback.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4812a17b288c5..f34f54fcd5b40 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2366,8 +2366,15 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) ret = generic_writepages(mapping, wbc); if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL)) break; - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); + + /* + * Lacking an allocation context or the locality or writeback + * state of any of the inode's pages, throttle based on + * writeback activity on the local node. It's as good a + * guess as any. + */ + reclaim_throttle(NODE_DATA(numa_node_id()), + VMSCAN_THROTTLE_WRITEBACK, HZ/50); } /* * Usually few pages are written by now from those we've just submitted From b689e4f2d8339f5e19a6de35bb77c8096b75fd4e Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 21 Oct 2021 15:08:07 +1100 Subject: [PATCH 0775/1202] mm/page_alloc: remove the throttling logic from the page allocator The page allocator stalls based on the number of pages that are waiting for writeback to start but this should now be redundant. shrink_inactive_list() will wake flusher threads if the LRU tail are unqueued dirty pages so the flusher should be active. If it fails to make progress due to pages under writeback not being completed quickly then it should stall on VMSCAN_THROTTLE_WRITEBACK. Link: https://lkml.kernel.org/r/20211019090108.25501-6-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andreas Dilger Cc: "Darrick J . Wong" Cc: Dave Chinner Cc: Hillf Danton Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Michal Hocko Cc: NeilBrown Cc: Rik van Riel Cc: "Theodore Ts'o" Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/page_alloc.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 001f9469632d5..7be29facdf96e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4791,30 +4791,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, trace_reclaim_retry_zone(z, order, reclaimable, available, min_wmark, *no_progress_loops, wmark); if (wmark) { - /* - * If we didn't make any progress and have a lot of - * dirty + writeback pages then we should wait for - * an IO to complete to slow down the reclaim and - * prevent from pre mature OOM - */ - if (!did_some_progress) { - unsigned long write_pending; - - write_pending = zone_page_state_snapshot(zone, - NR_ZONE_WRITE_PENDING); - - if (2 * write_pending > reclaimable) { - congestion_wait(BLK_RW_ASYNC, HZ/10); - return true; - } - } - ret = true; - goto out; + break; } } -out: /* * Memory allocation/reclaim might be called from a WQ context and the * current implementation of the WQ concurrency control doesn't From b719ac3e4356e0f2af1a025bf608d9b0d8f09602 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 21 Oct 2021 15:08:07 +1100 Subject: [PATCH 0776/1202] mm/vmscan: centralise timeout values for reclaim_throttle Neil Brown raised concerns about callers of reclaim_throttle specifying a timeout value. The original timeout values to congestion_wait() were probably pulled out of thin air or copy&pasted from somewhere else. This patch centralises the timeout values and selects a timeout based on the reason for reclaim throttling. These figures are also pulled out of the same thin air but better values may be derived Running a workload that is throttling for inappropriate periods and tracing mm_vmscan_throttled can be used to pick a more appropriate value. Excessive throttling would pick a lower timeout where as excessive CPU usage in reclaim context would select a larger timeout. Ideally a large value would always be used and the wakeups would occur before a timeout but that requires careful testing. Link: https://lkml.kernel.org/r/20211019090108.25501-7-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andreas Dilger Cc: "Darrick J . Wong" Cc: Dave Chinner Cc: Hillf Danton Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Michal Hocko Cc: NeilBrown Cc: Rik van Riel Cc: "Theodore Ts'o" Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/compaction.c | 2 +- mm/internal.h | 3 +-- mm/page-writeback.c | 2 +- mm/vmscan.c | 48 +++++++++++++++++++++++++++++++++------------ 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 7359093d8ac0e..151b04c4dab32 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -828,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (cc->mode == MIGRATE_ASYNC) return -EAGAIN; - reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED); if (fatal_signal_pending(current)) return -EINTR; diff --git a/mm/internal.h b/mm/internal.h index cfd42598ba97f..6d66f8f94f480 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -130,8 +130,7 @@ extern unsigned long highest_memmap_pfn; */ extern int isolate_lru_page(struct page *page); extern void putback_lru_page(struct page *page); -extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, - long timeout); +extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason); /* * in mm/rmap.c: diff --git a/mm/page-writeback.c b/mm/page-writeback.c index f34f54fcd5b40..4b01a6872f9ec 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2374,7 +2374,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) * guess as any. */ reclaim_throttle(NODE_DATA(numa_node_id()), - VMSCAN_THROTTLE_WRITEBACK, HZ/50); + VMSCAN_THROTTLE_WRITEBACK); } /* * Usually few pages are written by now from those we've just submitted diff --git a/mm/vmscan.c b/mm/vmscan.c index fe5526dc41256..743a8cacfecfa 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1006,12 +1006,10 @@ static void handle_write_error(struct address_space *mapping, unlock_page(page); } -void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, - long timeout) +void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) { wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason]; - long ret; - bool acct_writeback = (reason == VMSCAN_THROTTLE_WRITEBACK); + long timeout, ret; DEFINE_WAIT(wait); /* @@ -1023,17 +1021,41 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason, current->flags & (PF_IO_WORKER|PF_KTHREAD)) return; - if (acct_writeback && - atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) { - WRITE_ONCE(pgdat->nr_reclaim_start, - node_page_state(pgdat, NR_THROTTLED_WRITTEN)); + /* + * These figures are pulled out of thin air. + * VMSCAN_THROTTLE_ISOLATED is a transient condition based on too many + * parallel reclaimers which is a short-lived event so the timeout is + * short. Failing to make progress or waiting on writeback are + * potentially long-lived events so use a longer timeout. This is shaky + * logic as a failure to make progress could be due to anything from + * writeback to a slow device to excessive references pages at the tail + * of the inactive LRU. + */ + switch(reason) { + case VMSCAN_THROTTLE_NOPROGRESS: + case VMSCAN_THROTTLE_WRITEBACK: + timeout = HZ/10; + + if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) { + WRITE_ONCE(pgdat->nr_reclaim_start, + node_page_state(pgdat, NR_THROTTLED_WRITTEN)); + } + + break; + case VMSCAN_THROTTLE_ISOLATED: + timeout = HZ/50; + break; + default: + WARN_ON_ONCE(1); + timeout = HZ; + break; } prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); ret = schedule_timeout(timeout); finish_wait(wqh, &wait); - if (acct_writeback) + if (reason == VMSCAN_THROTTLE_ISOLATED) atomic_dec(&pgdat->nr_writeback_throttled); trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout), @@ -2325,7 +2347,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, /* wait a bit for the reclaimer. */ stalled = true; - reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED); /* We are about to die and free our memory. Return now. */ if (fatal_signal_pending(current)) @@ -3257,7 +3279,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) * until some pages complete writeback. */ if (sc->nr.immediate) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); } /* @@ -3281,7 +3303,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) if (!current_is_kswapd() && current_may_throttle() && !sc->hibernation_mode && test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc)) @@ -3353,7 +3375,7 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc) /* Throttle if making no progress at high prioities. */ if (sc->priority < DEF_PRIORITY - 2) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS, HZ/10); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS); } /* From aca688ab58acc8878cd2d081d252cc5b71bcc445 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 21 Oct 2021 15:08:08 +1100 Subject: [PATCH 0777/1202] mm/vmscan: increase the timeout if page reclaim is not making progress Tracing of the stutterp workload showed the following delays 1 usect_delayed=124000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usect_delayed=128000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usect_delayed=176000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usect_delayed=536000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usect_delayed=544000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usect_delayed=556000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usect_delayed=624000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usect_delayed=716000 reason=VMSCAN_THROTTLE_NOPROGRESS 1 usect_delayed=772000 reason=VMSCAN_THROTTLE_NOPROGRESS 2 usect_delayed=512000 reason=VMSCAN_THROTTLE_NOPROGRESS 16 usect_delayed=120000 reason=VMSCAN_THROTTLE_NOPROGRESS 53 usect_delayed=116000 reason=VMSCAN_THROTTLE_NOPROGRESS 116 usect_delayed=112000 reason=VMSCAN_THROTTLE_NOPROGRESS 5907 usect_delayed=108000 reason=VMSCAN_THROTTLE_NOPROGRESS 71741 usect_delayed=104000 reason=VMSCAN_THROTTLE_NOPROGRESS All the throttling hit the full timeout and then there was wakeup delays meaning that the wakeups are premature as no other reclaimer such as kswapd has made progress. This patch increases the maximum timeout. Link: https://lkml.kernel.org/r/20211019090108.25501-8-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andreas Dilger Cc: "Darrick J . Wong" Cc: Dave Chinner Cc: Hillf Danton Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Michal Hocko Cc: NeilBrown Cc: Rik van Riel Cc: "Theodore Ts'o" Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmscan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index 743a8cacfecfa..12ac469783180 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1033,6 +1033,8 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) */ switch(reason) { case VMSCAN_THROTTLE_NOPROGRESS: + timeout = HZ/2; + break; case VMSCAN_THROTTLE_WRITEBACK: timeout = HZ/10; From fe169c5f77207d62b3a4a77243b286b4b5aa97ae Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 21 Oct 2021 15:08:09 +1100 Subject: [PATCH 0778/1202] mm/vmscan: delay waking of tasks throttled on NOPROGRESS Tracing indicates that tasks throttled on NOPROGRESS are woken prematurely resulting in occasional massive spikes in direct reclaim activity. This patch wakes tasks throttled on NOPROGRESS if reclaim efficiency is at least 12%. Link: https://lkml.kernel.org/r/20211019090108.25501-9-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andreas Dilger Cc: "Darrick J . Wong" Cc: Dave Chinner Cc: Hillf Danton Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Matthew Wilcox Cc: Michal Hocko Cc: NeilBrown Cc: Rik van Riel Cc: "Theodore Ts'o" Cc: Yang Shi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmscan.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 12ac469783180..4fcb12e7359d1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1057,7 +1057,7 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) ret = schedule_timeout(timeout); finish_wait(wqh, &wait); - if (reason == VMSCAN_THROTTLE_ISOLATED) + if (reason == VMSCAN_THROTTLE_WRITEBACK) atomic_dec(&pgdat->nr_writeback_throttled); trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout), @@ -3355,8 +3355,11 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc) { - /* If reclaim is making progress, wake any throttled tasks. */ - if (sc->nr_reclaimed) { + /* + * If reclaim is making progress greater than 12% efficiency then + * wake all the NOPROGRESS throttled tasks. + */ + if (sc->nr_reclaimed > (sc->nr_scanned >> 3)) { wait_queue_head_t *wqh; wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_NOPROGRESS]; From 82bd32af2b3d0cc3aa70da92e657ef0df9b8b811 Mon Sep 17 00:00:00 2001 From: Zhenliang Wei Date: Thu, 21 Oct 2021 15:08:09 +1100 Subject: [PATCH 0779/1202] tools/vm/page_owner_sort.c: count and sort by mem When viewing page owner information, we may be more concerned about the total memory rather than the times of stack appears. Therefore, the following adjustments are made: 1. Added the statistics on the total number of pages. 2. Added the optional parameter "-m" to configure the program to sort by memory (total pages). The general output of page_owner is as follows: Page allocated via order XXX, ... PFN XXX ... // Detailed stack Page allocated via order XXX, ... PFN XXX ... // Detailed stack The original page_owner_sort ignores PFN rows, puts the remaining rows in buf, counts the times of buf, and finally sorts them according to the times. General output: XXX times: Page allocated via order XXX, ... // Detailed stack Now, we use regexp to extract the page order value from the buf, and count the total pages for the buf. General output: XXX times, XXX pages: Page allocated via order XXX, ... // Detailed stack By default, it is still sorted by the times of buf; If you want to sort by the pages nums of buf, use the new -m parameter. Link: https://lkml.kernel.org/r/1631678242-41033-1-git-send-email-weizhenliang@huawei.com Signed-off-by: Zhenliang Wei Cc: Tang Bin Cc: Zhang Shengju Cc: Zhenliang Wei Cc: Xiaoming Ni Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/vm/page_owner.rst | 23 +++++++- tools/vm/page_owner_sort.c | 94 +++++++++++++++++++++++++++++---- 2 files changed, 107 insertions(+), 10 deletions(-) diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst index 2175465c9bf2a..9837fc8147dd6 100644 --- a/Documentation/vm/page_owner.rst +++ b/Documentation/vm/page_owner.rst @@ -85,5 +85,26 @@ Usage cat /sys/kernel/debug/page_owner > page_owner_full.txt ./page_owner_sort page_owner_full.txt sorted_page_owner.txt + The general output of ``page_owner_full.txt`` is as follows: + + Page allocated via order XXX, ... + PFN XXX ... + // Detailed stack + + Page allocated via order XXX, ... + PFN XXX ... + // Detailed stack + + The ``page_owner_sort`` tool ignores ``PFN`` rows, puts the remaining rows + in buf, uses regexp to extract the page order value, counts the times + and pages of buf, and finally sorts them according to the times. + See the result about who allocated each page - in the ``sorted_page_owner.txt``. + in the ``sorted_page_owner.txt``. General output: + + XXX times, XXX pages: + Page allocated via order XXX, ... + // Detailed stack + + By default, ``page_owner_sort`` is sorted according to the times of buf. + If you want to sort by the pages nums of buf, use the ``-m`` parameter. diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c index 0e75f22c94750..9ebb84a9c7310 100644 --- a/tools/vm/page_owner_sort.c +++ b/tools/vm/page_owner_sort.c @@ -5,6 +5,8 @@ * Example use: * cat /sys/kernel/debug/page_owner > page_owner_full.txt * ./page_owner_sort page_owner_full.txt sorted_page_owner.txt + * Or sort by total memory: + * ./page_owner_sort -m page_owner_full.txt sorted_page_owner.txt * * See Documentation/vm/page_owner.rst */ @@ -16,14 +18,18 @@ #include #include #include +#include +#include struct block_list { char *txt; int len; int num; + int page_num; }; - +static int sort_by_memory; +static regex_t order_pattern; static struct block_list *list; static int list_size; static int max_size; @@ -59,12 +65,50 @@ static int compare_num(const void *p1, const void *p2) return l2->num - l1->num; } +static int compare_page_num(const void *p1, const void *p2) +{ + const struct block_list *l1 = p1, *l2 = p2; + + return l2->page_num - l1->page_num; +} + +static int get_page_num(char *buf) +{ + int err, val_len, order_val; + char order_str[4] = {0}; + char *endptr; + regmatch_t pmatch[2]; + + err = regexec(&order_pattern, buf, 2, pmatch, REG_NOTBOL); + if (err != 0 || pmatch[1].rm_so == -1) { + printf("no order pattern in %s\n", buf); + return 0; + } + val_len = pmatch[1].rm_eo - pmatch[1].rm_so; + if (val_len > 2) /* max_order should not exceed 2 digits */ + goto wrong_order; + + memcpy(order_str, buf + pmatch[1].rm_so, val_len); + + errno = 0; + order_val = strtol(order_str, &endptr, 10); + if (errno != 0 || endptr == order_str || *endptr != '\0') + goto wrong_order; + + return 1 << order_val; + +wrong_order: + printf("wrong order in follow buf:\n%s\n", buf); + return 0; +} + static void add_list(char *buf, int len) { if (list_size != 0 && len == list[list_size-1].len && memcmp(buf, list[list_size-1].txt, len) == 0) { list[list_size-1].num++; + list[list_size-1].page_num += get_page_num(buf); return; } if (list_size == max_size) { @@ -74,6 +118,7 @@ static void add_list(char *buf, int len) list[list_size].txt = malloc(len+1); list[list_size].len = len; list[list_size].num = 1; + list[list_size].page_num = get_page_num(buf); memcpy(list[list_size].txt, buf, len); list[list_size].txt[len] = 0; list_size++; @@ -85,6 +130,13 @@ static void add_list(char *buf, int len) #define BUF_SIZE (128 * 1024) +static void usage(void) +{ + printf("Usage: ./page_owner_sort [-m] \n" + "-m Sort by total memory. If this option is unset, sort by times\n" + ); +} + int main(int argc, char **argv) { FILE *fin, *fout; @@ -92,21 +144,39 @@ int main(int argc, char **argv) int ret, i, count; struct block_list *list2; struct stat st; + int err; + int opt; - if (argc < 3) { - printf("Usage: ./program \n"); - perror("open: "); + while ((opt = getopt(argc, argv, "m")) != -1) + switch (opt) { + case 'm': + sort_by_memory = 1; + break; + default: + usage(); + exit(1); + } + + if (optind >= (argc - 1)) { + usage(); exit(1); } - fin = fopen(argv[1], "r"); - fout = fopen(argv[2], "w"); + fin = fopen(argv[optind], "r"); + fout = fopen(argv[optind + 1], "w"); if (!fin || !fout) { - printf("Usage: ./program \n"); + usage(); perror("open: "); exit(1); } + err = regcomp(&order_pattern, "order\\s*([0-9]*),", REG_EXTENDED|REG_NEWLINE); + if (err != 0 || order_pattern.re_nsub != 1) { + printf("%s: Invalid pattern 'order\\s*([0-9]*),' code %d\n", + argv[0], err); + exit(1); + } + fstat(fileno(fin), &st); max_size = st.st_size / 100; /* hack ... */ @@ -145,13 +215,19 @@ int main(int argc, char **argv) list2[count++] = list[i]; } else { list2[count-1].num += list[i].num; + list2[count-1].page_num += list[i].page_num; } } - qsort(list2, count, sizeof(list[0]), compare_num); + if (sort_by_memory) + qsort(list2, count, sizeof(list[0]), compare_page_num); + else + qsort(list2, count, sizeof(list[0]), compare_num); for (i = 0; i < count; i++) - fprintf(fout, "%d times:\n%s\n", list2[i].num, list2[i].txt); + fprintf(fout, "%d times, %d pages:\n%s\n", + list2[i].num, list2[i].page_num, list2[i].txt); + regfree(&order_pattern); return 0; } From 2225848e778dbb91a8fbdb2ce25bc2218c81c6b9 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 21 Oct 2021 15:08:10 +1100 Subject: [PATCH 0780/1202] tools/vm/page-types.c: make walk_file() aware of address range option Patch series "tools/vm/page-types.c: a few improvements". This patchset adds some improvements on tools/vm/page-types.c. Patch 1/3 makes -a option (specify address range) work with -f (file cache mode). Patch 2/3 and 3/3 are to fix minor formatting issues of this tool. These would make life a little easier for the users of this tool. Please see individual patches for more details about specific issues. This patch (of 3): -a|--addr option is used to limit the range of address to be scanned for page status. It works now for physical address space (dafult mode) or for virtual address space (with -p option), but not for file address space (with -f option). So make walk_file() aware of -a option. Link: https://lkml.kernel.org/r/20211004061325.1525902-1-naoya.horiguchi@linux.dev Link: https://lkml.kernel.org/r/20211004061325.1525902-2-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Cc: Konstantin Khlebnikov Cc: Christian Hansen Cc: Changbin Du Cc: Bin Wang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/vm/page-types.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index f62f10c988db1..b14376af1f168 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -967,22 +967,19 @@ static struct sigaction sigbus_action = { .sa_flags = SA_SIGINFO, }; -static void walk_file(const char *name, const struct stat *st) +static void walk_file_range(const char *name, int fd, + unsigned long off, unsigned long end) { uint8_t vec[PAGEMAP_BATCH]; uint64_t buf[PAGEMAP_BATCH], flags; uint64_t cgroup = 0; uint64_t mapcnt = 0; unsigned long nr_pages, pfn, i; - off_t off, end = st->st_size; - int fd; ssize_t len; void *ptr; int first = 1; - fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW); - - for (off = 0; off < end; off += len) { + for (; off < end; off += len) { nr_pages = (end - off + page_size - 1) / page_size; if (nr_pages > PAGEMAP_BATCH) nr_pages = PAGEMAP_BATCH; @@ -1043,6 +1040,21 @@ static void walk_file(const char *name, const struct stat *st) flags, cgroup, mapcnt, buf[i]); } } +} + +static void walk_file(const char *name, const struct stat *st) +{ + int i; + int fd; + + fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW); + + if (!nr_addr_ranges) + add_addr_range(0, st->st_size / page_size); + + for (i = 0; i < nr_addr_ranges; i++) + walk_file_range(name, fd, opt_offset[i] * page_size, + (opt_offset[i] + opt_size[i]) * page_size); close(fd); } From a0fb6f11f5519ecca15ea157cce036eb5861ac37 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 21 Oct 2021 15:08:11 +1100 Subject: [PATCH 0781/1202] tools/vm/page-types.c: move show_file() to summary output Currently file info from show_file() is printed out within page list like below, but this is inconvenient a little to utilize the page list from other scripts (maybe needs additional filtering). $ ./page-types -f page-types.c -l foffset offset len flags page-types.c Inode: 15108680 Size: 30953 (8 pages) Modify: Sat Oct 2 23:11:20 2021 (2399 seconds ago) Access: Sat Oct 2 23:11:28 2021 (2391 seconds ago) 0 d9f59e 1 ___U_lA____________________________________ 1 1031eb5 1 __RU_l_____________________________________ 2 13bf717 1 __RU_l_____________________________________ 3 13ac333 1 ___U_lA____________________________________ 4 d9f59f 1 __RU_l_____________________________________ 5 183fd49 1 ___U_lA____________________________________ 6 13cbf69 1 ___U_lA____________________________________ 7 d9ef05 1 ___U_lA____________________________________ flags page-count MB symbolic-flags long-symbolic-flags 0x000000000000002c 3 0 __RU_l_____________________________________ referenced,uptodate,lru 0x0000000000000068 5 0 ___U_lA____________________________________ uptodate,lru,active total 8 0 With this patch file info is printed out in summary part like below: $ ./page-types -f page-types.c -l foffset offset len flags 0 d9f59e 1 ___U_lA_____________________________________ 1 1031eb5 1 __RU_l______________________________________ 2 13bf717 1 __RU_l______________________________________ 3 13ac333 1 ___U_lA_____________________________________ 4 d9f59f 1 __RU_l______________________________________ 5 183fd49 1 ___U_lA_____________________________________ 6 13cbf69 1 ___U_lA_____________________________________ page-types.c Inode: 15108680 Size: 30953 (8 pages) Modify: Sat Oct 2 23:11:20 2021 (2435 seconds ago) Access: Sat Oct 2 23:11:28 2021 (2427 seconds ago) flags page-count MB symbolic-flags long-symbolic-flags 0x000000000000002c 3 0 __RU_l______________________________________ referenced,uptodate,lru 0x0000000000000068 4 0 ___U_lA_____________________________________ uptodate,lru,active total 7 0 Link: https://lkml.kernel.org/r/20211004061325.1525902-3-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Cc: Bin Wang Cc: Changbin Du Cc: Christian Hansen Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/vm/page-types.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index b14376af1f168..fdb1891faf90b 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -1034,7 +1034,6 @@ static void walk_file_range(const char *name, int fd, if (first && opt_list) { first = 0; flush_page_range(); - show_file(name, st); } add_page(off / page_size + i, pfn, flags, cgroup, mapcnt, buf[i]); @@ -1074,10 +1073,10 @@ int walk_tree(const char *name, const struct stat *st, int type, struct FTW *f) return 0; } +struct stat st; + static void walk_page_cache(void) { - struct stat st; - kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY); pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY); sigaction(SIGBUS, &sigbus_action, NULL); @@ -1374,6 +1373,11 @@ int main(int argc, char *argv[]) if (opt_list) printf("\n\n"); + if (opt_file) { + show_file(opt_file, &st); + printf("\n"); + } + show_summary(); if (opt_list_mapcnt) From b952e8789d8a7bc7e5ca1b4cbb1dc2463ca5f633 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 21 Oct 2021 15:08:11 +1100 Subject: [PATCH 0782/1202] tools/vm/page-types.c: print file offset in hexadecimal In page list mode (with -l and -L option), virtual address and physical address are printed in hexadecimal, but file offset is not, which is confusing, so let's align it. Link: https://lkml.kernel.org/r/20211004061325.1525902-4-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Cc: Bin Wang Cc: Changbin Du Cc: Christian Hansen Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/vm/page-types.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index fdb1891faf90b..b1ed76d9a9794 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -390,7 +390,7 @@ static void show_page_range(unsigned long voffset, unsigned long offset, if (opt_pid) printf("%lx\t", voff); if (opt_file) - printf("%lu\t", voff); + printf("%lx\t", voff); if (opt_list_cgroup) printf("@%llu\t", (unsigned long long)cgroup0); if (opt_list_mapcnt) @@ -418,7 +418,7 @@ static void show_page(unsigned long voffset, unsigned long offset, if (opt_pid) printf("%lx\t", voffset); if (opt_file) - printf("%lu\t", voffset); + printf("%lx\t", voffset); if (opt_list_cgroup) printf("@%llu\t", (unsigned long long)cgroup); if (opt_list_mapcnt) From 231bf59f800533ec0f9f057ef0bd400fd89d7145 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Thu, 21 Oct 2021 15:08:12 +1100 Subject: [PATCH 0783/1202] mm/mempolicy: convert from atomic_t to refcount_t on mempolicy->refcnt refcount_t type and corresponding API can protect refcounters from accidental underflow and overflow and further use-after-free situations. Link: https://lkml.kernel.org/r/1626683671-64407-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Acked-by: Ben Widawsky Reviewed-by: Muchun Song Cc: Feng Tang Cc: Mike Kravetz Cc: Muchun Song Cc: Yanfei Xu Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mempolicy.h | 5 +++-- mm/mempolicy.c | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 097bbbb374936..f9edbbddd3549 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -6,6 +6,7 @@ #ifndef _LINUX_MEMPOLICY_H #define _LINUX_MEMPOLICY_H 1 +#include #include #include #include @@ -42,7 +43,7 @@ struct mm_struct; * to 1, representing the caller of mpol_dup(). */ struct mempolicy { - atomic_t refcnt; + refcount_t refcnt; unsigned short mode; /* See MPOL_* above */ unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ nodemask_t nodes; /* interleave/bind/perfer */ @@ -93,7 +94,7 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol) static inline void mpol_get(struct mempolicy *pol) { if (pol) - atomic_inc(&pol->refcnt); + refcount_inc(&pol->refcnt); } extern bool __mpol_equal(struct mempolicy *a, struct mempolicy *b); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e18c91d301c53..b82fb397e6820 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -293,7 +293,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); if (!policy) return ERR_PTR(-ENOMEM); - atomic_set(&policy->refcnt, 1); + refcount_set(&policy->refcnt, 1); policy->mode = mode; policy->flags = flags; @@ -303,7 +303,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, /* Slow path of a mpol destructor. */ void __mpol_put(struct mempolicy *p) { - if (!atomic_dec_and_test(&p->refcnt)) + if (!refcount_dec_and_test(&p->refcnt)) return; kmem_cache_free(policy_cache, p); } @@ -2313,7 +2313,7 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) nodemask_t mems = cpuset_mems_allowed(current); mpol_rebind_policy(new, &mems); } - atomic_set(&new->refcnt, 1); + refcount_set(&new->refcnt, 1); return new; } @@ -2608,7 +2608,7 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, goto alloc_new; *mpol_new = *n->policy; - atomic_set(&mpol_new->refcnt, 1); + refcount_set(&mpol_new->refcnt, 1); sp_node_init(n_new, end, n->end, mpol_new); n->end = start; sp_insert(sp, n_new); From eba08dd517549d9b8e17eb39c0148994eae6c6ae Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 21 Oct 2021 15:08:13 +1100 Subject: [PATCH 0784/1202] mm-mempolicy-convert-from-atomic_t-to-refcount_t-on-mempolicy-refcnt-fix fix warnings mm/mempolicy.c:125:42: warning: missing braces around initializer [-Wmissing-braces] 125 | static struct mempolicy default_policy = { | ^ mm/mempolicy.c:125:42: warning: missing braces around initializer [-Wmissing-braces] mm/mempolicy.c: In function 'numa_policy_init': mm/mempolicy.c:2815:32: warning: missing braces around initializer [-Wmissing-braces] 2815 | preferred_node_policy[nid] = (struct mempolicy) { | ^ mm/mempolicy.c:2815:32: warning: missing braces around initializer [-Wmissing-braces] Cc: Xiyu Yang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/mempolicy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b82fb397e6820..6e3199a7c679d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -123,7 +123,7 @@ enum zone_type policy_zone = 0; * run-time system-wide default policy => local allocation */ static struct mempolicy default_policy = { - .refcnt = ATOMIC_INIT(1), /* never free it */ + .refcnt = { ATOMIC_INIT(1), }, /* never free it */ .mode = MPOL_LOCAL, }; @@ -2802,7 +2802,7 @@ void __init numa_policy_init(void) for_each_node(nid) { preferred_node_policy[nid] = (struct mempolicy) { - .refcnt = ATOMIC_INIT(1), + .refcnt = { ATOMIC_INIT(1), }, .mode = MPOL_PREFERRED, .flags = MPOL_F_MOF | MPOL_F_MORON, .nodes = nodemask_of_node(nid), From 5798302bfb1e4b60d5a4e8e534233b856806dd5f Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 21 Oct 2021 15:08:13 +1100 Subject: [PATCH 0785/1202] arch_numa: simplify numa_distance allocation Patch series "memblock: cleanup memblock_free interface", v2. This is the fix for memblock freeing APIs mismatch [1]. The first patch is a cleanup of numa_distance allocation in arch_numa I've spotted during the conversion. The second patch is a fix for Xen memory freeing on some of the error paths. [1] https://lore.kernel.org/all/CAHk-=wj9k4LZTz+svCxLYs5Y1=+yKrbAUArH1+ghyG3OLd8VVg@mail.gmail.com This patch (of 6): Memory allocation of numa_distance uses memblock_phys_alloc_range() without actual range limits, converts the returned physical address to virtual and then only uses the virtual address for further initialization. Simplify this by replacing memblock_phys_alloc_range() with memblock_alloc(). Link: https://lkml.kernel.org/r/20210930185031.18648-1-rppt@kernel.org Link: https://lkml.kernel.org/r/20210930185031.18648-2-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Christophe Leroy Cc: Juergen Gross Cc: Shahab Vahedi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/base/arch_numa.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 28222688ace79..a6491673dc1f1 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -337,15 +337,13 @@ void __init numa_free_distance(void) static int __init numa_alloc_distance(void) { size_t size; - u64 phys; int i, j; size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]); - phys = memblock_phys_alloc_range(size, PAGE_SIZE, 0, PFN_PHYS(max_pfn)); - if (WARN_ON(!phys)) + numa_distance = memblock_alloc(size, PAGE_SIZE); + if (WARN_ON(!numa_distance)) return -ENOMEM; - numa_distance = __va(phys); numa_distance_cnt = nr_node_ids; /* fill with the default distances */ From 9e65628f01c171ab40a3f350c8e5316f818d8862 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 21 Oct 2021 15:08:14 +1100 Subject: [PATCH 0786/1202] xen/x86: free_p2m_page: use memblock_free_ptr() to free a virtual pointer free_p2m_page() wrongly passes a virtual pointer to memblock_free() that treats it as a physical address. Call memblock_free_ptr() instead that gets a virtual address to free the memory. Link: https://lkml.kernel.org/r/20210930185031.18648-3-rppt@kernel.org Signed-off-by: Mike Rapoport Reviewed-by: Juergen Gross Cc: Christophe Leroy Cc: Shahab Vahedi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/x86/xen/p2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 5e6e236977c75..141bb9dbd2fbe 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -197,7 +197,7 @@ static void * __ref alloc_p2m_page(void) static void __ref free_p2m_page(void *p) { if (unlikely(!slab_is_available())) { - memblock_free((unsigned long)p, PAGE_SIZE); + memblock_free_ptr(p, PAGE_SIZE); return; } From e7b689f1cdc6cbeb9350e87a6ab9cf8397707ccc Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 21 Oct 2021 15:08:15 +1100 Subject: [PATCH 0787/1202] memblock: drop memblock_free_early_nid() and memblock_free_early() memblock_free_early_nid() is unused and memblock_free_early() is an alias for memblock_free(). Replace calls to memblock_free_early() with calls to memblock_free() and remove memblock_free_early() and memblock_free_early_nid(). Link: https://lkml.kernel.org/r/20210930185031.18648-4-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Christophe Leroy Cc: Juergen Gross Cc: Shahab Vahedi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/mips/mm/init.c | 2 +- arch/powerpc/platforms/pseries/svm.c | 3 +-- arch/s390/kernel/smp.c | 2 +- drivers/base/arch_numa.c | 2 +- drivers/s390/char/sclp_early.c | 2 +- include/linux/memblock.h | 12 ------------ kernel/dma/swiotlb.c | 2 +- lib/cpumask.c | 2 +- mm/percpu.c | 8 ++++---- mm/sparse.c | 2 +- 10 files changed, 12 insertions(+), 25 deletions(-) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 19347dc6bbf88..21a5a7ac00370 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -529,7 +529,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free_early(__pa(ptr), size); + memblock_free(__pa(ptr), size); } void __init setup_per_cpu_areas(void) diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 87f001b4c4e4f..f12229ce73014 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -56,8 +56,7 @@ void __init svm_swiotlb_init(void) return; - memblock_free_early(__pa(vstart), - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free(__pa(vstart), PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); panic("SVM: Cannot allocate SWIOTLB buffer"); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 1a04e5bdf6555..066efd6d9345e 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -880,7 +880,7 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ __smp_rescan_cpus(info, true); - memblock_free_early((unsigned long)info, sizeof(*info)); + memblock_free((unsigned long)info, sizeof(*info)); } /* diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index a6491673dc1f1..ade8934764f65 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -166,7 +166,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free_early(__pa(ptr), size); + memblock_free(__pa(ptr), size); } #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index f3d5c7f4c13d2..f01d942e1c1df 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -139,7 +139,7 @@ int __init sclp_early_get_core_info(struct sclp_core_info *info) } sclp_fill_core_info(info, sccb); out: - memblock_free_early((unsigned long)sccb, length); + memblock_free((unsigned long)sccb, length); return rc; } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 34de69b3b8bad..fc8183be340c6 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -441,18 +441,6 @@ static inline void *memblock_alloc_node(phys_addr_t size, MEMBLOCK_ALLOC_ACCESSIBLE, nid); } -static inline void memblock_free_early(phys_addr_t base, - phys_addr_t size) -{ - memblock_free(base, size); -} - -static inline void memblock_free_early_nid(phys_addr_t base, - phys_addr_t size, int nid) -{ - memblock_free(base, size); -} - static inline void memblock_free_late(phys_addr_t base, phys_addr_t size) { __memblock_free_late(base, size); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 87c40517e8227..430d2f78d540e 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -247,7 +247,7 @@ swiotlb_init(int verbose) return; fail_free_mem: - memblock_free_early(__pa(tlb), bytes); + memblock_free(__pa(tlb), bytes); fail: pr_warn("Cannot allocate buffer"); } diff --git a/lib/cpumask.c b/lib/cpumask.c index c3c76b8333846..045779446a18d 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -188,7 +188,7 @@ EXPORT_SYMBOL(free_cpumask_var); */ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { - memblock_free_early(__pa(mask), cpumask_size()); + memblock_free(__pa(mask), cpumask_size()); } #endif diff --git a/mm/percpu.c b/mm/percpu.c index e0a986818903d..f58318cb04c02 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -2472,7 +2472,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, */ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) { - memblock_free_early(__pa(ai), ai->__ai_size); + memblock_free(__pa(ai), ai->__ai_size); } /** @@ -3134,7 +3134,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, out_free: pcpu_free_alloc_info(ai); if (areas) - memblock_free_early(__pa(areas), areas_size); + memblock_free(__pa(areas), areas_size); return rc; } #endif /* BUILD_EMBED_FIRST_CHUNK */ @@ -3256,7 +3256,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, free_fn(page_address(pages[j]), PAGE_SIZE); rc = -ENOMEM; out_free_ar: - memblock_free_early(__pa(pages), pages_size); + memblock_free(__pa(pages), pages_size); pcpu_free_alloc_info(ai); return rc; } @@ -3286,7 +3286,7 @@ static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_dfl_fc_free(void *ptr, size_t size) { - memblock_free_early(__pa(ptr), size); + memblock_free(__pa(ptr), size); } void __init setup_per_cpu_areas(void) diff --git a/mm/sparse.c b/mm/sparse.c index 120bc8ea5293e..55fea0c2f927e 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -451,7 +451,7 @@ static void *sparsemap_buf_end __meminitdata; static inline void __meminit sparse_buffer_free(unsigned long size) { WARN_ON(!sparsemap_buf || size == 0); - memblock_free_early(__pa(sparsemap_buf), size); + memblock_free(__pa(sparsemap_buf), size); } static void __init sparse_buffer_init(unsigned long size, int nid) From 8ca796f273b258a20ab7a200be789deed51e7869 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 21 Oct 2021 15:08:16 +1100 Subject: [PATCH 0788/1202] memblock: stop aliasing __memblock_free_late with memblock_free_late memblock_free_late() is a NOP wrapper for __memblock_free_late(), there is no point to keep this indirection. Drop the wrapper and rename __memblock_free_late() to memblock_free_late(). Link: https://lkml.kernel.org/r/20210930185031.18648-5-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Christophe Leroy Cc: Juergen Gross Cc: Shahab Vahedi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/memblock.h | 7 +------ mm/memblock.c | 8 ++++---- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index fc8183be340c6..e25f964fdd601 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -133,7 +133,7 @@ void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); -void __memblock_free_late(phys_addr_t base, phys_addr_t size); +void memblock_free_late(phys_addr_t base, phys_addr_t size); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, @@ -441,11 +441,6 @@ static inline void *memblock_alloc_node(phys_addr_t size, MEMBLOCK_ALLOC_ACCESSIBLE, nid); } -static inline void memblock_free_late(phys_addr_t base, phys_addr_t size) -{ - __memblock_free_late(base, size); -} - /* * Set the allocation direction to bottom-up or top-down. */ diff --git a/mm/memblock.c b/mm/memblock.c index b91df5cf54d34..427acbeed4a37 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -366,14 +366,14 @@ void __init memblock_discard(void) addr = __pa(memblock.reserved.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.reserved.max); - __memblock_free_late(addr, size); + memblock_free_late(addr, size); } if (memblock.memory.regions != memblock_memory_init_regions) { addr = __pa(memblock.memory.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.memory.max); - __memblock_free_late(addr, size); + memblock_free_late(addr, size); } memblock_memory = NULL; @@ -1591,7 +1591,7 @@ void * __init memblock_alloc_try_nid( } /** - * __memblock_free_late - free pages directly to buddy allocator + * memblock_free_late - free pages directly to buddy allocator * @base: phys starting address of the boot memory block * @size: size of the boot memory block in bytes * @@ -1599,7 +1599,7 @@ void * __init memblock_alloc_try_nid( * down, but we are still initializing the system. Pages are released directly * to the buddy allocator. */ -void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) +void __init memblock_free_late(phys_addr_t base, phys_addr_t size) { phys_addr_t cursor, end; From 0d52db59796e5ea9302b450a700acc438b822b10 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 21 Oct 2021 15:08:17 +1100 Subject: [PATCH 0789/1202] memblock: rename memblock_free to memblock_phys_free Since memblock_free() operates on a physical range, make its name reflect it and rename it to memblock_phys_free(), so it will be a logical counterpart to memblock_phys_alloc(). The callers are updated with the below semantic patch: @@ expression addr; expression size; @@ - memblock_free(addr, size); + memblock_phys_free(addr, size); Link: https://lkml.kernel.org/r/20210930185031.18648-6-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Christophe Leroy Cc: Juergen Gross Cc: Shahab Vahedi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/alpha/kernel/core_irongate.c | 3 ++- arch/arc/mm/init.c | 2 +- arch/arm/mach-hisi/platmcpm.c | 2 +- arch/arm/mm/init.c | 2 +- arch/arm64/mm/mmu.c | 4 ++-- arch/mips/mm/init.c | 2 +- arch/mips/sgi-ip30/ip30-setup.c | 6 +++--- arch/powerpc/kernel/dt_cpu_ftrs.c | 4 ++-- arch/powerpc/kernel/paca.c | 8 ++++---- arch/powerpc/kernel/setup-common.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- arch/powerpc/platforms/pseries/svm.c | 3 ++- arch/riscv/kernel/setup.c | 5 +++-- arch/s390/kernel/setup.c | 8 ++++---- arch/s390/kernel/smp.c | 4 ++-- arch/s390/kernel/uv.c | 2 +- arch/s390/mm/kasan_init.c | 2 +- arch/sh/boards/mach-ap325rxa/setup.c | 2 +- arch/sh/boards/mach-ecovec24/setup.c | 4 ++-- arch/sh/boards/mach-kfr2r09/setup.c | 2 +- arch/sh/boards/mach-migor/setup.c | 2 +- arch/sh/boards/mach-se/7724/setup.c | 4 ++-- arch/sparc/kernel/smp_64.c | 2 +- arch/um/kernel/mem.c | 2 +- arch/x86/kernel/setup.c | 4 ++-- arch/x86/mm/init.c | 2 +- arch/x86/xen/mmu_pv.c | 6 +++--- arch/x86/xen/setup.c | 6 +++--- drivers/base/arch_numa.c | 2 +- drivers/firmware/efi/memmap.c | 2 +- drivers/of/kexec.c | 3 +-- drivers/of/of_reserved_mem.c | 5 +++-- drivers/s390/char/sclp_early.c | 2 +- drivers/usb/early/xhci-dbc.c | 10 +++++----- drivers/xen/swiotlb-xen.c | 2 +- include/linux/memblock.h | 2 +- init/initramfs.c | 2 +- kernel/dma/swiotlb.c | 2 +- lib/cpumask.c | 2 +- mm/cma.c | 2 +- mm/memblock.c | 8 ++++---- mm/memory_hotplug.c | 2 +- mm/percpu.c | 8 ++++---- mm/sparse.c | 2 +- 45 files changed, 79 insertions(+), 76 deletions(-) diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c index 72af1e72d8331..ee26dcc494186 100644 --- a/arch/alpha/kernel/core_irongate.c +++ b/arch/alpha/kernel/core_irongate.c @@ -233,7 +233,8 @@ albacore_init_arch(void) unsigned long size; size = initrd_end - initrd_start; - memblock_free(__pa(initrd_start), PAGE_ALIGN(size)); + memblock_phys_free(__pa(initrd_start), + PAGE_ALIGN(size)); if (!move_initrd(pci_mem)) printk("irongate_init_arch: initrd too big " "(%ldK)\ndisabling initrd\n", diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 699ecf1196414..59408f6a02d4a 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -173,7 +173,7 @@ static void __init highmem_init(void) #ifdef CONFIG_HIGHMEM unsigned long tmp; - memblock_free(high_mem_start, high_mem_sz); + memblock_phys_free(high_mem_start, high_mem_sz); for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) free_highmem_page(pfn_to_page(tmp)); #endif diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c index 96a484095194d..258586e31333e 100644 --- a/arch/arm/mach-hisi/platmcpm.c +++ b/arch/arm/mach-hisi/platmcpm.c @@ -339,7 +339,7 @@ static int __init hip04_smp_init(void) err_sysctrl: iounmap(relocation); err_reloc: - memblock_free(hip04_boot_method[0], hip04_boot_method[1]); + memblock_phys_free(hip04_boot_method[0], hip04_boot_method[1]); err: return ret; } diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 6162a070a4104..6d0cb0f7bc54b 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -158,7 +158,7 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) panic("Failed to steal %pa bytes at %pS\n", &size, (void *)_RET_IP_); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); return phys; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index cfd9deb347c38..f68c2d953617b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -738,8 +738,8 @@ void __init paging_init(void) cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); init_mm.pgd = swapper_pg_dir; - memblock_free(__pa_symbol(init_pg_dir), - __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); + memblock_phys_free(__pa_symbol(init_pg_dir), + __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir)); memblock_allow_resize(); } diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 21a5a7ac00370..3be1c29084fa8 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -529,7 +529,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free(__pa(ptr), size); + memblock_phys_free(__pa(ptr), size); } void __init setup_per_cpu_areas(void) diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c index 44b1607e964dd..75a34684e7045 100644 --- a/arch/mips/sgi-ip30/ip30-setup.c +++ b/arch/mips/sgi-ip30/ip30-setup.c @@ -69,10 +69,10 @@ static void __init ip30_mem_init(void) total_mem += size; if (addr >= IP30_REAL_MEMORY_START) - memblock_free(addr, size); + memblock_phys_free(addr, size); else if ((addr + size) > IP30_REAL_MEMORY_START) - memblock_free(IP30_REAL_MEMORY_START, - size - IP30_MAX_PROM_MEMORY); + memblock_phys_free(IP30_REAL_MEMORY_START, + size - IP30_MAX_PROM_MEMORY); } pr_info("Detected %luMB of physical memory.\n", MEM_SHIFT(total_mem)); } diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 358aee7c2d79a..42839d6bd4860 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -1095,8 +1095,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char cpufeatures_setup_finished(); - memblock_free(__pa(dt_cpu_features), - sizeof(struct dt_cpu_feature)*nr_dt_cpu_features); + memblock_phys_free(__pa(dt_cpu_features), + sizeof(struct dt_cpu_feature) * nr_dt_cpu_features); return 0; } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 9bd30cac852bf..4208b4044d12e 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -322,8 +322,8 @@ void __init free_unused_pacas(void) new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids; if (new_ptrs_size < paca_ptrs_size) - memblock_free(__pa(paca_ptrs) + new_ptrs_size, - paca_ptrs_size - new_ptrs_size); + memblock_phys_free(__pa(paca_ptrs) + new_ptrs_size, + paca_ptrs_size - new_ptrs_size); paca_nr_cpu_ids = nr_cpu_ids; paca_ptrs_size = new_ptrs_size; @@ -331,8 +331,8 @@ void __init free_unused_pacas(void) #ifdef CONFIG_PPC_BOOK3S_64 if (early_radix_enabled()) { /* Ugly fixup, see new_slb_shadow() */ - memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), - sizeof(struct slb_shadow)); + memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), + sizeof(struct slb_shadow)); paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL; } #endif diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b1e43b69a559d..5af8993a8e6db 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -825,7 +825,7 @@ static void __init smp_setup_pacas(void) set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]); } - memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32)); + memblock_phys_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32)); cpu_to_phys_id = NULL; } #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index eaa79a0996d1b..75bc294ac40d7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -812,7 +812,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, static void __init pcpu_free_bootmem(void *ptr, size_t size) { - memblock_free(__pa(ptr), size); + memblock_phys_free(__pa(ptr), size); } static int pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3dd35c327d1c5..b5a9d343b7205 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2981,7 +2981,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (!phb->hose) { pr_err(" Can't allocate PCI controller for %pOF\n", np); - memblock_free(__pa(phb), sizeof(struct pnv_phb)); + memblock_phys_free(__pa(phb), sizeof(struct pnv_phb)); return; } diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index f12229ce73014..b7c017bb40f7f 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -56,7 +56,8 @@ void __init svm_swiotlb_init(void) return; - memblock_free(__pa(vstart), PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_phys_free(__pa(vstart), + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); panic("SVM: Cannot allocate SWIOTLB buffer"); } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b9620e5f00baf..6ea7c53b82cd8 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -230,13 +230,14 @@ static void __init init_resources(void) /* Clean-up any unused pre-allocated resources */ if (res_idx >= 0) - memblock_free(__pa(mem_res), (res_idx + 1) * sizeof(*mem_res)); + memblock_phys_free(__pa(mem_res), + (res_idx + 1) * sizeof(*mem_res)); return; error: /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); - memblock_free(__pa(mem_res), mem_res_sz); + memblock_phys_free(__pa(mem_res), mem_res_sz); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 67e5fff96ee06..7fc836e9e1940 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -693,7 +693,7 @@ static void __init reserve_crashkernel(void) } if (register_memory_notifier(&kdump_mem_nb)) { - memblock_free(crash_base, crash_size); + memblock_phys_free(crash_base, crash_size); return; } @@ -748,7 +748,7 @@ static void __init free_mem_detect_info(void) get_mem_detect_reserved(&start, &size); if (size) - memblock_free(start, size); + memblock_phys_free(start, size); } static const char * __init get_mem_info_source(void) @@ -793,7 +793,7 @@ static void __init check_initrd(void) if (initrd_data.start && initrd_data.size && !memblock_is_region_memory(initrd_data.start, initrd_data.size)) { pr_err("The initial RAM disk does not fit into the memory\n"); - memblock_free(initrd_data.start, initrd_data.size); + memblock_phys_free(initrd_data.start, initrd_data.size); initrd_start = initrd_end = 0; } #endif @@ -890,7 +890,7 @@ static void __init setup_randomness(void) if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); - memblock_free((unsigned long) vmms, PAGE_SIZE); + memblock_phys_free((unsigned long)vmms, PAGE_SIZE); } /* diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 066efd6d9345e..78a8ea6fd582a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -723,7 +723,7 @@ void __init smp_save_dump_cpus(void) /* Get the CPU registers */ smp_save_cpu_regs(sa, addr, is_boot_cpu, page); } - memblock_free(page, PAGE_SIZE); + memblock_phys_free(page, PAGE_SIZE); diag_amode31_ops.diag308_reset(); pcpu_set_smt(0); } @@ -880,7 +880,7 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ __smp_rescan_cpus(info, true); - memblock_free((unsigned long)info, sizeof(*info)); + memblock_phys_free((unsigned long)info, sizeof(*info)); } /* diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 5a656c7b7a67a..d57457b16fe5f 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -64,7 +64,7 @@ void __init setup_uv(void) } if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) { - memblock_free(uv_stor_base, uv_info.uv_base_stor_len); + memblock_phys_free(uv_stor_base, uv_info.uv_base_stor_len); goto fail; } diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 3e47351680193..483b9dbe0970a 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -399,5 +399,5 @@ void __init kasan_copy_shadow_mapping(void) void __init kasan_free_early_identity(void) { - memblock_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos); + memblock_phys_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos); } diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index bac8a058ebd7c..c77b5f00a66a3 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -560,7 +560,7 @@ static void __init ap325rxa_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu_dma_membase = phys; diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index bab91a99124e1..2b22ce7921471 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -1502,7 +1502,7 @@ static void __init ecovec_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU0 memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu0_dma_membase = phys; @@ -1510,7 +1510,7 @@ static void __init ecovec_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU1 memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu1_dma_membase = phys; } diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c index eeb5ce341efdd..20f4db778ed6a 100644 --- a/arch/sh/boards/mach-kfr2r09/setup.c +++ b/arch/sh/boards/mach-kfr2r09/setup.c @@ -633,7 +633,7 @@ static void __init kfr2r09_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu_dma_membase = phys; diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c index 6703a2122c0d6..f60061283c482 100644 --- a/arch/sh/boards/mach-migor/setup.c +++ b/arch/sh/boards/mach-migor/setup.c @@ -633,7 +633,7 @@ static void __init migor_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu_dma_membase = phys; diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 8d6541ba01865..8bbf5a6aa4237 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -966,7 +966,7 @@ static void __init ms7724se_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU0 memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu0_dma_membase = phys; @@ -974,7 +974,7 @@ static void __init ms7724se_mv_mem_reserve(void) if (!phys) panic("Failed to allocate CEU1 memory\n"); - memblock_free(phys, size); + memblock_phys_free(phys, size); memblock_remove(phys, size); ceu1_dma_membase = phys; } diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 0224d8f19ed69..2507549538df1 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1567,7 +1567,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, static void __init pcpu_free_bootmem(void *ptr, size_t size) { - memblock_free(__pa(ptr), size); + memblock_phys_free(__pa(ptr), size); } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 8e636ce029495..d1710ebb44f47 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -47,7 +47,7 @@ void __init mem_init(void) */ brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); - memblock_free(__pa(brk_end), uml_reserved - brk_end); + memblock_phys_free(__pa(brk_end), uml_reserved - brk_end); uml_reserved = brk_end; /* this will put all low memory onto the freelists */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 40ed44ead0631..49b596db5631e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -322,7 +322,7 @@ static void __init reserve_initrd(void) relocate_initrd(); - memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); + memblock_phys_free(ramdisk_image, ramdisk_end - ramdisk_image); } #else @@ -521,7 +521,7 @@ static void __init reserve_crashkernel(void) } if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { - memblock_free(crash_base, crash_size); + memblock_phys_free(crash_base, crash_size); return; } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 23a14d82e7838..1895986842b91 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -618,7 +618,7 @@ static void __init memory_map_top_down(unsigned long map_start, */ addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start, map_end); - memblock_free(addr, PMD_SIZE); + memblock_phys_free(addr, PMD_SIZE); real_end = addr + PMD_SIZE; /* step_size need to be small so pgt_buf from BRK could cover it */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 3359c23573c50..676d8d292f8a3 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1025,7 +1025,7 @@ static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size) for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) make_lowmem_page_readwrite(vaddr); - memblock_free(paddr, size); + memblock_phys_free(paddr, size); } static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin) @@ -1151,7 +1151,7 @@ static void __init xen_pagetable_p2m_free(void) xen_cleanhighmap(addr, addr + size); size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); - memblock_free(__pa(addr), size); + memblock_phys_free(__pa(addr), size); } else { xen_cleanmfnmap(addr); } @@ -1955,7 +1955,7 @@ void __init xen_relocate_p2m(void) pfn_end = p2m_pfn_end; } - memblock_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn)); + memblock_phys_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn)); while (pfn < pfn_end) { if (pfn == p2m_pfn) { pfn = p2m_pfn_end; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8bfc103301077..f387fc7e52500 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -153,7 +153,7 @@ static void __init xen_del_extra_mem(unsigned long start_pfn, break; } } - memblock_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); + memblock_phys_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); } /* @@ -719,7 +719,7 @@ static void __init xen_reserve_xen_mfnlist(void) return; xen_relocate_p2m(); - memblock_free(start, size); + memblock_phys_free(start, size); } /** @@ -885,7 +885,7 @@ char * __init xen_memory_setup(void) xen_phys_memcpy(new_area, start, size); pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n", start, start + size, new_area, new_area + size); - memblock_free(start, size); + memblock_phys_free(start, size); boot_params.hdr.ramdisk_image = new_area; boot_params.ext_ramdisk_image = new_area >> 32; } diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index ade8934764f65..712edef039295 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -166,7 +166,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free(__pa(ptr), size); + memblock_phys_free(__pa(ptr), size); } #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index 2ff1883dc788d..4df55a55da841 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -35,7 +35,7 @@ void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags) if (slab_is_available()) memblock_free_late(phys, size); else - memblock_free(phys, size); + memblock_phys_free(phys, size); } else if (flags & EFI_MEMMAP_SLAB) { struct page *p = pfn_to_page(PHYS_PFN(phys)); unsigned int order = get_order(size); diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index 053e241f593c7..b9bd1cff17938 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -171,8 +171,7 @@ int ima_free_kexec_buffer(void) if (ret) return ret; - return memblock_free(addr, size); - + return memblock_phys_free(addr, size); } /** diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 59c1390cdf423..161c71e1e3906 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -45,7 +45,7 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, if (nomap) { err = memblock_mark_nomap(base, size); if (err) - memblock_free(base, size); + memblock_phys_free(base, size); } return err; @@ -282,7 +282,8 @@ void __init fdt_init_reserved_mem(void) if (nomap) memblock_clear_nomap(rmem->base, rmem->size); else - memblock_free(rmem->base, rmem->size); + memblock_phys_free(rmem->base, + rmem->size); } } } diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index f01d942e1c1df..c0052655fc4f9 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -139,7 +139,7 @@ int __init sclp_early_get_core_info(struct sclp_core_info *info) } sclp_fill_core_info(info, sccb); out: - memblock_free((unsigned long)sccb, length); + memblock_phys_free((unsigned long)sccb, length); return rc; } diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c index be4ecbabdd586..933d77ad0a642 100644 --- a/drivers/usb/early/xhci-dbc.c +++ b/drivers/usb/early/xhci-dbc.c @@ -185,7 +185,7 @@ static void __init xdbc_free_ring(struct xdbc_ring *ring) if (!seg) return; - memblock_free(seg->dma, PAGE_SIZE); + memblock_phys_free(seg->dma, PAGE_SIZE); ring->segment = NULL; } @@ -665,10 +665,10 @@ int __init early_xdbc_setup_hardware(void) xdbc_free_ring(&xdbc.in_ring); if (xdbc.table_dma) - memblock_free(xdbc.table_dma, PAGE_SIZE); + memblock_phys_free(xdbc.table_dma, PAGE_SIZE); if (xdbc.out_dma) - memblock_free(xdbc.out_dma, PAGE_SIZE); + memblock_phys_free(xdbc.out_dma, PAGE_SIZE); xdbc.table_base = NULL; xdbc.out_buf = NULL; @@ -987,8 +987,8 @@ static int __init xdbc_init(void) xdbc_free_ring(&xdbc.evt_ring); xdbc_free_ring(&xdbc.out_ring); xdbc_free_ring(&xdbc.in_ring); - memblock_free(xdbc.table_dma, PAGE_SIZE); - memblock_free(xdbc.out_dma, PAGE_SIZE); + memblock_phys_free(xdbc.table_dma, PAGE_SIZE); + memblock_phys_free(xdbc.out_dma, PAGE_SIZE); writel(0, &xdbc.xdbc_reg->control); early_iounmap(xdbc.xhci_base, xdbc.xhci_length); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index e56a5faac395c..4b671cc0a7ea5 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -241,7 +241,7 @@ void __init xen_swiotlb_init_early(void) */ rc = xen_swiotlb_fixup(start, nslabs); if (rc) { - memblock_free(__pa(start), PAGE_ALIGN(bytes)); + memblock_phys_free(__pa(start), PAGE_ALIGN(bytes)); if (nslabs > 1024 && repeat--) { /* Min is 2MB */ nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE)); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e25f964fdd601..d32d417095137 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -103,7 +103,7 @@ void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size); -int memblock_free(phys_addr_t base, phys_addr_t size); +int memblock_phys_free(phys_addr_t base, phys_addr_t size); int memblock_reserve(phys_addr_t base, phys_addr_t size); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP int memblock_physmem_add(phys_addr_t base, phys_addr_t size); diff --git a/init/initramfs.c b/init/initramfs.c index a842c05447456..1a971f070dd40 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -607,7 +607,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end) unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE); unsigned long aligned_end = ALIGN(end, PAGE_SIZE); - memblock_free(__pa(aligned_start), aligned_end - aligned_start); + memblock_phys_free(__pa(aligned_start), aligned_end - aligned_start); #endif free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 430d2f78d540e..b9fa173e5e56d 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -247,7 +247,7 @@ swiotlb_init(int verbose) return; fail_free_mem: - memblock_free(__pa(tlb), bytes); + memblock_phys_free(__pa(tlb), bytes); fail: pr_warn("Cannot allocate buffer"); } diff --git a/lib/cpumask.c b/lib/cpumask.c index 045779446a18d..a90786b77c1c2 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -188,7 +188,7 @@ EXPORT_SYMBOL(free_cpumask_var); */ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { - memblock_free(__pa(mask), cpumask_size()); + memblock_phys_free(__pa(mask), cpumask_size()); } #endif diff --git a/mm/cma.c b/mm/cma.c index 11152c3fb23cc..bc9ca8f3c4871 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -378,7 +378,7 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, return 0; free_mem: - memblock_free(base, size); + memblock_phys_free(base, size); err: pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); return ret; diff --git a/mm/memblock.c b/mm/memblock.c index 427acbeed4a37..1c88e34254e6a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -806,18 +806,18 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) void __init_memblock memblock_free_ptr(void *ptr, size_t size) { if (ptr) - memblock_free(__pa(ptr), size); + memblock_phys_free(__pa(ptr), size); } /** - * memblock_free - free boot memory block + * memblock_phys_free - free boot memory block * @base: phys starting address of the boot memory block * @size: size of the boot memory block in bytes * * Free boot memory block previously allocated by memblock_alloc_xx() API. * The freeing memory will not be released to the buddy allocator. */ -int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) +int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size - 1; @@ -1939,7 +1939,7 @@ static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) * memmap array. */ if (pg < pgend) - memblock_free(pg, pgend - pg); + memblock_phys_free(pg, pgend - pg); } /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9fd0be32a281e..feffaa9423fe3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -2204,7 +2204,7 @@ static int __ref try_remove_memory(u64 start, u64 size) arch_remove_memory(start, size, altmap); if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { - memblock_free(start, size); + memblock_phys_free(start, size); memblock_remove(start, size); } diff --git a/mm/percpu.c b/mm/percpu.c index f58318cb04c02..d65ddf6f2a353 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -2472,7 +2472,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, */ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) { - memblock_free(__pa(ai), ai->__ai_size); + memblock_phys_free(__pa(ai), ai->__ai_size); } /** @@ -3134,7 +3134,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, out_free: pcpu_free_alloc_info(ai); if (areas) - memblock_free(__pa(areas), areas_size); + memblock_phys_free(__pa(areas), areas_size); return rc; } #endif /* BUILD_EMBED_FIRST_CHUNK */ @@ -3256,7 +3256,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, free_fn(page_address(pages[j]), PAGE_SIZE); rc = -ENOMEM; out_free_ar: - memblock_free(__pa(pages), pages_size); + memblock_phys_free(__pa(pages), pages_size); pcpu_free_alloc_info(ai); return rc; } @@ -3286,7 +3286,7 @@ static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_dfl_fc_free(void *ptr, size_t size) { - memblock_free(__pa(ptr), size); + memblock_phys_free(__pa(ptr), size); } void __init setup_per_cpu_areas(void) diff --git a/mm/sparse.c b/mm/sparse.c index 55fea0c2f927e..fc3ab8d3b6bcd 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -451,7 +451,7 @@ static void *sparsemap_buf_end __meminitdata; static inline void __meminit sparse_buffer_free(unsigned long size) { WARN_ON(!sparsemap_buf || size == 0); - memblock_free(__pa(sparsemap_buf), size); + memblock_phys_free(__pa(sparsemap_buf), size); } static void __init sparse_buffer_init(unsigned long size, int nid) From 13ab40b0e60e661f4ec2e1b6df895b7ed5fdf13a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 21 Oct 2021 15:08:18 +1100 Subject: [PATCH 0790/1202] memblock: use memblock_free for freeing virtual pointers Rename memblock_free_ptr() to memblock_free() and use memblock_free() when freeing a virtual pointer so that memblock_free() will be a counterpart of memblock_alloc() The callers are updated with the below semantic patch and manual addition of (void *) casting to pointers that are represented by unsigned long variables. @@ identifier vaddr; expression size; @@ ( - memblock_phys_free(__pa(vaddr), size); + memblock_free(vaddr, size); | - memblock_free_ptr(vaddr, size); + memblock_free(vaddr, size); ) Link: https://lkml.kernel.org/r/20210930185031.18648-7-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Christophe Leroy Cc: Juergen Gross Cc: Shahab Vahedi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/alpha/kernel/core_irongate.c | 3 +-- arch/mips/mm/init.c | 2 +- arch/powerpc/kernel/dt_cpu_ftrs.c | 4 ++-- arch/powerpc/kernel/setup-common.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- arch/powerpc/platforms/pseries/svm.c | 3 +-- arch/riscv/kernel/setup.c | 5 ++--- arch/sparc/kernel/smp_64.c | 2 +- arch/um/kernel/mem.c | 2 +- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/mm/kasan_init_64.c | 4 ++-- arch/x86/mm/numa.c | 2 +- arch/x86/mm/numa_emulation.c | 2 +- arch/x86/xen/mmu_pv.c | 2 +- arch/x86/xen/p2m.c | 2 +- drivers/base/arch_numa.c | 4 ++-- drivers/macintosh/smu.c | 2 +- drivers/xen/swiotlb-xen.c | 2 +- include/linux/memblock.h | 2 +- init/initramfs.c | 2 +- init/main.c | 2 +- kernel/dma/swiotlb.c | 2 +- kernel/printk/printk.c | 4 ++-- lib/bootconfig.c | 2 +- lib/cpumask.c | 2 +- mm/memblock.c | 6 +++--- mm/percpu.c | 8 ++++---- mm/sparse.c | 2 +- 29 files changed, 39 insertions(+), 42 deletions(-) diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c index ee26dcc494186..6b8ed12936b6f 100644 --- a/arch/alpha/kernel/core_irongate.c +++ b/arch/alpha/kernel/core_irongate.c @@ -233,8 +233,7 @@ albacore_init_arch(void) unsigned long size; size = initrd_end - initrd_start; - memblock_phys_free(__pa(initrd_start), - PAGE_ALIGN(size)); + memblock_free((void *)initrd_start, PAGE_ALIGN(size)); if (!move_initrd(pci_mem)) printk("irongate_init_arch: initrd too big " "(%ldK)\ndisabling initrd\n", diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 3be1c29084fa8..325e1552cbead 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -529,7 +529,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_phys_free(__pa(ptr), size); + memblock_free(ptr, size); } void __init setup_per_cpu_areas(void) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 42839d6bd4860..ba527fb529931 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -1095,8 +1095,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char cpufeatures_setup_finished(); - memblock_phys_free(__pa(dt_cpu_features), - sizeof(struct dt_cpu_feature) * nr_dt_cpu_features); + memblock_free(dt_cpu_features, + sizeof(struct dt_cpu_feature) * nr_dt_cpu_features); return 0; } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 5af8993a8e6db..6b1338db87795 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -825,7 +825,7 @@ static void __init smp_setup_pacas(void) set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]); } - memblock_phys_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32)); + memblock_free(cpu_to_phys_id, nr_cpu_ids * sizeof(u32)); cpu_to_phys_id = NULL; } #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 75bc294ac40d7..1777e992b20bd 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -812,7 +812,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, static void __init pcpu_free_bootmem(void *ptr, size_t size) { - memblock_phys_free(__pa(ptr), size); + memblock_free(ptr, size); } static int pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b5a9d343b7205..004cd6a96c8a0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2981,7 +2981,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (!phb->hose) { pr_err(" Can't allocate PCI controller for %pOF\n", np); - memblock_phys_free(__pa(phb), sizeof(struct pnv_phb)); + memblock_free(phb, sizeof(struct pnv_phb)); return; } diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index b7c017bb40f7f..6332365d28917 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -56,8 +56,7 @@ void __init svm_swiotlb_init(void) return; - memblock_phys_free(__pa(vstart), - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free(vstart, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); panic("SVM: Cannot allocate SWIOTLB buffer"); } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 6ea7c53b82cd8..b42bfdc674823 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -230,14 +230,13 @@ static void __init init_resources(void) /* Clean-up any unused pre-allocated resources */ if (res_idx >= 0) - memblock_phys_free(__pa(mem_res), - (res_idx + 1) * sizeof(*mem_res)); + memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res)); return; error: /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); - memblock_phys_free(__pa(mem_res), mem_res_sz); + memblock_free(mem_res, mem_res_sz); } diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 2507549538df1..b98a7bbe6728a 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1567,7 +1567,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, static void __init pcpu_free_bootmem(void *ptr, size_t size) { - memblock_phys_free(__pa(ptr), size); + memblock_free(ptr, size); } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index d1710ebb44f47..0039771eb01cd 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -47,7 +47,7 @@ void __init mem_init(void) */ brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); - memblock_phys_free(__pa(brk_end), uml_reserved - brk_end); + memblock_free((void *)brk_end, uml_reserved - brk_end); uml_reserved = brk_end; /* this will put all low memory onto the freelists */ diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5afd985591939..7b65275544b2c 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -135,7 +135,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_free_ptr(ptr, size); + memblock_free(ptr, size); } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index ef885370719a6..e7b9b464a82f1 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -49,7 +49,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, p = early_alloc(PMD_SIZE, nid, false); if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) return; - memblock_free_ptr(p, PMD_SIZE); + memblock_free(p, PMD_SIZE); } p = early_alloc(PAGE_SIZE, nid, true); @@ -85,7 +85,7 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, p = early_alloc(PUD_SIZE, nid, false); if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) return; - memblock_free_ptr(p, PUD_SIZE); + memblock_free(p, PUD_SIZE); } p = early_alloc(PAGE_SIZE, nid, true); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 1e9b93b088dbf..c6b1213086d63 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -355,7 +355,7 @@ void __init numa_reset_distance(void) /* numa_distance could be 1LU marking allocation failure, test cnt */ if (numa_distance_cnt) - memblock_free_ptr(numa_distance, size); + memblock_free(numa_distance, size); numa_distance_cnt = 0; numa_distance = NULL; /* enable table creation */ } diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index e801e30089c43..1a02b791d273c 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -517,7 +517,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) } /* free the copied physical distance table */ - memblock_free_ptr(phys_dist, phys_size); + memblock_free(phys_dist, phys_size); return; no_emu: diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 676d8d292f8a3..173de1e29bda9 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1151,7 +1151,7 @@ static void __init xen_pagetable_p2m_free(void) xen_cleanhighmap(addr, addr + size); size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); - memblock_phys_free(__pa(addr), size); + memblock_free((void *)addr, size); } else { xen_cleanmfnmap(addr); } diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 141bb9dbd2fbe..58db86f7b3846 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -197,7 +197,7 @@ static void * __ref alloc_p2m_page(void) static void __ref free_p2m_page(void *p) { if (unlikely(!slab_is_available())) { - memblock_free_ptr(p, PAGE_SIZE); + memblock_free(p, PAGE_SIZE); return; } diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c index 712edef039295..bc1876915457d 100644 --- a/drivers/base/arch_numa.c +++ b/drivers/base/arch_numa.c @@ -166,7 +166,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_fc_free(void *ptr, size_t size) { - memblock_phys_free(__pa(ptr), size); + memblock_free(ptr, size); } #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK @@ -326,7 +326,7 @@ void __init numa_free_distance(void) size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); - memblock_free_ptr(numa_distance, size); + memblock_free(numa_distance, size); numa_distance_cnt = 0; numa_distance = NULL; } diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index fe63d5ee201b8..f621521112363 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -570,7 +570,7 @@ int __init smu_init (void) fail_db_node: of_node_put(smu->db_node); fail_bootmem: - memblock_free_ptr(smu, sizeof(struct smu_device)); + memblock_free(smu, sizeof(struct smu_device)); smu = NULL; fail_np: of_node_put(np); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 4b671cc0a7ea5..f083194e26344 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -241,7 +241,7 @@ void __init xen_swiotlb_init_early(void) */ rc = xen_swiotlb_fixup(start, nslabs); if (rc) { - memblock_phys_free(__pa(start), PAGE_ALIGN(bytes)); + memblock_free(start, PAGE_ALIGN(bytes)); if (nslabs > 1024 && repeat--) { /* Min is 2MB */ nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE)); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index d32d417095137..484650681beeb 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -118,7 +118,7 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); void memblock_free_all(void); -void memblock_free_ptr(void *ptr, size_t size); +void memblock_free(void *ptr, size_t size); void reset_node_managed_pages(pg_data_t *pgdat); void reset_all_zones_managed_pages(void); diff --git a/init/initramfs.c b/init/initramfs.c index 1a971f070dd40..2f3d96dc3db6d 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -607,7 +607,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end) unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE); unsigned long aligned_end = ALIGN(end, PAGE_SIZE); - memblock_phys_free(__pa(aligned_start), aligned_end - aligned_start); + memblock_free((void *)aligned_start, aligned_end - aligned_start); #endif free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, diff --git a/init/main.c b/init/main.c index 767ee26721760..3bbf707171ce8 100644 --- a/init/main.c +++ b/init/main.c @@ -925,7 +925,7 @@ static void __init print_unknown_bootoptions(void) end += sprintf(end, " %s", *p); pr_notice("Unknown command line parameters:%s\n", unknown_options); - memblock_free_ptr(unknown_options, len); + memblock_free(unknown_options, len); } asmlinkage __visible void __init __no_sanitize_address start_kernel(void) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index b9fa173e5e56d..02656d7ccbfd1 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -247,7 +247,7 @@ swiotlb_init(int verbose) return; fail_free_mem: - memblock_phys_free(__pa(tlb), bytes); + memblock_free(tlb, bytes); fail: pr_warn("Cannot allocate buffer"); } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a8d0a58deebc7..2cae1bfa6be7b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1166,9 +1166,9 @@ void __init setup_log_buf(int early) return; err_free_descs: - memblock_free_ptr(new_descs, new_descs_size); + memblock_free(new_descs, new_descs_size); err_free_log_buf: - memblock_free_ptr(new_log_buf, new_log_buf_len); + memblock_free(new_log_buf, new_log_buf_len); } static bool __read_mostly ignore_loglevel; diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 5ae248b293738..547558d80e64c 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -792,7 +792,7 @@ void __init xbc_destroy_all(void) xbc_data = NULL; xbc_data_size = 0; xbc_node_num = 0; - memblock_free_ptr(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); + memblock_free(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); xbc_nodes = NULL; brace_index = 0; } diff --git a/lib/cpumask.c b/lib/cpumask.c index a90786b77c1c2..a971a82d2f436 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -188,7 +188,7 @@ EXPORT_SYMBOL(free_cpumask_var); */ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { - memblock_phys_free(__pa(mask), cpumask_size()); + memblock_free(mask, cpumask_size()); } #endif diff --git a/mm/memblock.c b/mm/memblock.c index 1c88e34254e6a..ed28cb95b8830 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -472,7 +472,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, kfree(old_array); else if (old_array != memblock_memory_init_regions && old_array != memblock_reserved_init_regions) - memblock_free_ptr(old_array, old_alloc_size); + memblock_free(old_array, old_alloc_size); /* * Reserve the new array if that comes from the memblock. Otherwise, we @@ -796,14 +796,14 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) } /** - * memblock_free_ptr - free boot memory allocation + * memblock_free - free boot memory allocation * @ptr: starting address of the boot memory allocation * @size: size of the boot memory block in bytes * * Free boot memory block previously allocated by memblock_alloc_xx() API. * The freeing memory will not be released to the buddy allocator. */ -void __init_memblock memblock_free_ptr(void *ptr, size_t size) +void __init_memblock memblock_free(void *ptr, size_t size) { if (ptr) memblock_phys_free(__pa(ptr), size); diff --git a/mm/percpu.c b/mm/percpu.c index d65ddf6f2a353..f5b2c2ea5a548 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -2472,7 +2472,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups, */ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) { - memblock_phys_free(__pa(ai), ai->__ai_size); + memblock_free(ai, ai->__ai_size); } /** @@ -3134,7 +3134,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, out_free: pcpu_free_alloc_info(ai); if (areas) - memblock_phys_free(__pa(areas), areas_size); + memblock_free(areas, areas_size); return rc; } #endif /* BUILD_EMBED_FIRST_CHUNK */ @@ -3256,7 +3256,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, free_fn(page_address(pages[j]), PAGE_SIZE); rc = -ENOMEM; out_free_ar: - memblock_phys_free(__pa(pages), pages_size); + memblock_free(pages, pages_size); pcpu_free_alloc_info(ai); return rc; } @@ -3286,7 +3286,7 @@ static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size, static void __init pcpu_dfl_fc_free(void *ptr, size_t size) { - memblock_phys_free(__pa(ptr), size); + memblock_free(ptr, size); } void __init setup_per_cpu_areas(void) diff --git a/mm/sparse.c b/mm/sparse.c index fc3ab8d3b6bcd..e5c84b0cf0c93 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -451,7 +451,7 @@ static void *sparsemap_buf_end __meminitdata; static inline void __meminit sparse_buffer_free(unsigned long size) { WARN_ON(!sparsemap_buf || size == 0); - memblock_phys_free(__pa(sparsemap_buf), size); + memblock_free(sparsemap_buf, size); } static void __init sparse_buffer_init(unsigned long size, int nid) From 79c41156be7856a7f9dfdfdeceb6b5d006ba7b53 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 21 Oct 2021 15:08:19 +1100 Subject: [PATCH 0791/1202] fixup for "memblock: use memblock_free for freeing virtual pointers" Link: https://lkml.kernel.org/r/20211018192940.3d1d532f@canb.auug.org.au Signed-off-by: Stephen Rothwell Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 3bbf707171ce8..f0001af8ebb90 100644 --- a/init/main.c +++ b/init/main.c @@ -382,7 +382,7 @@ static char * __init xbc_make_cmdline(const char *key) ret = xbc_snprint_cmdline(new_cmdline, len + 1, root); if (ret < 0 || ret > len) { pr_err("Failed to print extra kernel cmdline.\n"); - memblock_free_ptr(new_cmdline, len + 1); + memblock_free(new_cmdline, len + 1); return NULL; } From 9067751b4f9e023565e6395fd8e6a247df74887f Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Thu, 21 Oct 2021 15:08:20 +1100 Subject: [PATCH 0792/1202] mm: mark the OOM reaper thread as freezable The OOM reaper alters user address space which might theoretically alter the snapshot if reaping is allowed to happen after the freezer quiescent state. To this end, the reaper kthread uses wait_event_freezable() while waiting for any work so that it cannot run while the system freezes. However, the current implementation doesn't respect the freezer because all kernel threads are created with the PF_NOFREEZE flag, so they are automatically excluded from freezing operations. This means that the OOM reaper can race with system snapshotting if it has work to do while the system is being frozen. Fix this by adding a set_freezable() call which will clear the PF_NOFREEZE flag and thus make the OOM reaper visible to the freezer. Please note that the OOM reaper altering the snapshot this way is mostly a theoretical concern and has not been observed in practice. Link: https://lkml.kernel.org/r/20210921165758.6154-1-sultan@kerneltoast.com Link: https://lkml.kernel.org/r/20210918233920.9174-1-sultan@kerneltoast.com Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Sultan Alsawaf Acked-by: Michal Hocko Cc: David Rientjes Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/oom_kill.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 831340e7ad8b4..46a742b57735e 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -641,6 +641,8 @@ static void oom_reap_task(struct task_struct *tsk) static int oom_reaper(void *unused) { + set_freezable(); + while (true) { struct task_struct *tsk = NULL; From 7f4a1e9f763f1bffb65538f37176562e48e75913 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 21 Oct 2021 15:08:21 +1100 Subject: [PATCH 0793/1202] oom_kill: oom_score_adj broken for processes with small memory usage If you have a process with less than 1000 totalpages, the calculation: adj = (long)p->signal->oom_score_adj; ... adj *= totalpages / 1000; will always result in adj being zero no matter what oom_score_adj is, which could result in the wrong process being picked for killing. Fix by adding 1000 to totalpages before dividing. I ran across this trying to diagnose another problem where I set up a cgroup with a small amount of memory and couldn't get a test program to work right. I'm not sure this is quite right, to keep closer to the current behavior you could do: if (totalpages >= 1000) adj *= totalpages / 1000; but that would map 0-1999 to the same value. But this at least shows the issue. I can provide a test program the shows the issue, but I think it's pretty obvious from the code. Link: https://lkml.kernel.org/r/20210701125430.836308-1-minyard@acm.org Signed-off-by: Corey Minyard Cc: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/oom_kill.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 46a742b57735e..ea2525a76a4a3 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -234,8 +234,11 @@ long oom_badness(struct task_struct *p, unsigned long totalpages) mm_pgtables_bytes(p->mm) / PAGE_SIZE; task_unlock(p); - /* Normalize to oom_score_adj units */ - adj *= totalpages / 1000; + /* + * Normalize to oom_score_adj units. You should never + * multiply by zero here, or oom_score_adj will not work. + */ + adj *= (totalpages + 1000) / 1000; points += adj; return points; From 5cf82e5486daee938979cba1f1a461c1c6f0eee3 Mon Sep 17 00:00:00 2001 From: Zhenguo Yao Date: Thu, 21 Oct 2021 15:08:22 +1100 Subject: [PATCH 0794/1202] hugetlbfs: extend the definition of hugepages parameter to support node allocation We can specify the number of hugepages to allocate at boot. But the hugepages is balanced in all nodes at present. In some scenarios, we only need hugepages in one node. For example: DPDK needs hugepages which are in the same node as NIC. If DPDK needs four hugepages of 1G size in node1 and system has 16 numa nodes we must reserve 64 hugepages on the kernel cmdline. But only four hugepages are used. The others should be free after boot. If the system memory is low(for example: 64G), it will be an impossible task. So extend the hugepages parameter to support specifying hugepages on a specific node. For example add following parameter: hugepagesz=1G hugepages=0:1,1:3 It will allocate 1 hugepage in node0 and 3 hugepages in node1. Link: https://lkml.kernel.org/r/20211005054729.86457-1-yaozhenguo1@gmail.com Signed-off-by: Zhenguo Yao Reviewed-by: Mike Kravetz Cc: Zhenguo Yao Cc: Dan Carpenter Cc: Nathan Chancellor Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Jonathan Corbet Cc: Mike Rapoport Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- .../admin-guide/kernel-parameters.txt | 8 +- Documentation/admin-guide/mm/hugetlbpage.rst | 12 +- arch/powerpc/mm/hugetlbpage.c | 9 +- include/linux/hugetlb.h | 6 +- mm/hugetlb.c | 153 +++++++++++++++--- 5 files changed, 155 insertions(+), 33 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e7f7904edf20f..02eeda2a11adc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1601,9 +1601,11 @@ the number of pages of hugepagesz to be allocated. If this is the first HugeTLB parameter on the command line, it specifies the number of pages to allocate for - the default huge page size. See also - Documentation/admin-guide/mm/hugetlbpage.rst. - Format: + the default huge page size. If using node format, the + number of pages to allocate per-node can be specified. + See also Documentation/admin-guide/mm/hugetlbpage.rst. + Format: or (node format) + :[,:] hugepagesz= [HW] The size of the HugeTLB pages. This is used in diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst index bb90de3885d1b..0166f9de34281 100644 --- a/Documentation/admin-guide/mm/hugetlbpage.rst +++ b/Documentation/admin-guide/mm/hugetlbpage.rst @@ -128,7 +128,9 @@ hugepages implicitly specifies the number of huge pages of default size to allocate. If the number of huge pages of default size is implicitly specified, it can not be overwritten by a hugepagesz,hugepages - parameter pair for the default size. + parameter pair for the default size. This parameter also has a + node format. The node format specifies the number of huge pages + to allocate on specific nodes. For example, on an architecture with 2M default huge page size:: @@ -138,6 +140,14 @@ hugepages indicating that the hugepages=512 parameter is ignored. If a hugepages parameter is preceded by an invalid hugepagesz parameter, it will be ignored. + + Node format example:: + + hugepagesz=2M hugepages=0:1,1:2 + + It will allocate 1 2M hugepage on node0 and 2 2M hugepages on node1. + If the node number is invalid, the parameter will be ignored. + default_hugepagesz Specify the default huge page size. This parameter can only be specified once on the command line. default_hugepagesz can diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 9a75ba078e1b3..82d8b368ca6d4 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -229,17 +229,22 @@ static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) m->hstate = hstate; return 1; } + +bool __init hugetlb_node_alloc_supported(void) +{ + return false; +} #endif -int __init alloc_bootmem_huge_page(struct hstate *h) +int __init alloc_bootmem_huge_page(struct hstate *h, int nid) { #ifdef CONFIG_PPC_BOOK3S_64 if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()) return pseries_alloc_bootmem_huge_page(h); #endif - return __alloc_bootmem_huge_page(h); + return __alloc_bootmem_huge_page(h, nid); } #ifndef CONFIG_PPC_BOOK3S_64 diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c0a20781b28ef..44c2ab0dfa591 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -615,6 +615,7 @@ struct hstate { unsigned long nr_overcommit_huge_pages; struct list_head hugepage_activelist; struct list_head hugepage_freelists[MAX_NUMNODES]; + unsigned int max_huge_pages_node[MAX_NUMNODES]; unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; @@ -647,8 +648,9 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, unsigned long address, struct page *page); /* arch callback */ -int __init __alloc_bootmem_huge_page(struct hstate *h); -int __init alloc_bootmem_huge_page(struct hstate *h); +int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); +int __init alloc_bootmem_huge_page(struct hstate *h, int nid); +bool __init hugetlb_node_alloc_supported(void); void __init hugetlb_add_hstate(unsigned order); bool __init arch_hugetlb_valid_size(unsigned long size); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fabb061338396..d14e3d4303773 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -77,6 +77,7 @@ static struct hstate * __initdata parsed_hstate; static unsigned long __initdata default_hstate_max_huge_pages; static bool __initdata parsed_valid_hugepagesz = true; static bool __initdata parsed_default_hugepagesz; +static unsigned int default_hugepages_in_node[MAX_NUMNODES] __initdata; /* * Protects updates to hugepage_freelists, hugepage_activelist, nr_huge_pages, @@ -2966,33 +2967,39 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, return ERR_PTR(-ENOSPC); } -int alloc_bootmem_huge_page(struct hstate *h) +int alloc_bootmem_huge_page(struct hstate *h, int nid) __attribute__ ((weak, alias("__alloc_bootmem_huge_page"))); -int __alloc_bootmem_huge_page(struct hstate *h) +int __alloc_bootmem_huge_page(struct hstate *h, int nid) { - struct huge_bootmem_page *m; + struct huge_bootmem_page *m = NULL; /* initialize for clang */ int nr_nodes, node; + if (nid >= nr_online_nodes) + return 0; + /* do node specific alloc */ + if (nid != NUMA_NO_NODE) { + m = memblock_alloc_try_nid_raw(huge_page_size(h), huge_page_size(h), + 0, MEMBLOCK_ALLOC_ACCESSIBLE, nid); + if (!m) + return 0; + goto found; + } + /* allocate from next node when distributing huge pages */ for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) { - void *addr; - - addr = memblock_alloc_try_nid_raw( + m = memblock_alloc_try_nid_raw( huge_page_size(h), huge_page_size(h), 0, MEMBLOCK_ALLOC_ACCESSIBLE, node); - if (addr) { - /* - * Use the beginning of the huge page to store the - * huge_bootmem_page struct (until gather_bootmem - * puts them into the mem_map). - */ - m = addr; - goto found; - } + /* + * Use the beginning of the huge page to store the + * huge_bootmem_page struct (until gather_bootmem + * puts them into the mem_map). + */ + if (!m) + return 0; + goto found; } - return 0; found: - BUG_ON(!IS_ALIGNED(virt_to_phys(m), huge_page_size(h))); /* Put them into a private list first because mem_map is not up yet */ INIT_LIST_HEAD(&m->list); list_add(&m->list, &huge_boot_pages); @@ -3032,12 +3039,61 @@ static void __init gather_bootmem_prealloc(void) cond_resched(); } } +static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid) +{ + unsigned long i; + char buf[32]; + + for (i = 0; i < h->max_huge_pages_node[nid]; ++i) { + if (hstate_is_gigantic(h)) { + if (!alloc_bootmem_huge_page(h, nid)) + break; + } else { + struct page *page; + gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE; + + page = alloc_fresh_huge_page(h, gfp_mask, nid, + &node_states[N_MEMORY], NULL); + if (!page) + break; + put_page(page); /* free it into the hugepage allocator */ + } + cond_resched(); + } + if (i == h->max_huge_pages_node[nid]) + return; + + string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32); + pr_warn("HugeTLB: allocating %u of page size %s failed node%d. Only allocated %lu hugepages.\n", + h->max_huge_pages_node[nid], buf, nid, i); + h->max_huge_pages -= (h->max_huge_pages_node[nid] - i); + h->max_huge_pages_node[nid] = i; +} static void __init hugetlb_hstate_alloc_pages(struct hstate *h) { unsigned long i; nodemask_t *node_alloc_noretry; + bool node_specific_alloc = false; + /* skip gigantic hugepages allocation if hugetlb_cma enabled */ + if (hstate_is_gigantic(h) && hugetlb_cma_size) { + pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); + return; + } + + /* do node specific alloc */ + for (i = 0; i < nr_online_nodes; i++) { + if (h->max_huge_pages_node[i] > 0) { + hugetlb_hstate_alloc_pages_onenode(h, i); + node_specific_alloc = true; + } + } + + if (node_specific_alloc) + return; + + /* below will do all node balanced alloc */ if (!hstate_is_gigantic(h)) { /* * Bit mask controlling how hard we retry per-node allocations. @@ -3058,11 +3114,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) for (i = 0; i < h->max_huge_pages; ++i) { if (hstate_is_gigantic(h)) { - if (hugetlb_cma_size) { - pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); - goto free; - } - if (!alloc_bootmem_huge_page(h)) + if (!alloc_bootmem_huge_page(h, NUMA_NO_NODE)) break; } else if (!alloc_pool_huge_page(h, &node_states[N_MEMORY], @@ -3078,7 +3130,6 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) h->max_huge_pages, buf, i); h->max_huge_pages = i; } -free: kfree(node_alloc_noretry); } @@ -3993,6 +4044,10 @@ static int __init hugetlb_init(void) } default_hstate.max_huge_pages = default_hstate_max_huge_pages; + + for (i = 0; i < nr_online_nodes; i++) + default_hstate.max_huge_pages_node[i] = + default_hugepages_in_node[i]; } } @@ -4053,6 +4108,10 @@ void __init hugetlb_add_hstate(unsigned int order) parsed_hstate = h; } +bool __init __weak hugetlb_node_alloc_supported(void) +{ + return true; +} /* * hugepages command line processing * hugepages normally follows a valid hugepagsz or default_hugepagsz @@ -4064,6 +4123,10 @@ static int __init hugepages_setup(char *s) { unsigned long *mhp; static unsigned long *last_mhp; + int node = NUMA_NO_NODE; + int count; + unsigned long tmp; + char *p = s; if (!parsed_valid_hugepagesz) { pr_warn("HugeTLB: hugepages=%s does not follow a valid hugepagesz, ignoring\n", s); @@ -4087,8 +4150,40 @@ static int __init hugepages_setup(char *s) return 0; } - if (sscanf(s, "%lu", mhp) <= 0) - *mhp = 0; + while (*p) { + count = 0; + if (sscanf(p, "%lu%n", &tmp, &count) != 1) + goto invalid; + /* Parameter is node format */ + if (p[count] == ':') { + if (!hugetlb_node_alloc_supported()) { + pr_warn("HugeTLB: architecture can't support node specific alloc, ignoring!\n"); + return 0; + } + node = tmp; + p += count + 1; + if (node < 0 || node >= nr_online_nodes) + goto invalid; + /* Parse hugepages */ + if (sscanf(p, "%lu%n", &tmp, &count) != 1) + goto invalid; + if (!hugetlb_max_hstate) + default_hugepages_in_node[node] = tmp; + else + parsed_hstate->max_huge_pages_node[node] = tmp; + *mhp += tmp; + /* Go to parse next node*/ + if (p[count] == ',') + p += count + 1; + else + break; + } else { + if (p != s) + goto invalid; + *mhp = tmp; + break; + } + } /* * Global state is always initialized later in hugetlb_init. @@ -4101,6 +4196,10 @@ static int __init hugepages_setup(char *s) last_mhp = mhp; return 1; + +invalid: + pr_warn("HugeTLB: Invalid hugepages parameter %s\n", p); + return 0; } __setup("hugepages=", hugepages_setup); @@ -4162,6 +4261,7 @@ __setup("hugepagesz=", hugepagesz_setup); static int __init default_hugepagesz_setup(char *s) { unsigned long size; + int i; parsed_valid_hugepagesz = false; if (parsed_default_hugepagesz) { @@ -4190,6 +4290,9 @@ static int __init default_hugepagesz_setup(char *s) */ if (default_hstate_max_huge_pages) { default_hstate.max_huge_pages = default_hstate_max_huge_pages; + for (i = 0; i < nr_online_nodes; i++) + default_hstate.max_huge_pages_node[i] = + default_hugepages_in_node[i]; if (hstate_is_gigantic(&default_hstate)) hugetlb_hstate_alloc_pages(&default_hstate); default_hstate_max_huge_pages = 0; From 92a4c6d1d66fb1f4c6e6c39ca67b9eecf0b28ff3 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Thu, 21 Oct 2021 15:08:22 +1100 Subject: [PATCH 0795/1202] mm/migrate: de-duplicate migrate_reason strings In order to remove the need to manually keep three different files in synch, provide a common definition of the mapping between enum migrate_reason, and the associated strings for each enum item. 1. Use the tracing system's mapping of enums to strings, by redefining and reusing the MIGRATE_REASON and supporting macros, and using that to populate the string array in mm/debug.c. 2. Move enum migrate_reason to migrate_mode.h. This is not strictly necessary for this patch, but migrate mode and migrate reason go together, so this will slightly clarify things. Link: https://lkml.kernel.org/r/20210922041755.141817-2-jhubbard@nvidia.com Signed-off-by: John Hubbard Reviewed-by: Weizhao Ouyang Cc: "Huang, Ying" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/migrate.h | 19 +------------------ include/linux/migrate_mode.h | 13 +++++++++++++ mm/debug.c | 20 +++++++++++--------- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c8077e9366919..3d154fe03c96e 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -19,24 +19,7 @@ struct migration_target_control; */ #define MIGRATEPAGE_SUCCESS 0 -/* - * Keep sync with: - * - macro MIGRATE_REASON in include/trace/events/migrate.h - * - migrate_reason_names[MR_TYPES] in mm/debug.c - */ -enum migrate_reason { - MR_COMPACTION, - MR_MEMORY_FAILURE, - MR_MEMORY_HOTPLUG, - MR_SYSCALL, /* also applies to cpusets */ - MR_MEMPOLICY_MBIND, - MR_NUMA_MISPLACED, - MR_CONTIG_RANGE, - MR_LONGTERM_PIN, - MR_DEMOTION, - MR_TYPES -}; - +/* Defined in mm/debug.c: */ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h index 883c992490334..f37cc03f9369e 100644 --- a/include/linux/migrate_mode.h +++ b/include/linux/migrate_mode.h @@ -19,4 +19,17 @@ enum migrate_mode { MIGRATE_SYNC_NO_COPY, }; +enum migrate_reason { + MR_COMPACTION, + MR_MEMORY_FAILURE, + MR_MEMORY_HOTPLUG, + MR_SYSCALL, /* also applies to cpusets */ + MR_MEMPOLICY_MBIND, + MR_NUMA_MISPLACED, + MR_CONTIG_RANGE, + MR_LONGTERM_PIN, + MR_DEMOTION, + MR_TYPES +}; + #endif /* MIGRATE_MODE_H_INCLUDED */ diff --git a/mm/debug.c b/mm/debug.c index fae0f81ad8313..4333b6784a20c 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -16,17 +16,19 @@ #include #include "internal.h" +#include + +/* + * Define EM() and EMe() so that MIGRATE_REASON from trace/events/migrate.h can + * be used to populate migrate_reason_names[]. + */ +#undef EM +#undef EMe +#define EM(a, b) b, +#define EMe(a, b) b const char *migrate_reason_names[MR_TYPES] = { - "compaction", - "memory_failure", - "memory_hotplug", - "syscall_or_cpuset", - "mempolicy_mbind", - "numa_misplaced", - "contig_range", - "longterm_pin", - "demotion", + MIGRATE_REASON }; const struct trace_print_flags pageflag_names[] = { From a2feb0e6f8a3cc4224234d14e2ed302cc022f205 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 21 Oct 2021 15:08:23 +1100 Subject: [PATCH 0796/1202] mm: migrate: make demotion knob depend on migration The memory demotion needs to call migrate_pages() to do the jobs. And it is controlled by a knob, however, the knob doesn't depend on CONFIG_MIGRATION. The knob could be truned on even though MIGRATION is disabled, this will not cause any crash since migrate_pages() would just return -ENOSYS. But it is definitely not optimal to go through demotion path then retry regular swap every time. And it doesn't make too much sense to have the knob visible to the users when !MIGRATION. Move the related code from mempolicy.[h|c] to migrate.[h|c]. Link: https://lkml.kernel.org/r/20211015005559.246709-1-shy828301@gmail.com Signed-off-by: Yang Shi Acked-by: "Huang, Ying" Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/mempolicy.h | 4 --- include/linux/migrate.h | 4 +++ mm/mempolicy.c | 61 --------------------------------------- mm/migrate.c | 61 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 65 deletions(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index f9edbbddd3549..d70c3a84ead70 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -184,8 +184,6 @@ extern bool vma_migratable(struct vm_area_struct *vma); extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); -extern bool numa_demotion_enabled; - static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return (pol->mode == MPOL_PREFERRED_MANY); @@ -301,8 +299,6 @@ static inline nodemask_t *policy_nodemask_current(gfp_t gfp) return NULL; } -#define numa_demotion_enabled false - static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return false; diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3d154fe03c96e..2d8130e05dc09 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -40,6 +40,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); + +extern bool numa_demotion_enabled; #else static inline void putback_movable_pages(struct list_head *l) {} @@ -65,6 +67,8 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, { return -ENOSYS; } + +#define numa_demotion_enabled false #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_COMPACTION diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 6e3199a7c679d..1e12a521e37e3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -3051,64 +3051,3 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) p += scnprintf(p, buffer + maxlen - p, ":%*pbl", nodemask_pr_args(&nodes)); } - -bool numa_demotion_enabled = false; - -#ifdef CONFIG_SYSFS -static ssize_t numa_demotion_enabled_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return sysfs_emit(buf, "%s\n", - numa_demotion_enabled? "true" : "false"); -} - -static ssize_t numa_demotion_enabled_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) - numa_demotion_enabled = true; - else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) - numa_demotion_enabled = false; - else - return -EINVAL; - - return count; -} - -static struct kobj_attribute numa_demotion_enabled_attr = - __ATTR(demotion_enabled, 0644, numa_demotion_enabled_show, - numa_demotion_enabled_store); - -static struct attribute *numa_attrs[] = { - &numa_demotion_enabled_attr.attr, - NULL, -}; - -static const struct attribute_group numa_attr_group = { - .attrs = numa_attrs, -}; - -static int __init numa_init_sysfs(void) -{ - int err; - struct kobject *numa_kobj; - - numa_kobj = kobject_create_and_add("numa", mm_kobj); - if (!numa_kobj) { - pr_err("failed to create numa kobject\n"); - return -ENOMEM; - } - err = sysfs_create_group(numa_kobj, &numa_attr_group); - if (err) { - pr_err("failed to register numa group\n"); - goto delete_obj; - } - return 0; - -delete_obj: - kobject_put(numa_kobj); - return err; -} -subsys_initcall(numa_init_sysfs); -#endif diff --git a/mm/migrate.c b/mm/migrate.c index 1852d787e6ab6..51e310a945168 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -3306,3 +3306,64 @@ static int __init migrate_on_reclaim_init(void) } late_initcall(migrate_on_reclaim_init); #endif /* CONFIG_HOTPLUG_CPU */ + +bool numa_demotion_enabled = false; + +#ifdef CONFIG_SYSFS +static ssize_t numa_demotion_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", + numa_demotion_enabled ? "true" : "false"); +} + +static ssize_t numa_demotion_enabled_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1)) + numa_demotion_enabled = true; + else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1)) + numa_demotion_enabled = false; + else + return -EINVAL; + + return count; +} + +static struct kobj_attribute numa_demotion_enabled_attr = + __ATTR(demotion_enabled, 0644, numa_demotion_enabled_show, + numa_demotion_enabled_store); + +static struct attribute *numa_attrs[] = { + &numa_demotion_enabled_attr.attr, + NULL, +}; + +static const struct attribute_group numa_attr_group = { + .attrs = numa_attrs, +}; + +static int __init numa_init_sysfs(void) +{ + int err; + struct kobject *numa_kobj; + + numa_kobj = kobject_create_and_add("numa", mm_kobj); + if (!numa_kobj) { + pr_err("failed to create numa kobject\n"); + return -ENOMEM; + } + err = sysfs_create_group(numa_kobj, &numa_attr_group); + if (err) { + pr_err("failed to register numa group\n"); + goto delete_obj; + } + return 0; + +delete_obj: + kobject_put(numa_kobj); + return err; +} +subsys_initcall(numa_init_sysfs); +#endif From 2b5ee5d3aa8713c1b535ccda84652e3a9073e969 Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Thu, 21 Oct 2021 15:08:24 +1100 Subject: [PATCH 0797/1202] selftests/vm/transhuge-stress: fix ram size thinko When executing transhuge-stress with an argument to specify the virtual memory size for testing, the ram size is reported as 0, e.g. transhuge-stress 384 thp-mmap: allocate 192 transhuge pages, using 384 MiB virtual memory and 0 MiB of ram thp-mmap: 0.184 s/loop, 0.957 ms/page, 2090.265 MiB/s 192 succeed, 0 failed This appears to be due to a thinko in commit 0085d61fe05e ("selftests/vm/transhuge-stress: stress test for memory compaction"), where, at a guess, the intent was to base "xyz MiB of ram" on `ram` size. Here are results after using `ram` size: thp-mmap: allocate 192 transhuge pages, using 384 MiB virtual memory and 14 MiB of ram Link: https://lkml.kernel.org/r/20210825135843.29052-1-george_davis@mentor.com Fixes: 0085d61fe05e ("selftests/vm/transhuge-stress: stress test for memory compaction") Signed-off-by: George G. Davis Cc: Konstantin Khlebnikov Cc: Eugeniu Rosca Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/transhuge-stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c index fd7f1b4a96f94..5e4c036f6ad38 100644 --- a/tools/testing/selftests/vm/transhuge-stress.c +++ b/tools/testing/selftests/vm/transhuge-stress.c @@ -79,7 +79,7 @@ int main(int argc, char **argv) warnx("allocate %zd transhuge pages, using %zd MiB virtual memory" " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20, - len >> (20 + HPAGE_SHIFT - PAGE_SHIFT - 1)); + ram >> (20 + HPAGE_SHIFT - PAGE_SHIFT - 1)); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) From 4fb0616dee76cfe1e2785d574c063299ba42230b Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Thu, 21 Oct 2021 15:08:25 +1100 Subject: [PATCH 0798/1202] mm/readahead.c: fix incorrect comments for get_init_ra_size In fact, formated values returned by get_init_ra_size are not that intuitive. This patch make the comments reflect its truth. Link: https://lkml.kernel.org/r/20211019104812.135602-1-linf@wangsu.com Signed-off-by: Lin Feng Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/readahead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/readahead.c b/mm/readahead.c index 41b75d76d36e1..88a84f254ed6f 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -309,7 +309,7 @@ void force_page_cache_ra(struct readahead_control *ractl, * Set the initial window size, round to next power of 2 and square * for small size, x 4 for medium, and x 2 for large * for 128k (32 page) max ra - * 1-8 page = 32k initial, > 8 page = 128k initial + * 1-2 page = 16k, 3-4 page 32k, 5-8 page = 64k, > 8 page = 128k initial */ static unsigned long get_init_ra_size(unsigned long size, unsigned long max) { From 01192ddb63349364498bd69b9e60c00b473f56cf Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:08:26 +1100 Subject: [PATCH 0799/1202] mm: nommu: kill arch_get_unmapped_area() When nommu, the arch_get_unmapped_area() will not be called, just kill it. Link: https://lkml.kernel.org/r/20210910061906.36299-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/nommu.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mm/nommu.c b/mm/nommu.c index 02d2427b8f9e8..8943dc0e21329 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1639,12 +1639,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, } EXPORT_SYMBOL(remap_vmalloc_range); -unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - return -ENOMEM; -} - vm_fault_t filemap_fault(struct vm_fault *vmf) { BUG(); From fde5179689c283e84b8f8b008b84ca3f37b451f4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 21 Oct 2021 15:08:27 +1100 Subject: [PATCH 0800/1202] selftest/vm: fix ksm selftest to run with different NUMA topologies Platforms can have non-contiguous NUMA nodes like below #numactl -H available: 2 nodes (0,8) ..... node distances: node 0 8 0: 10 40 8: 40 10 #numactl -H available: 1 nodes (1) .... node distances: node 1 1: 10 Hence update the test to not assume the presence of Node 0 and 1 and also use numa_num_configured_nodes() instead of numa_max_node for finding whether to skip the test. Link: https://lkml.kernel.org/r/20210914141414.350759-1-aneesh.kumar@linux.ibm.com Fixes: 82e717ad3501 ("selftests: vm: add KSM merging across nodes test") Signed-off-by: Aneesh Kumar K.V Reviewed-by: Pasha Tatashin Cc: Zhansaya Bagdauletkyzy Cc: Pavel Tatashin Cc: Tyler Hicks Cc: Hugh Dickins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/ksm_tests.c | 29 +++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index b61dcdb44c5be..0e142d25b0316 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -354,12 +354,34 @@ static int check_ksm_zero_page_merge(int mapping, int prot, long page_count, int return KSFT_FAIL; } +static int get_next_mem_node(int node) +{ + + long node_size; + int mem_node = 0; + int i, max_node = numa_max_node(); + + for (i = node + 1; i <= max_node + node; i++) { + mem_node = i % (max_node + 1); + node_size = numa_node_size(mem_node, NULL); + if (node_size > 0) + break; + } + return mem_node; +} + +static int get_first_mem_node(void) +{ + return get_next_mem_node(numa_max_node()); +} + static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_across_nodes, size_t page_size) { void *numa1_map_ptr, *numa2_map_ptr; struct timespec start_time; int page_count = 2; + int first_node; if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { perror("clock_gettime"); @@ -370,7 +392,7 @@ static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_a perror("NUMA support not enabled"); return KSFT_SKIP; } - if (numa_max_node() < 1) { + if (numa_num_configured_nodes() <= 1) { printf("At least 2 NUMA nodes must be available\n"); return KSFT_SKIP; } @@ -378,8 +400,9 @@ static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_a return KSFT_FAIL; /* allocate 2 pages in 2 different NUMA nodes and fill them with the same data */ - numa1_map_ptr = numa_alloc_onnode(page_size, 0); - numa2_map_ptr = numa_alloc_onnode(page_size, 1); + first_node = get_first_mem_node(); + numa1_map_ptr = numa_alloc_onnode(page_size, first_node); + numa2_map_ptr = numa_alloc_onnode(page_size, get_next_mem_node(first_node)); if (!numa1_map_ptr || !numa2_map_ptr) { perror("numa_alloc_onnode"); return KSFT_FAIL; From 87d8ff62d16936cdc49254fabf9a136a01f3847b Mon Sep 17 00:00:00 2001 From: Pedro Demarchi Gomes Date: Thu, 21 Oct 2021 15:08:27 +1100 Subject: [PATCH 0801/1202] selftests: vm: add KSM huge pages merging time test Add test case of KSM merging time using mostly huge pages Link: https://lkml.kernel.org/r/20211013044045.360251-1-pedrodemargomes@gmail.com Signed-off-by: Pedro Demarchi Gomes Cc: Zhansaya Bagdauletkyzy Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/ksm_tests.c | 125 ++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 0e142d25b0316..1436e1a9a3d38 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -5,6 +5,10 @@ #include #include #include +#include +#include +#include +#include #include "../kselftest.h" #include "../../../../include/vdso/time64.h" @@ -18,6 +22,15 @@ #define KSM_MERGE_ACROSS_NODES_DEFAULT true #define MB (1ul << 20) +#define PAGE_SHIFT 12 +#define HPAGE_SHIFT 21 + +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define HPAGE_SIZE (1 << HPAGE_SHIFT) + +#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0) +#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1)) + struct ksm_sysfs { unsigned long max_page_sharing; unsigned long merge_across_nodes; @@ -34,6 +47,7 @@ enum ksm_test_name { CHECK_KSM_ZERO_PAGE_MERGE, CHECK_KSM_NUMA_MERGE, KSM_MERGE_TIME, + KSM_MERGE_TIME_HUGE_PAGES, KSM_COW_TIME }; @@ -99,6 +113,9 @@ static void print_help(void) " -U (page unmerging)\n" " -P evaluate merging time and speed.\n" " For this test, the size of duplicated memory area (in MiB)\n" + " must be provided using -s option\n" + " -H evaluate merging time and speed of area allocated mostly with huge pages\n" + " For this test, the size of duplicated memory area (in MiB)\n" " must be provided using -s option\n" " -C evaluate the time required to break COW of merged pages.\n\n"); @@ -439,6 +456,101 @@ static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_a return KSFT_FAIL; } +int64_t allocate_transhuge(void *ptr, int pagemap_fd) +{ + uint64_t ent[2]; + + /* drop pmd */ + if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANONYMOUS | + MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr) + errx(2, "mmap transhuge"); + + if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE)) + err(2, "MADV_HUGEPAGE"); + + /* allocate transparent huge page */ + *(volatile void **)ptr = ptr; + + if (pread(pagemap_fd, ent, sizeof(ent), + (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent)) + err(2, "read pagemap"); + + if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) && + PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) && + !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1))) + return PAGEMAP_PFN(ent[0]); + + return -1; +} + +static int ksm_merge_hugepages_time(int mapping, int prot, int timeout, size_t map_size) +{ + void *map_ptr, *map_ptr_orig; + struct timespec start_time, end_time; + unsigned long scan_time_ns; + int pagemap_fd, n_normal_pages, n_huge_pages; + + map_size *= MB; + size_t len = map_size; + + len -= len % HPAGE_SIZE; + map_ptr_orig = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0); + map_ptr = map_ptr_orig + HPAGE_SIZE - (uintptr_t)map_ptr_orig % HPAGE_SIZE; + + if (map_ptr_orig == MAP_FAILED) + err(2, "initial mmap"); + + if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + err(2, "MADV_HUGEPAGE"); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + err(2, "open pagemap"); + + n_normal_pages = 0; + n_huge_pages = 0; + for (void *p = map_ptr; p < map_ptr + len; p += HPAGE_SIZE) { + if (allocate_transhuge(p, pagemap_fd) < 0) + n_normal_pages++; + else + n_huge_pages++; + } + printf("Number of normal pages: %d\n", n_normal_pages); + printf("Number of huge pages: %d\n", n_huge_pages); + + memset(map_ptr, '*', len); + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) { + perror("clock_gettime"); + goto err_out; + } + if (ksm_merge_pages(map_ptr, map_size, start_time, timeout)) + goto err_out; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) { + perror("clock_gettime"); + goto err_out; + } + + scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC + + (end_time.tv_nsec - start_time.tv_nsec); + + printf("Total size: %lu MiB\n", map_size / MB); + printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC, + scan_time_ns % NSEC_PER_SEC); + printf("Average speed: %.3f MiB/s\n", (map_size / MB) / + ((double)scan_time_ns / NSEC_PER_SEC)); + + munmap(map_ptr_orig, len + HPAGE_SIZE); + return KSFT_PASS; + +err_out: + printf("Not OK\n"); + munmap(map_ptr_orig, len + HPAGE_SIZE); + return KSFT_FAIL; +} + static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size) { void *map_ptr; @@ -564,7 +676,7 @@ int main(int argc, char *argv[]) bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT; long size_MB = 0; - while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPC")) != -1) { + while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCH")) != -1) { switch (opt) { case 'a': prot = str_to_prot(optarg); @@ -618,6 +730,9 @@ int main(int argc, char *argv[]) case 'P': test_name = KSM_MERGE_TIME; break; + case 'H': + test_name = KSM_MERGE_TIME_HUGE_PAGES; + break; case 'C': test_name = KSM_COW_TIME; break; @@ -670,6 +785,14 @@ int main(int argc, char *argv[]) ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, size_MB); break; + case KSM_MERGE_TIME_HUGE_PAGES: + if (size_MB == 0) { + printf("Option '-s' is required.\n"); + return KSFT_FAIL; + } + ret = ksm_merge_hugepages_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, + ksm_scan_limit_sec, size_MB); + break; case KSM_COW_TIME: ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec, page_size); From 7cdf668700676b0e7712636b866d7b67d825bb1b Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Thu, 21 Oct 2021 15:08:28 +1100 Subject: [PATCH 0802/1202] mm/vmstat: annotate data race for zone->free_area[order].nr_free KCSAN reports a data-race on v5.10 which also exists on mainline: ================================================================== BUG: KCSAN: data-race in extfrag_for_order+0x33/0x2d0 race at unknown origin, with read to 0xffff9ee9bfffab48 of 8 bytes by task 34 on cpu 1: extfrag_for_order+0x33/0x2d0 kcompactd+0x5f0/0xce0 kthread+0x1f9/0x220 ret_from_fork+0x22/0x30 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 34 Comm: kcompactd0 Not tainted 5.10.0+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 ================================================================== Access to zone->free_area[order].nr_free in extfrag_for_order()/frag_show_print() is lockless. That's intentional and the stats are a rough estimate anyway. Annotate them with data_race(). Link: https://lkml.kernel.org/r/20210908015606.3999871-1-liushixin2@huawei.com Signed-off-by: Liu Shixin Cc: "Paul E . McKenney" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmstat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 0f4643e5324d3..24a85576f6560 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1071,7 +1071,7 @@ static void fill_contig_page_info(struct zone *zone, unsigned long blocks; /* Count number of free blocks */ - blocks = zone->free_area[order].nr_free; + blocks = data_race(zone->free_area[order].nr_free); info->free_blocks_total += blocks; /* Count free base pages */ @@ -1446,7 +1446,7 @@ static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); for (order = 0; order < MAX_ORDER; ++order) - seq_printf(m, "%6lu ", zone->free_area[order].nr_free); + seq_printf(m, "%6lu ", data_race(zone->free_area[order].nr_free)); seq_putc(m, '\n'); } From c7a886357abae1a57252e7add327006c1cc3d83d Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Thu, 21 Oct 2021 15:08:29 +1100 Subject: [PATCH 0803/1202] mm-vmstat-annotate-data-race-for-zone-free_areanr_free-fix add comments Link: https://lkml.kernel.org/r/20210918084655.2696522-1-liushixin2@huawei.com Signed-off-by: Liu Shixin Cc: "Paul E . McKenney" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmstat.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 24a85576f6560..0fde7470368df 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1070,7 +1070,12 @@ static void fill_contig_page_info(struct zone *zone, for (order = 0; order < MAX_ORDER; order++) { unsigned long blocks; - /* Count number of free blocks */ + /* + * Count number of free blocks. + * + * Access to nr_free is lockless as nr_free is used only for + * diagnostic purposes. Use data_race to avoid KCSAN warning. + */ blocks = data_race(zone->free_area[order].nr_free); info->free_blocks_total += blocks; @@ -1446,6 +1451,10 @@ static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); for (order = 0; order < MAX_ORDER; ++order) + /* + * Access to nr_free is lockless as nr_free is used only for + * printing purposes. Use data_race to avoid KCSAN warning. + */ seq_printf(m, "%6lu ", data_race(zone->free_area[order].nr_free)); seq_putc(m, '\n'); } From baa9ee766c1fc6e958dc9995a0a28c69fd3f433e Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Thu, 21 Oct 2021 15:08:30 +1100 Subject: [PATCH 0804/1202] mm: vmstat.c: make extfrag_index show more pretty fragmentation_index may return -1000 and the corresponding formated value showed by seq_printf will take a negative signatrue, but other positive formated values don't take a positive signatrue, so the output becomes unaligned. before: Node 0, zone DMA -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 Node 0, zone DMA32 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 Node 0, zone Normal -1.000 -1.000 -1.000 -1.000 0.931 0.966 0.983 0.992 0.996 0.998 0.999 after this patch: Node 0, zone DMA -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 Node 0, zone DMA32 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 -1.000 Node 0, zone Normal -1.000 -1.000 -1.000 -1.000 0.931 0.966 0.983 0.992 0.996 0.998 0.999 Link: https://lkml.kernel.org/r/20211019103241.134797-1-linf@wangsu.com Signed-off-by: Lin Feng Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/vmstat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 0fde7470368df..d701c335628c8 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -2191,7 +2191,7 @@ static void extfrag_show_print(struct seq_file *m, for (order = 0; order < MAX_ORDER; ++order) { fill_contig_page_info(zone, order, &info); index = __fragmentation_index(order, &info); - seq_printf(m, "%d.%03d ", index / 1000, index % 1000); + seq_printf(m, "%2d.%03d ", index / 1000, index % 1000); } seq_putc(m, '\n'); From 2df362b780aaab073cf232c580a6f6586d52abd2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:31 +1100 Subject: [PATCH 0805/1202] selftests/vm: make MADV_POPULATE_(READ|WRITE) use in-tree headers The madv_populate selftest currently builds with a warning when the local installed headers (via the distribution) don't include MADV_POPULATE_READ and MADV_POPULATE_WRITE. The warning is correct, because the test cannot locate the necessary header. Reason is that the in-tree installed headers (usr/include) have a "linux" instead of a "sys" subdirectory. Including "linux/mman.h" instead of "sys/mman.h" doesn't work (e.g., mmap() and madvise() are not defined that way). The only thing that seems to work is including "linux/mman.h" in addition to "sys/mman.h". We can get rid of our availability check and simplify. Link: https://lkml.kernel.org/r/20211015165758.41374-1-david@redhat.com Signed-off-by: David Hildenbrand Reported-by: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/vm/madv_populate.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c index b959e4ebdad45..3ee0e82756002 100644 --- a/tools/testing/selftests/vm/madv_populate.c +++ b/tools/testing/selftests/vm/madv_populate.c @@ -14,12 +14,11 @@ #include #include #include +#include #include #include "../kselftest.h" -#if defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) - /* * For now, we're using 2 MiB of private anonymous memory for all tests. */ @@ -328,15 +327,3 @@ int main(int argc, char **argv) err, ksft_test_num()); return ksft_exit_pass(); } - -#else /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */ - -#warning "missing MADV_POPULATE_READ or MADV_POPULATE_WRITE definition" - -int main(int argc, char **argv) -{ - ksft_print_header(); - ksft_exit_skip("MADV_POPULATE_READ or MADV_POPULATE_WRITE not defined\n"); -} - -#endif /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */ From 3838050b7f6ee173756e088bf8d98e3d21b95420 Mon Sep 17 00:00:00 2001 From: Tang Yizhou Date: Thu, 21 Oct 2021 15:08:31 +1100 Subject: [PATCH 0806/1202] mm/memory_hotplug: add static qualifier for online_policy_to_str() online_policy_to_str is only used in memory_hotplug.c and should be defined as static. Link: https://lkml.kernel.org/r/20210913024534.26161-1-tangyizhou@huawei.com Signed-off-by: Tang Yizhou Reviewed-by: Muchun Song Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index feffaa9423fe3..afaae370b8cd0 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -57,7 +57,7 @@ enum { ONLINE_POLICY_AUTO_MOVABLE, }; -const char *online_policy_to_str[] = { +static const char * const online_policy_to_str[] = { [ONLINE_POLICY_CONTIG_ZONES] = "contig-zones", [ONLINE_POLICY_AUTO_MOVABLE] = "auto-movable", }; From 05bf9d75795a78d14ae42f41965c28e1a054f410 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:32 +1100 Subject: [PATCH 0807/1202] memory-hotplug.rst: fix two instances of "movablecore" that should be "movable_node" Patch series "memory-hotplug.rst: document the "auto-movable" online policy". Now that the memory-hotplug.rst overhaul is upstream, proper documentation for the "auto-movable" online policy, documenting all new toggles and options. Along, two fixes for the original overhaul. This patch (of 3): We really want to refer to the "movable_node" kernel command line parameter here. Link: https://lkml.kernel.org/r/20210930144117.23641-2-david@redhat.com Fixes: ac3332c44767 ("memory-hotplug.rst: complete admin-guide overhaul") Signed-off-by: David Hildenbrand Acked-by: Mike Rapoport Cc: Jonathan Corbet Cc: Michal Hocko Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/mm/memory-hotplug.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index 03dfbc9252529..27d748cb6ee0b 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -166,7 +166,7 @@ Or alternatively:: % echo 1 > /sys/devices/system/memory/memoryXXX/online The kernel will select the target zone automatically, usually defaulting to -``ZONE_NORMAL`` unless ``movablecore=1`` has been specified on the kernel +``ZONE_NORMAL`` unless ``movable_node`` has been specified on the kernel command line or if the memory block would intersect the ZONE_MOVABLE already. One can explicitly request to associate an offline memory block with @@ -393,7 +393,7 @@ command line parameters are relevant: ======================== ======================================================= ``memhp_default_state`` configure auto-onlining by essentially setting ``/sys/devices/system/memory/auto_online_blocks``. -``movablecore`` configure automatic zone selection of the kernel. When +``movable_node`` configure automatic zone selection in the kernel. When set, the kernel will default to ZONE_MOVABLE, unless other zones can be kept contiguous. ======================== ======================================================= From 829a834498a69727849413c41def3da57ba0a2ce Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:33 +1100 Subject: [PATCH 0808/1202] memory-hotplug.rst: fix wrong /sys/module/memory_hotplug/parameters/ path We accidentially added a superfluous "s". Link: https://lkml.kernel.org/r/20210930144117.23641-3-david@redhat.com Fixes: ac3332c44767 ("memory-hotplug.rst: complete admin-guide overhaul") Signed-off-by: David Hildenbrand Acked-by: Mike Rapoport Cc: Jonathan Corbet Cc: Michal Hocko Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/mm/memory-hotplug.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index 27d748cb6ee0b..ee00b70deddef 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -410,7 +410,7 @@ them with ``memory_hotplug.`` such as:: and they can be observed (and some even modified at runtime) via:: - /sys/modules/memory_hotplug/parameters/ + /sys/module/memory_hotplug/parameters/ The following module parameters are currently defined: From e2529fd3dc55a4ad4e065fa8f1a42050b5f420d3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:34 +1100 Subject: [PATCH 0809/1202] memory-hotplug.rst: document the "auto-movable" online policy In commit e83a437faa62 ("mm/memory_hotplug: introduce "auto-movable" online policy") we introduced a new memory online policy to automatically select a zone for memory blocks to be onlined. We added a way to set the active online policy and tunables for the auto-movable online policy. In follow-up commits we tweaked the "auto-movable" policy to also consider memory device details when selecting zones for memory blocks to be onlined. Let's document the new toggles and how the two online policies we have work. Link: https://lkml.kernel.org/r/20210930144117.23641-4-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Mike Rapoport Cc: Jonathan Corbet Cc: Michal Hocko Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- .../admin-guide/mm/memory-hotplug.rst | 128 +++++++++++++++--- 1 file changed, 108 insertions(+), 20 deletions(-) diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index ee00b70deddef..c20a2c0031cf3 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -165,9 +165,8 @@ Or alternatively:: % echo 1 > /sys/devices/system/memory/memoryXXX/online -The kernel will select the target zone automatically, usually defaulting to -``ZONE_NORMAL`` unless ``movable_node`` has been specified on the kernel -command line or if the memory block would intersect the ZONE_MOVABLE already. +The kernel will select the target zone automatically, depending on the +configured ``online_policy``. One can explicitly request to associate an offline memory block with ZONE_MOVABLE by:: @@ -198,6 +197,9 @@ Auto-onlining can be enabled by writing ``online``, ``online_kernel`` or % echo online > /sys/devices/system/memory/auto_online_blocks +Similarly to manual onlining, with ``online`` the kernel will select the +target zone automatically, depending on the configured ``online_policy``. + Modifying the auto-online behavior will only affect all subsequently added memory blocks only. @@ -393,9 +395,11 @@ command line parameters are relevant: ======================== ======================================================= ``memhp_default_state`` configure auto-onlining by essentially setting ``/sys/devices/system/memory/auto_online_blocks``. -``movable_node`` configure automatic zone selection in the kernel. When - set, the kernel will default to ZONE_MOVABLE, unless - other zones can be kept contiguous. +``movable_node`` configure automatic zone selection in the kernel when + using the ``contig-zones`` online policy. When + set, the kernel will default to ZONE_MOVABLE when + onlining a memory block, unless other zones can be kept + contiguous. ======================== ======================================================= Module Parameters @@ -414,20 +418,104 @@ and they can be observed (and some even modified at runtime) via:: The following module parameters are currently defined: -======================== ======================================================= -``memmap_on_memory`` read-write: Allocate memory for the memmap from the - added memory block itself. Even if enabled, actual - support depends on various other system properties and - should only be regarded as a hint whether the behavior - would be desired. - - While allocating the memmap from the memory block - itself makes memory hotplug less likely to fail and - keeps the memmap on the same NUMA node in any case, it - can fragment physical memory in a way that huge pages - in bigger granularity cannot be formed on hotplugged - memory. -======================== ======================================================= +================================ =============================================== +``memmap_on_memory`` read-write: Allocate memory for the memmap from + the added memory block itself. Even if enabled, + actual support depends on various other system + properties and should only be regarded as a + hint whether the behavior would be desired. + + While allocating the memmap from the memory + block itself makes memory hotplug less likely + to fail and keeps the memmap on the same NUMA + node in any case, it can fragment physical + memory in a way that huge pages in bigger + granularity cannot be formed on hotplugged + memory. +``online_policy`` read-write: Set the basic policy used for + automatic zone selection when onlining memory + blocks without specifying a target zone. + ``contig-zones`` has been the kernel default + before this parameter was added. After an + online policy was configured and memory was + online, the policy should not be changed + anymore. + + When set to ``contig-zones``, the kernel will + try keeping zones contiguous. If a memory block + intersects multiple zones or no zone, the + behavior depends on the ``movable_node`` kernel + command line parameter: default to ZONE_MOVABLE + if set, default to the applicable kernel zone + (usually ZONE_NORMAL) if not set. + + When set to ``auto-movable``, the kernel will + try onlining memory blocks to ZONE_MOVABLE if + possible according to the configuration and + memory device details. With this policy, one + can avoid zone imbalances when eventually + hotplugging a lot of memory later and still + wanting to be able to hotunplug as much as + possible reliably, very desirable in + virtualized environments. As one example, a + hotplugged DIMM will be onlined either + completely to ZONE_MOVABLE or completely to + ZONE_NORMAL, not a mixture. + As another example, as many memory blocks + belonging to a virtio-mem device will be + onlined to ZONE_MOVABLE as possible, + special-casing units of memory blocks that can + only get hotunplugged together. *This policy + does not protect from setups that are + problematic with ZONE_MOVABLE and does not + change the zone of memory blocks dynamically + after they were onlined.* +``auto_movable_ratio`` read-write: Set the maximum MOVABLE:KERNEL + memory ratio in % for the ``auto-movable`` + online policy. Whether the ratio applies only + for the system across all NUMA nodes or also + per NUMA nodes depends on the + ``auto_movable_numa_aware`` configuration. + + All accounting is based on present memory pages + in the zones combined with accounting per + memory device. Memory dedicated to the CMA + allocator is accounted as MOVABLE, although + residing on one of the kernel zones. The + possible ratio depends on the actual workload. + The kernel default is "301" %, for example, + allowing for hotplugging 24 GiB to a 8 GiB VM + and automatically onlining all hotplugged + memory to ZONE_MOVABLE in many setups. The + additional 1% deals with some pages being not + present, for example, because of some firmware + allocations. + + Note that ZONE_NORMAL memory provided by one + memory device does not allow for more + ZONE_MOVABLE memory for a different memory + device. As one example, onlining memory of a + hotplugged DIMM to ZONE_NORMAL will not allow + for another hotplugged DIMM to get onlined to + ZONE_MOVABLE automatically. In contrast, memory + hotplugged by a virtio-mem device that got + onlined to ZONE_NORMAL will allow for more + ZONE_MOVABLE memory within *the same* + virtio-mem device. +``auto_movable_numa_aware`` read-write: Configure whether the + ``auto_movable_ratio`` in the ``auto-movable`` + online policy also applies per NUMA + node in addition to the whole system across all + NUMA nodes. The kernel default is "Y". + + Disabling NUMA awareness can be helpful when + dealing with NUMA nodes that should be + completely hotunpluggable, onlining the memory + completely to ZONE_MOVABLE automatically if + possible. + + Parameter availability depends on CONFIG_NUMA. +================================ =============================================== ZONE_MOVABLE ============ From b15a18266d1dc07d9fec47368d0109782e1d39b0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:35 +1100 Subject: [PATCH 0810/1202] memory-hotplug.rst: document the "auto-movable" online policy - "memory-hotplug.rst: document the "auto-movable" online policy" -- Add a pointer to generic command line parameter documentation -- Explain relationship of "auto-movable" and "movable_node" command line parameter, also indicating in which setups "auto-movable" doesn't really make sense. Link: https://lkml.kernel.org/r/20211011082058.6076-4-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Mike Rapoport Cc: Jonathan Corbet Cc: Michal Hocko Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/mm/memory-hotplug.rst | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index c20a2c0031cf3..0f56ecd8ac054 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -402,6 +402,9 @@ command line parameters are relevant: contiguous. ======================== ======================================================= +See Documentation/admin-guide/kernel-parameters.txt for a more generic +description of these command line parameters. + Module Parameters ------------------ @@ -457,7 +460,17 @@ The following module parameters are currently defined: hotplugging a lot of memory later and still wanting to be able to hotunplug as much as possible reliably, very desirable in - virtualized environments. As one example, a + virtualized environments. This policy ignores + the ``movable_node`` kernel command line + parameter and isn't really applicable in + environments that require it (e.g., bare metal + with hotunpluggable nodes) where hotplugged + memory might be exposed via the + firmware-provided memory map early during boot + to the system instead of getting detected, + added and onlined later during boot (such as + done by virtio-mem or by some hypervisors + implementing emulated DIMMs). As one example, a hotplugged DIMM will be onlined either completely to ZONE_MOVABLE or completely to ZONE_NORMAL, not a mixture. From 64c784445fd9531fe9e0168c90ff94fe66f4851a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:36 +1100 Subject: [PATCH 0811/1202] mm/memory_hotplug: remove CONFIG_X86_64_ACPI_NUMA dependency from CONFIG_MEMORY_HOTPLUG Patch series "mm/memory_hotplug: Kconfig and 32 bit cleanups". Some cleanups around CONFIG_MEMORY_HOTPLUG, including removing 32 bit leftovers of memory hotplug support. This patch (of 6): SPARSEMEM is the only possible memory model for x86-64, FLATMEM is not possible: config ARCH_FLATMEM_ENABLE def_bool y depends on X86_32 && !NUMA And X86_64_ACPI_NUMA (obviously) only supports x86-64: config X86_64_ACPI_NUMA def_bool y depends on X86_64 && NUMA && ACPI && PCI Let's just remove the CONFIG_X86_64_ACPI_NUMA dependency, as it does no longer make sense. Link: https://lkml.kernel.org/r/20210929143600.49379-2-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Jonathan Corbet Cc: Alex Shi Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Shuah Khan Cc: Michal Hocko Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index d16ba9249bc53..b7fb3f0b485e8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -123,7 +123,7 @@ config ARCH_ENABLE_MEMORY_HOTPLUG config MEMORY_HOTPLUG bool "Allow for memory hot-add" select MEMORY_ISOLATION - depends on SPARSEMEM || X86_64_ACPI_NUMA + depends on SPARSEMEM depends on ARCH_ENABLE_MEMORY_HOTPLUG depends on 64BIT || BROKEN select NUMA_KEEP_MEMINFO if NUMA From dfead01977b7faaea65aa959e2f1353b0c9e38b3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:37 +1100 Subject: [PATCH 0812/1202] mm/memory_hotplug: remove CONFIG_MEMORY_HOTPLUG_SPARSE CONFIG_MEMORY_HOTPLUG depends on CONFIG_SPARSEMEM, so there is no need for CONFIG_MEMORY_HOTPLUG_SPARSE anymore; adjust all instances to use CONFIG_MEMORY_HOTPLUG and remove CONFIG_MEMORY_HOTPLUG_SPARSE. Link: https://lkml.kernel.org/r/20210929143600.49379-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Shuah Khan [kselftest] Acked-by: Greg Kroah-Hartman Acked-by: Oscar Salvador Cc: Alex Shi Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Jonathan Corbet Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Paul Mackerras Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/platforms/powernv/setup.c | 4 ++-- arch/powerpc/platforms/pseries/setup.c | 2 +- drivers/base/Makefile | 2 +- drivers/base/node.c | 9 ++++--- drivers/virtio/Kconfig | 2 +- include/linux/memory.h | 24 ++++++++----------- include/linux/node.h | 4 ++-- lib/Kconfig.debug | 2 +- mm/Kconfig | 4 ---- mm/memory_hotplug.c | 2 -- tools/testing/selftests/memory-hotplug/config | 1 - 13 files changed, 24 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 764f2732a8218..d8a2ca0070823 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -32,7 +32,7 @@ struct machdep_calls { void (*iommu_save)(void); void (*iommu_restore)(void); #endif -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG unsigned long (*memory_block_size)(void); #endif #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 1777e992b20bd..6052f5d5ded34 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -912,7 +912,7 @@ void __init setup_per_cpu_areas(void) } #endif -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG unsigned long memory_block_size_bytes(void) { if (ppc_md.memory_block_size) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index a8db3f1530639..ad56a54ac9c57 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -440,7 +440,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) } #endif /* CONFIG_KEXEC_CORE */ -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG static unsigned long pnv_memory_block_size(void) { /* @@ -553,7 +553,7 @@ define_machine(powernv) { #ifdef CONFIG_KEXEC_CORE .kexec_cpu_down = pnv_kexec_cpu_down, #endif -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG .memory_block_size = pnv_memory_block_size, #endif }; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index f79126f16258a..d29f6f1f7f37f 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -1089,7 +1089,7 @@ define_machine(pseries) { .machine_kexec = pSeries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, #endif -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG .memory_block_size = pseries_memory_block_size, #endif }; diff --git a/drivers/base/Makefile b/drivers/base/Makefile index ef8e44a7d2881..02f7f1358e865 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -13,7 +13,7 @@ obj-y += power/ obj-$(CONFIG_ISA_BUS_API) += isa.o obj-y += firmware_loader/ obj-$(CONFIG_NUMA) += node.o -obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o +obj-$(CONFIG_MEMORY_HOTPLUG) += memory.o ifeq ($(CONFIG_SYSFS),y) obj-$(CONFIG_MODULES) += module.o endif diff --git a/drivers/base/node.c b/drivers/base/node.c index c56d34f8158f7..b5a4ba18f9f90 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -629,7 +629,7 @@ static void node_device_release(struct device *dev) { struct node *node = to_node(dev); -#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) +#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS) /* * We schedule the work only when a memory section is * onlined/offlined on this node. When we come here, @@ -782,7 +782,7 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) return 0; } -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifdef CONFIG_MEMORY_HOTPLUG static int __ref get_nid_for_pfn(unsigned long pfn) { #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT @@ -958,10 +958,9 @@ static int node_memory_callback(struct notifier_block *self, return NOTIFY_OK; } #endif /* CONFIG_HUGETLBFS */ -#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ +#endif /* CONFIG_MEMORY_HOTPLUG */ -#if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \ - !defined(CONFIG_HUGETLBFS) +#if !defined(CONFIG_MEMORY_HOTPLUG) || !defined(CONFIG_HUGETLBFS) static inline int node_memory_callback(struct notifier_block *self, unsigned long action, void *arg) { diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index ce1b3f6ec325d..3654def9915c5 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -98,7 +98,7 @@ config VIRTIO_MEM default m depends on X86_64 depends on VIRTIO - depends on MEMORY_HOTPLUG_SPARSE + depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE depends on CONTIG_ALLOC help diff --git a/include/linux/memory.h b/include/linux/memory.h index 053a530c7bdd3..0328ec039c38f 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -110,7 +110,7 @@ struct mem_section; #define SLAB_CALLBACK_PRI 1 #define IPC_CALLBACK_PRI 10 -#ifndef CONFIG_MEMORY_HOTPLUG_SPARSE +#ifndef CONFIG_MEMORY_HOTPLUG static inline void memory_dev_init(void) { return; @@ -126,7 +126,14 @@ static inline int memory_notify(unsigned long val, void *v) { return 0; } -#else +static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) +{ + return 0; +} +/* These aren't inline functions due to a GCC bug. */ +#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) +#define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) +#else /* CONFIG_MEMORY_HOTPLUG */ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size, @@ -148,9 +155,6 @@ struct memory_group *memory_group_find_by_id(int mgid); typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *); int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, struct memory_group *excluded, void *arg); -#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ - -#ifdef CONFIG_MEMORY_HOTPLUG #define hotplug_memory_notifier(fn, pri) ({ \ static __meminitdata struct notifier_block fn##_mem_nb =\ { .notifier_call = fn, .priority = pri };\ @@ -158,15 +162,7 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, }) #define register_hotmemory_notifier(nb) register_memory_notifier(nb) #define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) -#else -static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) -{ - return 0; -} -/* These aren't inline functions due to a GCC bug. */ -#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) -#define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) -#endif +#endif /* CONFIG_MEMORY_HOTPLUG */ /* * Kernel text modification mutex, used for code patching. Users of this lock diff --git a/include/linux/node.h b/include/linux/node.h index 8e5a29897936c..bb21fd631b162 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -85,7 +85,7 @@ struct node { struct device dev; struct list_head access_list; -#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) +#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS) struct work_struct node_work; #endif #ifdef CONFIG_HMEM_REPORTING @@ -98,7 +98,7 @@ struct memory_block; extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); -#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) +#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn, enum meminit_context context); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2a9b6dcdac4ff..669fee1d26b83 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -877,7 +877,7 @@ config DEBUG_MEMORY_INIT config MEMORY_NOTIFIER_ERROR_INJECT tristate "Memory hotplug notifier error injection module" - depends on MEMORY_HOTPLUG_SPARSE && NOTIFIER_ERROR_INJECTION + depends on MEMORY_HOTPLUG && NOTIFIER_ERROR_INJECTION help This option provides the ability to inject artificial errors to memory hotplug notifier chain callbacks. It is controlled through diff --git a/mm/Kconfig b/mm/Kconfig index b7fb3f0b485e8..ea8762cd8e1e5 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -128,10 +128,6 @@ config MEMORY_HOTPLUG depends on 64BIT || BROKEN select NUMA_KEEP_MEMINFO if NUMA -config MEMORY_HOTPLUG_SPARSE - def_bool y - depends on SPARSEMEM && MEMORY_HOTPLUG - config MEMORY_HOTPLUG_DEFAULT_ONLINE bool "Online the newly added memory blocks by default" depends on MEMORY_HOTPLUG diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index afaae370b8cd0..fc07ce7b58428 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -220,7 +220,6 @@ static void release_memory_resource(struct resource *res) kfree(res); } -#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static int check_pfn_span(unsigned long pfn, unsigned long nr_pages, const char *reason) { @@ -1163,7 +1162,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, mem_hotplug_done(); return ret; } -#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ static void reset_node_present_pages(pg_data_t *pgdat) { diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config index a7e8cd5bb265d..1eef042a31e1a 100644 --- a/tools/testing/selftests/memory-hotplug/config +++ b/tools/testing/selftests/memory-hotplug/config @@ -1,5 +1,4 @@ CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTPLUG_SPARSE=y CONFIG_NOTIFIER_ERROR_INJECTION=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_MEMORY_HOTREMOVE=y From 1f874070d24e247259a81bb4f64cf3ee9e178880 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:37 +1100 Subject: [PATCH 0813/1202] mm/memory_hotplug: restrict CONFIG_MEMORY_HOTPLUG to 64 bit 32 bit support is broken in various ways: for example, we can online memory that should actually go to ZONE_HIGHMEM to ZONE_MOVABLE or in some cases even to one of the other kernel zones. We marked it BROKEN in commit b59d02ed0869 ("mm/memory_hotplug: disable the functionality for 32b") almost one year ago. According to that commit it might be broken at least since 2017. Further, there is hardly a sane use case nowadays. Let's just depend completely on 64bit, dropping the "BROKEN" dependency to make clear that we are not going to support it again. Next, we'll remove some HIGHMEM leftovers from memory hotplug code to clean up. Link: https://lkml.kernel.org/r/20210929143600.49379-4-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Alex Shi Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Jonathan Corbet Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Paul Mackerras Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Shuah Khan Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index ea8762cd8e1e5..88273dd5c6d6b 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -125,7 +125,7 @@ config MEMORY_HOTPLUG select MEMORY_ISOLATION depends on SPARSEMEM depends on ARCH_ENABLE_MEMORY_HOTPLUG - depends on 64BIT || BROKEN + depends on 64BIT select NUMA_KEEP_MEMINFO if NUMA config MEMORY_HOTPLUG_DEFAULT_ONLINE From 0872b9d2c097b1d60af5116aec050040cb9cf9d8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:38 +1100 Subject: [PATCH 0814/1202] mm/memory_hotplug: remove HIGHMEM leftovers We don't support CONFIG_MEMORY_HOTPLUG on 32 bit and consequently not HIGHMEM. Let's remove any leftover code -- including the unused "status_change_nid_high" field part of the memory notifier. Link: https://lkml.kernel.org/r/20210929143600.49379-5-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Alex Shi Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Jonathan Corbet Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Paul Mackerras Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Shuah Khan Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/core-api/memory-hotplug.rst | 3 -- .../zh_CN/core-api/memory-hotplug.rst | 4 --- include/linux/memory.h | 1 - mm/memory_hotplug.c | 36 ++----------------- 4 files changed, 2 insertions(+), 42 deletions(-) diff --git a/Documentation/core-api/memory-hotplug.rst b/Documentation/core-api/memory-hotplug.rst index de7467e480672..682259ee633ac 100644 --- a/Documentation/core-api/memory-hotplug.rst +++ b/Documentation/core-api/memory-hotplug.rst @@ -57,7 +57,6 @@ The third argument (arg) passes a pointer of struct memory_notify:: unsigned long start_pfn; unsigned long nr_pages; int status_change_nid_normal; - int status_change_nid_high; int status_change_nid; } @@ -65,8 +64,6 @@ The third argument (arg) passes a pointer of struct memory_notify:: - nr_pages is # of pages of online/offline memory. - status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask is (will be) set/clear, if this is -1, then nodemask status is not changed. -- status_change_nid_high is set node id when N_HIGH_MEMORY of nodemask - is (will be) set/clear, if this is -1, then nodemask status is not changed. - status_change_nid is set node id when N_MEMORY of nodemask is (will be) set/clear. It means a new(memoryless) node gets new memory by online and a node loses all memory. If this is -1, then nodemask status is not changed. diff --git a/Documentation/translations/zh_CN/core-api/memory-hotplug.rst b/Documentation/translations/zh_CN/core-api/memory-hotplug.rst index 161f4d2c18cc7..9a204eb196f2d 100644 --- a/Documentation/translations/zh_CN/core-api/memory-hotplug.rst +++ b/Documentation/translations/zh_CN/core-api/memory-hotplug.rst @@ -63,7 +63,6 @@ memory_notify结构体的指针:: unsigned long start_pfn; unsigned long nr_pages; int status_change_nid_normal; - int status_change_nid_high; int status_change_nid; } @@ -74,9 +73,6 @@ memory_notify结构体的指针:: - status_change_nid_normal是当nodemask的N_NORMAL_MEMORY被设置/清除时设置节 点id,如果是-1,则nodemask状态不改变。 -- status_change_nid_high是当nodemask的N_HIGH_MEMORY被设置/清除时设置的节点 - id,如果这个值为-1,那么nodemask状态不会改变。 - - status_change_nid是当nodemask的N_MEMORY被(将)设置/清除时设置的节点id。这 意味着一个新的(没上线的)节点通过联机获得新的内存,而一个节点失去了所有的内 存。如果这个值为-1,那么nodemask的状态就不会改变。 diff --git a/include/linux/memory.h b/include/linux/memory.h index 0328ec039c38f..88eb587b51438 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -96,7 +96,6 @@ struct memory_notify { unsigned long start_pfn; unsigned long nr_pages; int status_change_nid_normal; - int status_change_nid_high; int status_change_nid; }; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fc07ce7b58428..0aa7ca3dfbc9f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -585,10 +584,6 @@ void generic_online_page(struct page *page, unsigned int order) debug_pagealloc_map_pages(page, 1 << order); __free_pages_core(page, order); totalram_pages_add(1UL << order); -#ifdef CONFIG_HIGHMEM - if (PageHighMem(page)) - totalhigh_pages_add(1UL << order); -#endif } EXPORT_SYMBOL_GPL(generic_online_page); @@ -625,16 +620,11 @@ static void node_states_check_changes_online(unsigned long nr_pages, arg->status_change_nid = NUMA_NO_NODE; arg->status_change_nid_normal = NUMA_NO_NODE; - arg->status_change_nid_high = NUMA_NO_NODE; if (!node_state(nid, N_MEMORY)) arg->status_change_nid = nid; if (zone_idx(zone) <= ZONE_NORMAL && !node_state(nid, N_NORMAL_MEMORY)) arg->status_change_nid_normal = nid; -#ifdef CONFIG_HIGHMEM - if (zone_idx(zone) <= ZONE_HIGHMEM && !node_state(nid, N_HIGH_MEMORY)) - arg->status_change_nid_high = nid; -#endif } static void node_states_set_node(int node, struct memory_notify *arg) @@ -642,9 +632,6 @@ static void node_states_set_node(int node, struct memory_notify *arg) if (arg->status_change_nid_normal >= 0) node_set_state(node, N_NORMAL_MEMORY); - if (arg->status_change_nid_high >= 0) - node_set_state(node, N_HIGH_MEMORY); - if (arg->status_change_nid >= 0) node_set_state(node, N_MEMORY); } @@ -1801,7 +1788,6 @@ static void node_states_check_changes_offline(unsigned long nr_pages, arg->status_change_nid = NUMA_NO_NODE; arg->status_change_nid_normal = NUMA_NO_NODE; - arg->status_change_nid_high = NUMA_NO_NODE; /* * Check whether node_states[N_NORMAL_MEMORY] will be changed. @@ -1816,24 +1802,9 @@ static void node_states_check_changes_offline(unsigned long nr_pages, if (zone_idx(zone) <= ZONE_NORMAL && nr_pages >= present_pages) arg->status_change_nid_normal = zone_to_nid(zone); -#ifdef CONFIG_HIGHMEM - /* - * node_states[N_HIGH_MEMORY] contains nodes which - * have normal memory or high memory. - * Here we add the present_pages belonging to ZONE_HIGHMEM. - * If the zone is within the range of [0..ZONE_HIGHMEM), and - * we determine that the zones in that range become empty, - * we need to clear the node for N_HIGH_MEMORY. - */ - present_pages += pgdat->node_zones[ZONE_HIGHMEM].present_pages; - if (zone_idx(zone) <= ZONE_HIGHMEM && nr_pages >= present_pages) - arg->status_change_nid_high = zone_to_nid(zone); -#endif - /* - * We have accounted the pages from [0..ZONE_NORMAL), and - * in case of CONFIG_HIGHMEM the pages from ZONE_HIGHMEM - * as well. + * We have accounted the pages from [0..ZONE_NORMAL); ZONE_HIGHMEM + * does not apply as we don't support 32bit. * Here we count the possible pages from ZONE_MOVABLE. * If after having accounted all the pages, we see that the nr_pages * to be offlined is over or equal to the accounted pages, @@ -1851,9 +1822,6 @@ static void node_states_clear_node(int node, struct memory_notify *arg) if (arg->status_change_nid_normal >= 0) node_clear_state(node, N_NORMAL_MEMORY); - if (arg->status_change_nid_high >= 0) - node_clear_state(node, N_HIGH_MEMORY); - if (arg->status_change_nid >= 0) node_clear_state(node, N_MEMORY); } From 7bda8eb42cd0853670a09add7a14f8833d9b140a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:39 +1100 Subject: [PATCH 0815/1202] mm/memory_hotplug: remove stale function declarations These functions no longer exist. Link: https://lkml.kernel.org/r/20210929143600.49379-6-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Alex Shi Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Jonathan Corbet Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Paul Mackerras Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Shuah Khan Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/memory_hotplug.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e5a867c950b27..be48e003a5183 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -98,9 +98,6 @@ static inline void zone_seqlock_init(struct zone *zone) { seqlock_init(&zone->span_seqlock); } -extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages); -extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages); -extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); extern void adjust_present_page_count(struct page *page, struct memory_group *group, long nr_pages); From 23b704d2960de672ca0c9d0390c29c795c99d706 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:40 +1100 Subject: [PATCH 0816/1202] x86: remove memory hotplug support on X86_32 CONFIG_MEMORY_HOTPLUG was marked BROKEN over one year and we just restricted it to 64 bit. Let's remove the unused x86 32bit implementation and simplify the Kconfig. Link: https://lkml.kernel.org/r/20210929143600.49379-7-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Alex Shi Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Jonathan Corbet Cc: Michael Ellerman Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Paul Mackerras Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Shuah Khan Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/x86/Kconfig | 6 +++--- arch/x86/mm/init_32.c | 31 ------------------------------- 2 files changed, 3 insertions(+), 34 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d9830e7e1060f..b2fb68da6697a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -62,7 +62,7 @@ config X86 select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION - select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM) + select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG select ARCH_ENABLE_SPLIT_PMD_PTLOCK if (PGTABLE_LEVELS > 2) && (X86_64 || X86_PAE) select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE @@ -1614,7 +1614,7 @@ config ARCH_SELECT_MEMORY_MODEL config ARCH_MEMORY_PROBE bool "Enable sysfs memory/probe interface" - depends on X86_64 && MEMORY_HOTPLUG + depends on MEMORY_HOTPLUG help This option enables a sysfs memory/probe interface for testing. See Documentation/admin-guide/mm/memory-hotplug.rst for more information. @@ -2394,7 +2394,7 @@ endmenu config ARCH_HAS_ADD_PAGES def_bool y - depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG + depends on ARCH_ENABLE_MEMORY_HOTPLUG config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE def_bool y diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bd90b8fe81e45..5cd7ea6d645cf 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -779,37 +779,6 @@ void __init mem_init(void) test_wp_bit(); } -#ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_params *params) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - int ret; - - /* - * The page tables were already mapped at boot so if the caller - * requests a different mapping type then we must change all the - * pages with __set_memory_prot(). - */ - if (params->pgprot.pgprot != PAGE_KERNEL.pgprot) { - ret = __set_memory_prot(start, nr_pages, params->pgprot); - if (ret) - return ret; - } - - return __add_pages(nid, start_pfn, nr_pages, params); -} - -void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - - __remove_pages(start_pfn, nr_pages, altmap); -} -#endif - int kernel_set_to_readonly __read_mostly; static void mark_nxdata_nx(void) From 94930cf34695a3b643f5c418e91aa9fb711bb4bb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:41 +1100 Subject: [PATCH 0817/1202] mm/memory_hotplug: handle memblock_add_node() failures in add_memory_resource() Patch series "mm/memory_hotplug: full support for add_memory_driver_managed() with CONFIG_ARCH_KEEP_MEMBLOCK", v2. Architectures that require CONFIG_ARCH_KEEP_MEMBLOCK=y, such as arm64, don't cleanly support add_memory_driver_managed() yet. Most prominently, kexec_file can still end up placing kexec images on such driver-managed memory, resulting in undesired behavior, for example, having kexec images located on memory not part of the firmware-provided memory map. Teaching kexec to not place images on driver-managed memory is especially relevant for virtio-mem. Details can be found in commit 7b7b27214bba ("mm/memory_hotplug: introduce add_memory_driver_managed()"). Extend memblock with a new flag and set it from memory hotplug code when applicable. This is required to fully support virtio-mem on arm64, making also kexec_file behave like on x86-64. This patch (of 2): If memblock_add_node() fails, we're most probably running out of memory. While this is unlikely to happen, it can happen and having memory added without a memblock can be problematic for architectures that use memblock to detect valid memory. Let's fail in a nice way instead of silently ignoring the error. Link: https://lkml.kernel.org/r/20211004093605.5830-1-david@redhat.com Link: https://lkml.kernel.org/r/20211004093605.5830-2-david@redhat.com Signed-off-by: David Hildenbrand Cc: Mike Rapoport Cc: Michal Hocko Cc: Oscar Salvador Cc: Jianyong Wu Cc: "Aneesh Kumar K . V" Cc: Vineet Gupta Cc: Geert Uytterhoeven Cc: Huacai Chen Cc: Jiaxun Yang Cc: Thomas Bogendoerfer Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Eric Biederman Cc: Arnd Bergmann Cc: Shahab Vahedi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory_hotplug.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0aa7ca3dfbc9f..9d254e88221ec 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1369,8 +1369,11 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) mem_hotplug_begin(); - if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) - memblock_add_node(start, size, nid); + if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { + ret = memblock_add_node(start, size, nid); + if (ret) + goto error_mem_hotplug_end; + } ret = __try_online_node(nid, false); if (ret < 0) @@ -1443,6 +1446,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) rollback_node_hotadd(nid); if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) memblock_remove(start, size); +error_mem_hotplug_end: mem_hotplug_done(); return ret; } From 61eb1c0e71a579f9c68f38b6831ba1bea10bf316 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:41 +1100 Subject: [PATCH 0818/1202] memblock: improve MEMBLOCK_HOTPLUG documentation The description of MEMBLOCK_HOTPLUG is currently short and consequently misleading: we're actually dealing with a memory region that might get hotunplugged later (i.e., the platform+firmware supports it), yet it is indicated in the firmware-provided memory map as system ram that will just get used by the system for any purpose when not taking special care. The firmware marked this memory region as a hot(un)plugged (e.g., hotplugged before reboot), implying that it might get hotunplugged again later. Whether we consider this information depends on the "movable_node" kernel commandline parameter: only with "movable_node" set, we'll try keeping this memory hotunpluggable, for example, by not serving early allocations from this memory region and by letting the buddy manage it using the ZONE_MOVABLE. Let's make this clearer by extending the documentation. Note: kexec *has to* indicate this memory to the second kernel. With "movable_node" set, we don't want to place kexec-images on this memory. Without "movable_node" set, we don't care and can place kexec-images on this memory. In both cases, after successful memory hotunplug, kexec has to be re-armed to update the memory map for the second kernel and to place the kexec-images somewhere else. Link: https://lkml.kernel.org/r/20211004093605.5830-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Mike Rapoport Cc: "Aneesh Kumar K . V" Cc: Arnd Bergmann Cc: Christian Borntraeger Cc: Eric Biederman Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Huacai Chen Cc: Jianyong Wu Cc: Jiaxun Yang Cc: Michal Hocko Cc: Oscar Salvador Cc: Shahab Vahedi Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/memblock.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 484650681beeb..1d2b3e902a845 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -28,7 +28,11 @@ extern unsigned long long max_possible_pfn; /** * enum memblock_flags - definition of memory region attributes * @MEMBLOCK_NONE: no special request - * @MEMBLOCK_HOTPLUG: hotpluggable region + * @MEMBLOCK_HOTPLUG: memory region indicated in the firmware-provided memory + * map during early boot as hot(un)pluggable system RAM (e.g., memory range + * that might get hotunplugged later). With "movable_node" set on the kernel + * commandline, try keeping this memory region hotunpluggable. Does not apply + * to memblocks added ("hotplugged") after early boot. * @MEMBLOCK_MIRROR: mirrored region * @MEMBLOCK_NOMAP: don't add to kernel direct mapping and treat as * reserved in the memory map; refer to memblock_mark_nomap() description From 86975981a1b97a507e7f2c19418c5a2d147485c9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:42 +1100 Subject: [PATCH 0819/1202] memblock: allow to specify flags with memblock_add_node() We want to specify flags when hotplugging memory. Let's prepare to pass flags to memblock_add_node() by adjusting all existing users. Note that when hotplugging memory the system is already up and running and we might have concurrent memblock users: for example, while we're hotplugging memory, kexec_file code might search for suitable memory regions to place kexec images. It's important to add the memory directly to memblock via a single call with the right flags, instead of adding the memory first and apply flags later: otherwise, concurrent memblock users might temporarily stumble over memblocks with wrong flags, which will be important in a follow-up patch that introduces a new flag to properly handle add_memory_driver_managed(). Link: https://lkml.kernel.org/r/20211004093605.5830-4-david@redhat.com Acked-by: Geert Uytterhoeven Acked-by: Heiko Carstens Signed-off-by: David Hildenbrand Acked-by: Shahab Vahedi [arch/arc] Reviewed-by: Mike Rapoport Cc: "Aneesh Kumar K . V" Cc: Arnd Bergmann Cc: Christian Borntraeger Cc: Eric Biederman Cc: Huacai Chen Cc: Jianyong Wu Cc: Jiaxun Yang Cc: Michal Hocko Cc: Oscar Salvador Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/arc/mm/init.c | 4 ++-- arch/ia64/mm/contig.c | 2 +- arch/ia64/mm/init.c | 2 +- arch/m68k/mm/mcfmmu.c | 3 ++- arch/m68k/mm/motorola.c | 6 ++++-- arch/mips/loongson64/init.c | 4 +++- arch/mips/sgi-ip27/ip27-memory.c | 3 ++- arch/s390/kernel/setup.c | 3 ++- include/linux/memblock.h | 3 ++- include/linux/mm.h | 2 +- mm/memblock.c | 9 +++++---- mm/memory_hotplug.c | 2 +- 12 files changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 59408f6a02d4a..ce4e939a7f077 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -59,13 +59,13 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) low_mem_sz = size; in_use = 1; - memblock_add_node(base, size, 0); + memblock_add_node(base, size, 0, MEMBLOCK_NONE); } else { #ifdef CONFIG_HIGHMEM high_mem_start = base; high_mem_sz = size; in_use = 1; - memblock_add_node(base, size, 1); + memblock_add_node(base, size, 1, MEMBLOCK_NONE); memblock_reserve(base, size); #endif } diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 42e025cfbd088..24901d8093015 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -153,7 +153,7 @@ find_memory (void) efi_memmap_walk(find_max_min_low_pfn, NULL); max_pfn = max_low_pfn; - memblock_add_node(0, PFN_PHYS(max_low_pfn), 0); + memblock_add_node(0, PFN_PHYS(max_low_pfn), 0, MEMBLOCK_NONE); find_initrd(); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 5c6da8d83c1ad..5d165607bf354 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -378,7 +378,7 @@ int __init register_active_ranges(u64 start, u64 len, int nid) #endif if (start < end) - memblock_add_node(__pa(start), end - start, nid); + memblock_add_node(__pa(start), end - start, nid, MEMBLOCK_NONE); return 0; } diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index eac9dde651934..6f1f251252944 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -174,7 +174,8 @@ void __init cf_bootmem_alloc(void) m68k_memory[0].addr = _rambase; m68k_memory[0].size = _ramend - _rambase; - memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0); + memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0, + MEMBLOCK_NONE); /* compute total pages in system */ num_pages = PFN_DOWN(_ramend - _rambase); diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 9f3f77785aa78..2b05bb2bac00d 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -410,7 +410,8 @@ void __init paging_init(void) min_addr = m68k_memory[0].addr; max_addr = min_addr + m68k_memory[0].size; - memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0); + memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0, + MEMBLOCK_NONE); for (i = 1; i < m68k_num_memory;) { if (m68k_memory[i].addr < min_addr) { printk("Ignoring memory chunk at 0x%lx:0x%lx before the first chunk\n", @@ -421,7 +422,8 @@ void __init paging_init(void) (m68k_num_memory - i) * sizeof(struct m68k_mem_info)); continue; } - memblock_add_node(m68k_memory[i].addr, m68k_memory[i].size, i); + memblock_add_node(m68k_memory[i].addr, m68k_memory[i].size, i, + MEMBLOCK_NONE); addr = m68k_memory[i].addr + m68k_memory[i].size; if (addr > max_addr) max_addr = addr; diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index 76e0a9636a0ed..4ac5ba80bbf62 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -77,7 +77,9 @@ void __init szmem(unsigned int node) (u32)node_id, mem_type, mem_start, mem_size); pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", start_pfn, end_pfn, num_physpages); - memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(node_psize), node); + memblock_add_node(PFN_PHYS(start_pfn), + PFN_PHYS(node_psize), node, + MEMBLOCK_NONE); break; case SYSTEM_RAM_RESERVED: pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 6173684b5aaa0..adc2faeecf7c0 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -341,7 +341,8 @@ static void __init szmem(void) continue; } memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)), - PFN_PHYS(slot_psize), node); + PFN_PHYS(slot_psize), node, + MEMBLOCK_NONE); } } } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7fc836e9e1940..8a378d4262398 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -593,7 +593,8 @@ static void __init setup_resources(void) * part of the System RAM resource. */ if (crashk_res.end) { - memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0); + memblock_add_node(crashk_res.start, resource_size(&crashk_res), + 0, MEMBLOCK_NONE); memblock_reserve(crashk_res.start, resource_size(&crashk_res)); insert_resource(&iomem_resource, &crashk_res); } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 1d2b3e902a845..8b6560dc7a9ed 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -104,7 +104,8 @@ static inline void memblock_discard(void) {} #endif void memblock_allow_resize(void); -int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); +int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid, + enum memblock_flags flags); int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_phys_free(phys_addr_t base, phys_addr_t size); diff --git a/include/linux/mm.h b/include/linux/mm.h index 7e37c726b9db1..15058d9cec99b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2425,7 +2425,7 @@ static inline unsigned long get_num_physpages(void) * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn, * max_highmem_pfn}; * for_each_valid_physical_page_range() - * memblock_add_node(base, size, nid) + * memblock_add_node(base, size, nid, MEMBLOCK_NONE) * free_area_init(max_zone_pfns); */ void free_area_init(unsigned long *max_zone_pfn); diff --git a/mm/memblock.c b/mm/memblock.c index ed28cb95b8830..7b81178059016 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -655,6 +655,7 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, * @base: base address of the new region * @size: size of the new region * @nid: nid of the new region + * @flags: flags of the new region * * Add new memblock region [@base, @base + @size) to the "memory" * type. See memblock_add_range() description for mode details @@ -663,14 +664,14 @@ static int __init_memblock memblock_add_range(struct memblock_type *type, * 0 on success, -errno on failure. */ int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, - int nid) + int nid, enum memblock_flags flags) { phys_addr_t end = base + size - 1; - memblock_dbg("%s: [%pa-%pa] nid=%d %pS\n", __func__, - &base, &end, nid, (void *)_RET_IP_); + memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__, + &base, &end, nid, flags, (void *)_RET_IP_); - return memblock_add_range(&memblock.memory, base, size, nid, 0); + return memblock_add_range(&memblock.memory, base, size, nid, flags); } /** diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9d254e88221ec..cedeaa4d6f0ee 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1370,7 +1370,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) mem_hotplug_begin(); if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { - ret = memblock_add_node(start, size, nid); + ret = memblock_add_node(start, size, nid, MEMBLOCK_NONE); if (ret) goto error_mem_hotplug_end; } From c262f02f0b9a2ecc0c9008f0faec7e15083b354a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:43 +1100 Subject: [PATCH 0820/1202] memblock: add MEMBLOCK_DRIVER_MANAGED to mimic IORESOURCE_SYSRAM_DRIVER_MANAGED Let's add a flag that corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED, indicating that we're dealing with a memory region that is never indicated in the firmware-provided memory map, but always detected and added by a driver. Similar to MEMBLOCK_HOTPLUG, most infrastructure has to treat such memory regions like ordinary MEMBLOCK_NONE memory regions -- for example, when selecting memory regions to add to the vmcore for dumping in the crashkernel via for_each_mem_range(). However, especially kexec_file is not supposed to select such memblocks via for_each_free_mem_range() / for_each_free_mem_range_reverse() to place kexec images, similar to how we handle IORESOURCE_SYSRAM_DRIVER_MANAGED without CONFIG_ARCH_KEEP_MEMBLOCK. We'll make sure that memory hotplug code sets the flag where applicable (IORESOURCE_SYSRAM_DRIVER_MANAGED) next. This prepares architectures that need CONFIG_ARCH_KEEP_MEMBLOCK, such as arm64, for virtio-mem support. Note that kexec *must not* indicate this memory to the second kernel and *must not* place kexec-images on this memory. Let's add a comment to kexec_walk_memblock(), documenting how we handle MEMBLOCK_DRIVER_MANAGED now just like using IORESOURCE_SYSRAM_DRIVER_MANAGED in locate_mem_hole_callback() for kexec_walk_resources(). Also note that MEMBLOCK_HOTPLUG cannot be reused due to different semantics: MEMBLOCK_HOTPLUG: memory is indicated as "System RAM" in the firmware-provided memory map and added to the system early during boot; kexec *has to* indicate this memory to the second kernel and can place kexec-images on this memory. After memory hotunplug, kexec has to be re-armed. We mostly ignore this flag when "movable_node" is not set on the kernel command line, because then we're told to not care about hotunpluggability of such memory regions. MEMBLOCK_DRIVER_MANAGED: memory is not indicated as "System RAM" in the firmware-provided memory map; this memory is always detected and added to the system by a driver; memory might not actually be physically hotunpluggable. kexec *must not* indicate this memory to the second kernel and *must not* place kexec-images on this memory. Link: https://lkml.kernel.org/r/20211004093605.5830-5-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Mike Rapoport Cc: "Aneesh Kumar K . V" Cc: Arnd Bergmann Cc: Christian Borntraeger Cc: Eric Biederman Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Huacai Chen Cc: Jianyong Wu Cc: Jiaxun Yang Cc: Michal Hocko Cc: Oscar Salvador Cc: Shahab Vahedi Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/memblock.h | 16 ++++++++++++++-- kernel/kexec_file.c | 5 +++++ mm/memblock.c | 4 ++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8b6560dc7a9ed..7df557b16c1e8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -37,12 +37,17 @@ extern unsigned long long max_possible_pfn; * @MEMBLOCK_NOMAP: don't add to kernel direct mapping and treat as * reserved in the memory map; refer to memblock_mark_nomap() description * for further details + * @MEMBLOCK_DRIVER_MANAGED: memory region that is always detected and added + * via a driver, and never indicated in the firmware-provided memory map as + * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the + * kernel resource tree. */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */ MEMBLOCK_MIRROR = 0x2, /* mirrored region */ MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ + MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ }; /** @@ -213,7 +218,8 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range(i, p_start, p_end) \ __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_HOTPLUG, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG | MEMBLOCK_DRIVER_MANAGED, \ + p_start, p_end, NULL) /** * for_each_mem_range_rev - reverse iterate through memblock areas from @@ -224,7 +230,8 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range_rev(i, p_start, p_end) \ __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_HOTPLUG, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG | MEMBLOCK_DRIVER_MANAGED,\ + p_start, p_end, NULL) /** * for_each_reserved_mem_range - iterate over all reserved memblock areas @@ -254,6 +261,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m) return m->flags & MEMBLOCK_NOMAP; } +static inline bool memblock_is_driver_managed(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_DRIVER_MANAGED; +} + int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 33400ff051a84..8347fc158d2b9 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -556,6 +556,11 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf, if (kbuf->image->type == KEXEC_TYPE_CRASH) return func(&crashk_res, kbuf); + /* + * Using MEMBLOCK_NONE will properly skip MEMBLOCK_DRIVER_MANAGED. See + * IORESOURCE_SYSRAM_DRIVER_MANAGED handling in + * locate_mem_hole_callback(). + */ if (kbuf->top_down) { for_each_free_mem_range_reverse(i, NUMA_NO_NODE, MEMBLOCK_NONE, &mstart, &mend, NULL) { diff --git a/mm/memblock.c b/mm/memblock.c index 7b81178059016..aa87ff5ae2a46 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -984,6 +984,10 @@ static bool should_skip_region(struct memblock_type *type, if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) return true; + /* skip driver-managed memory unless we were asked for it explicitly */ + if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m)) + return true; + return false; } From dcac77d3c7cd99ead2f0e689d13f40f1181da288 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:08:44 +1100 Subject: [PATCH 0821/1202] mm/memory_hotplug: indicate MEMBLOCK_DRIVER_MANAGED with IORESOURCE_SYSRAM_DRIVER_MANAGED Let's communicate driver-managed regions to memblock, to properly teach kexec_file with CONFIG_ARCH_KEEP_MEMBLOCK to not place images on these memory regions. Link: https://lkml.kernel.org/r/20211004093605.5830-6-david@redhat.com Signed-off-by: David Hildenbrand Cc: "Aneesh Kumar K . V" Cc: Arnd Bergmann Cc: Christian Borntraeger Cc: Eric Biederman Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Huacai Chen Cc: Jianyong Wu Cc: Jiaxun Yang Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Shahab Vahedi Cc: Thomas Bogendoerfer Cc: Vasily Gorbik Cc: Vineet Gupta Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory_hotplug.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index cedeaa4d6f0ee..852041f6be418 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1342,6 +1342,7 @@ bool mhp_supports_memmap_on_memory(unsigned long size) int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; + enum memblock_flags memblock_flags = MEMBLOCK_NONE; struct vmem_altmap mhp_altmap = {}; struct memory_group *group = NULL; u64 start, size; @@ -1370,7 +1371,9 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) mem_hotplug_begin(); if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { - ret = memblock_add_node(start, size, nid, MEMBLOCK_NONE); + if (res->flags & IORESOURCE_SYSRAM_DRIVER_MANAGED) + memblock_flags = MEMBLOCK_DRIVER_MANAGED; + ret = memblock_add_node(start, size, nid, memblock_flags); if (ret) goto error_mem_hotplug_end; } From 85f539e4a4f029054695c80b59a093e2b0b74971 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 21 Oct 2021 15:08:45 +1100 Subject: [PATCH 0822/1202] mm/memory_hotplug: make HWPoisoned dirty swapcache pages unmovable HWPoisoned dirty swapcache pages are kept for killing owner processes. We should not offline these pages or do_swap_page() would access the offline pages and lead to bad ending. Link: https://lkml.kernel.org/r/20210821094246.10149-4-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: Chris Goldsworthy Cc: Michal Hocko Cc: Minchan Kim Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memory_hotplug.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 852041f6be418..3de7933e53022 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1656,6 +1656,12 @@ static int scan_movable_pages(unsigned long start, unsigned long end, */ if (PageOffline(page) && page_count(page)) return -EBUSY; + /* + * HWPoisoned dirty swapcache pages are definitely unmovable + * because they are kept for killing owner processes. + */ + if (PageHWPoison(page) && PageSwapCache(page)) + return -EBUSY; if (!PageHuge(page)) continue; From 0077a52c5c878596d3e8c302ee30af58ab8c8d68 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 21 Oct 2021 15:08:46 +1100 Subject: [PATCH 0823/1202] mm/rmap.c: avoid double faults migrating device private pages During migration special page table entries are installed for each page being migrated. These entries store the pfn and associated permissions of ptes mapping the page being migarted. Device-private pages use special swap pte entries to distinguish read-only vs. writeable pages which the migration code checks when creating migration entries. Normally this follows a fast path in migrate_vma_collect_pmd() which correctly copies the permissions of device-private pages over to migration entries when migrating pages back to the CPU. However the slow-path falls back to using try_to_migrate() which unconditionally creates read-only migration entries for device-private pages. This leads to unnecessary double faults on the CPU as the new pages are always mapped read-only even when they could be mapped writeable. Fix this by correctly copying device-private permissions in try_to_migrate_one(). Link: https://lkml.kernel.org/r/20211018045247.3128058-1-apopple@nvidia.com Signed-off-by: Alistair Popple Reported-by: Ralph Campbell Reviewed-by: John Hubbard Cc: Jerome Glisse Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/rmap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 6aebd17472512..d65a74e140f95 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1807,6 +1807,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma, update_hiwater_rss(mm); if (is_zone_device_page(page)) { + unsigned long pfn = page_to_pfn(page); swp_entry_t entry; pte_t swp_pte; @@ -1815,8 +1816,11 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma, * pte. do_swap_page() will wait until the migration * pte is removed and then restart fault handling. */ - entry = make_readable_migration_entry( - page_to_pfn(page)); + entry = pte_to_swp_entry(pteval); + if (is_writable_device_private_entry(entry)) + entry = make_writable_migration_entry(pfn); + else + entry = make_readable_migration_entry(pfn); swp_pte = swp_entry_to_pte(entry); /* From 664955d510ac0f3ab28d0d6ccbde17cb9899d357 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Thu, 21 Oct 2021 15:08:47 +1100 Subject: [PATCH 0824/1202] mm/rmap: convert from atomic_t to refcount_t on anon_vma->refcount refcount_t type and corresponding API can protect refcounters from accidental underflow and overflow and further use-after-free situations. Link: https://lkml.kernel.org/r/1626665029-49104-1-git-send-email-xiyuyang19@fudan.edu.cn Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Cc: Alistair Popple Cc: Yang Shi Cc: Shakeel Butt Cc: Hugh Dickins Cc: Xiyu Yang Cc: Miaohe Lin Cc: Cc: Xin Tan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/rmap.h | 8 +++++--- mm/rmap.c | 14 +++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c976cc6de2574..38151efe1a593 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -12,6 +12,8 @@ #include #include +#include + /* * The anon_vma heads a list of private "related" vmas, to scan if * an anonymous page pointing to this anon_vma needs to be unmapped: @@ -36,7 +38,7 @@ struct anon_vma { * the reference is responsible for clearing up the * anon_vma if they are the last user on release */ - atomic_t refcount; + refcount_t refcount; /* * Count of child anon_vmas and VMAs which points to this anon_vma. @@ -100,14 +102,14 @@ enum ttu_flags { #ifdef CONFIG_MMU static inline void get_anon_vma(struct anon_vma *anon_vma) { - atomic_inc(&anon_vma->refcount); + refcount_inc(&anon_vma->refcount); } void __put_anon_vma(struct anon_vma *anon_vma); static inline void put_anon_vma(struct anon_vma *anon_vma) { - if (atomic_dec_and_test(&anon_vma->refcount)) + if (refcount_dec_and_test(&anon_vma->refcount)) __put_anon_vma(anon_vma); } diff --git a/mm/rmap.c b/mm/rmap.c index d65a74e140f95..1584706237425 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -89,7 +89,7 @@ static inline struct anon_vma *anon_vma_alloc(void) anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); if (anon_vma) { - atomic_set(&anon_vma->refcount, 1); + refcount_set(&anon_vma->refcount, 1); anon_vma->degree = 1; /* Reference for first vma */ anon_vma->parent = anon_vma; /* @@ -104,7 +104,7 @@ static inline struct anon_vma *anon_vma_alloc(void) static inline void anon_vma_free(struct anon_vma *anon_vma) { - VM_BUG_ON(atomic_read(&anon_vma->refcount)); + VM_BUG_ON(refcount_read(&anon_vma->refcount)); /* * Synchronize against page_lock_anon_vma_read() such that @@ -446,7 +446,7 @@ static void anon_vma_ctor(void *data) struct anon_vma *anon_vma = data; init_rwsem(&anon_vma->rwsem); - atomic_set(&anon_vma->refcount, 0); + refcount_set(&anon_vma->refcount, 0); anon_vma->rb_root = RB_ROOT_CACHED; } @@ -496,7 +496,7 @@ struct anon_vma *page_get_anon_vma(struct page *page) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); - if (!atomic_inc_not_zero(&anon_vma->refcount)) { + if (!refcount_inc_not_zero(&anon_vma->refcount)) { anon_vma = NULL; goto out; } @@ -555,7 +555,7 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page) } /* trylock failed, we got to sleep */ - if (!atomic_inc_not_zero(&anon_vma->refcount)) { + if (!refcount_inc_not_zero(&anon_vma->refcount)) { anon_vma = NULL; goto out; } @@ -570,7 +570,7 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page) rcu_read_unlock(); anon_vma_lock_read(anon_vma); - if (atomic_dec_and_test(&anon_vma->refcount)) { + if (refcount_dec_and_test(&anon_vma->refcount)) { /* * Oops, we held the last refcount, release the lock * and bail -- can't simply use put_anon_vma() because @@ -2228,7 +2228,7 @@ void __put_anon_vma(struct anon_vma *anon_vma) struct anon_vma *root = anon_vma->root; anon_vma_free(anon_vma); - if (root != anon_vma && atomic_dec_and_test(&root->refcount)) + if (root != anon_vma && refcount_dec_and_test(&root->refcount)) anon_vma_free(root); } From d0baae2abb1d90b34959d8088c3a0ad64dd871b8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 21 Oct 2021 15:08:47 +1100 Subject: [PATCH 0825/1202] mm: disable zsmalloc on PREEMPT_RT For efficiency reasons, zsmalloc is using a slim `handle'. The value is the address of a memory allocation of 4 or 8 bytes depending on the size of the long data type. The lowest bit in that allocated memory is used as a bit spin lock. The usage of the bit spin lock is problematic because with the bit spin lock held zsmalloc acquires a rwlock_t and spinlock_t which are both sleeping locks on PREEMPT_RT and therefore must not be acquired with disabled preemption. There is a patch which extends the handle on PREEMPT_RT so that a full spinlock_t fits (even with lockdep enabled) and then eliminates the bit spin lock. I'm not sure how sensible zsmalloc on PREEMPT_RT is given that it is used to store compressed user memory. Disable ZSMALLOC on PREEMPT_RT. If there is need for it, we can try to get it to work. Link: https://lkml.kernel.org/r/20210923170121.1860133-1-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index 88273dd5c6d6b..84387164a741f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -636,6 +636,7 @@ config ZSWAP_ZPOOL_DEFAULT_Z3FOLD config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC bool "zsmalloc" + depends on !PREEMPT_RT select ZSMALLOC help Use the zsmalloc allocator as the default allocator. @@ -686,7 +687,7 @@ config Z3FOLD config ZSMALLOC tristate "Memory allocator for compressed pages" - depends on MMU + depends on MMU && !PREEMPT_RT help zsmalloc is a slab-based memory allocator designed to store compressed RAM pages. zsmalloc uses virtual memory mapping From bce1a23254205ab9c9749ee216980339d49ccf8c Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 21 Oct 2021 15:08:48 +1100 Subject: [PATCH 0826/1202] mm/zsmalloc.c: close race window between zs_pool_dec_isolated() and zs_unregister_migration() There is one possible race window between zs_pool_dec_isolated() and zs_unregister_migration() because wait_for_isolated_drain() checks the isolated count without holding class->lock and there is no order inside zs_pool_dec_isolated(). Thus the below race window could be possible: zs_pool_dec_isolated zs_unregister_migration check pool->destroying != 0 pool->destroying = true; smp_mb(); wait_for_isolated_drain() wait for pool->isolated_pages == 0 atomic_long_dec(&pool->isolated_pages); atomic_long_read(&pool->isolated_pages) == 0 Since we observe the pool->destroying (false) before atomic_long_dec() for pool->isolated_pages, waking pool->migration_wait up is missed. Fix this by ensure checking pool->destroying happens after the atomic_long_dec(&pool->isolated_pages). Link: https://lkml.kernel.org/r/20210708115027.7557-1-linmiaohe@huawei.com Fixes: 701d678599d0 ("mm/zsmalloc.c: fix race condition in zs_destroy_pool") Signed-off-by: Miaohe Lin Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Henry Burns Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/zsmalloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 68e8831068f4b..b897ce3b399a1 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1830,10 +1830,11 @@ static inline void zs_pool_dec_isolated(struct zs_pool *pool) VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); atomic_long_dec(&pool->isolated_pages); /* - * There's no possibility of racing, since wait_for_isolated_drain() - * checks the isolated count under &class->lock after enqueuing - * on migration_wait. + * Checking pool->destroying must happen after atomic_long_dec() + * for pool->isolated_pages above. Paired with the smp_mb() in + * zs_unregister_migration(). */ + smp_mb__after_atomic(); if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) wake_up_all(&pool->migration_wait); } From 9fdf2a4dbaf436fe785c087f0333930d8b235aba Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 21 Oct 2021 15:08:49 +1100 Subject: [PATCH 0827/1202] mm/zsmalloc.c: combine two atomic ops in zs_pool_dec_isolated() atomic_long_dec_and_test() is equivalent to atomic_long_dec() and atomic_long_read() == 0. Use it to make code more succinct. Link: https://lkml.kernel.org/r/20210624123930.1769093-3-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: Minchan Kim Cc: Nitin Gupta Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/zsmalloc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b897ce3b399a1..3e713ff6261ee 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1828,14 +1828,13 @@ static void putback_zspage_deferred(struct zs_pool *pool, static inline void zs_pool_dec_isolated(struct zs_pool *pool) { VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); - atomic_long_dec(&pool->isolated_pages); /* * Checking pool->destroying must happen after atomic_long_dec() * for pool->isolated_pages above. Paired with the smp_mb() in * zs_unregister_migration(). */ smp_mb__after_atomic(); - if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) + if (atomic_long_dec_and_test(&pool->isolated_pages) && pool->destroying) wake_up_all(&pool->migration_wait); } From e1c30c38f5a7d655c5856e692e2d824a79315058 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 21 Oct 2021 15:08:50 +1100 Subject: [PATCH 0828/1202] mm/highmem: Remove deprecated kmap_atomic kmap_atomic() is being deprecated in favor of kmap_local_page(). Replace the uses of kmap_atomic() within the highmem code. On profiling clear_huge_page() using ftrace an improvement of 62% was observed on the below setup. Setup:- Below data has been collected on Qualcomm's SM7250 SoC THP enabled (kernel v4.19.113) with only CPU-0(Cortex-A55) and CPU-7(Cortex-A76) switched on and set to max frequency, also DDR set to perf governor. FTRACE Data:- Base data:- Number of iterations: 48 Mean of allocation time: 349.5 us std deviation: 74.5 us v4 data:- Number of iterations: 48 Mean of allocation time: 131 us std deviation: 32.7 us The following simple userspace experiment to allocate 100MB(BUF_SZ) of pages and writing to it gave us a good insight, we observed an improvement of 42% in allocation and writing timings. ------------------------------------------------------------- Test code snippet ------------------------------------------------------------- clock_start(); buf = malloc(BUF_SZ); /* Allocate 100 MB of memory */ for(i=0; i < BUF_SZ_PAGES; i++) { *((int *)(buf + (i*PAGE_SIZE))) = 1; } clock_end(); ------------------------------------------------------------- Malloc test timings for 100MB anon allocation:- Base data:- Number of iterations: 100 Mean of allocation time: 31831 us std deviation: 4286 us v4 data:- Number of iterations: 100 Mean of allocation time: 18193 us std deviation: 4915 us Link: https://lkml.kernel.org/r/20210204073255.20769-2-prathu.baronia@oneplus.com Signed-off-by: Ira Weiny Signed-off-by: Prathu Baronia Cc: Ira Weiny Cc: Thomas Gleixner Cc: Matthew Wilcox Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/highmem.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index b4c49f9cc379e..31ebee36f26ce 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -143,9 +143,9 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size) #ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { - void *addr = kmap_atomic(page); + void *addr = kmap_local_page(page); clear_user_page(addr, vaddr, page); - kunmap_atomic(addr); + kunmap_local(addr); } #endif @@ -177,9 +177,9 @@ alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, static inline void clear_highpage(struct page *page) { - void *kaddr = kmap_atomic(page); + void *kaddr = kmap_local_page(page); clear_page(kaddr); - kunmap_atomic(kaddr); + kunmap_local(kaddr); } #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE @@ -202,7 +202,7 @@ static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) { - void *kaddr = kmap_atomic(page); + void *kaddr = kmap_local_page(page); unsigned int i; BUG_ON(end1 > page_size(page) || end2 > page_size(page)); @@ -213,7 +213,7 @@ static inline void zero_user_segments(struct page *page, if (end2 > start2) memset(kaddr + start2, 0, end2 - start2); - kunmap_atomic(kaddr); + kunmap_local(kaddr); for (i = 0; i < compound_nr(page); i++) flush_dcache_page(page + i); } @@ -238,11 +238,11 @@ static inline void copy_user_highpage(struct page *to, struct page *from, { char *vfrom, *vto; - vfrom = kmap_atomic(from); - vto = kmap_atomic(to); + vfrom = kmap_local_page(from); + vto = kmap_local_page(to); copy_user_page(vto, vfrom, vaddr, to); - kunmap_atomic(vto); - kunmap_atomic(vfrom); + kunmap_local(vto); + kunmap_local(vfrom); } #endif @@ -253,11 +253,11 @@ static inline void copy_highpage(struct page *to, struct page *from) { char *vfrom, *vto; - vfrom = kmap_atomic(from); - vto = kmap_atomic(to); + vfrom = kmap_local_page(from); + vto = kmap_local_page(to); copy_page(vto, vfrom); - kunmap_atomic(vto); - kunmap_atomic(vfrom); + kunmap_local(vto); + kunmap_local(vfrom); } #endif From 03465eaab10111be2754a834a367ad3d0c15abe5 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Thu, 21 Oct 2021 15:08:50 +1100 Subject: [PATCH 0829/1202] zram_drv: allow reclaim on bio_alloc The read_from_bdev_async is not called on atomic context. So GFP_NOIO is available rather than GFP_ATOMIC. If there were reclaimable pages with GFP_NOIO, we can avoid allocation failure and page fault failure. Link: https://lkml.kernel.org/r/20210908005241.28062-1-jaewon31.kim@samsung.com Signed-off-by: Jaewon Kim Reported-by: Yong-Taek Lee Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index fcaf2750f68f7..081e77d595d78 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -587,7 +587,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, { struct bio *bio; - bio = bio_alloc(GFP_ATOMIC, 1); + bio = bio_alloc(GFP_NOIO, 1); if (!bio) return -ENOMEM; From cf07c9196b95cd50e2b5020db2b9de40f85c081f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Oct 2021 15:08:51 +1100 Subject: [PATCH 0830/1202] zram: off by one in read_block_state() snprintf() returns the number of bytes it would have printed if there were space. But it does not count the NUL terminator. So that means that if "count == copied" then this has already overflowed by one character. This bug likely isn't super harmful in real life. Link: https://lkml.kernel.org/r/20210916130404.GA25094@kili Fixes: c0265342bff4 ("zram: introduce zram memory tracking") Signed-off-by: Dan Carpenter Cc: Minchan Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 081e77d595d78..f61910c65f0f5 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -910,7 +910,7 @@ static ssize_t read_block_state(struct file *file, char __user *buf, zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); - if (count < copied) { + if (count <= copied) { zram_slot_unlock(zram, index); break; } From cf7079ba91f77137527fe7866eb872334de37fef Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Thu, 21 Oct 2021 15:08:51 +1100 Subject: [PATCH 0831/1202] zram: introduce an aged idle interface This change introduces an aged idle interface to the existing idle sysfs file for zram. When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file now also accepts an integer argument. This integer is the age (in seconds) of pages to mark as idle. The idle file still supports 'all' as it always has. This new approach allows for much more control over which pages get marked as idle. Link: https://lkml.kernel.org/r/20210923130115.1344361-1-bgeffon@google.com Signed-off-by: Brian Geffon Acked-by: Minchan Kim Cc: Nitin Gupta Cc: Sergey Senozhatsky Cc: Jonathan Corbet Cc: Suleiman Souhlal Cc: Jesse Barnes Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/blockdev/zram.rst | 8 +++ drivers/block/zram/zram_drv.c | 60 +++++++++++++++------ 2 files changed, 52 insertions(+), 16 deletions(-) diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 700329d25f579..3e11926a4df95 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -328,6 +328,14 @@ as idle:: From now on, any pages on zram are idle pages. The idle mark will be removed until someone requests access of the block. IOW, unless there is access request, those pages are still idle pages. +Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be +marked as idle based on how long (in seconds) it's been since they were +last accessed:: + + echo 86400 > /sys/block/zramX/idle + +In this example all pages which haven't been accessed in more than 86400 +seconds (one day) will be marked idle. Admin can request writeback of those idle pages at right timing via:: diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f61910c65f0f5..0334602562546 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev, return len; } -static ssize_t idle_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) +/* + * Mark all pages which are older than or equal to cutoff as IDLE. + * Callers should hold the zram init lock in read mode + **/ +static void mark_idle(struct zram *zram, ktime_t cutoff) { - struct zram *zram = dev_to_zram(dev); + int is_idle = 1; unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; int index; - if (!sysfs_streq(buf, "all")) - return -EINVAL; - - down_read(&zram->init_lock); - if (!init_done(zram)) { - up_read(&zram->init_lock); - return -EINVAL; - } - for (index = 0; index < nr_pages; index++) { /* * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. @@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev, */ zram_slot_lock(zram, index); if (zram_allocated(zram, index) && - !zram_test_flag(zram, index, ZRAM_UNDER_WB)) - zram_set_flag(zram, index, ZRAM_IDLE); + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) { +#ifdef CONFIG_ZRAM_MEMORY_TRACKING + is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time)); +#endif + if (is_idle) + zram_set_flag(zram, index, ZRAM_IDLE); + } zram_slot_unlock(zram, index); } +} - up_read(&zram->init_lock); +static ssize_t idle_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + ktime_t cutoff_time = 0; + ssize_t rv = -EINVAL; - return len; + if (!sysfs_streq(buf, "all")) { +#ifdef CONFIG_ZRAM_MEMORY_TRACKING + u64 age_sec; + /* If it did not parse as 'all' try to treat it as an integer */ + if (!kstrtoull(buf, 0, &age_sec)) + cutoff_time = ktime_sub(ktime_get_boottime(), + ns_to_ktime(age_sec * NSEC_PER_SEC)); + else +#endif + goto out; + } + + down_read(&zram->init_lock); + if (!init_done(zram)) + goto out_unlock; + + /* A age_sec of 0 marks everything as idle, this is the "all" behavior */ + mark_idle(zram, cutoff_time); + rv = len; + +out_unlock: + up_read(&zram->init_lock); +out: + return rv; } #ifdef CONFIG_ZRAM_WRITEBACK From 24b14c255dede66f89aa3c6e3c9456ed4179fec3 Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Thu, 21 Oct 2021 15:08:52 +1100 Subject: [PATCH 0832/1202] zram-introduce-an-aged-idle-interface-v5 use IS_ENABLED and cleanup comment Link: https://lkml.kernel.org/r/20210924161128.1508015-1-bgeffon@google.com Signed-off-by: Brian Geffon Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/block/zram/zram_drv.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0334602562546..298d54d30abe9 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -294,7 +294,7 @@ static ssize_t mem_used_max_store(struct device *dev, /* * Mark all pages which are older than or equal to cutoff as IDLE. * Callers should hold the zram init lock in read mode - **/ + */ static void mark_idle(struct zram *zram, ktime_t cutoff) { int is_idle = 1; @@ -310,7 +310,7 @@ static void mark_idle(struct zram *zram, ktime_t cutoff) if (zram_allocated(zram, index) && !zram_test_flag(zram, index, ZRAM_UNDER_WB)) { #ifdef CONFIG_ZRAM_MEMORY_TRACKING - is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time)); + is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time)); #endif if (is_idle) zram_set_flag(zram, index, ZRAM_IDLE); @@ -327,14 +327,15 @@ static ssize_t idle_store(struct device *dev, ssize_t rv = -EINVAL; if (!sysfs_streq(buf, "all")) { -#ifdef CONFIG_ZRAM_MEMORY_TRACKING + /* + * If it did not parse as 'all' try to treat it as an integer when + * we have memory tracking enabled. + */ u64 age_sec; - /* If it did not parse as 'all' try to treat it as an integer */ - if (!kstrtoull(buf, 0, &age_sec)) + if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec)) cutoff_time = ktime_sub(ktime_get_boottime(), ns_to_ktime(age_sec * NSEC_PER_SEC)); else -#endif goto out; } From a99d7dbce407812276e2543586f29fce0ee483ba Mon Sep 17 00:00:00 2001 From: Brian Geffon Date: Thu, 21 Oct 2021 15:08:53 +1100 Subject: [PATCH 0833/1202] zram: Introduce an aged idle interface Sergey's cleanup suggestions Link: https://lkml.kernel.org/r/20210929143056.13067-1-bgeffon@google.com Signed-off-by: Brian Geffon Acked-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/block/zram/zram_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 298d54d30abe9..0a9309a2ef547 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -310,7 +310,7 @@ static void mark_idle(struct zram *zram, ktime_t cutoff) if (zram_allocated(zram, index) && !zram_test_flag(zram, index, ZRAM_UNDER_WB)) { #ifdef CONFIG_ZRAM_MEMORY_TRACKING - is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time)); + is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time); #endif if (is_idle) zram_set_flag(zram, index, ZRAM_IDLE); @@ -332,6 +332,7 @@ static ssize_t idle_store(struct device *dev, * we have memory tracking enabled. */ u64 age_sec; + if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec)) cutoff_time = ktime_sub(ktime_get_boottime(), ns_to_ktime(age_sec * NSEC_PER_SEC)); @@ -343,7 +344,7 @@ static ssize_t idle_store(struct device *dev, if (!init_done(zram)) goto out_unlock; - /* A age_sec of 0 marks everything as idle, this is the "all" behavior */ + /* A cutoff_time of 0 marks everything as idle, this is the "all" behavior */ mark_idle(zram, cutoff_time); rv = len; From 69c08f49a8b8b8a913f461c2912b522039172dc9 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Thu, 21 Oct 2021 15:08:54 +1100 Subject: [PATCH 0834/1202] mm: remove HARDENED_USERCOPY_FALLBACK This has served its purpose and is no longer used. All usercopy violations appear to have been handled by now, any remaining instances (or new bugs) will cause copies to be rejected. This isn't a direct revert of commit 2d891fbc3bb6 ("usercopy: Allow strict enforcement of whitelists"); since usercopy_fallback is effectively 0, the fallback handling is removed too. This also removes the usercopy_fallback module parameter on slab_common. Link: https://github.com/KSPP/linux/issues/153 Link: https://lkml.kernel.org/r/20210921061149.1091163-1-steve@sk2.org Signed-off-by: Stephen Kitt Suggested-by: Kees Cook Acked-by: Kees Cook Reviewed-by: Joel Stanley [defconfig change] Acked-by: David Rientjes Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: James Morris Cc: "Serge E . Hallyn" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/powerpc/configs/skiroot_defconfig | 1 - include/linux/slab.h | 2 -- mm/slab.c | 13 ------------- mm/slab_common.c | 8 -------- mm/slub.c | 14 -------------- security/Kconfig | 14 -------------- 6 files changed, 52 deletions(-) diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index b806a5d3a695d..c3ba614c973d6 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -275,7 +275,6 @@ CONFIG_NLS_UTF8=y CONFIG_ENCRYPTED_KEYS=y CONFIG_SECURITY=y CONFIG_HARDENED_USERCOPY=y -# CONFIG_HARDENED_USERCOPY_FALLBACK is not set CONFIG_HARDENED_USERCOPY_PAGESPAN=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_LOCKDOWN_LSM=y diff --git a/include/linux/slab.h b/include/linux/slab.h index 837cb16232ef4..181045148b065 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -142,8 +142,6 @@ struct mem_cgroup; void __init kmem_cache_init(void); bool slab_is_available(void); -extern bool usercopy_fallback; - struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, unsigned int align, slab_flags_t flags, void (*ctor)(void *)); diff --git a/mm/slab.c b/mm/slab.c index 64ec17a3bc2ba..da132a9ae6f8b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4204,19 +4204,6 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, n <= cachep->useroffset - offset + cachep->usersize) return; - /* - * If the copy is still within the allocated object, produce - * a warning instead of rejecting the copy. This is intended - * to be a temporary method to find any missing usercopy - * whitelists. - */ - if (usercopy_fallback && - offset <= cachep->object_size && - n <= cachep->object_size - offset) { - usercopy_warn("SLAB object", cachep->name, to_user, offset, n); - return; - } - usercopy_abort("SLAB object", cachep->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ diff --git a/mm/slab_common.c b/mm/slab_common.c index ec2bb0beed757..e5d080a930093 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -37,14 +37,6 @@ LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; -#ifdef CONFIG_HARDENED_USERCOPY -bool usercopy_fallback __ro_after_init = - IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK); -module_param(usercopy_fallback, bool, 0400); -MODULE_PARM_DESC(usercopy_fallback, - "WARN instead of reject usercopy whitelist violations"); -#endif - static LIST_HEAD(slab_caches_to_rcu_destroy); static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work); static DECLARE_WORK(slab_caches_to_rcu_destroy_work, diff --git a/mm/slub.c b/mm/slub.c index e9a51dcf8bf9a..432145d7b4ec5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4489,7 +4489,6 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, { struct kmem_cache *s; unsigned int offset; - size_t object_size; bool is_kfence = is_kfence_address(ptr); ptr = kasan_reset_tag(ptr); @@ -4522,19 +4521,6 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, n <= s->useroffset - offset + s->usersize) return; - /* - * If the copy is still within the allocated object, produce - * a warning instead of rejecting the copy. This is intended - * to be a temporary method to find any missing usercopy - * whitelists. - */ - object_size = slab_ksize(s); - if (usercopy_fallback && - offset <= object_size && n <= object_size - offset) { - usercopy_warn("SLUB object", s->name, to_user, offset, n); - return; - } - usercopy_abort("SLUB object", s->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ diff --git a/security/Kconfig b/security/Kconfig index 0ced7fd33e4d0..d9698900c9b73 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -163,20 +163,6 @@ config HARDENED_USERCOPY or are part of the kernel text. This kills entire classes of heap overflow exploits and similar kernel memory exposures. -config HARDENED_USERCOPY_FALLBACK - bool "Allow usercopy whitelist violations to fallback to object size" - depends on HARDENED_USERCOPY - default y - help - This is a temporary option that allows missing usercopy whitelists - to be discovered via a WARN() to the kernel log, instead of - rejecting the copy, falling back to non-whitelisted hardened - usercopy that checks the slab allocation size instead of the - whitelist size. This option will be removed once it seems like - all missing usercopy whitelists have been identified and fixed. - Booting with "slab_common.usercopy_fallback=Y/N" can change - this setting. - config HARDENED_USERCOPY_PAGESPAN bool "Refuse to copy allocations that span multiple pages" depends on HARDENED_USERCOPY From 7abd7ea904c3e9911fe6296fbf30e25863cc1e5a Mon Sep 17 00:00:00 2001 From: Mianhan Liu Date: Thu, 21 Oct 2021 15:08:54 +1100 Subject: [PATCH 0835/1202] include/linux/mm.h: move nr_free_buffer_pages from swap.h to mm.h nr_free_buffer_pages could be exposed through mm.h instead of swap.h. The advantage of this change is that it can reduce the obsolete includes. For example, net/ipv4/tcp.c wouldn't need swap.h any more since it has already included mm.h. Similarly, after checking all the other files, it comes that tcp.c, udp.c meter.c ,... follow the same rule, so these files can have swap.h removed too. Moreover, after preprocessing all the files that use nr_free_buffer_pages, it turns out that those files have already included mm.h.Thus, we can move nr_free_buffer_pages from swap.h to mm.h safely. This change will not affect the compilation of other files. Link: https://lkml.kernel.org/r/20210912133640.1624-1-liumh1@shanghaitech.edu.cn Signed-off-by: Mianhan Liu Cc: Jakub Kicinski CC: Ulf Hansson Cc: "David S . Miller" Cc: Simon Horman Cc: Pravin B Shelar Cc: Vlad Yasevich Cc: Marcelo Ricardo Leitner Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/mmc/core/mmc_test.c | 1 - include/linux/mm.h | 2 ++ include/linux/swap.h | 1 - net/ipv4/tcp.c | 1 - net/ipv4/udp.c | 1 - net/netfilter/ipvs/ip_vs_ctl.c | 1 - net/openvswitch/meter.c | 1 - net/sctp/protocol.c | 1 - 8 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 63524551a13a1..e6a2fd2c6d5c9 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -10,7 +10,6 @@ #include #include -#include /* For nr_free_buffer_pages() */ #include #include diff --git a/include/linux/mm.h b/include/linux/mm.h index 15058d9cec99b..b1720aa637276 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -875,6 +875,8 @@ void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); void copy_huge_page(struct page *dst, struct page *src); +unsigned long nr_free_buffer_pages(void); + /* * Compound pages have a destructor function. Provide a * prototype for that function and accessor functions. diff --git a/include/linux/swap.h b/include/linux/swap.h index ba52f3a3478e3..53f759a599966 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -335,7 +335,6 @@ void workingset_update_node(struct xa_node *node); /* linux/mm/page_alloc.c */ extern unsigned long totalreserve_pages; -extern unsigned long nr_free_buffer_pages(void); /* Definition of global_zone_page_state not available yet */ #define nr_free_pages() global_zone_page_state(NR_FREE_PAGES) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e8b48df73c852..0109279dfbd05 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -260,7 +260,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8536b2a7210b2..933ed26eefa23 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -78,7 +78,6 @@ #include #include #include -#include #include #include #include diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c25097092a060..034e537bb55c9 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 896b8f5bc8853..04a060ac7fdf6 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index ec0f52567c16f..35928fefae332 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include From f9f805d2567fafe3c3324db663294acb399b5400 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:08:55 +1100 Subject: [PATCH 0836/1202] stacktrace: move filter_irq_stacks() to kernel/stacktrace.c filter_irq_stacks() has little to do with the stackdepot implementation, except that it is usually used by users (such as KASAN) of stackdepot to reduce the stack trace. However, filter_irq_stacks() itself is not useful without a stack trace as obtained by stack_trace_save() and friends. Therefore, move filter_irq_stacks() to kernel/stacktrace.c, so that new users of filter_irq_stacks() do not have to start depending on STACKDEPOT only for filter_irq_stacks(). Link: https://lkml.kernel.org/r/20210923104803.2620285-1-elver@google.com Signed-off-by: Marco Elver Acked-by: Dmitry Vyukov Cc: Alexander Potapenko Cc: Jann Horn Cc: Aleksandr Nogikh Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/stackdepot.h | 2 -- include/linux/stacktrace.h | 1 + kernel/stacktrace.c | 30 ++++++++++++++++++++++++++++++ lib/stackdepot.c | 24 ------------------------ 4 files changed, 31 insertions(+), 26 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index b2f7e7c6ba54d..d29860966bc91 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -25,8 +25,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); -unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); - #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); #else diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 9edecb494e9e2..bef158815e83d 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -21,6 +21,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *task, unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store, unsigned int size, unsigned int skipnr); unsigned int stack_trace_save_user(unsigned long *store, unsigned int size); +unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries); /* Internal interfaces. Do not use in generic code */ #ifdef CONFIG_ARCH_STACKWALK diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index 9f8117c7cfdde..9c625257023d2 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -13,6 +13,7 @@ #include #include #include +#include /** * stack_trace_print - Print the entries in the stack trace @@ -373,3 +374,32 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size) #endif /* CONFIG_USER_STACKTRACE_SUPPORT */ #endif /* !CONFIG_ARCH_STACKWALK */ + +static inline bool in_irqentry_text(unsigned long ptr) +{ + return (ptr >= (unsigned long)&__irqentry_text_start && + ptr < (unsigned long)&__irqentry_text_end) || + (ptr >= (unsigned long)&__softirqentry_text_start && + ptr < (unsigned long)&__softirqentry_text_end); +} + +/** + * filter_irq_stacks - Find first IRQ stack entry in trace + * @entries: Pointer to stack trace array + * @nr_entries: Number of entries in the storage array + * + * Return: Number of trace entries until IRQ stack starts. + */ +unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries) +{ + unsigned int i; + + for (i = 0; i < nr_entries; i++) { + if (in_irqentry_text(entries[i])) { + /* Include the irqentry function into the stack. */ + return i + 1; + } + } + return nr_entries; +} +EXPORT_SYMBOL_GPL(filter_irq_stacks); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index bda58597e3756..09485dc5bd128 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -20,7 +20,6 @@ */ #include -#include #include #include #include @@ -371,26 +370,3 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, return __stack_depot_save(entries, nr_entries, alloc_flags, true); } EXPORT_SYMBOL_GPL(stack_depot_save); - -static inline int in_irqentry_text(unsigned long ptr) -{ - return (ptr >= (unsigned long)&__irqentry_text_start && - ptr < (unsigned long)&__irqentry_text_end) || - (ptr >= (unsigned long)&__softirqentry_text_start && - ptr < (unsigned long)&__softirqentry_text_end); -} - -unsigned int filter_irq_stacks(unsigned long *entries, - unsigned int nr_entries) -{ - unsigned int i; - - for (i = 0; i < nr_entries; i++) { - if (in_irqentry_text(entries[i])) { - /* Include the irqentry function into the stack. */ - return i + 1; - } - } - return nr_entries; -} -EXPORT_SYMBOL_GPL(filter_irq_stacks); From 414eb5be01a29c4fd7014d793ca19b6b24f3a6e2 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:08:56 +1100 Subject: [PATCH 0837/1202] kfence: count unexpectedly skipped allocations Maintain a counter to count allocations that are skipped due to being incompatible (oversized, incompatible gfp flags) or no capacity. This is to compute the fraction of allocations that could not be serviced by KFENCE, which we expect to be rare. Link: https://lkml.kernel.org/r/20210923104803.2620285-2-elver@google.com Signed-off-by: Marco Elver Reviewed-by: Dmitry Vyukov Acked-by: Alexander Potapenko Cc: Aleksandr Nogikh Cc: Jann Horn Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kfence/core.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 7a97db8bc8e75..249d75b7e5ee2 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -112,6 +112,8 @@ enum kfence_counter_id { KFENCE_COUNTER_FREES, KFENCE_COUNTER_ZOMBIES, KFENCE_COUNTER_BUGS, + KFENCE_COUNTER_SKIP_INCOMPAT, + KFENCE_COUNTER_SKIP_CAPACITY, KFENCE_COUNTER_COUNT, }; static atomic_long_t counters[KFENCE_COUNTER_COUNT]; @@ -121,6 +123,8 @@ static const char *const counter_names[] = { [KFENCE_COUNTER_FREES] = "total frees", [KFENCE_COUNTER_ZOMBIES] = "zombie allocations", [KFENCE_COUNTER_BUGS] = "total bugs", + [KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)", + [KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)", }; static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT); @@ -271,8 +275,10 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g list_del_init(&meta->list); } raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags); - if (!meta) + if (!meta) { + atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_CAPACITY]); return NULL; + } if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) { /* @@ -740,8 +746,10 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) * Perform size check before switching kfence_allocation_gate, so that * we don't disable KFENCE without making an allocation. */ - if (size > PAGE_SIZE) + if (size > PAGE_SIZE) { + atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); return NULL; + } /* * Skip allocations from non-default zones, including DMA. We cannot @@ -749,8 +757,10 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) * properties (e.g. reside in DMAable memory). */ if ((flags & GFP_ZONEMASK) || - (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) + (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) { + atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]); return NULL; + } /* * allocation_gate only needs to become non-zero, so it doesn't make From 69496c9bdf1b98a3ab354594a8c95381d804ef43 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:08:56 +1100 Subject: [PATCH 0838/1202] kfence: move saving stack trace of allocations into __kfence_alloc() Move the saving of the stack trace of allocations into __kfence_alloc(), so that the stack entries array can be used outside of kfence_guarded_alloc() and we avoid potentially unwinding the stack multiple times. Link: https://lkml.kernel.org/r/20210923104803.2620285-3-elver@google.com Signed-off-by: Marco Elver Reviewed-by: Dmitry Vyukov Acked-by: Alexander Potapenko Cc: Aleksandr Nogikh Cc: Jann Horn Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kfence/core.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 249d75b7e5ee2..db01814f8ff05 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -187,19 +187,26 @@ static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *m * Update the object's metadata state, including updating the alloc/free stacks * depending on the state transition. */ -static noinline void metadata_update_state(struct kfence_metadata *meta, - enum kfence_object_state next) +static noinline void +metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next, + unsigned long *stack_entries, size_t num_stack_entries) { struct kfence_track *track = next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track; lockdep_assert_held(&meta->lock); - /* - * Skip over 1 (this) functions; noinline ensures we do not accidentally - * skip over the caller by never inlining. - */ - track->num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1); + if (stack_entries) { + memcpy(track->stack_entries, stack_entries, + num_stack_entries * sizeof(stack_entries[0])); + } else { + /* + * Skip over 1 (this) functions; noinline ensures we do not + * accidentally skip over the caller by never inlining. + */ + num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1); + } + track->num_stack_entries = num_stack_entries; track->pid = task_pid_nr(current); track->cpu = raw_smp_processor_id(); track->ts_nsec = local_clock(); /* Same source as printk timestamps. */ @@ -261,7 +268,8 @@ static __always_inline void for_each_canary(const struct kfence_metadata *meta, } } -static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp) +static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp, + unsigned long *stack_entries, size_t num_stack_entries) { struct kfence_metadata *meta = NULL; unsigned long flags; @@ -320,7 +328,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g addr = (void *)meta->addr; /* Update remaining metadata. */ - metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED); + metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED, stack_entries, num_stack_entries); /* Pairs with READ_ONCE() in kfence_shutdown_cache(). */ WRITE_ONCE(meta->cache, cache); meta->size = size; @@ -400,7 +408,7 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z memzero_explicit(addr, meta->size); /* Mark the object as freed. */ - metadata_update_state(meta, KFENCE_OBJECT_FREED); + metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0); raw_spin_unlock_irqrestore(&meta->lock, flags); @@ -742,6 +750,9 @@ void kfence_shutdown_cache(struct kmem_cache *s) void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { + unsigned long stack_entries[KFENCE_STACK_DEPTH]; + size_t num_stack_entries; + /* * Perform size check before switching kfence_allocation_gate, so that * we don't disable KFENCE without making an allocation. @@ -786,7 +797,9 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (!READ_ONCE(kfence_enabled)) return NULL; - return kfence_guarded_alloc(s, size, flags); + num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0); + + return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries); } size_t kfence_ksize(const void *addr) From 4dce22fdbd9db1aeb103bd55ae0b8c188a0b97f3 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:08:57 +1100 Subject: [PATCH 0839/1202] kfence: limit currently covered allocations when pool nearly full One of KFENCE's main design principles is that with increasing uptime, allocation coverage increases sufficiently to detect previously undetected bugs. We have observed that frequent long-lived allocations of the same source (e.g. pagecache) tend to permanently fill up the KFENCE pool with increasing system uptime, thus breaking the above requirement. The workaround thus far had been increasing the sample interval and/or increasing the KFENCE pool size, but is no reliable solution. To ensure diverse coverage of allocations, limit currently covered allocations of the same source once pool utilization reaches 75% (configurable via `kfence.skip_covered_thresh`) or above. The effect is retaining reasonable allocation coverage when the pool is close to full. A side-effect is that this also limits frequent long-lived allocations of the same source filling up the pool permanently. Uniqueness of an allocation for coverage purposes is based on its (partial) allocation stack trace (the source). A Counting Bloom filter is used to check if an allocation is covered; if the allocation is currently covered, the allocation is skipped by KFENCE. Testing was done using: (a) a synthetic workload that performs frequent long-lived allocations (default config values; sample_interval=1; num_objects=63), and (b) normal desktop workloads on an otherwise idle machine where the problem was first reported after a few days of uptime (default config values). In both test cases the sampled allocation rate no longer drops to zero at any point. In the case of (b) we observe (after 2 days uptime) 15% unique allocations in the pool, 77% pool utilization, with 20% "skipped allocations (covered)". Link: https://lkml.kernel.org/r/20210923104803.2620285-4-elver@google.com Signed-off-by: Marco Elver Reviewed-by: Dmitry Vyukov Acked-by: Alexander Potapenko Cc: Aleksandr Nogikh Cc: Jann Horn Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kfence/core.c | 103 ++++++++++++++++++++++++++++++++++++++++++++- mm/kfence/kfence.h | 2 + 2 files changed, 103 insertions(+), 2 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index db01814f8ff05..58a0f6f1acc59 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -11,11 +11,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -82,6 +84,10 @@ static const struct kernel_param_ops sample_interval_param_ops = { }; module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600); +/* Pool usage% threshold when currently covered allocations are skipped. */ +static unsigned long kfence_skip_covered_thresh __read_mostly = 75; +module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644); + /* The pool of pages used for guard pages and objects. */ char *__kfence_pool __ro_after_init; EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */ @@ -105,6 +111,25 @@ DEFINE_STATIC_KEY_FALSE(kfence_allocation_key); /* Gates the allocation, ensuring only one succeeds in a given period. */ atomic_t kfence_allocation_gate = ATOMIC_INIT(1); +/* + * A Counting Bloom filter of allocation coverage: limits currently covered + * allocations of the same source filling up the pool. + * + * Assuming a range of 15%-85% unique allocations in the pool at any point in + * time, the below parameters provide a probablity of 0.02-0.33 for false + * positive hits respectively: + * + * P(alloc_traces) = (1 - e^(-HNUM * (alloc_traces / SIZE)) ^ HNUM + */ +#define ALLOC_COVERED_HNUM 2 +#define ALLOC_COVERED_SIZE (1 << (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2)) +#define ALLOC_COVERED_HNEXT(h) (1664525 * (h) + 1013904223) +#define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1) +static atomic_t alloc_covered[ALLOC_COVERED_SIZE]; + +/* Stack depth used to determine uniqueness of an allocation. */ +#define UNIQUE_ALLOC_STACK_DEPTH 8UL + /* Statistics counters for debugfs. */ enum kfence_counter_id { KFENCE_COUNTER_ALLOCATED, @@ -114,6 +139,7 @@ enum kfence_counter_id { KFENCE_COUNTER_BUGS, KFENCE_COUNTER_SKIP_INCOMPAT, KFENCE_COUNTER_SKIP_CAPACITY, + KFENCE_COUNTER_SKIP_COVERED, KFENCE_COUNTER_COUNT, }; static atomic_long_t counters[KFENCE_COUNTER_COUNT]; @@ -125,11 +151,60 @@ static const char *const counter_names[] = { [KFENCE_COUNTER_BUGS] = "total bugs", [KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)", [KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)", + [KFENCE_COUNTER_SKIP_COVERED] = "skipped allocations (covered)", }; static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT); /* === Internals ============================================================ */ +static inline bool should_skip_covered(void) +{ + unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100; + + return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh; +} + +static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries) +{ + /* Some randomness across reboots / different machines. */ + u32 seed = (u32)((unsigned long)__kfence_pool >> (BITS_PER_LONG - 32)); + + num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH); + num_entries = filter_irq_stacks(stack_entries, num_entries); + return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), seed); +} + +/* + * Adds (or subtracts) count @val for allocation stack trace hash + * @alloc_stack_hash from Counting Bloom filter. + */ +static void alloc_covered_add(u32 alloc_stack_hash, int val) +{ + int i; + + for (i = 0; i < ALLOC_COVERED_HNUM; i++) { + atomic_add(val, &alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]); + alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash); + } +} + +/* + * Returns true if the allocation stack trace hash @alloc_stack_hash is + * currently contained (non-zero count) in Counting Bloom filter. + */ +static bool alloc_covered_contains(u32 alloc_stack_hash) +{ + int i; + + for (i = 0; i < ALLOC_COVERED_HNUM; i++) { + if (!atomic_read(&alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK])) + return false; + alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash); + } + + return true; +} + static bool kfence_protect(unsigned long addr) { return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true)); @@ -269,7 +344,8 @@ static __always_inline void for_each_canary(const struct kfence_metadata *meta, } static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp, - unsigned long *stack_entries, size_t num_stack_entries) + unsigned long *stack_entries, size_t num_stack_entries, + u32 alloc_stack_hash) { struct kfence_metadata *meta = NULL; unsigned long flags; @@ -332,6 +408,8 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g /* Pairs with READ_ONCE() in kfence_shutdown_cache(). */ WRITE_ONCE(meta->cache, cache); meta->size = size; + meta->alloc_stack_hash = alloc_stack_hash; + for_each_canary(meta, set_canary_byte); /* Set required struct page fields. */ @@ -344,6 +422,8 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g raw_spin_unlock_irqrestore(&meta->lock, flags); + alloc_covered_add(alloc_stack_hash, 1); + /* Memory initialization. */ /* @@ -412,6 +492,8 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z raw_spin_unlock_irqrestore(&meta->lock, flags); + alloc_covered_add(meta->alloc_stack_hash, -1); + /* Protect to detect use-after-frees. */ kfence_protect((unsigned long)addr); @@ -752,6 +834,7 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { unsigned long stack_entries[KFENCE_STACK_DEPTH]; size_t num_stack_entries; + u32 alloc_stack_hash; /* * Perform size check before switching kfence_allocation_gate, so that @@ -799,7 +882,23 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0); - return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries); + /* + * Do expensive check for coverage of allocation in slow-path after + * allocation_gate has already become non-zero, even though it might + * mean not making any allocation within a given sample interval. + * + * This ensures reasonable allocation coverage when the pool is almost + * full, including avoiding long-lived allocations of the same source + * filling up the pool (e.g. pagecache allocations). + */ + alloc_stack_hash = get_alloc_stack_hash(stack_entries, num_stack_entries); + if (should_skip_covered() && alloc_covered_contains(alloc_stack_hash)) { + atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_COVERED]); + return NULL; + } + + return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries, + alloc_stack_hash); } size_t kfence_ksize(const void *addr) diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h index c1f23c61e5f91..2a2d5de9d3791 100644 --- a/mm/kfence/kfence.h +++ b/mm/kfence/kfence.h @@ -87,6 +87,8 @@ struct kfence_metadata { /* Allocation and free stack information. */ struct kfence_track alloc_track; struct kfence_track free_track; + /* For updating alloc_covered on frees. */ + u32 alloc_stack_hash; }; extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS]; From f43827e7825e8ba18804740579145f0baec48e9f Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:08:58 +1100 Subject: [PATCH 0840/1202] kfence-limit-currently-covered-allocations-when-pool-nearly-full-fix * Simplify and just use hash_32(). * Use more random stack_hash_seed. Link: https://lkml.kernel.org/r/YU3MRGaCaJiYht5g@elver.google.com Signed-off-by: Marco Elver Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Aleksandr Nogikh Cc: Jann Horn Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kfence/core.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 58a0f6f1acc59..545999d04af46 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -122,14 +123,21 @@ atomic_t kfence_allocation_gate = ATOMIC_INIT(1); * P(alloc_traces) = (1 - e^(-HNUM * (alloc_traces / SIZE)) ^ HNUM */ #define ALLOC_COVERED_HNUM 2 -#define ALLOC_COVERED_SIZE (1 << (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2)) -#define ALLOC_COVERED_HNEXT(h) (1664525 * (h) + 1013904223) +#define ALLOC_COVERED_ORDER (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2) +#define ALLOC_COVERED_SIZE (1 << ALLOC_COVERED_ORDER) +#define ALLOC_COVERED_HNEXT(h) hash_32(h, ALLOC_COVERED_ORDER) #define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1) static atomic_t alloc_covered[ALLOC_COVERED_SIZE]; /* Stack depth used to determine uniqueness of an allocation. */ #define UNIQUE_ALLOC_STACK_DEPTH 8UL +/* + * Randomness for stack hashes, making the same collisions across reboots and + * different machines less likely. + */ +static u32 stack_hash_seed __ro_after_init; + /* Statistics counters for debugfs. */ enum kfence_counter_id { KFENCE_COUNTER_ALLOCATED, @@ -166,12 +174,9 @@ static inline bool should_skip_covered(void) static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries) { - /* Some randomness across reboots / different machines. */ - u32 seed = (u32)((unsigned long)__kfence_pool >> (BITS_PER_LONG - 32)); - num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH); num_entries = filter_irq_stacks(stack_entries, num_entries); - return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), seed); + return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed); } /* @@ -759,6 +764,7 @@ void __init kfence_init(void) if (!kfence_sample_interval) return; + stack_hash_seed = (u32)random_get_entropy(); if (!kfence_init_pool()) { pr_err("%s failed\n", __func__); return; From 2396a97dbe05734486e020a1363ee342b79f2b4b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:08:58 +1100 Subject: [PATCH 0841/1202] fixup! kfence: limit currently covered allocations when pool nearly full Fix 32 bit. size_t is UL on 64-bit only; just cast it to size_t mm/kfence/core.c: In function `get_alloc_stack_hash': ./include/linux/minmax.h:20:28: warning: comparison of distinct pointer types lacks a cast 20 | (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) | ^~ ./include/linux/minmax.h:26:4: note: in expansion of macro `__typecheck' 26 | (__typecheck(x, y) && __no_side_effects(x, y)) | ^~~~~~~~~~~ ./include/linux/minmax.h:36:24: note: in expansion of macro `__safe_cmp' 36 | __builtin_choose_expr(__safe_cmp(x, y), \ | ^~~~~~~~~~ ./include/linux/minmax.h:45:19: note: in expansion of macro `__careful_cmp' 45 | #define min(x, y) __careful_cmp(x, y, <) | ^~~~~~~~~~~~~ mm/kfence/core.c:177:16: note: in expansion of macro `min' 177 | num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH); | ^~~ Link: https://lkml.kernel.org/r/YVQ0fE4Yil2EX8FI@elver.google.com Signed-off-by: Marco Elver Reported-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kfence/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 545999d04af46..b61ef93d9f982 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -130,7 +130,7 @@ atomic_t kfence_allocation_gate = ATOMIC_INIT(1); static atomic_t alloc_covered[ALLOC_COVERED_SIZE]; /* Stack depth used to determine uniqueness of an allocation. */ -#define UNIQUE_ALLOC_STACK_DEPTH 8UL +#define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8) /* * Randomness for stack hashes, making the same collisions across reboots and From 035f2104ad0c1facbf859532e42700f8babe7b69 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:08:59 +1100 Subject: [PATCH 0842/1202] kfence: add note to documentation about skipping covered allocations Add a note briefly mentioning the new policy about "skipping currently covered allocations if pool close to full." Since this has a notable impact on KFENCE's bug-detection ability on systems with large uptimes, it is worth pointing out the feature. Link: https://lkml.kernel.org/r/20210923104803.2620285-5-elver@google.com Signed-off-by: Marco Elver Reviewed-by: Dmitry Vyukov Acked-by: Alexander Potapenko Cc: Aleksandr Nogikh Cc: Jann Horn Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/dev-tools/kfence.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst index 0fbe3308bf37f..d45f952986ae4 100644 --- a/Documentation/dev-tools/kfence.rst +++ b/Documentation/dev-tools/kfence.rst @@ -269,6 +269,17 @@ tail of KFENCE's freelist, so that the least recently freed objects are reused first, and the chances of detecting use-after-frees of recently freed objects is increased. +If pool utilization reaches 75% (default) or above, to reduce the risk of the +pool eventually being fully occupied by allocated objects yet ensure diverse +coverage of allocations, KFENCE limits currently covered allocations of the +same source from further filling up the pool. The "source" of an allocation is +based on its partial allocation stack trace. A side-effect is that this also +limits frequent long-lived allocations (e.g. pagecache) of the same source +filling up the pool permanently, which is the most common risk for the pool +becoming full and the sampled allocation rate dropping to zero. The threshold +at which to start limiting currently covered allocations can be configured via +the boot parameter ``kfence.skip_covered_thresh`` (pool usage%). + Interface --------- From 74089acd74c51f0fa3e9f32a253d83e4e676c65f Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:09:00 +1100 Subject: [PATCH 0843/1202] kfence: test: use kunit_skip() to skip tests Use the new kunit_skip() to skip tests if requirements were not met. It makes it easier to see in KUnit's summary if there were skipped tests. Link: https://lkml.kernel.org/r/20210922182541.1372400-1-elver@google.com Signed-off-by: Marco Elver Reviewed-by: David Gow Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Aleksandr Nogikh Cc: Taras Madan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kfence/kfence_test.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index f1690cf541998..695030c1fff8b 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -32,6 +32,11 @@ #define arch_kfence_test_address(addr) (addr) #endif +#define KFENCE_TEST_REQUIRES(test, cond) do { \ + if (!(cond)) \ + kunit_skip((test), "Test requires: " #cond); \ +} while (0) + /* Report as observed from console. */ static struct { spinlock_t lock; @@ -555,8 +560,7 @@ static void test_init_on_free(struct kunit *test) }; int i; - if (!IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON)) - return; + KFENCE_TEST_REQUIRES(test, IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON)); /* Assume it hasn't been disabled on command line. */ setup_test_cache(test, size, 0, NULL); @@ -603,10 +607,8 @@ static void test_gfpzero(struct kunit *test) char *buf1, *buf2; int i; - if (CONFIG_KFENCE_SAMPLE_INTERVAL > 100) { - kunit_warn(test, "skipping ... would take too long\n"); - return; - } + /* Skip if we think it'd take too long. */ + KFENCE_TEST_REQUIRES(test, CONFIG_KFENCE_SAMPLE_INTERVAL <= 100); setup_test_cache(test, size, 0, NULL); buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY); From e25a9b7a91d5fd25d01a5e473edc8ac09c47f1e6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:09:01 +1100 Subject: [PATCH 0844/1202] kfence: shorten critical sections of alloc/free Initializing memory and setting/checking the canary bytes is relatively expensive, and doing so in the meta->lock critical sections extends the duration with preemption and interrupts disabled unnecessarily. Any reads to meta->addr and meta->size in kfence_guarded_alloc() and kfence_guarded_free() don't require locking meta->lock as long as the object is removed from the freelist: only kfence_guarded_alloc() sets meta->addr and meta->size after removing it from the freelist, which requires a preceding kfence_guarded_free() returning it to the list or the initial state. Therefore move reads to meta->addr and meta->size, including expensive memory initialization using them, out of meta->lock critical sections. Link: https://lkml.kernel.org/r/20210930153706.2105471-1-elver@google.com Signed-off-by: Marco Elver Acked-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Jann Horn Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kfence/core.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index b61ef93d9f982..802905b1c89b5 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -309,12 +309,19 @@ static inline bool set_canary_byte(u8 *addr) /* Check canary byte at @addr. */ static inline bool check_canary_byte(u8 *addr) { + struct kfence_metadata *meta; + unsigned long flags; + if (likely(*addr == KFENCE_CANARY_PATTERN(addr))) return true; atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); - kfence_report_error((unsigned long)addr, false, NULL, addr_to_metadata((unsigned long)addr), - KFENCE_ERROR_CORRUPTION); + + meta = addr_to_metadata((unsigned long)addr); + raw_spin_lock_irqsave(&meta->lock, flags); + kfence_report_error((unsigned long)addr, false, NULL, meta, KFENCE_ERROR_CORRUPTION); + raw_spin_unlock_irqrestore(&meta->lock, flags); + return false; } @@ -324,8 +331,6 @@ static __always_inline void for_each_canary(const struct kfence_metadata *meta, const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE); unsigned long addr; - lockdep_assert_held(&meta->lock); - /* * We'll iterate over each canary byte per-side until fn() returns * false. However, we'll still iterate over the canary bytes to the @@ -414,8 +419,9 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g WRITE_ONCE(meta->cache, cache); meta->size = size; meta->alloc_stack_hash = alloc_stack_hash; + raw_spin_unlock_irqrestore(&meta->lock, flags); - for_each_canary(meta, set_canary_byte); + alloc_covered_add(alloc_stack_hash, 1); /* Set required struct page fields. */ page = virt_to_page(meta->addr); @@ -425,11 +431,8 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g if (IS_ENABLED(CONFIG_SLAB)) page->s_mem = addr; - raw_spin_unlock_irqrestore(&meta->lock, flags); - - alloc_covered_add(alloc_stack_hash, 1); - /* Memory initialization. */ + for_each_canary(meta, set_canary_byte); /* * We check slab_want_init_on_alloc() ourselves, rather than letting @@ -454,6 +457,7 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z { struct kcsan_scoped_access assert_page_exclusive; unsigned long flags; + bool init; raw_spin_lock_irqsave(&meta->lock, flags); @@ -481,6 +485,13 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z meta->unprotected_page = 0; } + /* Mark the object as freed. */ + metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0); + init = slab_want_init_on_free(meta->cache); + raw_spin_unlock_irqrestore(&meta->lock, flags); + + alloc_covered_add(meta->alloc_stack_hash, -1); + /* Check canary bytes for memory corruption. */ for_each_canary(meta, check_canary_byte); @@ -489,16 +500,9 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z * data is still there, and after a use-after-free is detected, we * unprotect the page, so the data is still accessible. */ - if (!zombie && unlikely(slab_want_init_on_free(meta->cache))) + if (!zombie && unlikely(init)) memzero_explicit(addr, meta->size); - /* Mark the object as freed. */ - metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0); - - raw_spin_unlock_irqrestore(&meta->lock, flags); - - alloc_covered_add(meta->alloc_stack_hash, -1); - /* Protect to detect use-after-frees. */ kfence_protect((unsigned long)addr); From 1679db049c16b5d0714f40756479cf335fccd7b9 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:09:02 +1100 Subject: [PATCH 0845/1202] kfence: always use static branches to guard kfence_alloc() Regardless of KFENCE mode (CONFIG_KFENCE_STATIC_KEYS: either using static keys to gate allocations, or using a simple dynamic branch), always use a static branch to avoid the dynamic branch in kfence_alloc() if KFENCE was disabled at boot. For CONFIG_KFENCE_STATIC_KEYS=n, this now avoids the dynamic branch if KFENCE was disabled at boot. To simplify, also unifies the location where kfence_allocation_gate is read-checked to just be inline in kfence_alloc(). Link: https://lkml.kernel.org/r/20211019102524.2807208-1-elver@google.com Signed-off-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Jann Horn Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/kfence.h | 21 +++++++++++---------- mm/kfence/core.c | 16 +++++++--------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 3fe6dd8a18c19..4b5e3679a72c7 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -14,6 +14,9 @@ #ifdef CONFIG_KFENCE +#include +#include + /* * We allocate an even number of pages, as it simplifies calculations to map * address to metadata indices; effectively, the very first page serves as an @@ -22,13 +25,8 @@ #define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE) extern char *__kfence_pool; -#ifdef CONFIG_KFENCE_STATIC_KEYS -#include DECLARE_STATIC_KEY_FALSE(kfence_allocation_key); -#else -#include extern atomic_t kfence_allocation_gate; -#endif /** * is_kfence_address() - check if an address belongs to KFENCE pool @@ -116,13 +114,16 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags); */ static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { -#ifdef CONFIG_KFENCE_STATIC_KEYS - if (static_branch_unlikely(&kfence_allocation_key)) +#if defined(CONFIG_KFENCE_STATIC_KEYS) || CONFIG_KFENCE_SAMPLE_INTERVAL == 0 + if (!static_branch_unlikely(&kfence_allocation_key)) + return NULL; #else - if (unlikely(!atomic_read(&kfence_allocation_gate))) + if (!static_branch_likely(&kfence_allocation_key)) + return NULL; #endif - return __kfence_alloc(s, size, flags); - return NULL; + if (likely(atomic_read(&kfence_allocation_gate))) + return NULL; + return __kfence_alloc(s, size, flags); } /** diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 802905b1c89b5..09945784df9e6 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -104,10 +104,11 @@ struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS]; static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist); static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */ -#ifdef CONFIG_KFENCE_STATIC_KEYS -/* The static key to set up a KFENCE allocation. */ +/* + * The static key to set up a KFENCE allocation; or if static keys are not used + * to gate allocations, to avoid a load and compare if KFENCE is disabled. + */ DEFINE_STATIC_KEY_FALSE(kfence_allocation_key); -#endif /* Gates the allocation, ensuring only one succeeds in a given period. */ atomic_t kfence_allocation_gate = ATOMIC_INIT(1); @@ -774,6 +775,8 @@ void __init kfence_init(void) return; } + if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS)) + static_branch_enable(&kfence_allocation_key); WRITE_ONCE(kfence_enabled, true); queue_delayed_work(system_unbound_wq, &kfence_timer, 0); pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE, @@ -866,12 +869,7 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) return NULL; } - /* - * allocation_gate only needs to become non-zero, so it doesn't make - * sense to continue writing to it and pay the associated contention - * cost, in case we have a large number of concurrent allocations. - */ - if (atomic_read(&kfence_allocation_gate) || atomic_inc_return(&kfence_allocation_gate) > 1) + if (atomic_inc_return(&kfence_allocation_gate) > 1) return NULL; #ifdef CONFIG_KFENCE_STATIC_KEYS /* From 5a7f650d0399b63ff74161cfaed668519359a5af Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 21 Oct 2021 15:09:02 +1100 Subject: [PATCH 0846/1202] kfence: default to dynamic branch instead of static keys mode We have observed that on very large machines with newer CPUs, the static key/branch switching delay is on the order of milliseconds. This is due to the required broadcast IPIs, which simply does not scale well to hundreds of CPUs (cores). If done too frequently, this can adversely affect tail latencies of various workloads. One workaround is to increase the sample interval to several seconds, while decreasing sampled allocation coverage, but the problem still exists and could still increase tail latencies. As already noted in the Kconfig help text, there are trade-offs: at lower sample intervals the dynamic branch results in better performance; however, at very large sample intervals, the static keys mode can result in better performance -- careful benchmarking is recommended. Our initial benchmarking showed that with large enough sample intervals and workloads stressing the allocator, the static keys mode was slightly better. Evaluating and observing the possible system-wide side-effects of the static-key-switching induced broadcast IPIs, however, was a blind spot (in particular on large machines with 100s of cores). Therefore, a major downside of the static keys mode is, unfortunately, that it is hard to predict performance on new system architectures and topologies, but also making conclusions about performance of new workloads based on a limited set of benchmarks. Most distributions will simply select the defaults, while targeting a large variety of different workloads and system architectures. As such, the better default is CONFIG_KFENCE_STATIC_KEYS=n, and re-enabling it is only recommended after careful evaluation. For reference, on x86-64 the condition in kfence_alloc() generates exactly 2 instructions in the kmem_cache_alloc() fast-path: | ... | cmpl $0x0,0x1a8021c(%rip) # ffffffff82d560d0 | je ffffffff812d6003 | ... which, given kfence_allocation_gate is infrequently modified, should be well predicted by most CPUs. Link: https://lkml.kernel.org/r/20211019102524.2807208-2-elver@google.com Signed-off-by: Marco Elver Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Jann Horn Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/dev-tools/kfence.rst | 12 ++++++++---- lib/Kconfig.kfence | 26 +++++++++++++++----------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst index d45f952986ae4..ac6b89d1a8c32 100644 --- a/Documentation/dev-tools/kfence.rst +++ b/Documentation/dev-tools/kfence.rst @@ -231,10 +231,14 @@ Guarded allocations are set up based on the sample interval. After expiration of the sample interval, the next allocation through the main allocator (SLAB or SLUB) returns a guarded allocation from the KFENCE object pool (allocation sizes up to PAGE_SIZE are supported). At this point, the timer is reset, and -the next allocation is set up after the expiration of the interval. To "gate" a -KFENCE allocation through the main allocator's fast-path without overhead, -KFENCE relies on static branches via the static keys infrastructure. The static -branch is toggled to redirect the allocation to KFENCE. +the next allocation is set up after the expiration of the interval. + +When using ``CONFIG_KFENCE_STATIC_KEYS=y``, KFENCE allocations are "gated" +through the main allocator's fast-path by relying on static branches via the +static keys infrastructure. The static branch is toggled to redirect the +allocation to KFENCE. Depending on sample interval, target workloads, and +system architecture, this may perform better than the simple dynamic branch. +Careful benchmarking is recommended. KFENCE objects each reside on a dedicated page, at either the left or right page boundaries selected at random. The pages to the left and right of the diff --git a/lib/Kconfig.kfence b/lib/Kconfig.kfence index e641add339475..912f252a41fc6 100644 --- a/lib/Kconfig.kfence +++ b/lib/Kconfig.kfence @@ -25,17 +25,6 @@ menuconfig KFENCE if KFENCE -config KFENCE_STATIC_KEYS - bool "Use static keys to set up allocations" - default y - depends on JUMP_LABEL # To ensure performance, require jump labels - help - Use static keys (static branches) to set up KFENCE allocations. Using - static keys is normally recommended, because it avoids a dynamic - branch in the allocator's fast path. However, with very low sample - intervals, or on systems that do not support jump labels, a dynamic - branch may still be an acceptable performance trade-off. - config KFENCE_SAMPLE_INTERVAL int "Default sample interval in milliseconds" default 100 @@ -56,6 +45,21 @@ config KFENCE_NUM_OBJECTS pages are required; with one containing the object and two adjacent ones used as guard pages. +config KFENCE_STATIC_KEYS + bool "Use static keys to set up allocations" if EXPERT + depends on JUMP_LABEL + help + Use static keys (static branches) to set up KFENCE allocations. This + option is only recommended when using very large sample intervals, or + performance has carefully been evaluated with this option. + + Using static keys comes with trade-offs that need to be carefully + evaluated given target workloads and system architectures. Notably, + enabling and disabling static keys invoke IPI broadcasts, the latency + and impact of which is much harder to predict than a dynamic branch. + + Say N if you are unsure. + config KFENCE_STRESS_TEST_FAULTS int "Stress testing of fault handling and error reporting" if EXPERT default 0 From abcdd4b0df06d8f37b32388b84609291551c945f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 21 Oct 2021 15:09:03 +1100 Subject: [PATCH 0847/1202] mm/damon: grammar s/works/work/ Correct a singular versus plural grammar mistake in the help text for the DAMON_VADDR config symbol. Link: https://lkml.kernel.org/r/20210914073451.3883834-1-geert@linux-m68k.org Fixes: 3f49584b262cf8f4 ("mm/damon: implement primitives for the virtual memory address spaces") Signed-off-by: Geert Uytterhoeven Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index 37024798a97ca..ba8898c7eb8eb 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -30,7 +30,7 @@ config DAMON_VADDR select PAGE_IDLE_FLAG help This builds the default data access monitoring primitives for DAMON - that works for virtual address spaces. + that work for virtual address spaces. config DAMON_VADDR_KUNIT_TEST bool "Test for DAMON primitives" if !KUNIT_ALL_TESTS From 21a5082297c7894a35da392f65293eba5d748f8c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:04 +1100 Subject: [PATCH 0848/1202] Documentation/vm: move user guides to admin-guide/mm/ Most memory management user guide documents are in 'admin-guide/mm/', but two of those are in 'vm/'. This commit moves the two docs into 'admin-guide/mm' for easier documents finding. Link: https://lkml.kernel.org/r/20210917123958.3819-2-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/mm/index.rst | 2 ++ .../{vm => admin-guide/mm}/swap_numa.rst | 0 .../{vm => admin-guide/mm}/zswap.rst | 0 Documentation/vm/index.rst | 26 ++++--------------- 4 files changed, 7 insertions(+), 21 deletions(-) rename Documentation/{vm => admin-guide/mm}/swap_numa.rst (100%) rename Documentation/{vm => admin-guide/mm}/zswap.rst (100%) diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst index cbd19d5e625f3..c21b5823f1261 100644 --- a/Documentation/admin-guide/mm/index.rst +++ b/Documentation/admin-guide/mm/index.rst @@ -37,5 +37,7 @@ the Linux memory management. numaperf pagemap soft-dirty + swap_numa transhuge userfaultfd + zswap diff --git a/Documentation/vm/swap_numa.rst b/Documentation/admin-guide/mm/swap_numa.rst similarity index 100% rename from Documentation/vm/swap_numa.rst rename to Documentation/admin-guide/mm/swap_numa.rst diff --git a/Documentation/vm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst similarity index 100% rename from Documentation/vm/zswap.rst rename to Documentation/admin-guide/mm/zswap.rst diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst index b51f0d8992f8f..6f5ffef4b716a 100644 --- a/Documentation/vm/index.rst +++ b/Documentation/vm/index.rst @@ -3,27 +3,11 @@ Linux Memory Management Documentation ===================================== This is a collection of documents about the Linux memory management (mm) -subsystem. If you are looking for advice on simply allocating memory, -see the :ref:`memory_allocation`. - -User guides for MM features -=========================== - -The following documents provide guides for controlling and tuning -various features of the Linux memory management - -.. toctree:: - :maxdepth: 1 - - swap_numa - zswap - -Kernel developers MM documentation -================================== - -The below documents describe MM internals with different level of -details ranging from notes and mailing list responses to elaborate -descriptions of data structures and algorithms. +subsystem internals with different level of details ranging from notes and +mailing list responses for elaborating descriptions of data structures and +algorithms. If you are looking for advice on simply allocating memory, see the +:ref:`memory_allocation`. For controlling and tuning guides, see the +:doc:`admin guide <../admin-guide/mm/index>`. .. toctree:: :maxdepth: 1 From 9260162f77c5d980fa44ee0773da5f084df0a051 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:05 +1100 Subject: [PATCH 0849/1202] MAINTAINERS: update SeongJae's email address This commit updates SeongJae's email address in MAINTAINERS file to his preferred one. Link: https://lkml.kernel.org/r/20210917123958.3819-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d118d7957d26..430f2ca3f68ff 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5161,7 +5161,7 @@ F: net/ax25/ax25_timer.c F: net/ax25/sysctl_net_ax25.c DATA ACCESS MONITOR -M: SeongJae Park +M: SeongJae Park L: linux-mm@kvack.org S: Maintained F: Documentation/admin-guide/mm/damon/ From f9863304e98299ed3f5a723fbf152aca78095d06 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:06 +1100 Subject: [PATCH 0850/1202] docs/vm/damon: remove broken reference Building DAMON documents warns for a reference to nonexisting doc, as below: $ time make htmldocs [...] Documentation/vm/damon/index.rst:24: WARNING: toctree contains reference to nonexisting document 'vm/damon/plans' This commit fixes the warning by removing the wrong reference. Link: https://lkml.kernel.org/r/20210917123958.3819-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/vm/damon/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/vm/damon/index.rst b/Documentation/vm/damon/index.rst index a2858baf3bf1d..48c0bbff98b2f 100644 --- a/Documentation/vm/damon/index.rst +++ b/Documentation/vm/damon/index.rst @@ -27,4 +27,3 @@ workloads and systems. faq design api - plans From 222a062afe6ba56f18c09dde81ba23536a743f55 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:06 +1100 Subject: [PATCH 0851/1202] include/linux/damon.h: fix kernel-doc comments for 'damon_callback' A few Kernel-doc comments in 'damon.h' are broken. This commit fixes those. Link: https://lkml.kernel.org/r/20210917123958.3819-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index d68b67b8d458d..755d70804705b 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -62,7 +62,7 @@ struct damon_target { struct damon_ctx; /** - * struct damon_primitive Monitoring primitives for given use cases. + * struct damon_primitive - Monitoring primitives for given use cases. * * @init: Initialize primitive-internal data structures. * @update: Update primitive-internal data structures. @@ -108,8 +108,8 @@ struct damon_primitive { void (*cleanup)(struct damon_ctx *context); }; -/* - * struct damon_callback Monitoring events notification callbacks. +/** + * struct damon_callback - Monitoring events notification callbacks. * * @before_start: Called before starting the monitoring. * @after_sampling: Called after each sampling. From 1115a97fd211e7e281a8960aca79a91f802f60fc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:07 +1100 Subject: [PATCH 0852/1202] mm/damon/core: print kdamond start log in debug mode only Logging of kdamond startup is using 'pr_info()' unnecessarily. This commit makes it to use 'pr_debug()' instead. Link: https://lkml.kernel.org/r/20210917123958.3819-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 30e9211f494a7..874558a790a0f 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -653,7 +653,7 @@ static int kdamond_fn(void *data) unsigned long sz_limit = 0; mutex_lock(&ctx->kdamond_lock); - pr_info("kdamond (%d) starts\n", ctx->kdamond->pid); + pr_debug("kdamond (%d) starts\n", ctx->kdamond->pid); mutex_unlock(&ctx->kdamond_lock); if (ctx->primitive.init) From 531d5c55adbbd904b86dd3979ed3cd346a5bd3d7 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 21 Oct 2021 15:09:08 +1100 Subject: [PATCH 0853/1202] mm/damon: remove unnecessary do_exit() from kdamond Just return from the kthread function. Link: https://lkml.kernel.org/r/20210927232421.17694-1-changbin.du@gmail.com Signed-off-by: Changbin Du Cc: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 874558a790a0f..61a9e3b37bc9d 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -714,7 +714,7 @@ static int kdamond_fn(void *data) nr_running_ctxs--; mutex_unlock(&damon_lock); - do_exit(0); + return 0; } #include "core-test.h" From dbab189f7c06cf9d08ce02bed11814c3775bc4ba Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 21 Oct 2021 15:09:08 +1100 Subject: [PATCH 0854/1202] mm/damon: needn't hold kdamond_lock to print pid of kdamond Just get the pid by 'current->pid'. Meanwhile, to be symmetrical make the 'starts' and 'finishes' logs both use debug level. Link: https://lkml.kernel.org/r/20210927232432.17750-1-changbin.du@gmail.com Signed-off-by: Changbin Du Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 61a9e3b37bc9d..8171e7dddc309 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -652,9 +652,7 @@ static int kdamond_fn(void *data) unsigned int max_nr_accesses = 0; unsigned long sz_limit = 0; - mutex_lock(&ctx->kdamond_lock); - pr_debug("kdamond (%d) starts\n", ctx->kdamond->pid); - mutex_unlock(&ctx->kdamond_lock); + pr_debug("kdamond (%d) starts\n", current->pid); if (ctx->primitive.init) ctx->primitive.init(ctx); @@ -705,7 +703,7 @@ static int kdamond_fn(void *data) if (ctx->primitive.cleanup) ctx->primitive.cleanup(ctx); - pr_debug("kdamond (%d) finishes\n", ctx->kdamond->pid); + pr_debug("kdamond (%d) finishes\n", current->pid); mutex_lock(&ctx->kdamond_lock); ctx->kdamond = NULL; mutex_unlock(&ctx->kdamond_lock); From 60f5f8b6770ad2257fe20415afabd677f0167058 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 21 Oct 2021 15:09:09 +1100 Subject: [PATCH 0855/1202] mm/damon/core: nullify pointer ctx->kdamond with a NULL Currently a plain integer is being used to nullify the pointer ctx->kdamond. Use NULL instead. Cleans up sparse warning: mm/damon/core.c:317:40: warning: Using plain integer as NULL pointer Link: https://lkml.kernel.org/r/20210925215908.181226-1-colin.king@canonical.com Signed-off-by: Colin Ian King Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 8171e7dddc309..d993db50280cb 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -314,7 +314,7 @@ static int __damon_start(struct damon_ctx *ctx) nr_running_ctxs); if (IS_ERR(ctx->kdamond)) { err = PTR_ERR(ctx->kdamond); - ctx->kdamond = 0; + ctx->kdamond = NULL; } } mutex_unlock(&ctx->kdamond_lock); From 1ab3e0ba73257f62e8b44fceac285009a746921a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:10 +1100 Subject: [PATCH 0856/1202] mm/damon/core: account age of target regions Patch series "Implement Data Access Monitoring-based Memory Operation Schemes". Introduction ============ DAMON[1] can be used as a primitive for data access aware memory management optimizations. For that, users who want such optimizations should run DAMON, read the monitoring results, analyze it, plan a new memory management scheme, and apply the new scheme by themselves. Such efforts will be inevitable for some complicated optimizations. However, in many other cases, the users would simply want the system to apply a memory management action to a memory region of a specific size having a specific access frequency for a specific time. For example, "page out a memory region larger than 100 MiB keeping only rare accesses more than 2 minutes", or "Do not use THP for a memory region larger than 2 MiB rarely accessed for more than 1 seconds". To make the works easier and non-redundant, this patchset implements a new feature of DAMON, which is called Data Access Monitoring-based Operation Schemes (DAMOS). Using the feature, users can describe the normal schemes in a simple way and ask DAMON to execute those on its own. [1] https://damonitor.github.io Evaluations =========== DAMOS is accurate and useful for memory management optimizations. An experimental DAMON-based operation scheme for THP, 'ethp', removes 76.15% of THP memory overheads while preserving 51.25% of THP speedup. Another experimental DAMON-based 'proactive reclamation' implementation, 'prcl', reduces 93.38% of residential sets and 23.63% of system memory footprint while incurring only 1.22% runtime overhead in the best case (parsec3/freqmine). NOTE that the experimental THP optimization and proactive reclamation are not for production but only for proof of concepts. Please refer to the showcase web site's evaluation document[1] for detailed evaluation setup and results. [1] https://damonitor.github.io/doc/html/v34/vm/damon/eval.html Long-term Support Trees ----------------------- For people who want to test DAMON but using LTS kernels, there are another couple of trees based on two latest LTS kernels respectively and containing the 'damon/master' backports. - For v5.4.y: https://git.kernel.org/sj/h/damon/for-v5.4.y - For v5.10.y: https://git.kernel.org/sj/h/damon/for-v5.10.y Sequence Of Patches =================== The 1st patch accounts age of each region. The 2nd patch implements the core of the DAMON-based operation schemes feature. The 3rd patch makes the default monitoring primitives for virtual address spaces to support the schemes. From this point, the kernel space users can use DAMOS. The 4th patch exports the feature to the user space via the debugfs interface. The 5th patch implements schemes statistics feature for easier tuning of the schemes and runtime access pattern analysis, and the 6th patch adds selftests for these changes. Finally, the 7th patch documents this new feature. This patch (of 7): DAMON can be used for data access pattern aware memory management optimizations. For that, users should run DAMON, read the monitoring results, analyze it, plan a new memory management scheme, and apply the new scheme by themselves. It would not be too hard, but still require some level of effort. For complicated cases, this effort is inevitable. That said, in many cases, users would simply want to apply an actions to a memory region of a specific size having a specific access frequency for a specific time. For example, "page out a memory region larger than 100 MiB but having a low access frequency more than 10 minutes", or "Use THP for a memory region larger than 2 MiB having a high access frequency for more than 2 seconds". For such optimizations, users will need to first account the age of each region themselves. To reduce such efforts, this commit implements a simple age account of each region in DAMON. For each aggregation step, DAMON compares the access frequency with that from last aggregation and reset the age of the region if the change is significant. Else, the age is incremented. Also, in case of the merge of regions, the region size-weighted average of the ages is set as the age of merged new region. Link: https://lkml.kernel.org/r/20211001125604.29660-1-sj@kernel.org Link: https://lkml.kernel.org/r/20211001125604.29660-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Cameron Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: Jonathan Corbet Cc: David Hildenbrand Cc: David Woodhouse Cc: Marco Elver Cc: Leonard Foerster Cc: Greg Thelen Cc: Markus Boehme Cc: David Rienjes Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 10 ++++++++++ mm/damon/core.c | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 755d70804705b..3e8215debbd47 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -31,12 +31,22 @@ struct damon_addr_range { * @sampling_addr: Address of the sample for the next access check. * @nr_accesses: Access frequency of this region. * @list: List head for siblings. + * @age: Age of this region. + * + * @age is initially zero, increased for each aggregation interval, and reset + * to zero again if the access frequency is significantly changed. If two + * regions are merged into a new region, both @nr_accesses and @age of the new + * region are set as region size-weighted average of those of the two regions. */ struct damon_region { struct damon_addr_range ar; unsigned long sampling_addr; unsigned int nr_accesses; struct list_head list; + + unsigned int age; +/* private: Internal value for age calculation. */ + unsigned int last_nr_accesses; }; /** diff --git a/mm/damon/core.c b/mm/damon/core.c index d993db50280cb..3efbe80779db2 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -45,6 +45,9 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end) region->nr_accesses = 0; INIT_LIST_HEAD(®ion->list); + region->age = 0; + region->last_nr_accesses = 0; + return region; } @@ -444,6 +447,7 @@ static void kdamond_reset_aggregated(struct damon_ctx *c) damon_for_each_region(r, t) { trace_damon_aggregated(t, r, damon_nr_regions(t)); + r->last_nr_accesses = r->nr_accesses; r->nr_accesses = 0; } } @@ -461,6 +465,7 @@ static void damon_merge_two_regions(struct damon_target *t, l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / (sz_l + sz_r); + l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); l->ar.end = r->ar.end; damon_destroy_region(r, t); } @@ -480,6 +485,11 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, struct damon_region *r, *prev = NULL, *next; damon_for_each_region_safe(r, next, t) { + if (diff_of(r->nr_accesses, r->last_nr_accesses) > thres) + r->age = 0; + else + r->age++; + if (prev && prev->ar.end == r->ar.start && diff_of(prev->nr_accesses, r->nr_accesses) <= thres && sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) @@ -527,6 +537,9 @@ static void damon_split_region_at(struct damon_ctx *ctx, r->ar.end = new->ar.start; + new->age = r->age; + new->last_nr_accesses = r->last_nr_accesses; + damon_insert_region(new, r, damon_next_region(r), t); } From 4fead03c79d796f2defc456016adb83594dbd031 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:10 +1100 Subject: [PATCH 0857/1202] mm/damon/core: implement DAMON-based Operation Schemes (DAMOS) In many cases, users might use DAMON for simple data access aware memory management optimizations such as applying an operation scheme to a memory region of a specific size having a specific access frequency for a specific time. For example, "page out a memory region larger than 100 MiB but having a low access frequency more than 10 minutes", or "Use THP for a memory region larger than 2 MiB having a high access frequency for more than 2 seconds". Most simple form of the solution would be doing offline data access pattern profiling using DAMON and modifying the application source code or system configuration based on the profiling results. Or, developing a daemon constructed with two modules (one for access monitoring and the other for applying memory management actions via mlock(), madvise(), sysctl, etc) is imaginable. To avoid users spending their time for implementation of such simple data access monitoring-based operation schemes, this commit makes DAMON to handle such schemes directly. With this commit, users can simply specify their desired schemes to DAMON. Then, DAMON will automatically apply the schemes to the user-specified target processes. Each of the schemes is composed with conditions for filtering of the target memory regions and desired memory management action for the target. Specifically, the format is:: The filtering conditions are size of memory region, number of accesses to the region monitored by DAMON, and the age of the region. The age of region is incremented periodically but reset when its addresses or access frequency has significantly changed or the action of a scheme was applied. For the action, current implementation supports a few of madvise()-like hints, ``WILLNEED``, ``COLD``, ``PAGEOUT``, ``HUGEPAGE``, and ``NOHUGEPAGE``. Because DAMON supports various address spaces and application of the actions to a monitoring target region is dependent to the type of the target address space, the application code should be implemented by each primitives and registered to the framework. Note that this commit only implements the framework part. Following commit will implement the action applications for virtual address spaces primitives. Link: https://lkml.kernel.org/r/20211001125604.29660-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 66 +++++++++++++++++++++++++ mm/damon/core.c | 109 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 175 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 3e8215debbd47..dbe18b0fb795c 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -69,6 +69,48 @@ struct damon_target { struct list_head list; }; +/** + * enum damos_action - Represents an action of a Data Access Monitoring-based + * Operation Scheme. + * + * @DAMOS_WILLNEED: Call ``madvise()`` for the region with MADV_WILLNEED. + * @DAMOS_COLD: Call ``madvise()`` for the region with MADV_COLD. + * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT. + * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. + * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. + */ +enum damos_action { + DAMOS_WILLNEED, + DAMOS_COLD, + DAMOS_PAGEOUT, + DAMOS_HUGEPAGE, + DAMOS_NOHUGEPAGE, +}; + +/** + * struct damos - Represents a Data Access Monitoring-based Operation Scheme. + * @min_sz_region: Minimum size of target regions. + * @max_sz_region: Maximum size of target regions. + * @min_nr_accesses: Minimum ``->nr_accesses`` of target regions. + * @max_nr_accesses: Maximum ``->nr_accesses`` of target regions. + * @min_age_region: Minimum age of target regions. + * @max_age_region: Maximum age of target regions. + * @action: &damo_action to be applied to the target regions. + * @list: List head for siblings. + * + * Note that both the minimums and the maximums are inclusive. + */ +struct damos { + unsigned long min_sz_region; + unsigned long max_sz_region; + unsigned int min_nr_accesses; + unsigned int max_nr_accesses; + unsigned int min_age_region; + unsigned int max_age_region; + enum damos_action action; + struct list_head list; +}; + struct damon_ctx; /** @@ -79,6 +121,7 @@ struct damon_ctx; * @prepare_access_checks: Prepare next access check of target regions. * @check_accesses: Check the accesses to target regions. * @reset_aggregated: Reset aggregated accesses monitoring results. + * @apply_scheme: Apply a DAMON-based operation scheme. * @target_valid: Determine if the target is valid. * @cleanup: Clean up the context. * @@ -104,6 +147,9 @@ struct damon_ctx; * of its update. The value will be used for regions adjustment threshold. * @reset_aggregated should reset the access monitoring results that aggregated * by @check_accesses. + * @apply_scheme is called from @kdamond when a region for user provided + * DAMON-based operation scheme is found. It should apply the scheme's action + * to the region. This is not used for &DAMON_ARBITRARY_TARGET case. * @target_valid should check whether the target is still valid for the * monitoring. * @cleanup is called from @kdamond just before its termination. @@ -114,6 +160,8 @@ struct damon_primitive { void (*prepare_access_checks)(struct damon_ctx *context); unsigned int (*check_accesses)(struct damon_ctx *context); void (*reset_aggregated)(struct damon_ctx *context); + int (*apply_scheme)(struct damon_ctx *context, struct damon_target *t, + struct damon_region *r, struct damos *scheme); bool (*target_valid)(void *target); void (*cleanup)(struct damon_ctx *context); }; @@ -192,6 +240,7 @@ struct damon_callback { * @min_nr_regions: The minimum number of adaptive monitoring regions. * @max_nr_regions: The maximum number of adaptive monitoring regions. * @adaptive_targets: Head of monitoring targets (&damon_target) list. + * @schemes: Head of schemes (&damos) list. */ struct damon_ctx { unsigned long sample_interval; @@ -213,6 +262,7 @@ struct damon_ctx { unsigned long min_nr_regions; unsigned long max_nr_regions; struct list_head adaptive_targets; + struct list_head schemes; }; #define damon_next_region(r) \ @@ -233,6 +283,12 @@ struct damon_ctx { #define damon_for_each_target_safe(t, next, ctx) \ list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list) +#define damon_for_each_scheme(s, ctx) \ + list_for_each_entry(s, &(ctx)->schemes, list) + +#define damon_for_each_scheme_safe(s, next, ctx) \ + list_for_each_entry_safe(s, next, &(ctx)->schemes, list) + #ifdef CONFIG_DAMON struct damon_region *damon_new_region(unsigned long start, unsigned long end); @@ -242,6 +298,14 @@ inline void damon_insert_region(struct damon_region *r, void damon_add_region(struct damon_region *r, struct damon_target *t); void damon_destroy_region(struct damon_region *r, struct damon_target *t); +struct damos *damon_new_scheme( + unsigned long min_sz_region, unsigned long max_sz_region, + unsigned int min_nr_accesses, unsigned int max_nr_accesses, + unsigned int min_age_region, unsigned int max_age_region, + enum damos_action action); +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); +void damon_destroy_scheme(struct damos *s); + struct damon_target *damon_new_target(unsigned long id); void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); void damon_free_target(struct damon_target *t); @@ -255,6 +319,8 @@ int damon_set_targets(struct damon_ctx *ctx, int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, unsigned long aggr_int, unsigned long primitive_upd_int, unsigned long min_nr_reg, unsigned long max_nr_reg); +int damon_set_schemes(struct damon_ctx *ctx, + struct damos **schemes, ssize_t nr_schemes); int damon_nr_running_ctxs(void); int damon_start(struct damon_ctx **ctxs, int nr_ctxs); diff --git a/mm/damon/core.c b/mm/damon/core.c index 3efbe80779db2..0ed97b21cbb6e 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -85,6 +85,50 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t) damon_free_region(r); } +struct damos *damon_new_scheme( + unsigned long min_sz_region, unsigned long max_sz_region, + unsigned int min_nr_accesses, unsigned int max_nr_accesses, + unsigned int min_age_region, unsigned int max_age_region, + enum damos_action action) +{ + struct damos *scheme; + + scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); + if (!scheme) + return NULL; + scheme->min_sz_region = min_sz_region; + scheme->max_sz_region = max_sz_region; + scheme->min_nr_accesses = min_nr_accesses; + scheme->max_nr_accesses = max_nr_accesses; + scheme->min_age_region = min_age_region; + scheme->max_age_region = max_age_region; + scheme->action = action; + INIT_LIST_HEAD(&scheme->list); + + return scheme; +} + +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) +{ + list_add_tail(&s->list, &ctx->schemes); +} + +static void damon_del_scheme(struct damos *s) +{ + list_del(&s->list); +} + +static void damon_free_scheme(struct damos *s) +{ + kfree(s); +} + +void damon_destroy_scheme(struct damos *s) +{ + damon_del_scheme(s); + damon_free_scheme(s); +} + /* * Construct a damon_target struct * @@ -156,6 +200,7 @@ struct damon_ctx *damon_new_ctx(void) ctx->max_nr_regions = 1000; INIT_LIST_HEAD(&ctx->adaptive_targets); + INIT_LIST_HEAD(&ctx->schemes); return ctx; } @@ -175,7 +220,13 @@ static void damon_destroy_targets(struct damon_ctx *ctx) void damon_destroy_ctx(struct damon_ctx *ctx) { + struct damos *s, *next_s; + damon_destroy_targets(ctx); + + damon_for_each_scheme_safe(s, next_s, ctx) + damon_destroy_scheme(s); + kfree(ctx); } @@ -250,6 +301,30 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, return 0; } +/** + * damon_set_schemes() - Set data access monitoring based operation schemes. + * @ctx: monitoring context + * @schemes: array of the schemes + * @nr_schemes: number of entries in @schemes + * + * This function should not be called while the kdamond of the context is + * running. + * + * Return: 0 if success, or negative error code otherwise. + */ +int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, + ssize_t nr_schemes) +{ + struct damos *s, *next; + ssize_t i; + + damon_for_each_scheme_safe(s, next, ctx) + damon_destroy_scheme(s); + for (i = 0; i < nr_schemes; i++) + damon_add_scheme(ctx, schemes[i]); + return 0; +} + /** * damon_nr_running_ctxs() - Return number of currently running contexts. */ @@ -453,6 +528,39 @@ static void kdamond_reset_aggregated(struct damon_ctx *c) } } +static void damon_do_apply_schemes(struct damon_ctx *c, + struct damon_target *t, + struct damon_region *r) +{ + struct damos *s; + unsigned long sz; + + damon_for_each_scheme(s, c) { + sz = r->ar.end - r->ar.start; + if (sz < s->min_sz_region || s->max_sz_region < sz) + continue; + if (r->nr_accesses < s->min_nr_accesses || + s->max_nr_accesses < r->nr_accesses) + continue; + if (r->age < s->min_age_region || s->max_age_region < r->age) + continue; + if (c->primitive.apply_scheme) + c->primitive.apply_scheme(c, t, r, s); + r->age = 0; + } +} + +static void kdamond_apply_schemes(struct damon_ctx *c) +{ + struct damon_target *t; + struct damon_region *r; + + damon_for_each_target(t, c) { + damon_for_each_region(r, t) + damon_do_apply_schemes(c, t, r); + } +} + #define sz_damon_region(r) (r->ar.end - r->ar.start) /* @@ -693,6 +801,7 @@ static int kdamond_fn(void *data) if (ctx->callback.after_aggregation && ctx->callback.after_aggregation(ctx)) set_kdamond_stop(ctx); + kdamond_apply_schemes(ctx); kdamond_reset_aggregated(ctx); kdamond_split_regions(ctx); if (ctx->primitive.reset_aggregated) From 892f66bf21da1e0b0e7ed742b24ed7908493242b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:11 +1100 Subject: [PATCH 0858/1202] mm/damon/vaddr: support DAMON-based Operation Schemes This commit makes DAMON's default primitives for virtual address spaces to support DAMON-based Operation Schemes (DAMOS) by implementing actions application functions and registering it to the monitoring context. The implementation simply links 'madvise()' for related DAMOS actions. That is, 'madvise(MADV_WILLNEED)' is called for 'WILLNEED' DAMOS action and similar for other actions ('COLD', 'PAGEOUT', 'HUGEPAGE', 'NOHUGEPAGE'). So, the kernel space DAMON users can now use the DAMON-based optimizations with only small amount of code. Link: https://lkml.kernel.org/r/20211001125604.29660-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 2 ++ mm/damon/vaddr.c | 56 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index dbe18b0fb795c..be6b6e81e8ee1 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -337,6 +337,8 @@ void damon_va_prepare_access_checks(struct damon_ctx *ctx); unsigned int damon_va_check_accesses(struct damon_ctx *ctx); bool damon_va_target_valid(void *t); void damon_va_cleanup(struct damon_ctx *ctx); +int damon_va_apply_scheme(struct damon_ctx *context, struct damon_target *t, + struct damon_region *r, struct damos *scheme); void damon_va_set_primitives(struct damon_ctx *ctx); #endif /* CONFIG_DAMON_VADDR */ diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 58c1fb2aafa91..3e1c74d36bab7 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "damon-va: " fmt +#include #include #include #include @@ -658,6 +659,60 @@ bool damon_va_target_valid(void *target) return false; } +#ifndef CONFIG_ADVISE_SYSCALLS +static int damos_madvise(struct damon_target *target, struct damon_region *r, + int behavior) +{ + return -EINVAL; +} +#else +static int damos_madvise(struct damon_target *target, struct damon_region *r, + int behavior) +{ + struct mm_struct *mm; + int ret = -ENOMEM; + + mm = damon_get_mm(target); + if (!mm) + goto out; + + ret = do_madvise(mm, PAGE_ALIGN(r->ar.start), + PAGE_ALIGN(r->ar.end - r->ar.start), behavior); + mmput(mm); +out: + return ret; +} +#endif /* CONFIG_ADVISE_SYSCALLS */ + +int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t, + struct damon_region *r, struct damos *scheme) +{ + int madv_action; + + switch (scheme->action) { + case DAMOS_WILLNEED: + madv_action = MADV_WILLNEED; + break; + case DAMOS_COLD: + madv_action = MADV_COLD; + break; + case DAMOS_PAGEOUT: + madv_action = MADV_PAGEOUT; + break; + case DAMOS_HUGEPAGE: + madv_action = MADV_HUGEPAGE; + break; + case DAMOS_NOHUGEPAGE: + madv_action = MADV_NOHUGEPAGE; + break; + default: + pr_warn("Wrong action %d\n", scheme->action); + return -EINVAL; + } + + return damos_madvise(t, r, madv_action); +} + void damon_va_set_primitives(struct damon_ctx *ctx) { ctx->primitive.init = damon_va_init; @@ -667,6 +722,7 @@ void damon_va_set_primitives(struct damon_ctx *ctx) ctx->primitive.reset_aggregated = NULL; ctx->primitive.target_valid = damon_va_target_valid; ctx->primitive.cleanup = NULL; + ctx->primitive.apply_scheme = damon_va_apply_scheme; } #include "vaddr-test.h" From fc292b655c985c74c9c3fc82372302b13731eb45 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:12 +1100 Subject: [PATCH 0859/1202] mm/damon/dbgfs: support DAMON-based Operation Schemes This commit makes 'damon-dbgfs' to support the data access monitoring oriented memory management schemes. Users can read and update the schemes using ``/damon/schemes`` file. The format is:: Link: https://lkml.kernel.org/r/20211001125604.29660-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/dbgfs.c | 165 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 3 deletions(-) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index faee070977d80..78b7a04490c57 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -98,6 +98,159 @@ static ssize_t dbgfs_attrs_write(struct file *file, return ret; } +static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) +{ + struct damos *s; + int written = 0; + int rc; + + damon_for_each_scheme(s, c) { + rc = scnprintf(&buf[written], len - written, + "%lu %lu %u %u %u %u %d\n", + s->min_sz_region, s->max_sz_region, + s->min_nr_accesses, s->max_nr_accesses, + s->min_age_region, s->max_age_region, + s->action); + if (!rc) + return -ENOMEM; + + written += rc; + } + return written; +} + +static ssize_t dbgfs_schemes_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + len = sprint_schemes(ctx, kbuf, count); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static void free_schemes_arr(struct damos **schemes, ssize_t nr_schemes) +{ + ssize_t i; + + for (i = 0; i < nr_schemes; i++) + kfree(schemes[i]); + kfree(schemes); +} + +static bool damos_action_valid(int action) +{ + switch (action) { + case DAMOS_WILLNEED: + case DAMOS_COLD: + case DAMOS_PAGEOUT: + case DAMOS_HUGEPAGE: + case DAMOS_NOHUGEPAGE: + return true; + default: + return false; + } +} + +/* + * Converts a string into an array of struct damos pointers + * + * Returns an array of struct damos pointers that converted if the conversion + * success, or NULL otherwise. + */ +static struct damos **str_to_schemes(const char *str, ssize_t len, + ssize_t *nr_schemes) +{ + struct damos *scheme, **schemes; + const int max_nr_schemes = 256; + int pos = 0, parsed, ret; + unsigned long min_sz, max_sz; + unsigned int min_nr_a, max_nr_a, min_age, max_age; + unsigned int action; + + schemes = kmalloc_array(max_nr_schemes, sizeof(scheme), + GFP_KERNEL); + if (!schemes) + return NULL; + + *nr_schemes = 0; + while (pos < len && *nr_schemes < max_nr_schemes) { + ret = sscanf(&str[pos], "%lu %lu %u %u %u %u %u%n", + &min_sz, &max_sz, &min_nr_a, &max_nr_a, + &min_age, &max_age, &action, &parsed); + if (ret != 7) + break; + if (!damos_action_valid(action)) { + pr_err("wrong action %d\n", action); + goto fail; + } + + pos += parsed; + scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a, + min_age, max_age, action); + if (!scheme) + goto fail; + + schemes[*nr_schemes] = scheme; + *nr_schemes += 1; + } + return schemes; +fail: + free_schemes_arr(schemes, *nr_schemes); + return NULL; +} + +static ssize_t dbgfs_schemes_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + struct damos **schemes; + ssize_t nr_schemes = 0, ret = count; + int err; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + schemes = str_to_schemes(kbuf, ret, &nr_schemes); + if (!schemes) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + err = damon_set_schemes(ctx, schemes, nr_schemes); + if (err) + ret = err; + else + nr_schemes = 0; +unlock_out: + mutex_unlock(&ctx->kdamond_lock); + free_schemes_arr(schemes, nr_schemes); +out: + kfree(kbuf); + return ret; +} + static inline bool targetid_is_pid(const struct damon_ctx *ctx) { return ctx->primitive.target_valid == damon_va_target_valid; @@ -279,6 +432,12 @@ static const struct file_operations attrs_fops = { .write = dbgfs_attrs_write, }; +static const struct file_operations schemes_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_schemes_read, + .write = dbgfs_schemes_write, +}; + static const struct file_operations target_ids_fops = { .open = damon_dbgfs_open, .read = dbgfs_target_ids_read, @@ -292,10 +451,10 @@ static const struct file_operations kdamond_pid_fops = { static void dbgfs_fill_ctx_dir(struct dentry *dir, struct damon_ctx *ctx) { - const char * const file_names[] = {"attrs", "target_ids", + const char * const file_names[] = {"attrs", "schemes", "target_ids", "kdamond_pid"}; - const struct file_operations *fops[] = {&attrs_fops, &target_ids_fops, - &kdamond_pid_fops}; + const struct file_operations *fops[] = {&attrs_fops, &schemes_fops, + &target_ids_fops, &kdamond_pid_fops}; int i; for (i = 0; i < ARRAY_SIZE(file_names); i++) From edefc6a6c56c6b5da80258b3706776f91fe36829 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:12 +1100 Subject: [PATCH 0860/1202] mm/damon/schemes: implement statistics feature To tune the DAMON-based operation schemes, knowing how many and how large regions are affected by each of the schemes will be helful. Those stats could be used for not only the tuning, but also monitoring of the working set size and the number of regions, if the scheme does not change the program behavior too much. For the reason, this commit implements the statistics for the schemes. The total number and size of the regions that each scheme is applied are exported to users via '->stat_count' and '->stat_sz' of 'struct damos'. Admins can also check the number by reading 'schemes' debugfs file. The last two integers now represents the stats. To allow collecting the stats without changing the program behavior, this commit also adds new scheme action, 'DAMOS_STAT'. Note that 'DAMOS_STAT' is not only making no memory operation actions, but also does not reset the age of regions. Link: https://lkml.kernel.org/r/20211001125604.29660-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 10 +++++++++- mm/damon/core.c | 7 ++++++- mm/damon/dbgfs.c | 5 +++-- mm/damon/vaddr.c | 2 ++ 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index be6b6e81e8ee1..f301bb53381c1 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -78,6 +78,7 @@ struct damon_target { * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT. * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. + * @DAMOS_STAT: Do nothing but count the stat. */ enum damos_action { DAMOS_WILLNEED, @@ -85,6 +86,7 @@ enum damos_action { DAMOS_PAGEOUT, DAMOS_HUGEPAGE, DAMOS_NOHUGEPAGE, + DAMOS_STAT, /* Do nothing but only record the stat */ }; /** @@ -96,9 +98,13 @@ enum damos_action { * @min_age_region: Minimum age of target regions. * @max_age_region: Maximum age of target regions. * @action: &damo_action to be applied to the target regions. + * @stat_count: Total number of regions that this scheme is applied. + * @stat_sz: Total size of regions that this scheme is applied. * @list: List head for siblings. * - * Note that both the minimums and the maximums are inclusive. + * For each aggregation interval, DAMON applies @action to monitoring target + * regions fit in the condition and updates the statistics. Note that both + * the minimums and the maximums are inclusive. */ struct damos { unsigned long min_sz_region; @@ -108,6 +114,8 @@ struct damos { unsigned int min_age_region; unsigned int max_age_region; enum damos_action action; + unsigned long stat_count; + unsigned long stat_sz; struct list_head list; }; diff --git a/mm/damon/core.c b/mm/damon/core.c index 0ed97b21cbb6e..2f6785737902d 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -103,6 +103,8 @@ struct damos *damon_new_scheme( scheme->min_age_region = min_age_region; scheme->max_age_region = max_age_region; scheme->action = action; + scheme->stat_count = 0; + scheme->stat_sz = 0; INIT_LIST_HEAD(&scheme->list); return scheme; @@ -544,9 +546,12 @@ static void damon_do_apply_schemes(struct damon_ctx *c, continue; if (r->age < s->min_age_region || s->max_age_region < r->age) continue; + s->stat_count++; + s->stat_sz += sz; if (c->primitive.apply_scheme) c->primitive.apply_scheme(c, t, r, s); - r->age = 0; + if (s->action != DAMOS_STAT) + r->age = 0; } } diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 78b7a04490c57..28d6abf277636 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -106,11 +106,11 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) damon_for_each_scheme(s, c) { rc = scnprintf(&buf[written], len - written, - "%lu %lu %u %u %u %u %d\n", + "%lu %lu %u %u %u %u %d %lu %lu\n", s->min_sz_region, s->max_sz_region, s->min_nr_accesses, s->max_nr_accesses, s->min_age_region, s->max_age_region, - s->action); + s->action, s->stat_count, s->stat_sz); if (!rc) return -ENOMEM; @@ -159,6 +159,7 @@ static bool damos_action_valid(int action) case DAMOS_PAGEOUT: case DAMOS_HUGEPAGE: case DAMOS_NOHUGEPAGE: + case DAMOS_STAT: return true; default: return false; diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 3e1c74d36bab7..953c145b4f08a 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -705,6 +705,8 @@ int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t, case DAMOS_NOHUGEPAGE: madv_action = MADV_NOHUGEPAGE; break; + case DAMOS_STAT: + return 0; default: pr_warn("Wrong action %d\n", scheme->action); return -EINVAL; From beb199d126cbb20ad130f97777238eeca9677bcb Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:13 +1100 Subject: [PATCH 0861/1202] selftests/damon: add 'schemes' debugfs tests This commit adds simple selftets for 'schemes' debugfs file of DAMON. Link: https://lkml.kernel.org/r/20211001125604.29660-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/damon/debugfs_attrs.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index bfabb19dc0d3d..639cfb6a1f651 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -57,6 +57,19 @@ test_write_fail "$file" "1 2 3 5 4" "$orig_content" \ test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written" echo "$orig_content" > "$file" +# Test schemes file +# ================= + +file="$DBGFS/schemes" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4 5 6 4" \ + "$orig_content" "valid input" +test_write_fail "$file" "1 2 +3 4 5 6 3" "$orig_content" "multi lines" +test_write_succ "$file" "" "$orig_content" "disabling" +echo "$orig_content" > "$file" + # Test target_ids file # ==================== From 30de3ae16109a61137528867ae6ae2d6acef4313 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:13 +1100 Subject: [PATCH 0862/1202] Docs/admin-guide/mm/damon: document DAMON-based Operation Schemes This commit add description of DAMON-based operation schemes in the DAMON documents. Link: https://lkml.kernel.org/r/20211001125604.29660-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/mm/damon/start.rst | 11 +++++ Documentation/admin-guide/mm/damon/usage.rst | 51 +++++++++++++++++++- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/start.rst b/Documentation/admin-guide/mm/damon/start.rst index d5eb89a8fc386..51503cf90ca29 100644 --- a/Documentation/admin-guide/mm/damon/start.rst +++ b/Documentation/admin-guide/mm/damon/start.rst @@ -108,6 +108,17 @@ the results as separate image files. :: You can view the visualizations of this example workload at [1]_. Visualizations of other realistic workloads are available at [2]_ [3]_ [4]_. + +Data Access Pattern Aware Memory Management +=========================================== + +Below three commands make every memory region of size >=4K that doesn't +accessed for >=60 seconds in your workload to be swapped out. :: + + $ echo "#min-size max-size min-acc max-acc min-age max-age action" > scheme + $ echo "4K max 0 0 60s max pageout" >> scheme + $ damo schemes -c my_thp_scheme + .. [1] https://damonitor.github.io/doc/html/v17/admin-guide/mm/damon/start.html#visualizing-recorded-patterns .. [2] https://damonitor.github.io/test/result/visual/latest/rec.heatmap.1.png.html .. [3] https://damonitor.github.io/test/result/visual/latest/rec.wss_sz.png.html diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index a72cda374abac..c0296c14babff 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -34,8 +34,8 @@ the reason, this document describes only the debugfs interface debugfs Interface ================= -DAMON exports three files, ``attrs``, ``target_ids``, and ``monitor_on`` under -its debugfs directory, ``/damon/``. +DAMON exports four files, ``attrs``, ``target_ids``, ``schemes`` and +``monitor_on`` under its debugfs directory, ``/damon/``. Attributes @@ -74,6 +74,53 @@ check it again:: Note that setting the target ids doesn't start the monitoring. +Schemes +------- + +For usual DAMON-based data access aware memory management optimizations, users +would simply want the system to apply a memory management action to a memory +region of a specific size having a specific access frequency for a specific +time. DAMON receives such formalized operation schemes from the user and +applies those to the target processes. It also counts the total number and +size of regions that each scheme is applied. This statistics can be used for +online analysis or tuning of the schemes. + +Users can get and set the schemes by reading from and writing to ``schemes`` +debugfs file. Reading the file also shows the statistics of each scheme. To +the file, each of the schemes should be represented in each line in below form: + + min-size max-size min-acc max-acc min-age max-age action + +Note that the ranges are closed interval. Bytes for the size of regions +(``min-size`` and ``max-size``), number of monitored accesses per aggregate +interval for access frequency (``min-acc`` and ``max-acc``), number of +aggregate intervals for the age of regions (``min-age`` and ``max-age``), and a +predefined integer for memory management actions should be used. The supported +numbers and their meanings are as below. + + - 0: Call ``madvise()`` for the region with ``MADV_WILLNEED`` + - 1: Call ``madvise()`` for the region with ``MADV_COLD`` + - 2: Call ``madvise()`` for the region with ``MADV_PAGEOUT`` + - 3: Call ``madvise()`` for the region with ``MADV_HUGEPAGE`` + - 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE`` + - 5: Do nothing but count the statistics + +You can disable schemes by simply writing an empty string to the file. For +example, below commands applies a scheme saying "If a memory region of size in +[4KiB, 8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate +interval in [10, 20], page out the region", check the entered scheme again, and +finally remove the scheme. :: + + # cd /damon + # echo "4096 8192 0 5 10 20 2" > schemes + # cat schemes + 4096 8192 0 5 10 20 2 0 0 + # echo > schemes + +The last two integers in the 4th line of above example is the total number and +the total size of the regions that the scheme is applied. + + Turning On/Off -------------- From e901d131e649ff970bed118d58972c0eeac5e93a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:14 +1100 Subject: [PATCH 0863/1202] mm/damon/dbgfs: allow users to set initial monitoring target regions Patch series "DAMON: Support Physical Memory Address Space Monitoring:. DAMON currently supports only virtual address spaces monitoring. It can be easily extended for various use cases and address spaces by configuring its monitoring primitives layer to use appropriate primitives implementations, though. This patchset implements monitoring primitives for the physical address space monitoring using the structure. The first 3 patches allow the user space users manually set the monitoring regions. The 1st patch implements the feature in the 'damon-dbgfs'. Then, patches for adding a unit tests (the 2nd patch) and updating the documentation (the 3rd patch) follow. Following 4 patches implement the physical address space monitoring primitives. The 4th patch makes some primitive functions for the virtual address spaces primitives reusable. The 5th patch implements the physical address space monitoring primitives. The 6th patch links the primitives to the 'damon-dbgfs'. Finally, 7th patch documents this new features. This patch (of 7): Some 'damon-dbgfs' users would want to monitor only a part of the entire virtual memory address space. The program interface users in the kernel space could use '->before_start()' callback or set the regions inside the context struct as they want, but 'damon-dbgfs' users cannot. For the reason, this commit introduces a new debugfs file called 'init_region'. 'damon-dbgfs' users can specify which initial monitoring target address regions they want by writing special input to the file. The input should describe each region in each line in the below form: Note that the regions will be updated to cover entire memory mapped regions after a 'regions update interval' is passed. If you want the regions to not be updated after the initial setting, you could set the interval as a very long time, say, a few decades. Link: https://lkml.kernel.org/r/20211012205711.29216-1-sj@kernel.org Link: https://lkml.kernel.org/r/20211012205711.29216-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Cameron Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: Jonathan Corbet Cc: David Hildenbrand Cc: David Woodhouse Cc: Marco Elver Cc: Leonard Foerster Cc: Greg Thelen Cc: Markus Boehme Cc: David Rienjes Cc: Shakeel Butt Cc: Shuah Khan Cc: Brendan Higgins Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/dbgfs.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 154 insertions(+), 2 deletions(-) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 28d6abf277636..1cce53cd241d0 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -394,6 +394,152 @@ static ssize_t dbgfs_target_ids_write(struct file *file, return ret; } +static ssize_t sprint_init_regions(struct damon_ctx *c, char *buf, ssize_t len) +{ + struct damon_target *t; + struct damon_region *r; + int written = 0; + int rc; + + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + rc = scnprintf(&buf[written], len - written, + "%lu %lu %lu\n", + t->id, r->ar.start, r->ar.end); + if (!rc) + return -ENOMEM; + written += rc; + } + } + return written; +} + +static ssize_t dbgfs_init_regions_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + mutex_unlock(&ctx->kdamond_lock); + len = -EBUSY; + goto out; + } + + len = sprint_init_regions(ctx, kbuf, count); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static int add_init_region(struct damon_ctx *c, + unsigned long target_id, struct damon_addr_range *ar) +{ + struct damon_target *t; + struct damon_region *r, *prev; + unsigned long id; + int rc = -EINVAL; + + if (ar->start >= ar->end) + return -EINVAL; + + damon_for_each_target(t, c) { + id = t->id; + if (targetid_is_pid(c)) + id = (unsigned long)pid_vnr((struct pid *)id); + if (id == target_id) { + r = damon_new_region(ar->start, ar->end); + if (!r) + return -ENOMEM; + damon_add_region(r, t); + if (damon_nr_regions(t) > 1) { + prev = damon_prev_region(r); + if (prev->ar.end > r->ar.start) { + damon_destroy_region(r, t); + return -EINVAL; + } + } + rc = 0; + } + } + return rc; +} + +static int set_init_regions(struct damon_ctx *c, const char *str, ssize_t len) +{ + struct damon_target *t; + struct damon_region *r, *next; + int pos = 0, parsed, ret; + unsigned long target_id; + struct damon_addr_range ar; + int err; + + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + + while (pos < len) { + ret = sscanf(&str[pos], "%lu %lu %lu%n", + &target_id, &ar.start, &ar.end, &parsed); + if (ret != 3) + break; + err = add_init_region(c, target_id, &ar); + if (err) + goto fail; + pos += parsed; + } + + return 0; + +fail: + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + return err; +} + +static ssize_t dbgfs_init_regions_write(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t ret = count; + int err; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + err = set_init_regions(ctx, kbuf, ret); + if (err) + ret = err; + +unlock_out: + mutex_unlock(&ctx->kdamond_lock); + kfree(kbuf); + return ret; +} + static ssize_t dbgfs_kdamond_pid_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -445,6 +591,12 @@ static const struct file_operations target_ids_fops = { .write = dbgfs_target_ids_write, }; +static const struct file_operations init_regions_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_init_regions_read, + .write = dbgfs_init_regions_write, +}; + static const struct file_operations kdamond_pid_fops = { .open = damon_dbgfs_open, .read = dbgfs_kdamond_pid_read, @@ -453,9 +605,9 @@ static const struct file_operations kdamond_pid_fops = { static void dbgfs_fill_ctx_dir(struct dentry *dir, struct damon_ctx *ctx) { const char * const file_names[] = {"attrs", "schemes", "target_ids", - "kdamond_pid"}; + "init_regions", "kdamond_pid"}; const struct file_operations *fops[] = {&attrs_fops, &schemes_fops, - &target_ids_fops, &kdamond_pid_fops}; + &target_ids_fops, &init_regions_fops, &kdamond_pid_fops}; int i; for (i = 0; i < ARRAY_SIZE(file_names); i++) From e11d5e48cc24ef20582f4c1a3498729a860494b3 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:14 +1100 Subject: [PATCH 0864/1202] mm/damon/dbgfs-test: add a unit test case for 'init_regions' This commit adds another test case for the new feature, 'init_regions'. Link: https://lkml.kernel.org/r/20211012205711.29216-3-sj@kernel.org Signed-off-by: SeongJae Park Reviewed-by: Brendan Higgins Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/dbgfs-test.h | 54 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/mm/damon/dbgfs-test.h b/mm/damon/dbgfs-test.h index 4eddcfa73996f..104b22957616b 100644 --- a/mm/damon/dbgfs-test.h +++ b/mm/damon/dbgfs-test.h @@ -109,9 +109,63 @@ static void damon_dbgfs_test_set_targets(struct kunit *test) dbgfs_destroy_ctx(ctx); } +static void damon_dbgfs_test_set_init_regions(struct kunit *test) +{ + struct damon_ctx *ctx = damon_new_ctx(); + unsigned long ids[] = {1, 2, 3}; + /* Each line represents one region in `` `` */ + char * const valid_inputs[] = {"2 10 20\n 2 20 30\n2 35 45", + "2 10 20\n", + "2 10 20\n1 39 59\n1 70 134\n 2 20 25\n", + ""}; + /* Reading the file again will show sorted, clean output */ + char * const valid_expects[] = {"2 10 20\n2 20 30\n2 35 45\n", + "2 10 20\n", + "1 39 59\n1 70 134\n2 10 20\n2 20 25\n", + ""}; + char * const invalid_inputs[] = {"4 10 20\n", /* target not exists */ + "2 10 20\n 2 14 26\n", /* regions overlap */ + "1 10 20\n2 30 40\n 1 5 8"}; /* not sorted by address */ + char *input, *expect; + int i, rc; + char buf[256]; + + damon_set_targets(ctx, ids, 3); + + /* Put valid inputs and check the results */ + for (i = 0; i < ARRAY_SIZE(valid_inputs); i++) { + input = valid_inputs[i]; + expect = valid_expects[i]; + + rc = set_init_regions(ctx, input, strnlen(input, 256)); + KUNIT_EXPECT_EQ(test, rc, 0); + + memset(buf, 0, 256); + sprint_init_regions(ctx, buf, 256); + + KUNIT_EXPECT_STREQ(test, (char *)buf, expect); + } + /* Put invlid inputs and check the return error code */ + for (i = 0; i < ARRAY_SIZE(invalid_inputs); i++) { + input = invalid_inputs[i]; + pr_info("input: %s\n", input); + rc = set_init_regions(ctx, input, strnlen(input, 256)); + KUNIT_EXPECT_EQ(test, rc, -EINVAL); + + memset(buf, 0, 256); + sprint_init_regions(ctx, buf, 256); + + KUNIT_EXPECT_STREQ(test, (char *)buf, ""); + } + + damon_set_targets(ctx, NULL, 0); + damon_destroy_ctx(ctx); +} + static struct kunit_case damon_test_cases[] = { KUNIT_CASE(damon_dbgfs_test_str_to_target_ids), KUNIT_CASE(damon_dbgfs_test_set_targets), + KUNIT_CASE(damon_dbgfs_test_set_init_regions), {}, }; From 43b9611a17305f5be031806b17b2028a00090a1d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:15 +1100 Subject: [PATCH 0865/1202] Docs/admin-guide/mm/damon: document 'init_regions' feature This commit adds description of the 'init_regions' feature in the DAMON usage document. Link: https://lkml.kernel.org/r/20211012205711.29216-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: Brendan Higgins Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/mm/damon/usage.rst | 41 +++++++++++++++++++- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index c0296c14babff..f7d5cfbb50c2d 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -34,8 +34,9 @@ the reason, this document describes only the debugfs interface debugfs Interface ================= -DAMON exports four files, ``attrs``, ``target_ids``, ``schemes`` and -``monitor_on`` under its debugfs directory, ``/damon/``. +DAMON exports five files, ``attrs``, ``target_ids``, ``init_regions``, +``schemes`` and ``monitor_on`` under its debugfs directory, +``/damon/``. Attributes @@ -74,6 +75,42 @@ check it again:: Note that setting the target ids doesn't start the monitoring. +Initial Monitoring Target Regions +--------------------------------- + +In case of the debugfs based monitoring, DAMON automatically sets and updates +the monitoring target regions so that entire memory mappings of target +processes can be covered. However, users can want to limit the monitoring +region to specific address ranges, such as the heap, the stack, or specific +file-mapped area. Or, some users can know the initial access pattern of their +workloads and therefore want to set optimal initial regions for the 'adaptive +regions adjustment'. + +In such cases, users can explicitly set the initial monitoring target regions +as they want, by writing proper values to the ``init_regions`` file. Each line +of the input should represent one region in below form.:: + + + +The ``target id`` should already in ``target_ids`` file, and the regions should +be passed in address order. For example, below commands will set a couple of +address ranges, ``1-100`` and ``100-200`` as the initial monitoring target +region of process 42, and another couple of address ranges, ``20-40`` and +``50-100`` as that of process 4242.:: + + # cd /damon + # echo "42 1 100 + 42 100 200 + 4242 20 40 + 4242 50 100" > init_regions + +Note that this sets the initial monitoring target regions only. In case of +virtual memory monitoring, DAMON will automatically updates the boundary of the +regions after one ``regions update interval``. Therefore, users should set the +``regions update interval`` large enough in this case, if they don't want the +update. + + Schemes ------- From a3a24947259e5578331c863ffa2b490fbf5ca19c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:15 +1100 Subject: [PATCH 0866/1202] mm/damon/vaddr: separate commonly usable functions This commit moves functions in the default virtual address spaces monitoring primitives that commonly usable from other address spaces like physical address space into a header file. Those will be reused by the physical address space monitoring primitives which will be implemented by the following commit. Link: https://lkml.kernel.org/r/20211012205711.29216-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: Brendan Higgins Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/Makefile | 2 +- mm/damon/prmtv-common.c | 88 +++++++++++++++++++++++++++++++++++++++++ mm/damon/prmtv-common.h | 17 ++++++++ mm/damon/vaddr.c | 87 +--------------------------------------- 4 files changed, 108 insertions(+), 86 deletions(-) create mode 100644 mm/damon/prmtv-common.c create mode 100644 mm/damon/prmtv-common.h diff --git a/mm/damon/Makefile b/mm/damon/Makefile index fed4be3bace3e..99b1bfe01ff51 100644 --- a/mm/damon/Makefile +++ b/mm/damon/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DAMON) := core.o -obj-$(CONFIG_DAMON_VADDR) += vaddr.o +obj-$(CONFIG_DAMON_VADDR) += prmtv-common.o vaddr.o obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o diff --git a/mm/damon/prmtv-common.c b/mm/damon/prmtv-common.c new file mode 100644 index 0000000000000..1768cbe1b9ffd --- /dev/null +++ b/mm/damon/prmtv-common.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Primitives for Data Access Monitoring + * + * Author: SeongJae Park + */ + +#include +#include +#include +#include +#include + +#include "prmtv-common.h" + +/* + * Get an online page for a pfn if it's in the LRU list. Otherwise, returns + * NULL. + * + * The body of this function is stolen from the 'page_idle_get_page()'. We + * steal rather than reuse it because the code is quite simple. + */ +struct page *damon_get_page(unsigned long pfn) +{ + struct page *page = pfn_to_online_page(pfn); + + if (!page || !PageLRU(page) || !get_page_unless_zero(page)) + return NULL; + + if (unlikely(!PageLRU(page))) { + put_page(page); + page = NULL; + } + return page; +} + +void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, unsigned long addr) +{ + bool referenced = false; + struct page *page = damon_get_page(pte_pfn(*pte)); + + if (!page) + return; + + if (pte_young(*pte)) { + referenced = true; + *pte = pte_mkold(*pte); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, addr + PAGE_SIZE)) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +} + +void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool referenced = false; + struct page *page = damon_get_page(pmd_pfn(*pmd)); + + if (!page) + return; + + if (pmd_young(*pmd)) { + referenced = true; + *pmd = pmd_mkold(*pmd); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, + addr + ((1UL) << HPAGE_PMD_SHIFT))) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +} diff --git a/mm/damon/prmtv-common.h b/mm/damon/prmtv-common.h new file mode 100644 index 0000000000000..7093d19e5d428 --- /dev/null +++ b/mm/damon/prmtv-common.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common Primitives for Data Access Monitoring + * + * Author: SeongJae Park + */ + +#include +#include + +/* Get a random number in [l, r) */ +#define damon_rand(l, r) (l + prandom_u32_max(r - l)) + +struct page *damon_get_page(unsigned long pfn); + +void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, unsigned long addr); +void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr); diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 953c145b4f08a..b8ba44f69db66 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -8,25 +8,18 @@ #define pr_fmt(fmt) "damon-va: " fmt #include -#include #include -#include #include -#include #include #include -#include -#include -#include + +#include "prmtv-common.h" #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST #undef DAMON_MIN_REGION #define DAMON_MIN_REGION 1 #endif -/* Get a random number in [l, r) */ -#define damon_rand(l, r) (l + prandom_u32_max(r - l)) - /* * 't->id' should be the pointer to the relevant 'struct pid' having reference * count. Caller must put the returned task, unless it is NULL. @@ -373,82 +366,6 @@ void damon_va_update(struct damon_ctx *ctx) } } -/* - * Get an online page for a pfn if it's in the LRU list. Otherwise, returns - * NULL. - * - * The body of this function is stolen from the 'page_idle_get_page()'. We - * steal rather than reuse it because the code is quite simple. - */ -static struct page *damon_get_page(unsigned long pfn) -{ - struct page *page = pfn_to_online_page(pfn); - - if (!page || !PageLRU(page) || !get_page_unless_zero(page)) - return NULL; - - if (unlikely(!PageLRU(page))) { - put_page(page); - page = NULL; - } - return page; -} - -static void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, - unsigned long addr) -{ - bool referenced = false; - struct page *page = damon_get_page(pte_pfn(*pte)); - - if (!page) - return; - - if (pte_young(*pte)) { - referenced = true; - *pte = pte_mkold(*pte); - } - -#ifdef CONFIG_MMU_NOTIFIER - if (mmu_notifier_clear_young(mm, addr, addr + PAGE_SIZE)) - referenced = true; -#endif /* CONFIG_MMU_NOTIFIER */ - - if (referenced) - set_page_young(page); - - set_page_idle(page); - put_page(page); -} - -static void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, - unsigned long addr) -{ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - bool referenced = false; - struct page *page = damon_get_page(pmd_pfn(*pmd)); - - if (!page) - return; - - if (pmd_young(*pmd)) { - referenced = true; - *pmd = pmd_mkold(*pmd); - } - -#ifdef CONFIG_MMU_NOTIFIER - if (mmu_notifier_clear_young(mm, addr, - addr + ((1UL) << HPAGE_PMD_SHIFT))) - referenced = true; -#endif /* CONFIG_MMU_NOTIFIER */ - - if (referenced) - set_page_young(page); - - set_page_idle(page); - put_page(page); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -} - static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { From 70224fe9f6814687dbf56d51bd96edee8d670b6a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:16 +1100 Subject: [PATCH 0867/1202] mm/damon/vaddr: include 'highmem.h' to fix a build failure Commit 0ff28922686c ("mm/damon/vaddr: separate commonly usable functions") in -mm tree[1] moves include of 'highmem.h' from 'vaddr.c' to 'prmtv-common.c', though the code for the header is still in 'vaddr.c'. As a result, build with 'CONFIG_HIGHPTE' fails as below: In file included from ../include/linux/mm.h:33:0, from ../include/linux/kallsyms.h:13, from ../include/linux/bpf.h:20, from ../include/linux/bpf-cgroup.h:5, from ../include/linux/cgroup-defs.h:22, from ../include/linux/cgroup.h:28, from ../include/linux/hugetlb.h:9, from ../mm/damon/vaddr.c:11: ../mm/damon/vaddr.c: In function `damon_mkold_pmd_entry': ../include/linux/pgtable.h:97:12: error: implicit declaration of function `kmap_atomic'; did you mean `mcopy_atomic'? [-Werror=implicit-function-declaration] ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \ ^ ../include/linux/mm.h:2376:17: note: in expansion of macro `pte_offset_map' pte_t *__pte = pte_offset_map(pmd, address); \ ^~~~~~~~~~~~~~ ../mm/damon/vaddr.c:387:8: note: in expansion of macro `pte_offset_map_lock' pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); ^~~~~~~~~~~~~~~~~~~ ../include/linux/pgtable.h:99:24: error: implicit declaration of function `kunmap_atomic'; did you mean `in_atomic'? [-Werror=implicit-function-declaration] #define pte_unmap(pte) kunmap_atomic((pte)) ^ ../include/linux/mm.h:2384:2: note: in expansion of macro `pte_unmap' pte_unmap(pte); \ ^~~~~~~~~ ../mm/damon/vaddr.c:392:2: note: in expansion of macro `pte_unmap_unlock' pte_unmap_unlock(pte, ptl); ^~~~~~~~~~~~~~~~ This commit fixes the issue by moving the include back to 'vaddr.c'. [1] https://github.com/hnaz/linux-mm/commit/0ff28922686c Link: https://lkml.kernel.org/r/20211014110848.5204-1-sj@kernel.org Signed-off-by: SeongJae Park Reported-by: Randy Dunlap Acked-by: Randy Dunlap Tested-by: Randy Dunlap Cc: Mark Brown Cc: Stephen Rothwell Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/prmtv-common.c | 1 - mm/damon/vaddr.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/prmtv-common.c b/mm/damon/prmtv-common.c index 1768cbe1b9ffd..7e62ee54fb543 100644 --- a/mm/damon/prmtv-common.c +++ b/mm/damon/prmtv-common.c @@ -5,7 +5,6 @@ * Author: SeongJae Park */ -#include #include #include #include diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index b8ba44f69db66..f0aef35602dab 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) "damon-va: " fmt #include +#include #include #include #include From de246b2e6f72978c031f42f05c592ff098d87e7b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:16 +1100 Subject: [PATCH 0868/1202] mm/damon: implement primitives for physical address space monitoring This commit implements the monitoring primitives for the physical memory address space. Internally, it uses the PTE Accessed bit, similar to that of the virtual address spaces monitoring primitives. It supports only user memory pages, as idle pages tracking does. If the monitoring target physical memory address range contains non-user memory pages, access check of the pages will do nothing but simply treat the pages as not accessed. Link: https://lkml.kernel.org/r/20211012205711.29216-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: Brendan Higgins Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 10 ++ mm/damon/Kconfig | 8 ++ mm/damon/Makefile | 1 + mm/damon/paddr.c | 224 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 243 insertions(+) create mode 100644 mm/damon/paddr.c diff --git a/include/linux/damon.h b/include/linux/damon.h index f301bb53381c1..715dadd21f7cd 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -351,4 +351,14 @@ void damon_va_set_primitives(struct damon_ctx *ctx); #endif /* CONFIG_DAMON_VADDR */ +#ifdef CONFIG_DAMON_PADDR + +/* Monitoring primitives for the physical memory address space */ +void damon_pa_prepare_access_checks(struct damon_ctx *ctx); +unsigned int damon_pa_check_accesses(struct damon_ctx *ctx); +bool damon_pa_target_valid(void *t); +void damon_pa_set_primitives(struct damon_ctx *ctx); + +#endif /* CONFIG_DAMON_PADDR */ + #endif /* _DAMON_H */ diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index ba8898c7eb8eb..2a5923be631e2 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -32,6 +32,14 @@ config DAMON_VADDR This builds the default data access monitoring primitives for DAMON that work for virtual address spaces. +config DAMON_PADDR + bool "Data access monitoring primitives for the physical address space" + depends on DAMON && MMU + select PAGE_IDLE_FLAG + help + This builds the default data access monitoring primitives for DAMON + that works for the physical address space. + config DAMON_VADDR_KUNIT_TEST bool "Test for DAMON primitives" if !KUNIT_ALL_TESTS depends on DAMON_VADDR && KUNIT=y diff --git a/mm/damon/Makefile b/mm/damon/Makefile index 99b1bfe01ff51..8d9b0df797029 100644 --- a/mm/damon/Makefile +++ b/mm/damon/Makefile @@ -2,4 +2,5 @@ obj-$(CONFIG_DAMON) := core.o obj-$(CONFIG_DAMON_VADDR) += prmtv-common.o vaddr.o +obj-$(CONFIG_DAMON_PADDR) += prmtv-common.o paddr.o obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c new file mode 100644 index 0000000000000..d7a2ecd09ed02 --- /dev/null +++ b/mm/damon/paddr.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Primitives for The Physical Address Space + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-pa: " fmt + +#include +#include +#include +#include + +#include "prmtv-common.h" + +static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct page_vma_mapped_walk pvmw = { + .page = page, + .vma = vma, + .address = addr, + }; + + while (page_vma_mapped_walk(&pvmw)) { + addr = pvmw.address; + if (pvmw.pte) + damon_ptep_mkold(pvmw.pte, vma->vm_mm, addr); + else + damon_pmdp_mkold(pvmw.pmd, vma->vm_mm, addr); + } + return true; +} + +static void damon_pa_mkold(unsigned long paddr) +{ + struct page *page = damon_get_page(PHYS_PFN(paddr)); + struct rmap_walk_control rwc = { + .rmap_one = __damon_pa_mkold, + .anon_lock = page_lock_anon_vma_read, + }; + bool need_lock; + + if (!page) + return; + + if (!page_mapped(page) || !page_rmapping(page)) { + set_page_idle(page); + goto out; + } + + need_lock = !PageAnon(page) || PageKsm(page); + if (need_lock && !trylock_page(page)) + goto out; + + rmap_walk(page, &rwc); + + if (need_lock) + unlock_page(page); + +out: + put_page(page); +} + +static void __damon_pa_prepare_access_check(struct damon_ctx *ctx, + struct damon_region *r) +{ + r->sampling_addr = damon_rand(r->ar.start, r->ar.end); + + damon_pa_mkold(r->sampling_addr); +} + +void damon_pa_prepare_access_checks(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) + __damon_pa_prepare_access_check(ctx, r); + } +} + +struct damon_pa_access_chk_result { + unsigned long page_sz; + bool accessed; +}; + +static bool __damon_pa_young(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct damon_pa_access_chk_result *result = arg; + struct page_vma_mapped_walk pvmw = { + .page = page, + .vma = vma, + .address = addr, + }; + + result->accessed = false; + result->page_sz = PAGE_SIZE; + while (page_vma_mapped_walk(&pvmw)) { + addr = pvmw.address; + if (pvmw.pte) { + result->accessed = pte_young(*pvmw.pte) || + !page_is_idle(page) || + mmu_notifier_test_young(vma->vm_mm, addr); + } else { +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + result->accessed = pmd_young(*pvmw.pmd) || + !page_is_idle(page) || + mmu_notifier_test_young(vma->vm_mm, addr); + result->page_sz = ((1UL) << HPAGE_PMD_SHIFT); +#else + WARN_ON_ONCE(1); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + } + if (result->accessed) { + page_vma_mapped_walk_done(&pvmw); + break; + } + } + + /* If accessed, stop walking */ + return !result->accessed; +} + +static bool damon_pa_young(unsigned long paddr, unsigned long *page_sz) +{ + struct page *page = damon_get_page(PHYS_PFN(paddr)); + struct damon_pa_access_chk_result result = { + .page_sz = PAGE_SIZE, + .accessed = false, + }; + struct rmap_walk_control rwc = { + .arg = &result, + .rmap_one = __damon_pa_young, + .anon_lock = page_lock_anon_vma_read, + }; + bool need_lock; + + if (!page) + return false; + + if (!page_mapped(page) || !page_rmapping(page)) { + if (page_is_idle(page)) + result.accessed = false; + else + result.accessed = true; + put_page(page); + goto out; + } + + need_lock = !PageAnon(page) || PageKsm(page); + if (need_lock && !trylock_page(page)) { + put_page(page); + return NULL; + } + + rmap_walk(page, &rwc); + + if (need_lock) + unlock_page(page); + put_page(page); + +out: + *page_sz = result.page_sz; + return result.accessed; +} + +static void __damon_pa_check_access(struct damon_ctx *ctx, + struct damon_region *r) +{ + static unsigned long last_addr; + static unsigned long last_page_sz = PAGE_SIZE; + static bool last_accessed; + + /* If the region is in the last checked page, reuse the result */ + if (ALIGN_DOWN(last_addr, last_page_sz) == + ALIGN_DOWN(r->sampling_addr, last_page_sz)) { + if (last_accessed) + r->nr_accesses++; + return; + } + + last_accessed = damon_pa_young(r->sampling_addr, &last_page_sz); + if (last_accessed) + r->nr_accesses++; + + last_addr = r->sampling_addr; +} + +unsigned int damon_pa_check_accesses(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + unsigned int max_nr_accesses = 0; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) { + __damon_pa_check_access(ctx, r); + max_nr_accesses = max(r->nr_accesses, max_nr_accesses); + } + } + + return max_nr_accesses; +} + +bool damon_pa_target_valid(void *t) +{ + return true; +} + +void damon_pa_set_primitives(struct damon_ctx *ctx) +{ + ctx->primitive.init = NULL; + ctx->primitive.update = NULL; + ctx->primitive.prepare_access_checks = damon_pa_prepare_access_checks; + ctx->primitive.check_accesses = damon_pa_check_accesses; + ctx->primitive.reset_aggregated = NULL; + ctx->primitive.target_valid = damon_pa_target_valid; + ctx->primitive.cleanup = NULL; + ctx->primitive.apply_scheme = NULL; +} From d2689ae0067fafc61b7e67623b63b244e2d8cfc4 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:17 +1100 Subject: [PATCH 0869/1202] mm/damon/dbgfs: support physical memory monitoring This commit makes the 'damon-dbgfs' to support the physical memory monitoring, in addition to the virtual memory monitoring. Users can do the physical memory monitoring by writing a special keyword, 'paddr' to the 'target_ids' debugfs file. Then, DAMON will check the special keyword and configure the monitoring context to run with the primitives for the physical address space. Unlike the virtual memory monitoring, the monitoring target region will not be automatically set. Therefore, users should also set the monitoring target address region using the 'init_regions' debugfs file. Also, note that the physical memory monitoring will not automatically terminated. The user should explicitly turn off the monitoring by writing 'off' to the 'monitor_on' debugfs file. Link: https://lkml.kernel.org/r/20211012205711.29216-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: Brendan Higgins Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/Kconfig | 2 +- mm/damon/dbgfs.c | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index 2a5923be631e2..ca33b289ebbe4 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -54,7 +54,7 @@ config DAMON_VADDR_KUNIT_TEST config DAMON_DBGFS bool "DAMON debugfs interface" - depends on DAMON_VADDR && DEBUG_FS + depends on DAMON_VADDR && DAMON_PADDR && DEBUG_FS help This builds the debugfs interface for DAMON. The user space admins can use the interface for arbitrary data access monitoring. diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 1cce53cd241d0..38188347d8abb 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -339,6 +339,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct damon_ctx *ctx = file->private_data; + bool id_is_pid = true; char *kbuf, *nrs; unsigned long *targets; ssize_t nr_targets; @@ -351,6 +352,11 @@ static ssize_t dbgfs_target_ids_write(struct file *file, return PTR_ERR(kbuf); nrs = kbuf; + if (!strncmp(kbuf, "paddr\n", count)) { + id_is_pid = false; + /* target id is meaningless here, but we set it just for fun */ + scnprintf(kbuf, count, "42 "); + } targets = str_to_target_ids(nrs, ret, &nr_targets); if (!targets) { @@ -358,7 +364,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file, goto out; } - if (targetid_is_pid(ctx)) { + if (id_is_pid) { for (i = 0; i < nr_targets; i++) { targets[i] = (unsigned long)find_get_pid( (int)targets[i]); @@ -372,15 +378,24 @@ static ssize_t dbgfs_target_ids_write(struct file *file, mutex_lock(&ctx->kdamond_lock); if (ctx->kdamond) { - if (targetid_is_pid(ctx)) + if (id_is_pid) dbgfs_put_pids(targets, nr_targets); ret = -EBUSY; goto unlock_out; } + /* remove targets with previously-set primitive */ + damon_set_targets(ctx, NULL, 0); + + /* Configure the context for the address space type */ + if (id_is_pid) + damon_va_set_primitives(ctx); + else + damon_pa_set_primitives(ctx); + err = damon_set_targets(ctx, targets, nr_targets); if (err) { - if (targetid_is_pid(ctx)) + if (id_is_pid) dbgfs_put_pids(targets, nr_targets); ret = err; } From 0d47226b71d99c36d0b329ec2fafea644df0b489 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:17 +1100 Subject: [PATCH 0870/1202] Docs/DAMON: document physical memory monitoring support This commit updates the DAMON documents for the physical memory address space monitoring support. Link: https://lkml.kernel.org/r/20211012205711.29216-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: Brendan Higgins Cc: David Hildenbrand Cc: David Rienjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/mm/damon/usage.rst | 25 +++++++++++++---- Documentation/vm/damon/design.rst | 29 ++++++++++++-------- Documentation/vm/damon/faq.rst | 5 ++-- 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index f7d5cfbb50c2d..ed96bbf0daffc 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -10,15 +10,16 @@ DAMON provides below three interfaces for different users. This is for privileged people such as system administrators who want a just-working human-friendly interface. Using this, users can use the DAMON’s major features in a human-friendly way. It may not be highly tuned for - special cases, though. It supports only virtual address spaces monitoring. + special cases, though. It supports both virtual and physical address spaces + monitoring. - *debugfs interface.* This is for privileged user space programmers who want more optimized use of DAMON. Using this, users can use DAMON’s major features by reading from and writing to special debugfs files. Therefore, you can write and use your personalized DAMON debugfs wrapper programs that reads/writes the debugfs files instead of you. The DAMON user space tool is also a reference - implementation of such programs. It supports only virtual address spaces - monitoring. + implementation of such programs. It supports both virtual and physical + address spaces monitoring. - *Kernel Space Programming Interface.* This is for kernel space programmers. Using this, users can utilize every feature of DAMON most flexibly and efficiently by writing kernel space @@ -72,20 +73,34 @@ check it again:: # cat target_ids 42 4242 +Users can also monitor the physical memory address space of the system by +writing a special keyword, "``paddr\n``" to the file. Because physical address +space monitoring doesn't support multiple targets, reading the file will show a +fake value, ``42``, as below:: + + # cd /damon + # echo paddr > target_ids + # cat target_ids + 42 + Note that setting the target ids doesn't start the monitoring. Initial Monitoring Target Regions --------------------------------- -In case of the debugfs based monitoring, DAMON automatically sets and updates -the monitoring target regions so that entire memory mappings of target +In case of the virtual address space monitoring, DAMON automatically sets and +updates the monitoring target regions so that entire memory mappings of target processes can be covered. However, users can want to limit the monitoring region to specific address ranges, such as the heap, the stack, or specific file-mapped area. Or, some users can know the initial access pattern of their workloads and therefore want to set optimal initial regions for the 'adaptive regions adjustment'. +In contrast, DAMON do not automatically sets and updates the monitoring target +regions in case of physical memory monitoring. Therefore, users should set the +monitoring target regions by themselves. + In such cases, users can explicitly set the initial monitoring target regions as they want, by writing proper values to the ``init_regions`` file. Each line of the input should represent one region in below form.:: diff --git a/Documentation/vm/damon/design.rst b/Documentation/vm/damon/design.rst index b05159c295f4d..210f0f50efd81 100644 --- a/Documentation/vm/damon/design.rst +++ b/Documentation/vm/damon/design.rst @@ -35,13 +35,17 @@ two parts: 1. Identification of the monitoring target address range for the address space. 2. Access check of specific address range in the target space. -DAMON currently provides the implementation of the primitives for only the -virtual address spaces. Below two subsections describe how it works. +DAMON currently provides the implementations of the primitives for the physical +and virtual address spaces. Below two subsections describe how those work. VMA-based Target Address Range Construction ------------------------------------------- +This is only for the virtual address space primitives implementation. That for +the physical address space simply asks users to manually set the monitoring +target address ranges. + Only small parts in the super-huge virtual address space of the processes are mapped to the physical memory and accessed. Thus, tracking the unmapped address regions is just wasteful. However, because DAMON can deal with some @@ -71,15 +75,18 @@ to make a reasonable trade-off. Below shows this in detail:: PTE Accessed-bit Based Access Check ----------------------------------- -The implementation for the virtual address space uses PTE Accessed-bit for -basic access checks. It finds the relevant PTE Accessed bit from the address -by walking the page table for the target task of the address. In this way, the -implementation finds and clears the bit for next sampling target address and -checks whether the bit set again after one sampling period. This could disturb -other kernel subsystems using the Accessed bits, namely Idle page tracking and -the reclaim logic. To avoid such disturbances, DAMON makes it mutually -exclusive with Idle page tracking and uses ``PG_idle`` and ``PG_young`` page -flags to solve the conflict with the reclaim logic, as Idle page tracking does. +Both of the implementations for physical and virtual address spaces use PTE +Accessed-bit for basic access checks. Only one difference is the way of +finding the relevant PTE Accessed bit(s) from the address. While the +implementation for the virtual address walks the page table for the target task +of the address, the implementation for the physical address walks every page +table having a mapping to the address. In this way, the implementations find +and clear the bit(s) for next sampling target address and checks whether the +bit(s) set again after one sampling period. This could disturb other kernel +subsystems using the Accessed bits, namely Idle page tracking and the reclaim +logic. To avoid such disturbances, DAMON makes it mutually exclusive with Idle +page tracking and uses ``PG_idle`` and ``PG_young`` page flags to solve the +conflict with the reclaim logic, as Idle page tracking does. Address Space Independent Core Mechanisms diff --git a/Documentation/vm/damon/faq.rst b/Documentation/vm/damon/faq.rst index cb3d8b585a8b3..11aea40eb328c 100644 --- a/Documentation/vm/damon/faq.rst +++ b/Documentation/vm/damon/faq.rst @@ -36,10 +36,9 @@ constructions and actual access checks can be implemented and configured on the DAMON core by the users. In this way, DAMON users can monitor any address space with any access check technique. -Nonetheless, DAMON provides vma tracking and PTE Accessed bit check based +Nonetheless, DAMON provides vma/rmap tracking and PTE Accessed bit check based implementations of the address space dependent functions for the virtual memory -by default, for a reference and convenient use. In near future, we will -provide those for physical memory address space. +and the physical memory by default, for a reference and convenient use. Can I simply monitor page granularity? From 3d4c21de159106dcbd373d842c89b1d8488b4f90 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Thu, 21 Oct 2021 15:09:18 +1100 Subject: [PATCH 0871/1202] mm/damon/vaddr: constify static mm_walk_ops The only usage of these structs is to pass their addresses to walk_page_range(), which takes a pointer to const mm_walk_ops as argument. Make them const to allow the compiler to put them in read-only memory. Link: https://lkml.kernel.org/r/20211014075042.17174-2-rikard.falkeborn@gmail.com Signed-off-by: Rikard Falkeborn Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/vaddr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index f0aef35602dab..758501b8d97db 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -394,7 +394,7 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, return 0; } -static struct mm_walk_ops damon_mkold_ops = { +static const struct mm_walk_ops damon_mkold_ops = { .pmd_entry = damon_mkold_pmd_entry, }; @@ -490,7 +490,7 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, return 0; } -static struct mm_walk_ops damon_young_ops = { +static const struct mm_walk_ops damon_young_ops = { .pmd_entry = damon_young_pmd_entry, }; From 216e8f6f3606aa7a9121e69ff0e95ba999205928 Mon Sep 17 00:00:00 2001 From: Rongwei Wang Date: Thu, 21 Oct 2021 15:09:18 +1100 Subject: [PATCH 0872/1202] mm/damon/dbgfs: remove unnecessary variables In some functions, it's unnecessary to declare 'err' and 'ret' variables at the same time. This patch mainly to simplify the issue of such declarations by reusing one variable. Link: https://lkml.kernel.org/r/20211014073014.35754-1-sj@kernel.org Signed-off-by: Rongwei Wang Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/dbgfs.c | 66 +++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 38188347d8abb..c90988a20fa4f 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -69,8 +69,7 @@ static ssize_t dbgfs_attrs_write(struct file *file, struct damon_ctx *ctx = file->private_data; unsigned long s, a, r, minr, maxr; char *kbuf; - ssize_t ret = count; - int err; + ssize_t ret; kbuf = user_input_str(buf, count, ppos); if (IS_ERR(kbuf)) @@ -88,9 +87,9 @@ static ssize_t dbgfs_attrs_write(struct file *file, goto unlock_out; } - err = damon_set_attrs(ctx, s, a, r, minr, maxr); - if (err) - ret = err; + ret = damon_set_attrs(ctx, s, a, r, minr, maxr); + if (!ret) + ret = count; unlock_out: mutex_unlock(&ctx->kdamond_lock); out: @@ -220,14 +219,13 @@ static ssize_t dbgfs_schemes_write(struct file *file, const char __user *buf, struct damon_ctx *ctx = file->private_data; char *kbuf; struct damos **schemes; - ssize_t nr_schemes = 0, ret = count; - int err; + ssize_t nr_schemes = 0, ret; kbuf = user_input_str(buf, count, ppos); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); - schemes = str_to_schemes(kbuf, ret, &nr_schemes); + schemes = str_to_schemes(kbuf, count, &nr_schemes); if (!schemes) { ret = -EINVAL; goto out; @@ -239,11 +237,12 @@ static ssize_t dbgfs_schemes_write(struct file *file, const char __user *buf, goto unlock_out; } - err = damon_set_schemes(ctx, schemes, nr_schemes); - if (err) - ret = err; - else + ret = damon_set_schemes(ctx, schemes, nr_schemes); + if (!ret) { + ret = count; nr_schemes = 0; + } + unlock_out: mutex_unlock(&ctx->kdamond_lock); free_schemes_arr(schemes, nr_schemes); @@ -343,9 +342,8 @@ static ssize_t dbgfs_target_ids_write(struct file *file, char *kbuf, *nrs; unsigned long *targets; ssize_t nr_targets; - ssize_t ret = count; + ssize_t ret; int i; - int err; kbuf = user_input_str(buf, count, ppos); if (IS_ERR(kbuf)) @@ -358,7 +356,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file, scnprintf(kbuf, count, "42 "); } - targets = str_to_target_ids(nrs, ret, &nr_targets); + targets = str_to_target_ids(nrs, count, &nr_targets); if (!targets) { ret = -ENOMEM; goto out; @@ -393,11 +391,12 @@ static ssize_t dbgfs_target_ids_write(struct file *file, else damon_pa_set_primitives(ctx); - err = damon_set_targets(ctx, targets, nr_targets); - if (err) { + ret = damon_set_targets(ctx, targets, nr_targets); + if (ret) { if (id_is_pid) dbgfs_put_pids(targets, nr_targets); - ret = err; + } else { + ret = count; } unlock_out: @@ -715,8 +714,7 @@ static ssize_t dbgfs_mk_context_write(struct file *file, { char *kbuf; char *ctx_name; - ssize_t ret = count; - int err; + ssize_t ret; kbuf = user_input_str(buf, count, ppos); if (IS_ERR(kbuf)) @@ -734,9 +732,9 @@ static ssize_t dbgfs_mk_context_write(struct file *file, } mutex_lock(&damon_dbgfs_lock); - err = dbgfs_mk_context(ctx_name); - if (err) - ret = err; + ret = dbgfs_mk_context(ctx_name); + if (!ret) + ret = count; mutex_unlock(&damon_dbgfs_lock); out: @@ -805,8 +803,7 @@ static ssize_t dbgfs_rm_context_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char *kbuf; - ssize_t ret = count; - int err; + ssize_t ret; char *ctx_name; kbuf = user_input_str(buf, count, ppos); @@ -825,9 +822,9 @@ static ssize_t dbgfs_rm_context_write(struct file *file, } mutex_lock(&damon_dbgfs_lock); - err = dbgfs_rm_context(ctx_name); - if (err) - ret = err; + ret = dbgfs_rm_context(ctx_name); + if (!ret) + ret = count; mutex_unlock(&damon_dbgfs_lock); out: @@ -851,9 +848,8 @@ static ssize_t dbgfs_monitor_on_read(struct file *file, static ssize_t dbgfs_monitor_on_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - ssize_t ret = count; + ssize_t ret; char *kbuf; - int err; kbuf = user_input_str(buf, count, ppos); if (IS_ERR(kbuf)) @@ -866,14 +862,14 @@ static ssize_t dbgfs_monitor_on_write(struct file *file, } if (!strncmp(kbuf, "on", count)) - err = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs); + ret = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs); else if (!strncmp(kbuf, "off", count)) - err = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs); + ret = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs); else - err = -EINVAL; + ret = -EINVAL; - if (err) - ret = err; + if (!ret) + ret = count; kfree(kbuf); return ret; } From 97644a0977cfdf38c53f05e6ddf7c51a9fb14c3b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:19 +1100 Subject: [PATCH 0873/1202] mm/damon/paddr: support the pageout scheme Introduction ============ This patchset 1) makes the engine for general data access pattern-oriented memory management (DAMOS) be more useful for production environments, and 2) implements a static kernel module for lightweight proactive reclamation using the engine. Proactive Reclamation --------------------- On general memory over-committed systems, proactively reclaiming cold pages helps saving memory and reducing latency spikes that incurred by the direct reclaim or the CPU consumption of kswapd, while incurring only minimal performance degradation[2]. A Free Pages Reporting[8] based memory over-commit virtualization system would be one more specific use case. In the system, the guest VMs reports their free memory to host, and the host reallocates the reported memory to other guests. As a result, the system's memory utilization can be maximized. However, the guests could be not so memory-frugal, because some kernel subsystems and user-space applications are designed to use as much memory as available. Then, guests would report only small amount of free memory to host, results in poor memory utilization. Running the proactive reclamation in such guests could help mitigating this problem. Google has also implemented this idea and using it in their data center. They further proposed upstreaming it in LSFMM'19, and "the general consensus was that, while this sort of proactive reclaim would be useful for a number of users, the cost of this particular solution was too high to consider merging it upstream"[3]. The cost mainly comes from the coldness tracking. Roughly speaking, the implementation periodically scans the 'Accessed' bit of each page. For the reason, the overhead linearly increases as the size of the memory and the scanning frequency grows. As a result, Google is known to dedicating one CPU for the work. That's a reasonable option to someone like Google, but it wouldn't be so to some others. DAMON and DAMOS: An engine for data access pattern-oriented memory management ----------------------------------------------------------------------------- DAMON[4] is a framework for general data access monitoring. Its adaptive monitoring overhead control feature minimizes its monitoring overhead. It also let the upper-bound of the overhead be configurable by clients, regardless of the size of the monitoring target memory. While monitoring 70 GiB memory of a production system every 5 milliseconds, it consumes less than 1% single CPU time. For this, it could sacrify some of the quality of the monitoring results. Nevertheless, the lower-bound of the quality is configurable, and it uses a best-effort algorithm for better quality. Our test results[5] show the quality is practical enough. From the production system monitoring, we were able to find a 4 KiB region in the 70 GiB memory that shows highest access frequency. We normally don't monitor the data access pattern just for fun but to improve something like memory management. Proactive reclamation is one such usage. For such general cases, DAMON provides a feature called DAMon-based Operation Schemes (DAMOS)[6]. It makes DAMON an engine for general data access pattern oriented memory management. Using this, clients can ask DAMON to find memory regions of specific data access pattern and apply some memory management action (e.g., page out, move to head of the LRU list, use huge page, ...). We call the request 'scheme'. Proactive Reclamation on top of DAMON/DAMOS ------------------------------------------- Therefore, by using DAMON for the cold pages detection, the proactive reclamation's monitoring overhead issue can be solved. Actually, we previously implemented a version of proactive reclamation using DAMOS and achieved noticeable improvements with our evaluation setup[5]. Nevertheless, it more for a proof-of-concept, rather than production uses. It supports only virtual address spaces of processes, and require additional tuning efforts for given workloads and the hardware. For the tuning, we introduced a simple auto-tuning user space tool[8]. Google is also known to using a ML-based similar approach for their fleets[2]. But, making it just works with intuitive knobs in the kernel would be helpful for general users. To this end, this patchset improves DAMOS to be ready for such production usages, and implements another version of the proactive reclamation, namely DAMON_RECLAIM, on top of it. DAMOS Improvements: Aggressiveness Control, Prioritization, and Watermarks -------------------------------------------------------------------------- First of all, the current version of DAMOS supports only virtual address spaces. This patchset makes it supports the physical address space for the page out action. Next major problem of the current version of DAMOS is the lack of the aggressiveness control, which can results in arbitrary overhead. For example, if huge memory regions having the data access pattern of interest are found, applying the requested action to all of the regions could incur significant overhead. It can be controlled by tuning the target data access pattern with manual or automated approaches[2,7]. But, some people would prefer the kernel to just work with only intuitive tuning or default values. For such cases, this patchset implements a safeguard, namely time/size quota. Using this, the clients can specify up to how much time can be used for applying the action, and/or up to how much memory regions the action can be applied within a user-specified time duration. A followup question is, to which memory regions should the action applied within the limits? We implement a simple regions prioritization mechanism for each action and make DAMOS to apply the action to high priority regions first. It also allows clients tune the prioritization mechanism to use different weights for size, access frequency, and age of memory regions. This means we could use not only LRU but also LFU or some fancy algorithms like CAR[9] with lightweight overhead. Though DAMON is lightweight, someone would want to remove even the cold pages monitoring overhead when it is unnecessary. Currently, it should manually turned on and off by clients, but some clients would simply want to turn it on and off based on some metrics like free memory ratio or memory fragmentation. For such cases, this patchset implements a watermarks-based automatic activation feature. It allows the clients configure the metric of their interest, and three watermarks of the metric. If the metric is higher than the high watermark or lower than the low watermark, the scheme is deactivated. If the metric is lower than the mid watermark but higher than the low watermark, the scheme is activated. DAMON-based Reclaim ------------------- Using the improved version of DAMOS, this patchset implements a static kernel module called 'damon_reclaim'. It finds memory regions that didn't accessed for specific time duration and page out. Consuming too much CPU for the paging out operations, or doing pageout too frequently can be critical for systems configuring their swap devices with software-defined in-memory block devices like zram/zswap or total number of writes limited devices like SSDs, respectively. To avoid the problems, the time/size quotas can be configured. Under the quotas, it pages out memory regions that didn't accessed longer first. Also, to remove the monitoring overhead under peaceful situation, and to fall back to the LRU-list based page granularity reclamation when it doesn't make progress, the three watermarks based activation mechanism is used, with the free memory ratio as the watermark metric. For convenient configurations, it provides several module parameters. Using these, sysadmins can enable/disable it, and tune its parameters including the coldness identification time threshold, the time/size quotas and the three watermarks. Evaluation ========== In short, DAMON_RECLAIM with 50ms/s time quota and regions prioritization on v5.15-rc5 Linux kernel with ZRAM swap device achieves 38.58% memory saving with only 1.94% runtime overhead. For this, DAMON_RECLAIM consumes only 4.97% of single CPU time. Setup ----- We evaluate DAMON_RECLAIM to show how each of the DAMOS improvements make effect. For this, we measure DAMON_RECLAIM's CPU consumption, entire system memory footprint, total number of major page faults, and runtime of 24 realistic workloads in PARSEC3 and SPLASH-2X benchmark suites on my QEMU/KVM based virtual machine. The virtual machine runs on an i3.metal AWS instance, has 130GiB memory, and runs a linux kernel built on latest -mm tree[1] plus this patchset. It also utilizes a 4 GiB ZRAM swap device. We repeats the measurement 5 times and use averages. [1] https://github.com/hnaz/linux-mm/tree/v5.15-rc5-mmots-2021-10-13-19-55 Detailed Results ---------------- The results are summarized in the below table. With coldness identification threshold of 5 seconds, DAMON_RECLAIM without the time quota-based speed limit achieves 47.21% memory saving, but incur 4.59% runtime slowdown to the workloads on average. For this, DAMON_RECLAIM consumes about 11.28% single CPU time. Applying time quotas of 200ms/s, 50ms/s, and 10ms/s without the regions prioritization reduces the slowdown to 4.89%, 2.65%, and 1.5%, respectively. Time quota of 200ms/s (20%) makes no real change compared to the quota unapplied version, because the quota unapplied version consumes only 11.28% CPU time. DAMON_RECLAIM's CPU utilization also similarly reduced: 11.24%, 5.51%, and 2.01% of single CPU time. That is, the overhead is proportional to the speed limit. Nevertheless, it also reduces the memory saving because it becomes less aggressive. In detail, the three variants show 48.76%, 37.83%, and 7.85% memory saving, respectively. Applying the regions prioritization (page out regions that not accessed longer first within the time quota) further reduces the performance degradation. Runtime slowdowns and total number of major page faults increase has been 4.89%/218,690% -> 4.39%/166,136% (200ms/s), 2.65%/111,886% -> 1.94%/59,053% (50ms/s), and 1.5%/34,973.40% -> 2.08%/8,781.75% (10ms/s). The runtime under 10ms/s time quota has increased with prioritization, but apparently that's under the margin of error. time quota prioritization memory_saving cpu_util slowdown pgmajfaults overhead N N 47.21% 11.28% 4.59% 194,802% 200ms/s N 48.76% 11.24% 4.89% 218,690% 50ms/s N 37.83% 5.51% 2.65% 111,886% 10ms/s N 7.85% 2.01% 1.5% 34,793.40% 200ms/s Y 50.08% 10.38% 4.39% 166,136% 50ms/s Y 38.58% 4.97% 1.94% 59,053% 10ms/s Y 3.63% 1.73% 2.08% 8,781.75% Baseline and Complete Git Trees =============================== The patches are based on the latest -mm tree (v5.15-rc5-mmots-2021-10-13-19-55). You can also clone the complete git tree from: $ git clone git://github.com/sjp38/linux -b damon_reclaim/patches/v1 The web is also available: https://git.kernel.org/pub/scm/linux/kernel/git/sj/linux.git/tag/?h=damon_reclaim/patches/v1 Sequence Of Patches =================== The first patch makes DAMOS support the physical address space for the page out action. Following five patches (patches 2-6) implement the time/size quotas. Next four patches (patches 7-10) implement the memory regions prioritization within the limit. Then, three following patches (patches 11-13) implement the watermarks-based schemes activation. Finally, the last two patches (patches 14-15) implement and document the DAMON-based reclamation using the advanced DAMOS. [1] https://www.kernel.org/doc/html/v5.15-rc1/vm/damon/index.html [2] https://research.google/pubs/pub48551/ [3] https://lwn.net/Articles/787611/ [4] https://damonitor.github.io [5] https://damonitor.github.io/doc/html/latest/vm/damon/eval.html [6] https://lore.kernel.org/linux-mm/20211001125604.29660-1-sj@kernel.org/ [7] https://github.com/awslabs/damoos [8] https://www.kernel.org/doc/html/latest/vm/free_page_reporting.html [9] https://www.usenix.org/conference/fast-04/car-clock-adaptive-replacement This patch (of 15): This commit makes the DAMON primitives for physical address space support the pageout action for DAMON-based Operation Schemes. With this commit, hence, users can easily implement system-level data access-aware reclamations using DAMOS. Link: https://lkml.kernel.org/r/20211019150731.16699-1-sj@kernel.org Link: https://lkml.kernel.org/r/20211019150731.16699-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Cameron Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: Jonathan Corbet Cc: David Hildenbrand Cc: David Woodhouse Cc: Marco Elver Cc: Leonard Foerster Cc: Greg Thelen Cc: Markus Boehme Cc: David Rientjes Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/paddr.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index d7a2ecd09ed02..957ada55de77b 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -11,7 +11,9 @@ #include #include #include +#include +#include "../internal.h" #include "prmtv-common.h" static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma, @@ -211,6 +213,39 @@ bool damon_pa_target_valid(void *t) return true; } +int damon_pa_apply_scheme(struct damon_ctx *ctx, struct damon_target *t, + struct damon_region *r, struct damos *scheme) +{ + unsigned long addr; + LIST_HEAD(page_list); + + if (scheme->action != DAMOS_PAGEOUT) + return -EINVAL; + + for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) { + struct page *page = damon_get_page(PHYS_PFN(addr)); + + if (!page) + continue; + + ClearPageReferenced(page); + test_and_clear_page_young(page); + if (isolate_lru_page(page)) { + put_page(page); + continue; + } + if (PageUnevictable(page)) { + putback_lru_page(page); + } else { + list_add(&page->lru, &page_list); + put_page(page); + } + } + reclaim_pages(&page_list); + cond_resched(); + return 0; +} + void damon_pa_set_primitives(struct damon_ctx *ctx) { ctx->primitive.init = NULL; @@ -220,5 +255,5 @@ void damon_pa_set_primitives(struct damon_ctx *ctx) ctx->primitive.reset_aggregated = NULL; ctx->primitive.target_valid = damon_pa_target_valid; ctx->primitive.cleanup = NULL; - ctx->primitive.apply_scheme = NULL; + ctx->primitive.apply_scheme = damon_pa_apply_scheme; } From da8dae3655068143f0372e3cfd74d8613cdc4557 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:19 +1100 Subject: [PATCH 0874/1202] mm/damon/schemes: implement size quota for schemes application speed control There could be arbitrarily large memory regions fulfilling the target data access pattern of a DAMON-based operation scheme. In the case, applying the action of the scheme could incur too high overhead. To provide an intuitive way for avoiding it, this commit implements a feature called size quota. If the quota is set, DAMON tries to apply the action only up to the given amount of memory regions within a given time window. Link: https://lkml.kernel.org/r/20211019150731.16699-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 36 +++++++++++++++++++++++--- mm/damon/core.c | 60 +++++++++++++++++++++++++++++++++++++------ mm/damon/dbgfs.c | 4 ++- 3 files changed, 87 insertions(+), 13 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 715dadd21f7cd..af8c2ada26557 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -89,6 +89,26 @@ enum damos_action { DAMOS_STAT, /* Do nothing but only record the stat */ }; +/** + * struct damos_quota - Controls the aggressiveness of the given scheme. + * @sz: Maximum bytes of memory that the action can be applied. + * @reset_interval: Charge reset interval in milliseconds. + * + * To avoid consuming too much CPU time or IO resources for applying the + * &struct damos->action to large memory, DAMON allows users to set a size + * quota. The quota can be set by writing non-zero values to &sz. If the size + * quota is set, DAMON tries to apply the action only up to &sz bytes within + * &reset_interval. + */ +struct damos_quota { + unsigned long sz; + unsigned long reset_interval; + +/* private: For charging the quota */ + unsigned long charged_sz; + unsigned long charged_from; +}; + /** * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @min_sz_region: Minimum size of target regions. @@ -98,13 +118,20 @@ enum damos_action { * @min_age_region: Minimum age of target regions. * @max_age_region: Maximum age of target regions. * @action: &damo_action to be applied to the target regions. + * @quota: Control the aggressiveness of this scheme. * @stat_count: Total number of regions that this scheme is applied. * @stat_sz: Total size of regions that this scheme is applied. * @list: List head for siblings. * - * For each aggregation interval, DAMON applies @action to monitoring target - * regions fit in the condition and updates the statistics. Note that both - * the minimums and the maximums are inclusive. + * For each aggregation interval, DAMON finds regions which fit in the + * condition (&min_sz_region, &max_sz_region, &min_nr_accesses, + * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to + * those. To avoid consuming too much CPU time or IO resources for the + * &action, "a is used. + * + * After applying the &action to each region, &stat_count and &stat_sz is + * updated to reflect the number of regions and total size of regions that the + * &action is applied. */ struct damos { unsigned long min_sz_region; @@ -114,6 +141,7 @@ struct damos { unsigned int min_age_region; unsigned int max_age_region; enum damos_action action; + struct damos_quota quota; unsigned long stat_count; unsigned long stat_sz; struct list_head list; @@ -310,7 +338,7 @@ struct damos *damon_new_scheme( unsigned long min_sz_region, unsigned long max_sz_region, unsigned int min_nr_accesses, unsigned int max_nr_accesses, unsigned int min_age_region, unsigned int max_age_region, - enum damos_action action); + enum damos_action action, struct damos_quota *quota); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); diff --git a/mm/damon/core.c b/mm/damon/core.c index 2f6785737902d..cce14a0d5c725 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -89,7 +89,7 @@ struct damos *damon_new_scheme( unsigned long min_sz_region, unsigned long max_sz_region, unsigned int min_nr_accesses, unsigned int max_nr_accesses, unsigned int min_age_region, unsigned int max_age_region, - enum damos_action action) + enum damos_action action, struct damos_quota *quota) { struct damos *scheme; @@ -107,6 +107,11 @@ struct damos *damon_new_scheme( scheme->stat_sz = 0; INIT_LIST_HEAD(&scheme->list); + scheme->quota.sz = quota->sz; + scheme->quota.reset_interval = quota->reset_interval; + scheme->quota.charged_sz = 0; + scheme->quota.charged_from = 0; + return scheme; } @@ -530,15 +535,25 @@ static void kdamond_reset_aggregated(struct damon_ctx *c) } } +static void damon_split_region_at(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + unsigned long sz_r); + static void damon_do_apply_schemes(struct damon_ctx *c, struct damon_target *t, struct damon_region *r) { struct damos *s; - unsigned long sz; damon_for_each_scheme(s, c) { - sz = r->ar.end - r->ar.start; + struct damos_quota *quota = &s->quota; + unsigned long sz = r->ar.end - r->ar.start; + + /* Check the quota */ + if (quota->sz && quota->charged_sz >= quota->sz) + continue; + + /* Check the target regions condition */ if (sz < s->min_sz_region || s->max_sz_region < sz) continue; if (r->nr_accesses < s->min_nr_accesses || @@ -546,22 +561,51 @@ static void damon_do_apply_schemes(struct damon_ctx *c, continue; if (r->age < s->min_age_region || s->max_age_region < r->age) continue; - s->stat_count++; - s->stat_sz += sz; - if (c->primitive.apply_scheme) + + /* Apply the scheme */ + if (c->primitive.apply_scheme) { + if (quota->sz && quota->charged_sz + sz > quota->sz) { + sz = ALIGN_DOWN(quota->sz - quota->charged_sz, + DAMON_MIN_REGION); + if (!sz) + goto update_stat; + damon_split_region_at(c, t, r, sz); + } c->primitive.apply_scheme(c, t, r, s); + quota->charged_sz += sz; + } if (s->action != DAMOS_STAT) r->age = 0; + +update_stat: + s->stat_count++; + s->stat_sz += sz; } } static void kdamond_apply_schemes(struct damon_ctx *c) { struct damon_target *t; - struct damon_region *r; + struct damon_region *r, *next_r; + struct damos *s; + + damon_for_each_scheme(s, c) { + struct damos_quota *quota = &s->quota; + + if (!quota->sz) + continue; + + /* New charge window starts */ + if (time_after_eq(jiffies, quota->charged_from + + msecs_to_jiffies( + quota->reset_interval))) { + quota->charged_from = jiffies; + quota->charged_sz = 0; + } + } damon_for_each_target(t, c) { - damon_for_each_region(r, t) + damon_for_each_region_safe(r, next_r, t) damon_do_apply_schemes(c, t, r); } } diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index c90988a20fa4f..a04bd50cc4c4e 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -188,6 +188,8 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, *nr_schemes = 0; while (pos < len && *nr_schemes < max_nr_schemes) { + struct damos_quota quota = {}; + ret = sscanf(&str[pos], "%lu %lu %u %u %u %u %u%n", &min_sz, &max_sz, &min_nr_a, &max_nr_a, &min_age, &max_age, &action, &parsed); @@ -200,7 +202,7 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, pos += parsed; scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a, - min_age, max_age, action); + min_age, max_age, action, "a); if (!scheme) goto fail; From 017d6984dc63b35a0787d376eb0086e59ca63a9d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:20 +1100 Subject: [PATCH 0875/1202] mm/damon/schemes: skip already charged targets and regions If DAMOS has stopped applying action in the middle of a group of memory regions due to its size quota, it starts the work again from the beginning of the address space in the next charge window. If there is a huge memory region at the beginning of the address space and it fulfills the scheme's target data access pattern always, the action will applied to only the region. This commit mitigates the case by skipping memory regions that charged in current charge window at the beginning of next charge window. Link: https://lkml.kernel.org/r/20211019150731.16699-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 5 +++++ mm/damon/core.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index af8c2ada26557..1fec32e183198 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -107,6 +107,8 @@ struct damos_quota { /* private: For charging the quota */ unsigned long charged_sz; unsigned long charged_from; + struct damon_target *charge_target_from; + unsigned long charge_addr_from; }; /** @@ -307,6 +309,9 @@ struct damon_ctx { #define damon_prev_region(r) \ (container_of(r->list.prev, struct damon_region, list)) +#define damon_last_region(t) \ + (list_last_entry(&t->regions_list, struct damon_region, list)) + #define damon_for_each_region(r, t) \ list_for_each_entry(r, &t->regions_list, list) diff --git a/mm/damon/core.c b/mm/damon/core.c index cce14a0d5c725..693b75bc34505 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -111,6 +111,8 @@ struct damos *damon_new_scheme( scheme->quota.reset_interval = quota->reset_interval; scheme->quota.charged_sz = 0; scheme->quota.charged_from = 0; + scheme->quota.charge_target_from = NULL; + scheme->quota.charge_addr_from = 0; return scheme; } @@ -553,6 +555,37 @@ static void damon_do_apply_schemes(struct damon_ctx *c, if (quota->sz && quota->charged_sz >= quota->sz) continue; + /* Skip previously charged regions */ + if (quota->charge_target_from) { + if (t != quota->charge_target_from) + continue; + if (r == damon_last_region(t)) { + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + continue; + } + if (quota->charge_addr_from && + r->ar.end <= quota->charge_addr_from) + continue; + + if (quota->charge_addr_from && r->ar.start < + quota->charge_addr_from) { + sz = ALIGN_DOWN(quota->charge_addr_from - + r->ar.start, DAMON_MIN_REGION); + if (!sz) { + if (r->ar.end - r->ar.start <= + DAMON_MIN_REGION) + continue; + sz = DAMON_MIN_REGION; + } + damon_split_region_at(c, t, r, sz); + r = damon_next_region(r); + sz = r->ar.end - r->ar.start; + } + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + } + /* Check the target regions condition */ if (sz < s->min_sz_region || s->max_sz_region < sz) continue; @@ -573,6 +606,10 @@ static void damon_do_apply_schemes(struct damon_ctx *c, } c->primitive.apply_scheme(c, t, r, s); quota->charged_sz += sz; + if (quota->sz && quota->charged_sz >= quota->sz) { + quota->charge_target_from = t; + quota->charge_addr_from = r->ar.end + 1; + } } if (s->action != DAMOS_STAT) r->age = 0; From bed3c92db25b1174a26b29b93eef015fd1e2f45d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:21 +1100 Subject: [PATCH 0876/1202] mm/damon/schemes: implement time quota The size quota feature of DAMOS is useful for IO resource-critical systems, but not so intuitive for CPU time-critical systems. Systems using zram or zswap-like swap device would be examples. To provide another intuitive ways for such systems, this commit implements time-based quota for DAMON-based Operation Schemes. If the quota is set, DAMOS tries to use only up to the user-defined quota of CPU time within a given time window. Link: https://lkml.kernel.org/r/20211019150731.16699-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 25 +++++++++++++++++++----- mm/damon/core.c | 45 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 1fec32e183198..5347f940818e4 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -91,20 +91,35 @@ enum damos_action { /** * struct damos_quota - Controls the aggressiveness of the given scheme. + * @ms: Maximum milliseconds that the scheme can use. * @sz: Maximum bytes of memory that the action can be applied. * @reset_interval: Charge reset interval in milliseconds. * * To avoid consuming too much CPU time or IO resources for applying the - * &struct damos->action to large memory, DAMON allows users to set a size - * quota. The quota can be set by writing non-zero values to &sz. If the size - * quota is set, DAMON tries to apply the action only up to &sz bytes within - * &reset_interval. + * &struct damos->action to large memory, DAMON allows users to set time and/or + * size quotas. The quotas can be set by writing non-zero values to &ms and + * &sz, respectively. If the time quota is set, DAMON tries to use only up to + * &ms milliseconds within &reset_interval for applying the action. If the + * size quota is set, DAMON tries to apply the action only up to &sz bytes + * within &reset_interval. + * + * Internally, the time quota is transformed to a size quota using estimated + * throughput of the scheme's action. DAMON then compares it against &sz and + * uses smaller one as the effective quota. */ struct damos_quota { + unsigned long ms; unsigned long sz; unsigned long reset_interval; -/* private: For charging the quota */ +/* private: */ + /* For throughput estimation */ + unsigned long total_charged_sz; + unsigned long total_charged_ns; + + unsigned long esz; /* Effective size quota in bytes */ + + /* For charging the quota */ unsigned long charged_sz; unsigned long charged_from; struct damon_target *charge_target_from; diff --git a/mm/damon/core.c b/mm/damon/core.c index 693b75bc34505..d1da4bef96ede 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -107,8 +107,12 @@ struct damos *damon_new_scheme( scheme->stat_sz = 0; INIT_LIST_HEAD(&scheme->list); + scheme->quota.ms = quota->ms; scheme->quota.sz = quota->sz; scheme->quota.reset_interval = quota->reset_interval; + scheme->quota.total_charged_sz = 0; + scheme->quota.total_charged_ns = 0; + scheme->quota.esz = 0; scheme->quota.charged_sz = 0; scheme->quota.charged_from = 0; scheme->quota.charge_target_from = NULL; @@ -550,9 +554,10 @@ static void damon_do_apply_schemes(struct damon_ctx *c, damon_for_each_scheme(s, c) { struct damos_quota *quota = &s->quota; unsigned long sz = r->ar.end - r->ar.start; + struct timespec64 begin, end; /* Check the quota */ - if (quota->sz && quota->charged_sz >= quota->sz) + if (quota->esz && quota->charged_sz >= quota->esz) continue; /* Skip previously charged regions */ @@ -597,16 +602,21 @@ static void damon_do_apply_schemes(struct damon_ctx *c, /* Apply the scheme */ if (c->primitive.apply_scheme) { - if (quota->sz && quota->charged_sz + sz > quota->sz) { - sz = ALIGN_DOWN(quota->sz - quota->charged_sz, + if (quota->esz && + quota->charged_sz + sz > quota->esz) { + sz = ALIGN_DOWN(quota->esz - quota->charged_sz, DAMON_MIN_REGION); if (!sz) goto update_stat; damon_split_region_at(c, t, r, sz); } + ktime_get_coarse_ts64(&begin); c->primitive.apply_scheme(c, t, r, s); + ktime_get_coarse_ts64(&end); + quota->total_charged_ns += timespec64_to_ns(&end) - + timespec64_to_ns(&begin); quota->charged_sz += sz; - if (quota->sz && quota->charged_sz >= quota->sz) { + if (quota->esz && quota->charged_sz >= quota->esz) { quota->charge_target_from = t; quota->charge_addr_from = r->ar.end + 1; } @@ -620,6 +630,29 @@ static void damon_do_apply_schemes(struct damon_ctx *c, } } +/* Shouldn't be called if quota->ms and quota->sz are zero */ +static void damos_set_effective_quota(struct damos_quota *quota) +{ + unsigned long throughput; + unsigned long esz; + + if (!quota->ms) { + quota->esz = quota->sz; + return; + } + + if (quota->total_charged_ns) + throughput = quota->total_charged_sz * 1000000 / + quota->total_charged_ns; + else + throughput = PAGE_SIZE * 1024; + esz = throughput * quota->ms; + + if (quota->sz && quota->sz < esz) + esz = quota->sz; + quota->esz = esz; +} + static void kdamond_apply_schemes(struct damon_ctx *c) { struct damon_target *t; @@ -629,15 +662,17 @@ static void kdamond_apply_schemes(struct damon_ctx *c) damon_for_each_scheme(s, c) { struct damos_quota *quota = &s->quota; - if (!quota->sz) + if (!quota->ms && !quota->sz) continue; /* New charge window starts */ if (time_after_eq(jiffies, quota->charged_from + msecs_to_jiffies( quota->reset_interval))) { + quota->total_charged_sz += quota->charged_sz; quota->charged_from = jiffies; quota->charged_sz = 0; + damos_set_effective_quota(quota); } } From 76f66bb9faedc4ff7ac47450307b5c4727fc93d0 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:21 +1100 Subject: [PATCH 0877/1202] mm/damon/dbgfs: support quotas of schemes This commit makes the debugfs interface of DAMON support the scheme quotas by chaning the format of the input for the schemes file. Link: https://lkml.kernel.org/r/20211019150731.16699-6-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/dbgfs.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index a04bd50cc4c4e..097e6745ba75c 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -105,11 +105,14 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) damon_for_each_scheme(s, c) { rc = scnprintf(&buf[written], len - written, - "%lu %lu %u %u %u %u %d %lu %lu\n", + "%lu %lu %u %u %u %u %d %lu %lu %lu %lu %lu\n", s->min_sz_region, s->max_sz_region, s->min_nr_accesses, s->max_nr_accesses, s->min_age_region, s->max_age_region, - s->action, s->stat_count, s->stat_sz); + s->action, + s->quota.ms, s->quota.sz, + s->quota.reset_interval, + s->stat_count, s->stat_sz); if (!rc) return -ENOMEM; @@ -190,10 +193,11 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, while (pos < len && *nr_schemes < max_nr_schemes) { struct damos_quota quota = {}; - ret = sscanf(&str[pos], "%lu %lu %u %u %u %u %u%n", + ret = sscanf(&str[pos], "%lu %lu %u %u %u %u %u %lu %lu %lu%n", &min_sz, &max_sz, &min_nr_a, &max_nr_a, - &min_age, &max_age, &action, &parsed); - if (ret != 7) + &min_age, &max_age, &action, "a.ms, + "a.sz, "a.reset_interval, &parsed); + if (ret != 10) break; if (!damos_action_valid(action)) { pr_err("wrong action %d\n", action); From 873fd1845c98c520481a35dab2c5097dd45d0ba6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:22 +1100 Subject: [PATCH 0878/1202] mm/damon/selftests: support schemes quotas This commit updates DAMON selftests to support updated schemes debugfs file format for the quotas. Link: https://lkml.kernel.org/r/20211019150731.16699-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/damon/debugfs_attrs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index 639cfb6a1f651..8e33a7b584e70 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -63,10 +63,10 @@ echo "$orig_content" > "$file" file="$DBGFS/schemes" orig_content=$(cat "$file") -test_write_succ "$file" "1 2 3 4 5 6 4" \ +test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0" \ "$orig_content" "valid input" test_write_fail "$file" "1 2 -3 4 5 6 3" "$orig_content" "multi lines" +3 4 5 6 3 0 0 0" "$orig_content" "multi lines" test_write_succ "$file" "" "$orig_content" "disabling" echo "$orig_content" > "$file" From 1598b79d7573cb94b33a839decd6a46db6877581 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:22 +1100 Subject: [PATCH 0879/1202] mm/damon/schemes: prioritize regions within the quotas This commit makes DAMON apply schemes to regions having higher priority first, if it cannot apply schemes to all regions due to the quotas. The prioritization function should be implemented in the monitoring primitives. Those would commonly calculate the priority of the region using attributes of regions, namely 'size', 'nr_accesses', and 'age'. For example, some primitive would calculate the priority of each region using a weighted sum of 'nr_accesses' and 'age' of the region. The optimal weights would depend on give environments, so this commit makes those customizable. Nevertheless, the score calculation functions are only encouraged to respect the weights, not mandated. Link: https://lkml.kernel.org/r/20211019150731.16699-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 26 ++++++++++++++++++ mm/damon/core.c | 62 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 81 insertions(+), 7 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 5347f940818e4..999918a074440 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -14,6 +14,8 @@ /* Minimal region size. Every damon_region is aligned by this. */ #define DAMON_MIN_REGION PAGE_SIZE +/* Max priority score for DAMON-based operation schemes */ +#define DAMOS_MAX_SCORE (99) /** * struct damon_addr_range - Represents an address region of [@start, @end). @@ -95,6 +97,10 @@ enum damos_action { * @sz: Maximum bytes of memory that the action can be applied. * @reset_interval: Charge reset interval in milliseconds. * + * @weight_sz: Weight of the region's size for prioritization. + * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. + * @weight_age: Weight of the region's age for prioritization. + * * To avoid consuming too much CPU time or IO resources for applying the * &struct damos->action to large memory, DAMON allows users to set time and/or * size quotas. The quotas can be set by writing non-zero values to &ms and @@ -106,12 +112,22 @@ enum damos_action { * Internally, the time quota is transformed to a size quota using estimated * throughput of the scheme's action. DAMON then compares it against &sz and * uses smaller one as the effective quota. + * + * For selecting regions within the quota, DAMON prioritizes current scheme's + * target memory regions using the &struct damon_primitive->get_scheme_score. + * You could customize the prioritization logic by setting &weight_sz, + * &weight_nr_accesses, and &weight_age, because monitoring primitives are + * encouraged to respect those. */ struct damos_quota { unsigned long ms; unsigned long sz; unsigned long reset_interval; + unsigned int weight_sz; + unsigned int weight_nr_accesses; + unsigned int weight_age; + /* private: */ /* For throughput estimation */ unsigned long total_charged_sz; @@ -124,6 +140,10 @@ struct damos_quota { unsigned long charged_from; struct damon_target *charge_target_from; unsigned long charge_addr_from; + + /* For prioritization */ + unsigned long histogram[DAMOS_MAX_SCORE + 1]; + unsigned int min_score; }; /** @@ -174,6 +194,7 @@ struct damon_ctx; * @prepare_access_checks: Prepare next access check of target regions. * @check_accesses: Check the accesses to target regions. * @reset_aggregated: Reset aggregated accesses monitoring results. + * @get_scheme_score: Get the score of a region for a scheme. * @apply_scheme: Apply a DAMON-based operation scheme. * @target_valid: Determine if the target is valid. * @cleanup: Clean up the context. @@ -200,6 +221,8 @@ struct damon_ctx; * of its update. The value will be used for regions adjustment threshold. * @reset_aggregated should reset the access monitoring results that aggregated * by @check_accesses. + * @get_scheme_score should return the priority score of a region for a scheme + * as an integer in [0, &DAMOS_MAX_SCORE]. * @apply_scheme is called from @kdamond when a region for user provided * DAMON-based operation scheme is found. It should apply the scheme's action * to the region. This is not used for &DAMON_ARBITRARY_TARGET case. @@ -213,6 +236,9 @@ struct damon_primitive { void (*prepare_access_checks)(struct damon_ctx *context); unsigned int (*check_accesses)(struct damon_ctx *context); void (*reset_aggregated)(struct damon_ctx *context); + int (*get_scheme_score)(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme); int (*apply_scheme)(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, struct damos *scheme); bool (*target_valid)(void *target); diff --git a/mm/damon/core.c b/mm/damon/core.c index d1da4bef96ede..fad25778e2ecf 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -12,6 +12,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -110,6 +111,9 @@ struct damos *damon_new_scheme( scheme->quota.ms = quota->ms; scheme->quota.sz = quota->sz; scheme->quota.reset_interval = quota->reset_interval; + scheme->quota.weight_sz = quota->weight_sz; + scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; + scheme->quota.weight_age = quota->weight_age; scheme->quota.total_charged_sz = 0; scheme->quota.total_charged_ns = 0; scheme->quota.esz = 0; @@ -545,6 +549,28 @@ static void damon_split_region_at(struct damon_ctx *ctx, struct damon_target *t, struct damon_region *r, unsigned long sz_r); +static bool __damos_valid_target(struct damon_region *r, struct damos *s) +{ + unsigned long sz; + + sz = r->ar.end - r->ar.start; + return s->min_sz_region <= sz && sz <= s->max_sz_region && + s->min_nr_accesses <= r->nr_accesses && + r->nr_accesses <= s->max_nr_accesses && + s->min_age_region <= r->age && r->age <= s->max_age_region; +} + +static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, + struct damon_region *r, struct damos *s) +{ + bool ret = __damos_valid_target(r, s); + + if (!ret || !s->quota.esz || !c->primitive.get_scheme_score) + return ret; + + return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score; +} + static void damon_do_apply_schemes(struct damon_ctx *c, struct damon_target *t, struct damon_region *r) @@ -591,13 +617,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c, quota->charge_addr_from = 0; } - /* Check the target regions condition */ - if (sz < s->min_sz_region || s->max_sz_region < sz) - continue; - if (r->nr_accesses < s->min_nr_accesses || - s->max_nr_accesses < r->nr_accesses) - continue; - if (r->age < s->min_age_region || s->max_age_region < r->age) + if (!damos_valid_target(c, t, r, s)) continue; /* Apply the scheme */ @@ -661,6 +681,8 @@ static void kdamond_apply_schemes(struct damon_ctx *c) damon_for_each_scheme(s, c) { struct damos_quota *quota = &s->quota; + unsigned long cumulated_sz; + unsigned int score, max_score = 0; if (!quota->ms && !quota->sz) continue; @@ -674,6 +696,32 @@ static void kdamond_apply_schemes(struct damon_ctx *c) quota->charged_sz = 0; damos_set_effective_quota(quota); } + + if (!c->primitive.get_scheme_score) + continue; + + /* Fill up the score histogram */ + memset(quota->histogram, 0, sizeof(quota->histogram)); + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + if (!__damos_valid_target(r, s)) + continue; + score = c->primitive.get_scheme_score( + c, t, r, s); + quota->histogram[score] += + r->ar.end - r->ar.start; + if (score > max_score) + max_score = score; + } + } + + /* Set the min score limit */ + for (cumulated_sz = 0, score = max_score; ; score--) { + cumulated_sz += quota->histogram[score]; + if (cumulated_sz >= quota->esz || !score) + break; + } + quota->min_score = score; } damon_for_each_target(t, c) { From f257d6c28f5cc6a38efe9ff6551bc0aed8de755b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:23 +1100 Subject: [PATCH 0880/1202] mm/damon/vaddr,paddr: support pageout prioritization This commit makes the default monitoring primitives for virtual address spaces and the physical address sapce to support memory regions prioritization for 'PAGEOUT' DAMOS action. It calculates hotness of each region as weighted sum of 'nr_accesses' and 'age' of the region and get the priority score as reverse of the hotness, so that cold regions can be paged out first. Link: https://lkml.kernel.org/r/20211019150731.16699-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 4 ++++ mm/damon/paddr.c | 14 +++++++++++++ mm/damon/prmtv-common.c | 46 +++++++++++++++++++++++++++++++++++++++++ mm/damon/prmtv-common.h | 3 +++ mm/damon/vaddr.c | 15 ++++++++++++++ 5 files changed, 82 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 999918a074440..bdbb70db5e86c 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -421,6 +421,8 @@ bool damon_va_target_valid(void *t); void damon_va_cleanup(struct damon_ctx *ctx); int damon_va_apply_scheme(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, struct damos *scheme); +int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t, + struct damon_region *r, struct damos *scheme); void damon_va_set_primitives(struct damon_ctx *ctx); #endif /* CONFIG_DAMON_VADDR */ @@ -431,6 +433,8 @@ void damon_va_set_primitives(struct damon_ctx *ctx); void damon_pa_prepare_access_checks(struct damon_ctx *ctx); unsigned int damon_pa_check_accesses(struct damon_ctx *ctx); bool damon_pa_target_valid(void *t); +int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t, + struct damon_region *r, struct damos *scheme); void damon_pa_set_primitives(struct damon_ctx *ctx); #endif /* CONFIG_DAMON_PADDR */ diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 957ada55de77b..a496d6f203d64 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -246,6 +246,19 @@ int damon_pa_apply_scheme(struct damon_ctx *ctx, struct damon_target *t, return 0; } +int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t, + struct damon_region *r, struct damos *scheme) +{ + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_pageout_score(context, r, scheme); + default: + break; + } + + return DAMOS_MAX_SCORE; +} + void damon_pa_set_primitives(struct damon_ctx *ctx) { ctx->primitive.init = NULL; @@ -256,4 +269,5 @@ void damon_pa_set_primitives(struct damon_ctx *ctx) ctx->primitive.target_valid = damon_pa_target_valid; ctx->primitive.cleanup = NULL; ctx->primitive.apply_scheme = damon_pa_apply_scheme; + ctx->primitive.get_scheme_score = damon_pa_scheme_score; } diff --git a/mm/damon/prmtv-common.c b/mm/damon/prmtv-common.c index 7e62ee54fb543..92a04f5831d6b 100644 --- a/mm/damon/prmtv-common.c +++ b/mm/damon/prmtv-common.c @@ -85,3 +85,49 @@ void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr) put_page(page); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ } + +#define DAMON_MAX_SUBSCORE (100) +#define DAMON_MAX_AGE_IN_LOG (32) + +int damon_pageout_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s) +{ + unsigned int max_nr_accesses; + int freq_subscore; + unsigned int age_in_sec; + int age_in_log, age_subscore; + unsigned int freq_weight = s->quota.weight_nr_accesses; + unsigned int age_weight = s->quota.weight_age; + int hotness; + + max_nr_accesses = c->aggr_interval / c->sample_interval; + freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / max_nr_accesses; + + age_in_sec = (unsigned long)r->age * c->aggr_interval / 1000000; + for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; + age_in_log++, age_in_sec >>= 1) + ; + + /* If frequency is 0, higher age means it's colder */ + if (freq_subscore == 0) + age_in_log *= -1; + + /* + * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. + * Scale it to be in [0, 100] and set it as age subscore. + */ + age_in_log += DAMON_MAX_AGE_IN_LOG; + age_subscore = age_in_log * DAMON_MAX_SUBSCORE / + DAMON_MAX_AGE_IN_LOG / 2; + + hotness = (freq_weight * freq_subscore + age_weight * age_subscore); + if (freq_weight + age_weight) + hotness /= freq_weight + age_weight; + /* + * Transform it to fit in [0, DAMOS_MAX_SCORE] + */ + hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; + + /* Return coldness of the region */ + return DAMOS_MAX_SCORE - hotness; +} diff --git a/mm/damon/prmtv-common.h b/mm/damon/prmtv-common.h index 7093d19e5d428..61f27037603e1 100644 --- a/mm/damon/prmtv-common.h +++ b/mm/damon/prmtv-common.h @@ -15,3 +15,6 @@ struct page *damon_get_page(unsigned long pfn); void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, unsigned long addr); void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr); + +int damon_pageout_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s); diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 758501b8d97db..675cd8c7df9b8 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -633,6 +633,20 @@ int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t, return damos_madvise(t, r, madv_action); } +int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t, + struct damon_region *r, struct damos *scheme) +{ + + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_pageout_score(context, r, scheme); + default: + break; + } + + return DAMOS_MAX_SCORE; +} + void damon_va_set_primitives(struct damon_ctx *ctx) { ctx->primitive.init = damon_va_init; @@ -643,6 +657,7 @@ void damon_va_set_primitives(struct damon_ctx *ctx) ctx->primitive.target_valid = damon_va_target_valid; ctx->primitive.cleanup = NULL; ctx->primitive.apply_scheme = damon_va_apply_scheme; + ctx->primitive.get_scheme_score = damon_va_scheme_score; } #include "vaddr-test.h" From f378c66c9c7a6c19443cdbc83741c74737ba13dd Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:24 +1100 Subject: [PATCH 0881/1202] mm/damon/dbgfs: support prioritization weights This commit allows DAMON debugfs interface users set the prioritization weights by putting three more numbers to the 'schemes' file. Link: https://lkml.kernel.org/r/20211019150731.16699-10-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/dbgfs.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 097e6745ba75c..20c4feb8b918c 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -105,13 +105,16 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) damon_for_each_scheme(s, c) { rc = scnprintf(&buf[written], len - written, - "%lu %lu %u %u %u %u %d %lu %lu %lu %lu %lu\n", + "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %lu %lu\n", s->min_sz_region, s->max_sz_region, s->min_nr_accesses, s->max_nr_accesses, s->min_age_region, s->max_age_region, s->action, s->quota.ms, s->quota.sz, s->quota.reset_interval, + s->quota.weight_sz, + s->quota.weight_nr_accesses, + s->quota.weight_age, s->stat_count, s->stat_sz); if (!rc) return -ENOMEM; @@ -193,11 +196,14 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, while (pos < len && *nr_schemes < max_nr_schemes) { struct damos_quota quota = {}; - ret = sscanf(&str[pos], "%lu %lu %u %u %u %u %u %lu %lu %lu%n", + ret = sscanf(&str[pos], + "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u%n", &min_sz, &max_sz, &min_nr_a, &max_nr_a, &min_age, &max_age, &action, "a.ms, - "a.sz, "a.reset_interval, &parsed); - if (ret != 10) + "a.sz, "a.reset_interval, + "a.weight_sz, "a.weight_nr_accesses, + "a.weight_age, &parsed); + if (ret != 13) break; if (!damos_action_valid(action)) { pr_err("wrong action %d\n", action); From f8923822b15f31ba3b7bab521b8f15d10c73fa49 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:24 +1100 Subject: [PATCH 0882/1202] tools/selftests/damon: update for regions prioritization of schemes This commit updates the DAMON selftests for 'schemes' debugfs file, as the file format is updated. Link: https://lkml.kernel.org/r/20211019150731.16699-11-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/damon/debugfs_attrs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index 8e33a7b584e70..466dbeb37e31e 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -63,10 +63,10 @@ echo "$orig_content" > "$file" file="$DBGFS/schemes" orig_content=$(cat "$file") -test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0" \ +test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3" \ "$orig_content" "valid input" test_write_fail "$file" "1 2 -3 4 5 6 3 0 0 0" "$orig_content" "multi lines" +3 4 5 6 3 0 0 0 1 2 3" "$orig_content" "multi lines" test_write_succ "$file" "" "$orig_content" "disabling" echo "$orig_content" > "$file" From ea14ad200077e4489f4917d145143cf0e205a805 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:25 +1100 Subject: [PATCH 0883/1202] mm/damon/schemes: activate schemes based on a watermarks mechanism DAMON-based operation schemes need to be manually turned on and off. In some use cases, however, the condition for turning a scheme on and off would depend on the system's situation. For example, schemes for proactive pages reclamation would need to be turned on when some memory pressure is detected, and turned off when the system has enough free memory. For easier control of schemes activation based on the system situation, this commit introduces a watermarks-based mechanism. The client can describe the watermark metric (e.g., amount of free memory in the system), watermark check interval, and three watermarks, namely high, mid, and low. If the scheme is deactivated, it only gets the metric and compare that to the three watermarks for every check interval. If the metric is higher than the high watermark, the scheme is deactivated. If the metric is between the mid watermark and the low watermark, the scheme is activated. If the metric is lower than the low watermark, the scheme is deactivated again. This is to allow users fall back to traditional page-granularity mechanisms. Link: https://lkml.kernel.org/r/20211019150731.16699-12-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 52 ++++++++++++++++++++++- mm/damon/core.c | 97 ++++++++++++++++++++++++++++++++++++++++++- mm/damon/dbgfs.c | 5 ++- 3 files changed, 151 insertions(+), 3 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index bdbb70db5e86c..239681d657a8e 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -146,6 +146,45 @@ struct damos_quota { unsigned int min_score; }; +/** + * enum damos_wmark_metric - Represents the watermark metric. + * + * @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme. + * @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000]. + */ +enum damos_wmark_metric { + DAMOS_WMARK_NONE, + DAMOS_WMARK_FREE_MEM_RATE, +}; + +/** + * struct damos_watermarks - Controls when a given scheme should be activated. + * @metric: Metric for the watermarks. + * @interval: Watermarks check time interval in microseconds. + * @high: High watermark. + * @mid: Middle watermark. + * @low: Low watermark. + * + * If &metric is &DAMOS_WMARK_NONE, the scheme is always active. Being active + * means DAMON does monitoring and applying the action of the scheme to + * appropriate memory regions. Else, DAMON checks &metric of the system for at + * least every &interval microseconds and works as below. + * + * If &metric is higher than &high, the scheme is inactivated. If &metric is + * between &mid and &low, the scheme is activated. If &metric is lower than + * &low, the scheme is inactivated. + */ +struct damos_watermarks { + enum damos_wmark_metric metric; + unsigned long interval; + unsigned long high; + unsigned long mid; + unsigned long low; + +/* private: */ + bool activated; +}; + /** * struct damos - Represents a Data Access Monitoring-based Operation Scheme. * @min_sz_region: Minimum size of target regions. @@ -156,6 +195,7 @@ struct damos_quota { * @max_age_region: Maximum age of target regions. * @action: &damo_action to be applied to the target regions. * @quota: Control the aggressiveness of this scheme. + * @wmarks: Watermarks for automated (in)activation of this scheme. * @stat_count: Total number of regions that this scheme is applied. * @stat_sz: Total size of regions that this scheme is applied. * @list: List head for siblings. @@ -166,6 +206,14 @@ struct damos_quota { * those. To avoid consuming too much CPU time or IO resources for the * &action, "a is used. * + * To do the work only when needed, schemes can be activated for specific + * system situations using &wmarks. If all schemes that registered to the + * monitoring context are inactive, DAMON stops monitoring either, and just + * repeatedly checks the watermarks. + * + * If all schemes that registered to a &struct damon_ctx are inactive, DAMON + * stops monitoring and just repeatedly checks the watermarks. + * * After applying the &action to each region, &stat_count and &stat_sz is * updated to reflect the number of regions and total size of regions that the * &action is applied. @@ -179,6 +227,7 @@ struct damos { unsigned int max_age_region; enum damos_action action; struct damos_quota quota; + struct damos_watermarks wmarks; unsigned long stat_count; unsigned long stat_sz; struct list_head list; @@ -384,7 +433,8 @@ struct damos *damon_new_scheme( unsigned long min_sz_region, unsigned long max_sz_region, unsigned int min_nr_accesses, unsigned int max_nr_accesses, unsigned int min_age_region, unsigned int max_age_region, - enum damos_action action, struct damos_quota *quota); + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); diff --git a/mm/damon/core.c b/mm/damon/core.c index fad25778e2ecf..6993c60ae31c4 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -90,7 +91,8 @@ struct damos *damon_new_scheme( unsigned long min_sz_region, unsigned long max_sz_region, unsigned int min_nr_accesses, unsigned int max_nr_accesses, unsigned int min_age_region, unsigned int max_age_region, - enum damos_action action, struct damos_quota *quota) + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks) { struct damos *scheme; @@ -122,6 +124,13 @@ struct damos *damon_new_scheme( scheme->quota.charge_target_from = NULL; scheme->quota.charge_addr_from = 0; + scheme->wmarks.metric = wmarks->metric; + scheme->wmarks.interval = wmarks->interval; + scheme->wmarks.high = wmarks->high; + scheme->wmarks.mid = wmarks->mid; + scheme->wmarks.low = wmarks->low; + scheme->wmarks.activated = true; + return scheme; } @@ -582,6 +591,9 @@ static void damon_do_apply_schemes(struct damon_ctx *c, unsigned long sz = r->ar.end - r->ar.start; struct timespec64 begin, end; + if (!s->wmarks.activated) + continue; + /* Check the quota */ if (quota->esz && quota->charged_sz >= quota->esz) continue; @@ -684,6 +696,9 @@ static void kdamond_apply_schemes(struct damon_ctx *c) unsigned long cumulated_sz; unsigned int score, max_score = 0; + if (!s->wmarks.activated) + continue; + if (!quota->ms && !quota->sz) continue; @@ -924,6 +939,83 @@ static bool kdamond_need_stop(struct damon_ctx *ctx) return true; } +static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) +{ + struct sysinfo i; + + switch (metric) { + case DAMOS_WMARK_FREE_MEM_RATE: + si_meminfo(&i); + return i.freeram * 1000 / i.totalram; + default: + break; + } + return -EINVAL; +} + +/* + * Returns zero if the scheme is active. Else, returns time to wait for next + * watermark check in micro-seconds. + */ +static unsigned long damos_wmark_wait_us(struct damos *scheme) +{ + unsigned long metric; + + if (scheme->wmarks.metric == DAMOS_WMARK_NONE) + return 0; + + metric = damos_wmark_metric_value(scheme->wmarks.metric); + /* higher than high watermark or lower than low watermark */ + if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { + if (scheme->wmarks.activated) + pr_debug("inactivate a scheme (%d) for %s wmark\n", + scheme->action, + metric > scheme->wmarks.high ? + "high" : "low"); + scheme->wmarks.activated = false; + return scheme->wmarks.interval; + } + + /* inactive and higher than middle watermark */ + if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && + !scheme->wmarks.activated) + return scheme->wmarks.interval; + + if (!scheme->wmarks.activated) + pr_debug("activate a scheme (%d)\n", scheme->action); + scheme->wmarks.activated = true; + return 0; +} + +static void kdamond_usleep(unsigned long usecs) +{ + if (usecs > 100 * 1000) + schedule_timeout_interruptible(usecs_to_jiffies(usecs)); + else + usleep_range(usecs, usecs + 1); +} + +/* Returns negative error code if it's not activated but should return */ +static int kdamond_wait_activation(struct damon_ctx *ctx) +{ + struct damos *s; + unsigned long wait_time; + unsigned long min_wait_time = 0; + + while (!kdamond_need_stop(ctx)) { + damon_for_each_scheme(s, ctx) { + wait_time = damos_wmark_wait_us(s); + if (!min_wait_time || wait_time < min_wait_time) + min_wait_time = wait_time; + } + if (!min_wait_time) + return 0; + + kdamond_usleep(min_wait_time); + } + return -EBUSY; +} + static void set_kdamond_stop(struct damon_ctx *ctx) { mutex_lock(&ctx->kdamond_lock); @@ -952,6 +1044,9 @@ static int kdamond_fn(void *data) sz_limit = damon_region_sz_limit(ctx); while (!kdamond_need_stop(ctx)) { + if (kdamond_wait_activation(ctx)) + continue; + if (ctx->primitive.prepare_access_checks) ctx->primitive.prepare_access_checks(ctx); if (ctx->callback.after_sampling && diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 20c4feb8b918c..9f13060d10585 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -195,6 +195,9 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, *nr_schemes = 0; while (pos < len && *nr_schemes < max_nr_schemes) { struct damos_quota quota = {}; + struct damos_watermarks wmarks = { + .metric = DAMOS_WMARK_NONE, + }; ret = sscanf(&str[pos], "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u%n", @@ -212,7 +215,7 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, pos += parsed; scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a, - min_age, max_age, action, "a); + min_age, max_age, action, "a, &wmarks); if (!scheme) goto fail; From 8e19cd2a26e1ed914cd4cd933122dd3f6cd102da Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:26 +1100 Subject: [PATCH 0884/1202] mm/damon/dbgfs: support watermarks This commit updates DAMON debugfs interface to support the watermarks based schemes activation. For this, now 'schemes' file receives five more values. Link: https://lkml.kernel.org/r/20211019150731.16699-13-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/dbgfs.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 9f13060d10585..6828e463348b0 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -105,7 +105,7 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) damon_for_each_scheme(s, c) { rc = scnprintf(&buf[written], len - written, - "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %lu %lu\n", + "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %d %lu %lu %lu %lu %lu %lu\n", s->min_sz_region, s->max_sz_region, s->min_nr_accesses, s->max_nr_accesses, s->min_age_region, s->max_age_region, @@ -115,6 +115,8 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) s->quota.weight_sz, s->quota.weight_nr_accesses, s->quota.weight_age, + s->wmarks.metric, s->wmarks.interval, + s->wmarks.high, s->wmarks.mid, s->wmarks.low, s->stat_count, s->stat_sz); if (!rc) return -ENOMEM; @@ -195,18 +197,18 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, *nr_schemes = 0; while (pos < len && *nr_schemes < max_nr_schemes) { struct damos_quota quota = {}; - struct damos_watermarks wmarks = { - .metric = DAMOS_WMARK_NONE, - }; + struct damos_watermarks wmarks; ret = sscanf(&str[pos], - "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u%n", + "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u %u %lu %lu %lu %lu%n", &min_sz, &max_sz, &min_nr_a, &max_nr_a, &min_age, &max_age, &action, "a.ms, "a.sz, "a.reset_interval, "a.weight_sz, "a.weight_nr_accesses, - "a.weight_age, &parsed); - if (ret != 13) + "a.weight_age, &wmarks.metric, + &wmarks.interval, &wmarks.high, &wmarks.mid, + &wmarks.low, &parsed); + if (ret != 18) break; if (!damos_action_valid(action)) { pr_err("wrong action %d\n", action); From 397575f14b6a100c8ead3c7ceaa4fffe7313f5c1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:26 +1100 Subject: [PATCH 0885/1202] selftests/damon: support watermarks This commit updates DAMON selftests for 'schemes' debugfs file to reflect the changes in the format. Link: https://lkml.kernel.org/r/20211019150731.16699-14-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/damon/debugfs_attrs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index 466dbeb37e31e..196b6640bf378 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -63,10 +63,10 @@ echo "$orig_content" > "$file" file="$DBGFS/schemes" orig_content=$(cat "$file") -test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3" \ +test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ "$orig_content" "valid input" test_write_fail "$file" "1 2 -3 4 5 6 3 0 0 0 1 2 3" "$orig_content" "multi lines" +3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" test_write_succ "$file" "" "$orig_content" "disabling" echo "$orig_content" > "$file" From 9b723234c796cca54431418a48dfaeaeb00e7d19 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:27 +1100 Subject: [PATCH 0886/1202] mm/damon: introduce DAMON-based Reclamation (DAMON_RECLAIM) This commit implements a new kernel subsystem that finds cold memory regions using DAMON and reclaims those immediately. It is intended to be used as proactive lightweigh reclamation logic for light memory pressure. For heavy memory pressure, it could be inactivated and fall back to the traditional page-scanning based reclamation. It's implemented on top of DAMON framework to use the DAMON-based Operation Schemes (DAMOS) feature. It utilizes all the DAMOS features including speed limit, prioritization, and watermarks. It could be enabled and tuned in boot time via the kernel boot parameter, and in run time via its module parameters ('/sys/module/damon_reclaim/parameters/') interface. Link: https://lkml.kernel.org/r/20211019150731.16699-15-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/Kconfig | 12 ++ mm/damon/Makefile | 1 + mm/damon/reclaim.c | 354 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 367 insertions(+) create mode 100644 mm/damon/reclaim.c diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index ca33b289ebbe4..5bcf05851ad07 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -73,4 +73,16 @@ config DAMON_DBGFS_KUNIT_TEST If unsure, say N. +config DAMON_RECLAIM + bool "Build DAMON-based reclaim (DAMON_RECLAIM)" + depends on DAMON_PADDR + help + This builds the DAMON-based reclamation subsystem. It finds pages + that not accessed for a long time (cold) using DAMON and reclaim + those. + + This is suggested to be used as a proactive and lightweight + reclamation under light memory pressure, while the traditional page + scanning-based reclamation is used for heavy pressure. + endmenu diff --git a/mm/damon/Makefile b/mm/damon/Makefile index 8d9b0df797029..f7d5ac377a2bb 100644 --- a/mm/damon/Makefile +++ b/mm/damon/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_DAMON) := core.o obj-$(CONFIG_DAMON_VADDR) += prmtv-common.o vaddr.o obj-$(CONFIG_DAMON_PADDR) += prmtv-common.o paddr.o obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o +obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c new file mode 100644 index 0000000000000..f5ae4c422555d --- /dev/null +++ b/mm/damon/reclaim.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON-based page reclamation + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-reclaim: " fmt + +#include +#include +#include +#include +#include + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "damon_reclaim." + +/* + * Enable or disable DAMON_RECLAIM. + * + * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``. + * Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could + * do no real monitoring and reclamation due to the watermarks-based activation + * condition. Refer to below descriptions for the watermarks parameter for + * this. + */ +static bool enabled __read_mostly; +module_param(enabled, bool, 0600); + +/* + * Time threshold for cold memory regions identification in microseconds. + * + * If a memory region is not accessed for this or longer time, DAMON_RECLAIM + * identifies the region as cold, and reclaims. 120 seconds by default. + */ +static unsigned long min_age __read_mostly = 120000000; +module_param(min_age, ulong, 0600); + +/* + * Limit of time for trying the reclamation in milliseconds. + * + * DAMON_RECLAIM tries to use only up to this time within a time window + * (quota_reset_interval_ms) for trying reclamation of cold pages. This can be + * used for limiting CPU consumption of DAMON_RECLAIM. If the value is zero, + * the limit is disabled. + * + * 10 ms by default. + */ +static unsigned long quota_ms __read_mostly = 10; +module_param(quota_ms, ulong, 0600); + +/* + * Limit of size of memory for the reclamation in bytes. + * + * DAMON_RECLAIM charges amount of memory which it tried to reclaim within a + * time window (quota_reset_interval_ms) and makes no more than this limit is + * tried. This can be used for limiting consumption of CPU and IO. If this + * value is zero, the limit is disabled. + * + * 128 MiB by default. + */ +static unsigned long quota_sz __read_mostly = 128 * 1024 * 1024; +module_param(quota_sz, ulong, 0600); + +/* + * The time/size quota charge reset interval in milliseconds. + * + * The charge reset interval for the quota of time (quota_ms) and size + * (quota_sz). That is, DAMON_RECLAIM does not try reclamation for more than + * quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms + * milliseconds. + * + * 1 second by default. + */ +static unsigned long quota_reset_interval_ms __read_mostly = 1000; +module_param(quota_reset_interval_ms, ulong, 0600); + +/* + * The watermarks check time interval in microseconds. + * + * Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is + * enabled but inactive due to its watermarks rule. 5 seconds by default. + */ +static unsigned long wmarks_interval __read_mostly = 5000000; +module_param(wmarks_interval, ulong, 0600); + +/* + * Free memory rate (per thousand) for the high watermark. + * + * If free memory of the system in bytes per thousand bytes is higher than + * this, DAMON_RECLAIM becomes inactive, so it does nothing but periodically + * checks the watermarks. 500 (50%) by default. + */ +static unsigned long wmarks_high __read_mostly = 500; +module_param(wmarks_high, ulong, 0600); + +/* + * Free memory rate (per thousand) for the middle watermark. + * + * If free memory of the system in bytes per thousand bytes is between this and + * the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring + * and the reclaiming. 400 (40%) by default. + */ +static unsigned long wmarks_mid __read_mostly = 400; +module_param(wmarks_mid, ulong, 0600); + +/* + * Free memory rate (per thousand) for the low watermark. + * + * If free memory of the system in bytes per thousand bytes is lower than this, + * DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks + * the watermarks. In the case, the system falls back to the LRU-based page + * granularity reclamation logic. 200 (20%) by default. + */ +static unsigned long wmarks_low __read_mostly = 200; +module_param(wmarks_low, ulong, 0600); + +/* + * Sampling interval for the monitoring in microseconds. + * + * The sampling interval of DAMON for the cold memory monitoring. Please refer + * to the DAMON documentation for more detail. 5 ms by default. + */ +static unsigned long sample_interval __read_mostly = 5000; +module_param(sample_interval, ulong, 0600); + +/* + * Aggregation interval for the monitoring in microseconds. + * + * The aggregation interval of DAMON for the cold memory monitoring. Please + * refer to the DAMON documentation for more detail. 100 ms by default. + */ +static unsigned long aggr_interval __read_mostly = 100000; +module_param(aggr_interval, ulong, 0600); + +/* + * Minimum number of monitoring regions. + * + * The minimal number of monitoring regions of DAMON for the cold memory + * monitoring. This can be used to set lower-bound of the monitoring quality. + * But, setting this too high could result in increased monitoring overhead. + * Please refer to the DAMON documentation for more detail. 10 by default. + */ +static unsigned long min_nr_regions __read_mostly = 10; +module_param(min_nr_regions, ulong, 0600); + +/* + * Maximum number of monitoring regions. + * + * The maximum number of monitoring regions of DAMON for the cold memory + * monitoring. This can be used to set upper-bound of the monitoring overhead. + * However, setting this too low could result in bad monitoring quality. + * Please refer to the DAMON documentation for more detail. 1000 by default. + */ +static unsigned long max_nr_regions __read_mostly = 1000; +module_param(max_nr_regions, ulong, 0600); + +/* + * Start of the target memory region in physical address. + * + * The start physical address of memory region that DAMON_RECLAIM will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_start __read_mostly; +module_param(monitor_region_start, ulong, 0600); + +/* + * End of the target memory region in physical address. + * + * The end physical address of memory region that DAMON_RECLAIM will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_end __read_mostly; +module_param(monitor_region_end, ulong, 0600); + +/* + * PID of the DAMON thread + * + * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. + * Else, -1. + */ +static int kdamond_pid __read_mostly = -1; +module_param(kdamond_pid, int, 0400); + +static struct damon_ctx *ctx; +static struct damon_target *target; + +struct damon_reclaim_ram_walk_arg { + unsigned long start; + unsigned long end; +}; + +static int walk_system_ram(struct resource *res, void *arg) +{ + struct damon_reclaim_ram_walk_arg *a = arg; + + if (a->end - a->start < res->end - res->start) { + a->start = res->start; + a->end = res->end; + } + return 0; +} + +/* + * Find biggest 'System RAM' resource and store its start and end address in + * @start and @end, respectively. If no System RAM is found, returns false. + */ +static bool get_monitoring_region(unsigned long *start, unsigned long *end) +{ + struct damon_reclaim_ram_walk_arg arg = {}; + + walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); + if (arg.end <= arg.start) + return false; + + *start = arg.start; + *end = arg.end; + return true; +} + +static struct damos *damon_reclaim_new_scheme(void) +{ + struct damos_watermarks wmarks = { + .metric = DAMOS_WMARK_FREE_MEM_RATE, + .interval = wmarks_interval, + .high = wmarks_high, + .mid = wmarks_mid, + .low = wmarks_low, + }; + struct damos_quota quota = { + /* + * Do not try reclamation for more than quota_ms milliseconds + * or quota_sz bytes within quota_reset_interval_ms. + */ + .ms = quota_ms, + .sz = quota_sz, + .reset_interval = quota_reset_interval_ms, + /* Within the quota, page out older regions first. */ + .weight_sz = 0, + .weight_nr_accesses = 0, + .weight_age = 1 + }; + struct damos *scheme = damon_new_scheme( + /* Find regions having PAGE_SIZE or larger size */ + PAGE_SIZE, ULONG_MAX, + /* and not accessed at all */ + 0, 0, + /* for min_age or more micro-seconds, and */ + min_age / aggr_interval, UINT_MAX, + /* page out those, as soon as found */ + DAMOS_PAGEOUT, + /* under the quota. */ + "a, + /* (De)activate this according to the watermarks. */ + &wmarks); + + return scheme; +} + +static int damon_reclaim_turn(bool on) +{ + struct damon_region *region; + struct damos *scheme; + int err; + + if (!on) { + err = damon_stop(&ctx, 1); + if (!err) + kdamond_pid = -1; + return err; + } + + err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0, + min_nr_regions, max_nr_regions); + if (err) + return err; + + if (monitor_region_start > monitor_region_end) + return -EINVAL; + if (!monitor_region_start && !monitor_region_end && + !get_monitoring_region(&monitor_region_start, + &monitor_region_end)) + return -EINVAL; + /* DAMON will free this on its own when finish monitoring */ + region = damon_new_region(monitor_region_start, monitor_region_end); + if (!region) + return -ENOMEM; + damon_add_region(region, target); + + /* Will be freed by 'damon_set_schemes()' below */ + scheme = damon_reclaim_new_scheme(); + if (!scheme) + goto free_region_out; + err = damon_set_schemes(ctx, &scheme, 1); + if (err) + goto free_scheme_out; + + err = damon_start(&ctx, 1); + if (!err) { + kdamond_pid = ctx->kdamond->pid; + return 0; + } + +free_scheme_out: + damon_destroy_scheme(scheme); +free_region_out: + damon_destroy_region(region, target); + return err; +} + +#define ENABLE_CHECK_INTERVAL_MS 1000 +static struct delayed_work damon_reclaim_timer; +static void damon_reclaim_timer_fn(struct work_struct *work) +{ + static bool last_enabled; + bool now_enabled; + + now_enabled = enabled; + if (last_enabled != now_enabled) { + if (!damon_reclaim_turn(now_enabled)) + last_enabled = now_enabled; + else + enabled = last_enabled; + } + + schedule_delayed_work(&damon_reclaim_timer, + msecs_to_jiffies(ENABLE_CHECK_INTERVAL_MS)); +} +static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); + +static int __init damon_reclaim_init(void) +{ + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + + damon_pa_set_primitives(ctx); + + /* 4242 means nothing but fun */ + target = damon_new_target(4242); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + + schedule_delayed_work(&damon_reclaim_timer, 0); + return 0; +} + +module_init(damon_reclaim_init); From bed801f7f64cac4297476bddc9fa0d9bd8e8f6ce Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 21 Oct 2021 15:09:27 +1100 Subject: [PATCH 0887/1202] Documentation/admin-guide/mm/damon: add a document for DAMON_RECLAIM This commit adds an admin-guide document for DAMON-based Reclamation. Link: https://lkml.kernel.org/r/20211019150731.16699-16-sj@kernel.org Signed-off-by: SeongJae Park Cc: Amit Shah Cc: Benjamin Herrenschmidt Cc: David Hildenbrand Cc: David Rientjes Cc: David Woodhouse Cc: Greg Thelen Cc: Jonathan Cameron Cc: Jonathan Corbet Cc: Leonard Foerster Cc: Marco Elver Cc: Markus Boehme Cc: Shakeel Butt Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/admin-guide/mm/damon/index.rst | 1 + .../admin-guide/mm/damon/reclaim.rst | 235 ++++++++++++++++++ 2 files changed, 236 insertions(+) create mode 100644 Documentation/admin-guide/mm/damon/reclaim.rst diff --git a/Documentation/admin-guide/mm/damon/index.rst b/Documentation/admin-guide/mm/damon/index.rst index 8c5dde3a57544..61aff88347f3c 100644 --- a/Documentation/admin-guide/mm/damon/index.rst +++ b/Documentation/admin-guide/mm/damon/index.rst @@ -13,3 +13,4 @@ optimize those. start usage + reclaim diff --git a/Documentation/admin-guide/mm/damon/reclaim.rst b/Documentation/admin-guide/mm/damon/reclaim.rst new file mode 100644 index 0000000000000..fb9def3a73559 --- /dev/null +++ b/Documentation/admin-guide/mm/damon/reclaim.rst @@ -0,0 +1,235 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +DAMON-based Reclamation +======================= + +DAMON-based Reclamation (DAMON_RECLAIM) is a static kernel module that aimed to +be used for proactive and lightweight reclamation under light memory pressure. +It doesn't aim to replace the LRU-list based page_granularity reclamation, but +to be selectively used for different level of memory pressure and requirements. + +Where Proactive Reclamation is Required? +======================================== + +On general memory over-committed systems, proactively reclaiming cold pages +helps saving memory and reducing latency spikes that incurred by the direct +reclaim of the process or CPU consumption of kswapd, while incurring only +minimal performance degradation [1]_ [2]_ . + +Free Pages Reporting [3]_ based memory over-commit virtualization systems are +good example of the cases. In such systems, the guest VMs reports their free +memory to host, and the host reallocates the reported memory to other guests. +As a result, the memory of the systems are fully utilized. However, the +guests could be not so memory-frugal, mainly because some kernel subsystems and +user-space applications are designed to use as much memory as available. Then, +guests could report only small amount of memory as free to host, results in +memory utilization drop of the systems. Running the proactive reclamation in +guests could mitigate this problem. + +How It Works? +============= + +DAMON_RECLAIM finds memory regions that didn't accessed for specific time +duration and page out. To avoid it consuming too much CPU for the paging out +operation, a speed limit can be configured. Under the speed limit, it pages +out memory regions that didn't accessed longer time first. System +administrators can also configure under what situation this scheme should +automatically activated and deactivated with three memory pressure watermarks. + +Interface: Module Parameters +============================ + +To use this feature, you should first ensure your system is running on a kernel +that is built with ``CONFIG_DAMON_RECLAIM=y``. + +To let sysadmins enable or disable it and tune for the given system, +DAMON_RECLAIM utilizes module parameters. That is, you can put +``damon_reclaim.=`` on the kernel boot command line or write +proper values to ``/sys/modules/damon_reclaim/parameters/`` files. + +Note that the parameter values except ``enabled`` are applied only when +DAMON_RECLAIM starts. Therefore, if you want to apply new parameter values in +runtime and DAMON_RECLAIM is already enabled, you should disable and re-enable +it via ``enabled`` parameter file. Writing of the new values to proper +parameter values should be done before the re-enablement. + +Below are the description of each parameter. + +enabled +------- + +Enable or disable DAMON_RECLAIM. + +You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``. +Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could do +no real monitoring and reclamation due to the watermarks-based activation +condition. Refer to below descriptions for the watermarks parameter for this. + +min_age +------- + +Time threshold for cold memory regions identification in microseconds. + +If a memory region is not accessed for this or longer time, DAMON_RECLAIM +identifies the region as cold, and reclaims it. + +120 seconds by default. + +quota_ms +-------- + +Limit of time for the reclamation in milliseconds. + +DAMON_RECLAIM tries to use only up to this time within a time window +(quota_reset_interval_ms) for trying reclamation of cold pages. This can be +used for limiting CPU consumption of DAMON_RECLAIM. If the value is zero, the +limit is disabled. + +10 ms by default. + +quota_sz +-------- + +Limit of size of memory for the reclamation in bytes. + +DAMON_RECLAIM charges amount of memory which it tried to reclaim within a time +window (quota_reset_interval_ms) and makes no more than this limit is tried. +This can be used for limiting consumption of CPU and IO. If this value is +zero, the limit is disabled. + +128 MiB by default. + +quota_reset_interval_ms +----------------------- + +The time/size quota charge reset interval in milliseconds. + +The charget reset interval for the quota of time (quota_ms) and size +(quota_sz). That is, DAMON_RECLAIM does not try reclamation for more than +quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms +milliseconds. + +1 second by default. + +wmarks_interval +--------------- + +Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is +enabled but inactive due to its watermarks rule. + +wmarks_high +----------- + +Free memory rate (per thousand) for the high watermark. + +If free memory of the system in bytes per thousand bytes is higher than this, +DAMON_RECLAIM becomes inactive, so it does nothing but only periodically checks +the watermarks. + +wmarks_mid +---------- + +Free memory rate (per thousand) for the middle watermark. + +If free memory of the system in bytes per thousand bytes is between this and +the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring and +the reclaiming. + +wmarks_low +---------- + +Free memory rate (per thousand) for the low watermark. + +If free memory of the system in bytes per thousand bytes is lower than this, +DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks the +watermarks. In the case, the system falls back to the LRU-list based page +granularity reclamation logic. + +sample_interval +--------------- + +Sampling interval for the monitoring in microseconds. + +The sampling interval of DAMON for the cold memory monitoring. Please refer to +the DAMON documentation (:doc:`usage`) for more detail. + +aggr_interval +------------- + +Aggregation interval for the monitoring in microseconds. + +The aggregation interval of DAMON for the cold memory monitoring. Please +refer to the DAMON documentation (:doc:`usage`) for more detail. + +min_nr_regions +-------------- + +Minimum number of monitoring regions. + +The minimal number of monitoring regions of DAMON for the cold memory +monitoring. This can be used to set lower-bound of the monitoring quality. +But, setting this too high could result in increased monitoring overhead. +Please refer to the DAMON documentation (:doc:`usage`) for more detail. + +max_nr_regions +-------------- + +Maximum number of monitoring regions. + +The maximum number of monitoring regions of DAMON for the cold memory +monitoring. This can be used to set upper-bound of the monitoring overhead. +However, setting this too low could result in bad monitoring quality. Please +refer to the DAMON documentation (:doc:`usage`) for more detail. + +monitor_region_start +-------------------- + +Start of target memory region in physical address. + +The start physical address of memory region that DAMON_RECLAIM will do work +against. That is, DAMON_RECLAIM will find cold memory regions in this region +and reclaims. By default, biggest System RAM is used as the region. + +monitor_region_end +------------------ + +End of target memory region in physical address. + +The end physical address of memory region that DAMON_RECLAIM will do work +against. That is, DAMON_RECLAIM will find cold memory regions in this region +and reclaims. By default, biggest System RAM is used as the region. + +kdamond_pid +----------- + +PID of the DAMON thread. + +If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. Else, +-1. + +Example +======= + +Below runtime example commands make DAMON_RECLAIM to find memory regions that +not accessed for 30 seconds or more and pages out. The reclamation is limited +to be done only up to 1 GiB per second to avoid DAMON_RECLAIM consuming too +much CPU time for the paging out operation. It also asks DAMON_RECLAIM to do +nothing if the system's free memory rate is more than 50%, but start the real +works if it becomes lower than 40%. If DAMON_RECLAIM doesn't make progress and +therefore the free memory rate becomes lower than 20%, it asks DAMON_RECLAIM to +do nothing again, so that we can fall back to the LRU-list based page +granularity reclamation. :: + + # cd /sys/modules/damon_reclaim/parameters + # echo 30000000 > min_age + # echo $((1 * 1024 * 1024 * 1024)) > quota_sz + # echo 1000 > quota_reset_interval_ms + # echo 500 > wmarks_high + # echo 400 > wmarks_mid + # echo 200 > wmarks_low + # echo Y > enabled + +.. [1] https://research.google/pubs/pub48551/ +.. [2] https://lwn.net/Articles/787611/ +.. [3] https://www.kernel.org/doc/html/latest/vm/free_page_reporting.html From 0d8bc0ccb741289236615897694479796400729c Mon Sep 17 00:00:00 2001 From: Xin Hao Date: Thu, 21 Oct 2021 15:09:28 +1100 Subject: [PATCH 0888/1202] mm/damon: remove unnecessary variable initialization Patch series "mm/damon: Fix some small bugs", v4. This patch (of 2): In 'damon_va_apply_three_regions', There is no need to set variable 'i' as 0 Link: https://lkml.kernel.org/r/b7df8d3dad0943a37e01f60c441b1968b2b20354.1634720326.git.xhao@linux.alibaba.com Link: https://lkml.kernel.org/r/cover.1634720326.git.xhao@linux.alibaba.com Signed-off-by: Xin Hao Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/damon/vaddr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 675cd8c7df9b8..35fe49080ee99 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -306,7 +306,7 @@ static void damon_va_apply_three_regions(struct damon_target *t, struct damon_addr_range bregions[3]) { struct damon_region *r, *next; - unsigned int i = 0; + unsigned int i; /* Remove regions which are not in the three big regions now */ damon_for_each_region_safe(r, next, t) { From 503617b2538d2878c9fed5eef1466897813b6864 Mon Sep 17 00:00:00 2001 From: Xin Hao Date: Thu, 21 Oct 2021 15:09:28 +1100 Subject: [PATCH 0889/1202] mm/damon/dbgfs: add adaptive_targets list check before enable monitor_on When the ctx->adaptive_targets list is empty, I did some test on monitor_on interface like this. # cat /sys/kernel/debug/damon/target_ids # # echo on > /sys/kernel/debug/damon/monitor_on # damon: kdamond (5390) starts Though the ctx->adaptive_targets list is empty, but the kthread_run still be called, and the kdamond.x thread still be created, this is meaningless. So there adds a judgment in 'dbgfs_monitor_on_write', if the ctx->adaptive_targets list is empty, return -EINVAL. Link: https://lkml.kernel.org/r/0a60a6e8ec9d71989e0848a4dc3311996ca3b5d4.1634720326.git.xhao@linux.alibaba.com Signed-off-by: Xin Hao Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/damon.h | 1 + mm/damon/core.c | 5 +++++ mm/damon/dbgfs.c | 15 ++++++++++++--- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 239681d657a8e..a14b3cc54cab3 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -440,6 +440,7 @@ void damon_destroy_scheme(struct damos *s); struct damon_target *damon_new_target(unsigned long id); void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); +bool damon_targets_empty(struct damon_ctx *ctx); void damon_free_target(struct damon_target *t); void damon_destroy_target(struct damon_target *t); unsigned int damon_nr_regions(struct damon_target *t); diff --git a/mm/damon/core.c b/mm/damon/core.c index 6993c60ae31c4..46a6afea3030c 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -180,6 +180,11 @@ void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) list_add_tail(&t->list, &ctx->adaptive_targets); } +bool damon_targets_empty(struct damon_ctx *ctx) +{ + return list_empty(&ctx->adaptive_targets); +} + static void damon_del_target(struct damon_target *t) { list_del(&t->list); diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 6828e463348b0..befb27a29aabd 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -878,12 +878,21 @@ static ssize_t dbgfs_monitor_on_write(struct file *file, return -EINVAL; } - if (!strncmp(kbuf, "on", count)) + if (!strncmp(kbuf, "on", count)) { + int i; + + for (i = 0; i < dbgfs_nr_ctxs; i++) { + if (damon_targets_empty(dbgfs_ctxs[i])) { + kfree(kbuf); + return -EINVAL; + } + } ret = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs); - else if (!strncmp(kbuf, "off", count)) + } else if (!strncmp(kbuf, "off", count)) { ret = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs); - else + } else { ret = -EINVAL; + } if (!ret) ret = count; From aa36fbb0f499dde6d8aadbb4822f1d505b56fc95 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 21 Oct 2021 15:09:29 +1100 Subject: [PATCH 0890/1202] fs/buffer.c: add debug print for __getblk_gfp() stall problem Among syzbot's unresolved hung task reports, 18 out of 65 reports contain __getblk_gfp() line in the backtrace. Since there is a comment block that says that __getblk_gfp() will lock up the machine if try_to_free_buffers() attempt from grow_dev_page() is failing, let's start from checking whether syzbot is hitting that case. This change will be removed after the bug is fixed. Link: http://lkml.kernel.org/r/9b9fcdda-c347-53ee-fdbb-8a7d11cf430e@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Cc: Dmitry Vyukov Cc: Al Viro Cc: Mel Gorman Cc: Michal Hocko Cc: Andi Kleen Cc: Jan Kara Cc: Jeff Layton Cc: Cc: Tim Chen Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/buffer.c | 50 +++++++++++++++++++++++++++++++++++++++++-- include/linux/sched.h | 7 ++++++ lib/Kconfig.debug | 6 ++++++ 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index c615387aedcae..9190343db779b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -956,10 +956,20 @@ grow_dev_page(struct block_device *bdev, sector_t block, end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits, size); +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x01; +#endif goto done; } - if (!try_to_free_buffers(page)) + if (!try_to_free_buffers(page)) { +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x02; +#endif goto failed; + } +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x04; +#endif } /* @@ -979,6 +989,9 @@ grow_dev_page(struct block_device *bdev, sector_t block, spin_unlock(&inode->i_mapping->private_lock); done: ret = (block < end_block) ? 1 : -ENXIO; +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x08; +#endif failed: unlock_page(page); put_page(page); @@ -1030,6 +1043,12 @@ __getblk_slow(struct block_device *bdev, sector_t block, return NULL; } +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_stamp = jiffies; + current->getblk_executed = 0; + current->getblk_bh_count = 0; + current->getblk_bh_state = 0; +#endif for (;;) { struct buffer_head *bh; int ret; @@ -1041,6 +1060,18 @@ __getblk_slow(struct block_device *bdev, sector_t block, ret = grow_buffers(bdev, block, size, gfp); if (ret < 0) return NULL; + +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + if (!time_after(jiffies, current->getblk_stamp + 3 * HZ)) + continue; + printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d bh_state=%lx\n", + current->comm, current->pid, current->getblk_executed, + current->getblk_bh_count, current->getblk_bh_state); + current->getblk_executed = 0; + current->getblk_bh_count = 0; + current->getblk_bh_state = 0; + current->getblk_stamp = jiffies; +#endif } } @@ -3191,6 +3222,11 @@ EXPORT_SYMBOL(sync_dirty_buffer); */ static inline int buffer_busy(struct buffer_head *bh) { +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x80; + current->getblk_bh_count = atomic_read(&bh->b_count); + current->getblk_bh_state = bh->b_state; +#endif return atomic_read(&bh->b_count) | (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock))); } @@ -3229,11 +3265,18 @@ int try_to_free_buffers(struct page *page) int ret = 0; BUG_ON(!PageLocked(page)); - if (PageWriteback(page)) + if (PageWriteback(page)) { +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x10; +#endif return 0; + } if (mapping == NULL) { /* can this still happen? */ ret = drop_buffers(page, &buffers_to_free); +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x20; +#endif goto out; } @@ -3257,6 +3300,9 @@ int try_to_free_buffers(struct page *page) if (ret) cancel_dirty_page(page); spin_unlock(&mapping->private_lock); +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + current->getblk_executed |= 0x40; +#endif out: if (buffers_to_free) { struct buffer_head *bh = buffers_to_free; diff --git a/include/linux/sched.h b/include/linux/sched.h index c1a927ddec646..817f8faf62e51 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1488,6 +1488,13 @@ struct task_struct { struct callback_head l1d_flush_kill; #endif +#ifdef CONFIG_DEBUG_AID_FOR_SYZBOT + unsigned long getblk_stamp; + unsigned int getblk_executed; + unsigned int getblk_bh_count; + unsigned long getblk_bh_state; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 669fee1d26b83..8d698288c6723 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1782,6 +1782,12 @@ config IO_STRICT_DEVMEM menu "$(SRCARCH) Debugging" +config DEBUG_AID_FOR_SYZBOT + bool "Additional debug code for syzbot" + default n + help + This option is intended for testing by syzbot. + source "arch/$(SRCARCH)/Kconfig.debug" endmenu From 0599ac76da9f84d2a464e8dba2058dd44252dc9c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 21 Oct 2021 15:09:29 +1100 Subject: [PATCH 0891/1202] fs/buffer.c: dump more info for __getblk_gfp() stall problem We need to dump more variables on top of "fs/buffer.c: add debug print for __getblk_gfp() stall problem". Link: http://lkml.kernel.org/r/12239545-7d8a-820f-48ba-952e2e98a05c@i-love.sakura.ne.jp Signed-off-by: Tetsuo Handa Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/buffer.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 9190343db779b..17d5f40197d1d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1064,9 +1064,15 @@ __getblk_slow(struct block_device *bdev, sector_t block, #ifdef CONFIG_DEBUG_AID_FOR_SYZBOT if (!time_after(jiffies, current->getblk_stamp + 3 * HZ)) continue; - printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d bh_state=%lx\n", + printk(KERN_ERR "%s(%u): getblk(): executed=%x bh_count=%d bh_state=%lx bdev_super_blocksize=%ld size=%u bdev_super_blocksize_bits=%d bdev_inode_blkbits=%d\n", current->comm, current->pid, current->getblk_executed, - current->getblk_bh_count, current->getblk_bh_state); + current->getblk_bh_count, current->getblk_bh_state, + IS_ERR_OR_NULL(bdev->bd_super) ? -1L : + bdev->bd_super->s_blocksize, size, + IS_ERR_OR_NULL(bdev->bd_super) ? -1 : + bdev->bd_super->s_blocksize_bits, + IS_ERR_OR_NULL(bdev->bd_inode) ? -1 : + bdev->bd_inode->i_blkbits); current->getblk_executed = 0; current->getblk_bh_count = 0; current->getblk_bh_state = 0; From 189face3b5ed0a22dcbc31c1f0f9a394ea6f688c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 21 Oct 2021 15:09:30 +1100 Subject: [PATCH 0892/1202] kernel/hung_task.c: Monitor killed tasks. syzbot's current top report is "no output from test machine" where the userspace process failed to spawn a new test process for 300 seconds for some reason. One of reasons which can result in this report is that an already spawned test process was unable to terminate (e.g. trapped at an unkillable retry loop due to some bug) after SIGKILL was sent to that process. Therefore, reporting when a thread is failing to terminate despite a fatal signal is pending would give us more useful information. In the context of syzbot's testing where there are only 2 CPUs in the target VM (which means that only small number of threads and not so much memory) and threads get SIGKILL after 5 seconds from fork(), being unable to reach do_exit() within 10 seconds is likely a sign of something went wrong. Therefore, I would like to try this patch in linux-next.git for feasibility testing whether this patch helps finding more bugs and reproducers for such bugs, by bringing "unable to terminate threads" reports out of "no output from test machine" reports. Potential bad effect of this patch will be that kernel code becomes killable without addressing the root cause of being unable to terminate, for use of killable wait will bypass both TASK_UNINTERRUPTIBLE stall test and SIGKILL after 5 seconds behavior, which will result in failing to detect in real systems where SIGKILL won't be sent after 5 seconds when something went wrong. This version shares existing sysctl settings (e.g. check interval, timeout, whether to panic) used for detecting TASK_UNINTERRUPTIBLE threads. We will likely want to use different sysctl settings for monitoring killed threads. But let's start as linux-next.git patch without introducing new sysctl settings. We can add sysctl settings before sending to linux.git. Link: http://lkml.kernel.org/r/60d1d7f6-b201-3dcb-a51b-76a31bcfa919@i-love.sakura.ne.jp Signed-off-by: Tetsuo Handa Cc: Dmitry Vyukov Cc: Petr Mladek Cc: Ingo Molnar Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Vitaly Kuznetsov Cc: Liu Chuansheng Cc: Valdis Kletnieks Cc: linux-kernel@vger.kernel.org Cc: Dmitry Vyukov Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/sched.h | 1 + kernel/hung_task.c | 44 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index 817f8faf62e51..92570d193996b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1062,6 +1062,7 @@ struct task_struct { #ifdef CONFIG_DETECT_HUNG_TASK unsigned long last_switch_count; unsigned long last_switch_time; + unsigned long killed_time; #endif /* Filesystem information: */ struct fs_struct *fs; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 9888e2bc8c767..8cc07e7f29aa1 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -145,6 +145,47 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) touch_nmi_watchdog(); } +static void check_killed_task(struct task_struct *t, unsigned long timeout) +{ + unsigned long stamp = t->killed_time; + + /* + * Ensure the task is not frozen. + * Also, skip vfork and any other user process that freezer should skip. + */ + if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP))) + return; + /* + * Skip threads which are already inside do_exit(), for exit_mm() etc. + * might take many seconds. + */ + if (t->flags & PF_EXITING) + return; + if (!stamp) { + stamp = jiffies; + if (!stamp) + stamp++; + t->killed_time = stamp; + return; + } + if (time_is_after_jiffies(stamp + timeout * HZ)) + return; + trace_sched_process_hang(t); + if (sysctl_hung_task_panic) { + console_verbose(); + hung_task_call_panic = true; + } + /* + * This thread failed to terminate for more than + * sysctl_hung_task_timeout_secs seconds, complain: + */ + pr_err("INFO: task %s:%d can't die for more than %ld seconds.\n", + t->comm, t->pid, (jiffies - stamp) / HZ); + sched_show_task(t); + hung_task_show_lock = true; + touch_nmi_watchdog(); +} + /* * To avoid extending the RCU grace period for an unbounded amount of time, * periodically exit the critical section and enter a new one. @@ -196,6 +237,9 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) goto unlock; last_break = jiffies; } + /* Check threads which are about to terminate. */ + if (unlikely(fatal_signal_pending(t))) + check_killed_task(t, timeout); /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ if (READ_ONCE(t->__state) == TASK_UNINTERRUPTIBLE) check_hung_task(t, timeout); From d2e1d0df84fabd172beb42a50567c04f43d87766 Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Thu, 21 Oct 2021 15:09:30 +1100 Subject: [PATCH 0893/1202] procfs: do not list TID 0 in /proc//task If a task exits concurrently, task_pid_nr_ns may return 0. Link: https://lkml.kernel.org/r/8735pn5dx7.fsf@oldenburg.str.redhat.com Signed-off-by: Florian Weimer Acked-by: Christian Brauner Reviewed-by: Alexey Dobriyan Cc: Kees Cook Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index 533d5836eb9a4..54f29399088f0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3800,6 +3800,9 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) char name[10 + 1]; unsigned int len; tid = task_pid_nr_ns(task, ns); + if (!tid) + /* The task has just exited. */ + continue; len = snprintf(name, sizeof(name), "%u", tid); if (!proc_fill_cache(file, ctx, name, len, proc_task_instantiate, task, NULL)) { From a38a323fc776034c204ad8655044a2235ee90eeb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 21 Oct 2021 15:09:31 +1100 Subject: [PATCH 0894/1202] procfs-do-not-list-tid-0-in-proc-pid-task-fix coding style tweaks Cc: Florian Weimer Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 54f29399088f0..5541de99809c2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3799,10 +3799,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) task = next_tid(task), ctx->pos++) { char name[10 + 1]; unsigned int len; + tid = task_pid_nr_ns(task, ns); if (!tid) - /* The task has just exited. */ - continue; + continue; /* The task has just exited. */ len = snprintf(name, sizeof(name), "%u", tid); if (!proc_fill_cache(file, ctx, name, len, proc_task_instantiate, task, NULL)) { From 11dc751f26d3cbdbf24b639c37d392788a3841e9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Oct 2021 15:09:32 +1100 Subject: [PATCH 0895/1202] proc: test that /proc/*/task doesn't contain "0" add selftest to check for the errant "0" in /proc/*/task Link: https://lkml.kernel.org/r/YV88AnVzHxPafQ9o@localhost.localdomain Signed-off-by: Alexey Dobriyan Cc: Christian Brauner Cc: Alexey Dobriyan Cc: Kees Cook Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- tools/testing/selftests/proc/.gitignore | 1 + tools/testing/selftests/proc/Makefile | 2 + tools/testing/selftests/proc/proc-tid0.c | 81 ++++++++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 tools/testing/selftests/proc/proc-tid0.c diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 8f3e72e626fa7..c4e6a34f9657b 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -11,6 +11,7 @@ /proc-self-syscall /proc-self-wchan /proc-subset-pid +/proc-tid0 /proc-uptime-001 /proc-uptime-002 /read diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 1054e40a499ae..219fc61138473 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -Wall -O2 -Wno-unused-function CFLAGS += -D_GNU_SOURCE +LDFLAGS += -pthread TEST_GEN_PROGS := TEST_GEN_PROGS += fd-001-lookup @@ -13,6 +14,7 @@ TEST_GEN_PROGS += proc-self-map-files-002 TEST_GEN_PROGS += proc-self-syscall TEST_GEN_PROGS += proc-self-wchan TEST_GEN_PROGS += proc-subset-pid +TEST_GEN_PROGS += proc-tid0 TEST_GEN_PROGS += proc-uptime-001 TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += read diff --git a/tools/testing/selftests/proc/proc-tid0.c b/tools/testing/selftests/proc/proc-tid0.c new file mode 100644 index 0000000000000..58c1d7c90a8e2 --- /dev/null +++ b/tools/testing/selftests/proc/proc-tid0.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2021 Alexey Dobriyan + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that /proc/*/task never contains "0". +#include +#include +#include +#include +#include +#include +#include +#include + +static pid_t pid = -1; + +static void atexit_hook(void) +{ + if (pid > 0) { + kill(pid, SIGKILL); + } +} + +static void *f(void *_) +{ + return NULL; +} + +static void sigalrm(int _) +{ + exit(0); +} + +int main(void) +{ + pid = fork(); + if (pid == 0) { + /* child */ + while (1) { + pthread_t pth; + pthread_create(&pth, NULL, f, NULL); + pthread_join(pth, NULL); + } + } else if (pid > 0) { + /* parent */ + atexit(atexit_hook); + + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/%u/task", pid); + + signal(SIGALRM, sigalrm); + alarm(1); + + while (1) { + DIR *d = opendir(buf); + struct dirent *de; + while ((de = readdir(d))) { + if (strcmp(de->d_name, "0") == 0) { + exit(1); + } + } + closedir(d); + } + + return 0; + } else { + perror("fork"); + return 1; + } +} From 0d3a616a3b86642021dcfd000b83ce383d912ca3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:09:32 +1100 Subject: [PATCH 0896/1202] x86/xen: update xen_oldmem_pfn_is_ram() documentation After removing /dev/kmem, sanitizing /proc/kcore and handling /dev/mem, this series tackles the last sane way how a VM could accidentially access logically unplugged memory managed by a virtio-mem device: /proc/vmcore When dumping memory via "makedumpfile", PG_offline pages, used by virtio-mem to flag logically unplugged memory, are already properly excluded; however, especially when accessing/copying /proc/vmcore "the usual way", we can still end up reading logically unplugged memory part of a virtio-mem device. Patch #1-#3 are cleanups. Patch #4 extends the existing oldmem_pfn_is_ram mechanism. Patch #5-#7 are virtio-mem refactorings for patch #8, which implements the virtio-mem logic to query the state of device blocks. Patch #8: " Although virtio-mem currently supports reading unplugged memory in the hypervisor, this will change in the future, indicated to the device via a new feature flag. We similarly sanitized /proc/kcore access recently. [...] Distributions that support virtio-mem+kdump have to make sure that the virtio_mem module will be part of the kdump kernel or the kdump initrd; dracut was recently [2] extended to include virtio-mem in the generated initrd. As long as no special kdump kernels are used, this will automatically make sure that virtio-mem will be around in the kdump initrd and sanitize /proc/vmcore access -- with dracut. " This is the last remaining bit to support VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE [3] in the Linux implementation of virtio-mem. Note: this is best-effort. We'll never be able to control what runs inside the second kernel, really, but we also don't have to care: we only care about sane setups where we don't want our VM getting zapped once we touch the wrong memory location while dumping. While we usually expect sane setups to use "makedumfile", nothing really speaks against just copying /proc/vmcore, especially in environments where HWpoisioning isn't typically expected. Also, we really don't want to put all our trust completely on the memmap, so sanitizing also makes sense when just using "makedumpfile". [1] https://lkml.kernel.org/r/20210526093041.8800-1-david@redhat.com [2] https://github.com/dracutdevs/dracut/pull/1157 [3] https://lists.oasis-open.org/archives/virtio-comment/202109/msg00021.html This patch (of 9): The callback is only used for the vmcore nowadays. Link: https://lkml.kernel.org/r/20211005121430.30136-1-david@redhat.com Link: https://lkml.kernel.org/r/20211005121430.30136-2-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Boris Ostrovsky Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Juergen Gross Cc: Stefano Stabellini Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Cc: Michal Hocko Cc: Oscar Salvador Cc: Mike Rapoport Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/x86/xen/mmu_hvm.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c index 57409373750f0..b242d1f4b4269 100644 --- a/arch/x86/xen/mmu_hvm.c +++ b/arch/x86/xen/mmu_hvm.c @@ -9,12 +9,9 @@ #ifdef CONFIG_PROC_VMCORE /* - * This function is used in two contexts: - * - the kdump kernel has to check whether a pfn of the crashed kernel - * was a ballooned page. vmcore is using this function to decide - * whether to access a pfn of the crashed kernel. - * - the kexec kernel has to check whether a pfn was ballooned by the - * previous kernel. If the pfn is ballooned, handle it properly. + * The kdump kernel has to check whether a pfn of the crashed kernel + * was a ballooned page. vmcore is using this function to decide + * whether to access a pfn of the crashed kernel. * Returns 0 if the pfn is not backed by a RAM page, the caller may * handle the pfn special in this case. */ From 8bcce441290b3e534f87f1968235b900ad72c8af Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:09:33 +1100 Subject: [PATCH 0897/1202] x86/xen: simplify xen_oldmem_pfn_is_ram() Let's simplify return handling. Link: https://lkml.kernel.org/r/20211005121430.30136-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/x86/xen/mmu_hvm.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c index b242d1f4b4269..d1b38c77352ba 100644 --- a/arch/x86/xen/mmu_hvm.c +++ b/arch/x86/xen/mmu_hvm.c @@ -21,23 +21,10 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn) .domid = DOMID_SELF, .pfn = pfn, }; - int ram; if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) return -ENXIO; - - switch (a.mem_type) { - case HVMMEM_mmio_dm: - ram = 0; - break; - case HVMMEM_ram_rw: - case HVMMEM_ram_ro: - default: - ram = 1; - break; - } - - return ram; + return a.mem_type != HVMMEM_mmio_dm; } #endif From 8054c2d8c4a006cebf3da2273857135be53504df Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:09:33 +1100 Subject: [PATCH 0898/1202] x86/xen: print a warning when HVMOP_get_mem_type fails HVMOP_get_mem_type is not expected to fail, "This call failing is indication of something going quite wrong and it would be good to know about this." [1] Let's add a pr_warn_once(). [1] https://lkml.kernel.org/r/3b935aa0-6d85-0bcd-100e-15098add3c4c@oracle.com Link: https://lkml.kernel.org/r/20211005121430.30136-4-david@redhat.com Signed-off-by: David Hildenbrand Suggested-by: Boris Ostrovsky Cc: Baoquan He Cc: Borislav Petkov Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/x86/xen/mmu_hvm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c index d1b38c77352ba..6ba8826dcdcc0 100644 --- a/arch/x86/xen/mmu_hvm.c +++ b/arch/x86/xen/mmu_hvm.c @@ -22,8 +22,10 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn) .pfn = pfn, }; - if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) + if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) { + pr_warn_once("Unexpected HVMOP_get_mem_type failure\n"); return -ENXIO; + } return a.mem_type != HVMMEM_mmio_dm; } #endif From f17748e580fe29b773ca84763c82298a84ba36a5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:09:34 +1100 Subject: [PATCH 0899/1202] proc/vmcore: let pfn_is_ram() return a bool The callback should deal with errors internally, it doesn't make sense to expose these via pfn_is_ram(). We'll rework the callbacks next. Right now we consider errors as if "it's RAM"; no functional change. Link: https://lkml.kernel.org/r/20211005121430.30136-5-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/vmcore.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 9a15334da2086..a9bd80ab670ec 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -84,11 +84,11 @@ void unregister_oldmem_pfn_is_ram(void) } EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram); -static int pfn_is_ram(unsigned long pfn) +static bool pfn_is_ram(unsigned long pfn) { int (*fn)(unsigned long pfn); /* pfn is ram unless fn() checks pagetype */ - int ret = 1; + bool ret = true; /* * Ask hypervisor if the pfn is really ram. @@ -97,7 +97,7 @@ static int pfn_is_ram(unsigned long pfn) */ fn = oldmem_pfn_is_ram; if (fn) - ret = fn(pfn); + ret = !!fn(pfn); return ret; } @@ -124,7 +124,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, nr_bytes = count; /* If pfn is not ram, return zeros for sparse dump files */ - if (pfn_is_ram(pfn) == 0) + if (!pfn_is_ram(pfn)) memset(buf, 0, nr_bytes); else { if (encrypted) From b232b5ae6acfe08c6fddbaa4b622083c7d33f572 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:09:34 +1100 Subject: [PATCH 0900/1202] proc/vmcore: convert oldmem_pfn_is_ram callback to more generic vmcore callbacks Let's support multiple registered callbacks, making sure that registering vmcore callbacks cannot fail. Make the callback return a bool instead of an int, handling how to deal with errors internally. Drop unused HAVE_OLDMEM_PFN_IS_RAM. We soon want to make use of this infrastructure from other drivers: virtio-mem, registering one callback for each virtio-mem device, to prevent reading unplugged virtio-mem memory. Handle it via a generic vmcore_cb structure, prepared for future extensions: for example, once we support virtio-mem on s390x where the vmcore is completely constructed in the second kernel, we want to detect and add plugged virtio-mem memory ranges to the vmcore in order for them to get dumped properly. Handle corner cases that are unexpected and shouldn't happen in sane setups: registering a callback after the vmcore has already been opened (warn only) and unregistering a callback after the vmcore has already been opened (warn and essentially read only zeroes from that point on). Link: https://lkml.kernel.org/r/20211005121430.30136-6-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/x86/kernel/aperture_64.c | 13 ++++- arch/x86/xen/mmu_hvm.c | 11 ++-- fs/proc/vmcore.c | 99 ++++++++++++++++++++++++----------- include/linux/crash_dump.h | 26 +++++++-- 4 files changed, 111 insertions(+), 38 deletions(-) diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 10562885f5fc6..af3ba08b684b5 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn) (pfn >= aperture_pfn_start + aperture_page_count)); } +#ifdef CONFIG_PROC_VMCORE +static bool gart_oldmem_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn) +{ + return !!gart_mem_pfn_is_ram(pfn); +} + +static struct vmcore_cb gart_vmcore_cb = { + .pfn_is_ram = gart_oldmem_pfn_is_ram, +}; +#endif + static void __init exclude_from_core(u64 aper_base, u32 aper_order) { aperture_pfn_start = aper_base >> PAGE_SHIFT; aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; #ifdef CONFIG_PROC_VMCORE - WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram)); + register_vmcore_cb(&gart_vmcore_cb); #endif #ifdef CONFIG_PROC_KCORE WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram)); diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c index 6ba8826dcdcc0..509bdee3ab907 100644 --- a/arch/x86/xen/mmu_hvm.c +++ b/arch/x86/xen/mmu_hvm.c @@ -12,10 +12,10 @@ * The kdump kernel has to check whether a pfn of the crashed kernel * was a ballooned page. vmcore is using this function to decide * whether to access a pfn of the crashed kernel. - * Returns 0 if the pfn is not backed by a RAM page, the caller may + * Returns "false" if the pfn is not backed by a RAM page, the caller may * handle the pfn special in this case. */ -static int xen_oldmem_pfn_is_ram(unsigned long pfn) +static bool xen_vmcore_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn) { struct xen_hvm_get_mem_type a = { .domid = DOMID_SELF, @@ -24,10 +24,13 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn) if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) { pr_warn_once("Unexpected HVMOP_get_mem_type failure\n"); - return -ENXIO; + return true; } return a.mem_type != HVMMEM_mmio_dm; } +static struct vmcore_cb xen_vmcore_cb = { + .pfn_is_ram = xen_vmcore_pfn_is_ram, +}; #endif static void xen_hvm_exit_mmap(struct mm_struct *mm) @@ -61,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void) if (is_pagetable_dying_supported()) pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap; #ifdef CONFIG_PROC_VMCORE - WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram)); + register_vmcore_cb(&xen_vmcore_cb); #endif } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index a9bd80ab670ec..7a04b2eca287d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -62,46 +62,75 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0); /* Device Dump Size */ static size_t vmcoredd_orig_sz; -/* - * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error - * The called function has to take care of module refcounting. - */ -static int (*oldmem_pfn_is_ram)(unsigned long pfn); - -int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn)) +static DECLARE_RWSEM(vmcore_cb_rwsem); +/* List of registered vmcore callbacks. */ +static LIST_HEAD(vmcore_cb_list); +/* Whether we had a surprise unregistration of a callback. */ +static bool vmcore_cb_unstable; +/* Whether the vmcore has been opened once. */ +static bool vmcore_opened; + +void register_vmcore_cb(struct vmcore_cb *cb) { - if (oldmem_pfn_is_ram) - return -EBUSY; - oldmem_pfn_is_ram = fn; - return 0; + down_write(&vmcore_cb_rwsem); + INIT_LIST_HEAD(&cb->next); + list_add_tail(&cb->next, &vmcore_cb_list); + /* + * Registering a vmcore callback after the vmcore was opened is + * very unusual (e.g., manual driver loading). + */ + if (vmcore_opened) + pr_warn_once("Unexpected vmcore callback registration\n"); + up_write(&vmcore_cb_rwsem); } -EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram); +EXPORT_SYMBOL_GPL(register_vmcore_cb); -void unregister_oldmem_pfn_is_ram(void) +void unregister_vmcore_cb(struct vmcore_cb *cb) { - oldmem_pfn_is_ram = NULL; - wmb(); + down_write(&vmcore_cb_rwsem); + list_del(&cb->next); + /* + * Unregistering a vmcore callback after the vmcore was opened is + * very unusual (e.g., forced driver removal), but we cannot stop + * unregistering. + */ + if (vmcore_opened) { + pr_warn_once("Unexpected vmcore callback unregistration\n"); + vmcore_cb_unstable = true; + } + up_write(&vmcore_cb_rwsem); } -EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram); +EXPORT_SYMBOL_GPL(unregister_vmcore_cb); static bool pfn_is_ram(unsigned long pfn) { - int (*fn)(unsigned long pfn); - /* pfn is ram unless fn() checks pagetype */ + struct vmcore_cb *cb; bool ret = true; - /* - * Ask hypervisor if the pfn is really ram. - * A ballooned page contains no data and reading from such a page - * will cause high load in the hypervisor. - */ - fn = oldmem_pfn_is_ram; - if (fn) - ret = !!fn(pfn); + lockdep_assert_held_read(&vmcore_cb_rwsem); + if (unlikely(vmcore_cb_unstable)) + return false; + + list_for_each_entry(cb, &vmcore_cb_list, next) { + if (unlikely(!cb->pfn_is_ram)) + continue; + ret = cb->pfn_is_ram(cb, pfn); + if (!ret) + break; + } return ret; } +static int open_vmcore(struct inode *inode, struct file *file) +{ + down_read(&vmcore_cb_rwsem); + vmcore_opened = true; + up_read(&vmcore_cb_rwsem); + + return 0; +} + /* Reads a page from the oldmem device from given offset. */ ssize_t read_from_oldmem(char *buf, size_t count, u64 *ppos, int userbuf, @@ -117,6 +146,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, offset = (unsigned long)(*ppos % PAGE_SIZE); pfn = (unsigned long)(*ppos / PAGE_SIZE); + down_read(&vmcore_cb_rwsem); do { if (count > (PAGE_SIZE - offset)) nr_bytes = PAGE_SIZE - offset; @@ -136,8 +166,10 @@ ssize_t read_from_oldmem(char *buf, size_t count, tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf); - if (tmp < 0) + if (tmp < 0) { + up_read(&vmcore_cb_rwsem); return tmp; + } } *ppos += nr_bytes; count -= nr_bytes; @@ -147,6 +179,7 @@ ssize_t read_from_oldmem(char *buf, size_t count, offset = 0; } while (count); + up_read(&vmcore_cb_rwsem); return read; } @@ -537,14 +570,19 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot) { + int ret; + /* * Check if oldmem_pfn_is_ram was registered to avoid * looping over all pages without a reason. */ - if (oldmem_pfn_is_ram) - return remap_oldmem_pfn_checked(vma, from, pfn, size, prot); + down_read(&vmcore_cb_rwsem); + if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable) + ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot); else - return remap_oldmem_pfn_range(vma, from, pfn, size, prot); + ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot); + up_read(&vmcore_cb_rwsem); + return ret; } static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) @@ -668,6 +706,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) #endif static const struct proc_ops vmcore_proc_ops = { + .proc_open = open_vmcore, .proc_read = read_vmcore, .proc_lseek = default_llseek, .proc_mmap = mmap_vmcore, diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 2618577a4d6da..0c547d866f1e5 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -91,9 +91,29 @@ static inline void vmcore_unusable(void) elfcorehdr_addr = ELFCORE_ADDR_ERR; } -#define HAVE_OLDMEM_PFN_IS_RAM 1 -extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn)); -extern void unregister_oldmem_pfn_is_ram(void); +/** + * struct vmcore_cb - driver callbacks for /proc/vmcore handling + * @pfn_is_ram: check whether a PFN really is RAM and should be accessed when + * reading the vmcore. Will return "true" if it is RAM or if the + * callback cannot tell. If any callback returns "false", it's not + * RAM and the page must not be accessed; zeroes should be + * indicated in the vmcore instead. For example, a ballooned page + * contains no data and reading from such a page will cause high + * load in the hypervisor. + * @next: List head to manage registered callbacks internally; initialized by + * register_vmcore_cb(). + * + * vmcore callbacks allow drivers managing physical memory ranges to + * coordinate with vmcore handling code, for example, to prevent accessing + * physical memory ranges that should not be accessed when reading the vmcore, + * although included in the vmcore header as memory ranges to dump. + */ +struct vmcore_cb { + bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn); + struct list_head next; +}; +extern void register_vmcore_cb(struct vmcore_cb *cb); +extern void unregister_vmcore_cb(struct vmcore_cb *cb); #else /* !CONFIG_CRASH_DUMP */ static inline bool is_kdump_kernel(void) { return 0; } From 4e813ddace67a117e55496b7501d2c9a57f7119b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:09:35 +1100 Subject: [PATCH 0901/1202] virtio-mem: factor out hotplug specifics from virtio_mem_init() into virtio_mem_init_hotplug() Let's prepare for a new virtio-mem kdump mode in which we don't actually hot(un)plug any memory but only observe the state of device blocks. Link: https://lkml.kernel.org/r/20211005121430.30136-7-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/virtio/virtio_mem.c | 81 ++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index bef8ad6bf4661..2ba7e8d6ba8d4 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -2392,41 +2392,10 @@ static int virtio_mem_init_vq(struct virtio_mem *vm) return 0; } -static int virtio_mem_init(struct virtio_mem *vm) +static int virtio_mem_init_hotplug(struct virtio_mem *vm) { const struct range pluggable_range = mhp_get_pluggable_range(true); uint64_t sb_size, addr; - uint16_t node_id; - - if (!vm->vdev->config->get) { - dev_err(&vm->vdev->dev, "config access disabled\n"); - return -EINVAL; - } - - /* - * We don't want to (un)plug or reuse any memory when in kdump. The - * memory is still accessible (but not mapped). - */ - if (is_kdump_kernel()) { - dev_warn(&vm->vdev->dev, "disabled in kdump kernel\n"); - return -EBUSY; - } - - /* Fetch all properties that can't change. */ - virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, - &vm->plugged_size); - virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, - &vm->device_block_size); - virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, - &node_id); - vm->nid = virtio_mem_translate_node_id(vm, node_id); - virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); - virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, - &vm->region_size); - - /* Determine the nid for the device based on the lowest address. */ - if (vm->nid == NUMA_NO_NODE) - vm->nid = memory_add_physaddr_to_nid(vm->addr); /* bad device setup - warn only */ if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) @@ -2496,10 +2465,6 @@ static int virtio_mem_init(struct virtio_mem *vm) vm->offline_threshold); } - dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); - dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); - dev_info(&vm->vdev->dev, "device block size: 0x%llx", - (unsigned long long)vm->device_block_size); dev_info(&vm->vdev->dev, "memory block size: 0x%lx", memory_block_size_bytes()); if (vm->in_sbm) @@ -2508,10 +2473,52 @@ static int virtio_mem_init(struct virtio_mem *vm) else dev_info(&vm->vdev->dev, "big block size: 0x%llx", (unsigned long long)vm->bbm.bb_size); + + return 0; +} + +static int virtio_mem_init(struct virtio_mem *vm) +{ + uint16_t node_id; + + if (!vm->vdev->config->get) { + dev_err(&vm->vdev->dev, "config access disabled\n"); + return -EINVAL; + } + + /* + * We don't want to (un)plug or reuse any memory when in kdump. The + * memory is still accessible (but not mapped). + */ + if (is_kdump_kernel()) { + dev_warn(&vm->vdev->dev, "disabled in kdump kernel\n"); + return -EBUSY; + } + + /* Fetch all properties that can't change. */ + virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, + &vm->plugged_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, + &vm->device_block_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, + &node_id); + vm->nid = virtio_mem_translate_node_id(vm, node_id); + virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); + virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, + &vm->region_size); + + /* Determine the nid for the device based on the lowest address. */ + if (vm->nid == NUMA_NO_NODE) + vm->nid = memory_add_physaddr_to_nid(vm->addr); + + dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); + dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); + dev_info(&vm->vdev->dev, "device block size: 0x%llx", + (unsigned long long)vm->device_block_size); if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); - return 0; + return virtio_mem_init_hotplug(vm); } static int virtio_mem_create_resource(struct virtio_mem *vm) From c9847e48ea5ebabcd5f7d7e8cf89c332e2b049df Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:09:36 +1100 Subject: [PATCH 0902/1202] virtio-mem: factor out hotplug specifics from virtio_mem_probe() into virtio_mem_init_hotplug() Let's prepare for a new virtio-mem kdump mode in which we don't actually hot(un)plug any memory but only observe the state of device blocks. Link: https://lkml.kernel.org/r/20211005121430.30136-8-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/virtio/virtio_mem.c | 87 +++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 2ba7e8d6ba8d4..1be3ee7f684d5 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -260,6 +260,8 @@ static void virtio_mem_fake_offline_going_offline(unsigned long pfn, static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, unsigned long nr_pages); static void virtio_mem_retry(struct virtio_mem *vm); +static int virtio_mem_create_resource(struct virtio_mem *vm); +static void virtio_mem_delete_resource(struct virtio_mem *vm); /* * Register a virtio-mem device so it will be considered for the online_page @@ -2395,7 +2397,8 @@ static int virtio_mem_init_vq(struct virtio_mem *vm) static int virtio_mem_init_hotplug(struct virtio_mem *vm) { const struct range pluggable_range = mhp_get_pluggable_range(true); - uint64_t sb_size, addr; + uint64_t unit_pages, sb_size, addr; + int rc; /* bad device setup - warn only */ if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) @@ -2474,7 +2477,48 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm) dev_info(&vm->vdev->dev, "big block size: 0x%llx", (unsigned long long)vm->bbm.bb_size); + /* create the parent resource for all memory */ + rc = virtio_mem_create_resource(vm); + if (rc) + return rc; + + /* use a single dynamic memory group to cover the whole memory device */ + if (vm->in_sbm) + unit_pages = PHYS_PFN(memory_block_size_bytes()); + else + unit_pages = PHYS_PFN(vm->bbm.bb_size); + rc = memory_group_register_dynamic(vm->nid, unit_pages); + if (rc < 0) + goto out_del_resource; + vm->mgid = rc; + + /* + * If we still have memory plugged, we have to unplug all memory first. + * Registering our parent resource makes sure that this memory isn't + * actually in use (e.g., trying to reload the driver). + */ + if (vm->plugged_size) { + vm->unplug_all_required = true; + dev_info(&vm->vdev->dev, "unplugging all memory is required\n"); + } + + /* register callbacks */ + vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb; + rc = register_memory_notifier(&vm->memory_notifier); + if (rc) + goto out_unreg_group; + rc = register_virtio_mem_device(vm); + if (rc) + goto out_unreg_mem; + return 0; +out_unreg_mem: + unregister_memory_notifier(&vm->memory_notifier); +out_unreg_group: + memory_group_unregister(vm->mgid); +out_del_resource: + virtio_mem_delete_resource(vm); + return rc; } static int virtio_mem_init(struct virtio_mem *vm) @@ -2578,7 +2622,6 @@ static bool virtio_mem_has_memory_added(struct virtio_mem *vm) static int virtio_mem_probe(struct virtio_device *vdev) { struct virtio_mem *vm; - uint64_t unit_pages; int rc; BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24); @@ -2608,40 +2651,6 @@ static int virtio_mem_probe(struct virtio_device *vdev) if (rc) goto out_del_vq; - /* create the parent resource for all memory */ - rc = virtio_mem_create_resource(vm); - if (rc) - goto out_del_vq; - - /* use a single dynamic memory group to cover the whole memory device */ - if (vm->in_sbm) - unit_pages = PHYS_PFN(memory_block_size_bytes()); - else - unit_pages = PHYS_PFN(vm->bbm.bb_size); - rc = memory_group_register_dynamic(vm->nid, unit_pages); - if (rc < 0) - goto out_del_resource; - vm->mgid = rc; - - /* - * If we still have memory plugged, we have to unplug all memory first. - * Registering our parent resource makes sure that this memory isn't - * actually in use (e.g., trying to reload the driver). - */ - if (vm->plugged_size) { - vm->unplug_all_required = true; - dev_info(&vm->vdev->dev, "unplugging all memory is required\n"); - } - - /* register callbacks */ - vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb; - rc = register_memory_notifier(&vm->memory_notifier); - if (rc) - goto out_unreg_group; - rc = register_virtio_mem_device(vm); - if (rc) - goto out_unreg_mem; - virtio_device_ready(vdev); /* trigger a config update to start processing the requested_size */ @@ -2649,12 +2658,6 @@ static int virtio_mem_probe(struct virtio_device *vdev) queue_work(system_freezable_wq, &vm->wq); return 0; -out_unreg_mem: - unregister_memory_notifier(&vm->memory_notifier); -out_unreg_group: - memory_group_unregister(vm->mgid); -out_del_resource: - virtio_mem_delete_resource(vm); out_del_vq: vdev->config->del_vqs(vdev); out_free_vm: From 79d319b3d41bda8d90343ca0abb984463c1445b8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:09:36 +1100 Subject: [PATCH 0903/1202] virtio-mem: factor out hotplug specifics from virtio_mem_remove() into virtio_mem_deinit_hotplug() Let's prepare for a new virtio-mem kdump mode in which we don't actually hot(un)plug any memory but only observe the state of device blocks. Link: https://lkml.kernel.org/r/20211005121430.30136-9-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/virtio/virtio_mem.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 1be3ee7f684d5..76d8aef3cfd29 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -2667,9 +2667,8 @@ static int virtio_mem_probe(struct virtio_device *vdev) return rc; } -static void virtio_mem_remove(struct virtio_device *vdev) +static void virtio_mem_deinit_hotplug(struct virtio_mem *vm) { - struct virtio_mem *vm = vdev->priv; unsigned long mb_id; int rc; @@ -2716,7 +2715,8 @@ static void virtio_mem_remove(struct virtio_device *vdev) * away. Warn at least. */ if (virtio_mem_has_memory_added(vm)) { - dev_warn(&vdev->dev, "device still has system memory added\n"); + dev_warn(&vm->vdev->dev, + "device still has system memory added\n"); } else { virtio_mem_delete_resource(vm); kfree_const(vm->resource_name); @@ -2730,6 +2730,13 @@ static void virtio_mem_remove(struct virtio_device *vdev) } else { vfree(vm->bbm.bb_states); } +} + +static void virtio_mem_remove(struct virtio_device *vdev) +{ + struct virtio_mem *vm = vdev->priv; + + virtio_mem_deinit_hotplug(vm); /* reset the device and cleanup the queues */ vdev->config->reset(vdev); From 6955b2797e08e9830240371088d64b673dc60308 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:09:37 +1100 Subject: [PATCH 0904/1202] virtio-mem: kdump mode to sanitize /proc/vmcore access Although virtio-mem currently supports reading unplugged memory in the hypervisor, this will change in the future, indicated to the device via a new feature flag. We similarly sanitized /proc/kcore access recently. [1] Let's register a vmcore callback, to allow vmcore code to check if a PFN belonging to a virtio-mem device is either currently plugged and should be dumped or is currently unplugged and should not be accessed, instead mapping the shared zeropage or returning zeroes when reading. This is important when not capturing /proc/vmcore via tools like "makedumpfile" that can identify logically unplugged virtio-mem memory via PG_offline in the memmap, but simply by e.g., copying the file. Distributions that support virtio-mem+kdump have to make sure that the virtio_mem module will be part of the kdump kernel or the kdump initrd; dracut was recently [2] extended to include virtio-mem in the generated initrd. As long as no special kdump kernels are used, this will automatically make sure that virtio-mem will be around in the kdump initrd and sanitize /proc/vmcore access -- with dracut. With this series, we'll send one virtio-mem state request for every ~2 MiB chunk of virtio-mem memory indicated in the vmcore that we intend to read/map. In the future, we might want to allow building virtio-mem for kdump mode only, even without CONFIG_MEMORY_HOTPLUG and friends: this way, we could support special stripped-down kdump kernels that have many other config options disabled; we'll tackle that once required. Further, we might want to try sensing bigger blocks (e.g., memory sections) first before falling back to device blocks on demand. Tested with Fedora rawhide, which contains a recent kexec-tools version (considering "System RAM (virtio_mem)" when creating the vmcore header) and a recent dracut version (including the virtio_mem module in the kdump initrd). [1] https://lkml.kernel.org/r/20210526093041.8800-1-david@redhat.com [2] https://github.com/dracutdevs/dracut/pull/1157 Link: https://lkml.kernel.org/r/20211005121430.30136-10-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/virtio/virtio_mem.c | 136 ++++++++++++++++++++++++++++++++---- 1 file changed, 124 insertions(+), 12 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 76d8aef3cfd29..3573b78f6b050 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -223,6 +223,9 @@ struct virtio_mem { * When this lock is held the pointers can't change, ONLINE and * OFFLINE blocks can't change the state and no subblocks will get * plugged/unplugged. + * + * In kdump mode, used to serialize requests, last_block_addr and + * last_block_plugged. */ struct mutex hotplug_mutex; bool hotplug_active; @@ -230,6 +233,9 @@ struct virtio_mem { /* An error occurred we cannot handle - stop processing requests. */ bool broken; + /* Cached valued of is_kdump_kernel() when the device was probed. */ + bool in_kdump; + /* The driver is being removed. */ spinlock_t removal_lock; bool removing; @@ -243,6 +249,13 @@ struct virtio_mem { /* Memory notifier (online/offline events). */ struct notifier_block memory_notifier; +#ifdef CONFIG_PROC_VMCORE + /* vmcore callback for /proc/vmcore handling in kdump mode */ + struct vmcore_cb vmcore_cb; + uint64_t last_block_addr; + bool last_block_plugged; +#endif /* CONFIG_PROC_VMCORE */ + /* Next device in the list of virtio-mem devices. */ struct list_head next; }; @@ -2293,6 +2306,12 @@ static void virtio_mem_run_wq(struct work_struct *work) uint64_t diff; int rc; + if (unlikely(vm->in_kdump)) { + dev_warn_once(&vm->vdev->dev, + "unexpected workqueue run in kdump kernel\n"); + return; + } + hrtimer_cancel(&vm->retry_timer); if (vm->broken) @@ -2521,6 +2540,86 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm) return rc; } +#ifdef CONFIG_PROC_VMCORE +static int virtio_mem_send_state_request(struct virtio_mem *vm, uint64_t addr, + uint64_t size) +{ + const uint64_t nb_vm_blocks = size / vm->device_block_size; + const struct virtio_mem_req req = { + .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_STATE), + .u.state.addr = cpu_to_virtio64(vm->vdev, addr), + .u.state.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), + }; + int rc = -ENOMEM; + + dev_dbg(&vm->vdev->dev, "requesting state: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + + switch (virtio_mem_send_request(vm, &req)) { + case VIRTIO_MEM_RESP_ACK: + return virtio16_to_cpu(vm->vdev, vm->resp.u.state.state); + case VIRTIO_MEM_RESP_ERROR: + rc = -EINVAL; + break; + default: + break; + } + + dev_dbg(&vm->vdev->dev, "requesting state failed: %d\n", rc); + return rc; +} + +static bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb, + unsigned long pfn) +{ + struct virtio_mem *vm = container_of(cb, struct virtio_mem, + vmcore_cb); + uint64_t addr = PFN_PHYS(pfn); + bool is_ram; + int rc; + + if (!virtio_mem_contains_range(vm, addr, PAGE_SIZE)) + return true; + if (!vm->plugged_size) + return false; + + /* + * We have to serialize device requests and access to the information + * about the block queried last. + */ + mutex_lock(&vm->hotplug_mutex); + + addr = ALIGN_DOWN(addr, vm->device_block_size); + if (addr != vm->last_block_addr) { + rc = virtio_mem_send_state_request(vm, addr, + vm->device_block_size); + /* On any kind of error, we're going to signal !ram. */ + if (rc == VIRTIO_MEM_STATE_PLUGGED) + vm->last_block_plugged = true; + else + vm->last_block_plugged = false; + vm->last_block_addr = addr; + } + + is_ram = vm->last_block_plugged; + mutex_unlock(&vm->hotplug_mutex); + return is_ram; +} +#endif /* CONFIG_PROC_VMCORE */ + +static int virtio_mem_init_kdump(struct virtio_mem *vm) +{ +#ifdef CONFIG_PROC_VMCORE + dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n"); + vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram; + register_vmcore_cb(&vm->vmcore_cb); + return 0; +#else /* CONFIG_PROC_VMCORE */ + dev_warn(&vm->vdev->dev, "disabled in kdump kernel without vmcore\n"); + return -EBUSY; +#endif /* CONFIG_PROC_VMCORE */ +} + static int virtio_mem_init(struct virtio_mem *vm) { uint16_t node_id; @@ -2530,15 +2629,6 @@ static int virtio_mem_init(struct virtio_mem *vm) return -EINVAL; } - /* - * We don't want to (un)plug or reuse any memory when in kdump. The - * memory is still accessible (but not mapped). - */ - if (is_kdump_kernel()) { - dev_warn(&vm->vdev->dev, "disabled in kdump kernel\n"); - return -EBUSY; - } - /* Fetch all properties that can't change. */ virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, &vm->plugged_size); @@ -2562,6 +2652,12 @@ static int virtio_mem_init(struct virtio_mem *vm) if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); + /* + * We don't want to (un)plug or reuse any memory when in kdump. The + * memory is still accessible (but not exposed to Linux). + */ + if (vm->in_kdump) + return virtio_mem_init_kdump(vm); return virtio_mem_init_hotplug(vm); } @@ -2640,6 +2736,7 @@ static int virtio_mem_probe(struct virtio_device *vdev) hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); vm->retry_timer.function = virtio_mem_timer_expired; vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; + vm->in_kdump = is_kdump_kernel(); /* register the virtqueue */ rc = virtio_mem_init_vq(vm); @@ -2654,8 +2751,10 @@ static int virtio_mem_probe(struct virtio_device *vdev) virtio_device_ready(vdev); /* trigger a config update to start processing the requested_size */ - atomic_set(&vm->config_changed, 1); - queue_work(system_freezable_wq, &vm->wq); + if (!vm->in_kdump) { + atomic_set(&vm->config_changed, 1); + queue_work(system_freezable_wq, &vm->wq); + } return 0; out_del_vq: @@ -2732,11 +2831,21 @@ static void virtio_mem_deinit_hotplug(struct virtio_mem *vm) } } +static void virtio_mem_deinit_kdump(struct virtio_mem *vm) +{ +#ifdef CONFIG_PROC_VMCORE + unregister_vmcore_cb(&vm->vmcore_cb); +#endif /* CONFIG_PROC_VMCORE */ +} + static void virtio_mem_remove(struct virtio_device *vdev) { struct virtio_mem *vm = vdev->priv; - virtio_mem_deinit_hotplug(vm); + if (vm->in_kdump) + virtio_mem_deinit_kdump(vm); + else + virtio_mem_deinit_hotplug(vm); /* reset the device and cleanup the queues */ vdev->config->reset(vdev); @@ -2750,6 +2859,9 @@ static void virtio_mem_config_changed(struct virtio_device *vdev) { struct virtio_mem *vm = vdev->priv; + if (unlikely(vm->in_kdump)) + return; + atomic_set(&vm->config_changed, 1); virtio_mem_retry(vm); } From 9aa27c1c02f1e60ab7d7d67f610f462c34b28355 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Thu, 21 Oct 2021 15:09:38 +1100 Subject: [PATCH 0905/1202] proc: allow pid_revalidate() during LOOKUP_RCU Problem Description: When running running ~128 parallel instances of "TZ=/etc/localtime ps -fe >/dev/null" on a 128CPU machine, the %sys utilization reaches 97%, and perf shows the following code path as being responsible for heavy contention on the d_lockref spinlock: walk_component() lookup_fast() d_revalidate() pid_revalidate() // returns -ECHILD unlazy_child() lockref_get_not_dead(&nd->path.dentry->d_lockref) <-- contention The reason is that pid_revalidate() is triggering a drop from RCU to ref path walk mode. All concurrent path lookups thus try to grab a reference to the dentry for /proc/, before re-executing pid_revalidate() and then stepping into the /proc/$pid directory. Thus there is huge spinlock contention. This patch allows pid_revalidate() to execute in RCU mode, meaning that the path lookup can successfully enter the /proc/$pid directory while still in RCU mode. Later on, the path lookup may still drop into ref mode, but the contention will be much reduced at this point. By applying this patch, %sys utilization falls to around 85% under the same workload, and the number of ps processes executed per unit time increases by 3x-4x. Although this particular workload is a bit contrived, we have seen some large collections of eager monitoring scripts which produced similarly high %sys time due to contention in the /proc directory. As a result this patch, Al noted that several procfs methods which were only called in ref-walk mode could now be called from RCU mode. To ensure that this patch is safe, I audited all the inode get_link and permission() implementations, as well as dentry d_revalidate() implementations, in fs/proc. The purpose here is to ensure that they either are safe to call in RCU (i.e. don't sleep) or correctly bail out of RCU mode if they don't support it. My analysis shows that all at-risk procfs methods are safe to call under RCU, and thus this patch is safe. Procfs RCU-walk Analysis: This analysis is up-to-date with 5.15-rc3. When called under RCU mode, these functions have arguments as follows: * get_link() receives a NULL dentry pointer when called in RCU mode. * permission() receives MAY_NOT_BLOCK in the mode parameter when called from RCU. * d_revalidate() receives LOOKUP_RCU in flags. For the following functions, either they are trivially RCU safe, or they explicitly bail at the beginning of the function when they run: proc_ns_get_link (bails out) proc_get_link (RCU safe) proc_pid_get_link (bails out) map_files_d_revalidate (bails out) map_misc_d_revalidate (bails out) proc_net_d_revalidate (RCU safe) proc_sys_revalidate (bails out, also not under /proc/$pid) tid_fd_revalidate (bails out) proc_sys_permission (not under /proc/$pid) The remainder of the functions require a bit more detail: * proc_fd_permission: RCU safe. All of the body of this function is under rcu_read_lock(), except generic_permission() which declares itself RCU safe in its documentation string. * proc_self_get_link uses GFP_ATOMIC in the RCU case, so it is RCU aware and otherwise looks safe. The same is true of proc_thread_self_get_link. * proc_map_files_get_link: calls ns_capable, which calls capable(), and thus calls into the audit code (see note #1 below). The remainder is just a call to the trivially safe proc_pid_get_link(). * proc_pid_permission: calls ptrace_may_access(), which appears RCU safe, although it does call into the "security_ptrace_access_check()" hook, which looks safe under smack and selinux. Just the audit code is of concern. Also uses get_task_struct() and put_task_struct(), see note #2 below. * proc_tid_comm_permission: Appears safe, though calls put_task_struct (see note #2 below). Note #1: Most of the concern of RCU safety has centered around the audit code. However, since b17ec22fb339 ("selinux: slow_avc_audit has become non-blocking"), it's safe to call this code under RCU. So all of the above are safe by my estimation. Note #2: get_task_struct() and put_task_struct(): The majority of get_task_struct() is under RCU read lock, and in any case it is a simple increment. But put_task_struct() is complex, given that it could at some point free the task struct, and this process has many steps which I couldn't manually verify. However, several other places call put_task_struct() under RCU, so it appears safe to use here too (see kernel/hung_task.c:165 or rcu/tree-stall.h:296) Patch description: pid_revalidate() drops from RCU into REF lookup mode. When many threads are resolving paths within /proc in parallel, this can result in heavy spinlock contention on d_lockref as each thread tries to grab a reference to the /proc dentry (and drop it shortly thereafter). Investigation indicates that it is not necessary to drop RCU in pid_revalidate(), as no RCU data is modified and the function never sleeps. So, remove the LOOKUP_RCU check. Link: https://lkml.kernel.org/r/20211004175629.292270-2-stephen.s.brennan@oracle.com Signed-off-by: Stephen Brennan Cc: Konrad Wilk Cc: Alexander Viro Cc: Matthew Wilcox Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/proc/base.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 5541de99809c2..264509e584e3e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1979,19 +1979,21 @@ static int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; + int ret = 0; - if (flags & LOOKUP_RCU) - return -ECHILD; - - inode = d_inode(dentry); - task = get_proc_task(inode); + rcu_read_lock(); + inode = d_inode_rcu(dentry); + if (!inode) + goto out; + task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { pid_update_inode(task, inode); - put_task_struct(task); - return 1; + ret = 1; } - return 0; +out: + rcu_read_unlock(); + return ret; } static inline bool proc_inode_is_dead(struct inode *inode) From 8aea76688c6b849049d56219ca1c4f1797fb5d42 Mon Sep 17 00:00:00 2001 From: Julius Hemanth Pitti Date: Thu, 21 Oct 2021 15:09:38 +1100 Subject: [PATCH 0906/1202] proc/sysctl: make protected_* world readable protected_* files have 600 permissions which prevents non-superuser from reading them. Container like "AWS greengrass" refuse to launch unless protected_hardlinks and protected_symlinks are set. When containers like these run with "userns-remap" or "--user" mapping container's root to non-superuser on host, they fail to run due to denied read access to these files. As these protections are hardly a secret, and do not possess any security risk, making them world readable. Though above greengrass usecase needs read access to only protected_hardlinks and protected_symlinks files, setting all other protected_* files to 644 to keep consistency. Link: http://lkml.kernel.org/r/20200709235115.56954-1-jpitti@cisco.com Fixes: 800179c9b8a1 ("fs: add link restrictions") Signed-off-by: Julius Hemanth Pitti Acked-by: Kees Cook Cc: Iurii Zaikin Cc: Luis Chamberlain Cc: Ingo Molnar Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/sysctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 083be6af29d70..857c1ccad9e86 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -3316,7 +3316,7 @@ static struct ctl_table fs_table[] = { .procname = "protected_symlinks", .data = &sysctl_protected_symlinks, .maxlen = sizeof(int), - .mode = 0600, + .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, @@ -3325,7 +3325,7 @@ static struct ctl_table fs_table[] = { .procname = "protected_hardlinks", .data = &sysctl_protected_hardlinks, .maxlen = sizeof(int), - .mode = 0600, + .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, @@ -3334,7 +3334,7 @@ static struct ctl_table fs_table[] = { .procname = "protected_fifos", .data = &sysctl_protected_fifos, .maxlen = sizeof(int), - .mode = 0600, + .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, @@ -3343,7 +3343,7 @@ static struct ctl_table fs_table[] = { .procname = "protected_regular", .data = &sysctl_protected_regular, .maxlen = sizeof(int), - .mode = 0600, + .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &two, From 109e2e00f3cdd905ed133440967f1ec4b8d0bcf0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Oct 2021 15:09:38 +1100 Subject: [PATCH 0907/1202] kernel.h: drop unneeded inclusion from other headers Patch series "kernel.h further split", v5. kernel.h is a set of something which is not related to each other and often used in non-crossed compilation units, especially when drivers need only one or two macro definitions from it. This patch (of 7): There is no evidence we need kernel.h inclusion in certain headers. Drop unneeded inclusion from other headers. Link: https://lkml.kernel.org/r/20211013170417.87909-1-andriy.shevchenko@linux.intel.com Link: https://lkml.kernel.org/r/20211013170417.87909-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Thomas Gleixner Cc: Brendan Higgins Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Cc: Sakari Ailus Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Miguel Ojeda Cc: Jonathan Cameron Cc: Rasmus Villemoes Cc: Thorsten Leemhuis Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/rwsem.h | 1 - include/linux/smp.h | 1 - include/linux/spinlock.h | 1 - 3 files changed, 3 deletions(-) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 352c6127cb90f..f9348769e5588 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -11,7 +11,6 @@ #include #include -#include #include #include #include diff --git a/include/linux/smp.h b/include/linux/smp.h index 510519e8a1eb7..a80ab58ae3f1d 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -108,7 +108,6 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func, #ifdef CONFIG_SMP #include -#include #include #include #include diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 45310ea1b1d78..22d672f81705b 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -57,7 +57,6 @@ #include #include #include -#include #include #include #include From 89f0d96a2d80c445c1d63b014297d5078b6c9446 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 21 Oct 2021 15:09:39 +1100 Subject: [PATCH 0908/1202] bottom_half.h needs kernel.h for _THIS_IP_ on arm at least Link: https://lkml.kernel.org/r/20211015202908.1c417ae2@canb.auug.org.au Signed-off-by: Stephen Rothwell Cc: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/bottom_half.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index eed86eb0a1dea..11d107d88d038 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -2,6 +2,7 @@ #ifndef _LINUX_BH_H #define _LINUX_BH_H +#include #include #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS) From fc90017d743e2ce244f0b678ba2613b623d350a3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Oct 2021 15:09:39 +1100 Subject: [PATCH 0909/1202] kernel.h: split out container_of() and typeof_member() macros kernel.h is being used as a dump for all kinds of stuff for a long time. Here is the attempt cleaning it up by splitting out container_of() and typeof_member() macros. For time being include new header back to kernel.h to avoid twisted indirected includes for existing users. Note, there are _a lot_ of headers and modules that include kernel.h solely for one of these macros and this allows to unburden compiler for the twisted inclusion paths and to make new code cleaner in the future. Link: https://lkml.kernel.org/r/20211013170417.87909-3-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Boqun Feng Cc: Brendan Higgins Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Sakari Ailus Cc: Thomas Gleixner Cc: Thorsten Leemhuis Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/container_of.h | 40 ++++++++++++++++++++++++++++++++++++ include/linux/kernel.h | 33 +---------------------------- 2 files changed, 41 insertions(+), 32 deletions(-) create mode 100644 include/linux/container_of.h diff --git a/include/linux/container_of.h b/include/linux/container_of.h new file mode 100644 index 0000000000000..dd56019838c67 --- /dev/null +++ b/include/linux/container_of.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CONTAINER_OF_H +#define _LINUX_CONTAINER_OF_H + +#include +#include + +#define typeof_member(T, m) typeof(((T*)0)->m) + +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + void *__mptr = (void *)(ptr); \ + BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ + !__same_type(*(ptr), void), \ + "pointer type mismatch in container_of()"); \ + ((type *)(__mptr - offsetof(type, member))); }) + +/** + * container_of_safe - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + * If IS_ERR_OR_NULL(ptr), ptr is returned unchanged. + */ +#define container_of_safe(ptr, type, member) ({ \ + void *__mptr = (void *)(ptr); \ + BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ + !__same_type(*(ptr), void), \ + "pointer type mismatch in container_of()"); \ + IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) : \ + ((type *)(__mptr - offsetof(type, member))); }) + +#endif /* _LINUX_CONTAINER_OF_H */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 471bc05936796..ed4465757cec2 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -52,8 +53,6 @@ } \ ) -#define typeof_member(T, m) typeof(((T*)0)->m) - #define _RET_IP_ (unsigned long)__builtin_return_address(0) #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) @@ -484,36 +483,6 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #define __CONCAT(a, b) a ## b #define CONCATENATE(a, b) __CONCAT(a, b) -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - void *__mptr = (void *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ - !__same_type(*(ptr), void), \ - "pointer type mismatch in container_of()"); \ - ((type *)(__mptr - offsetof(type, member))); }) - -/** - * container_of_safe - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - * If IS_ERR_OR_NULL(ptr), ptr is returned unchanged. - */ -#define container_of_safe(ptr, type, member) ({ \ - void *__mptr = (void *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ - !__same_type(*(ptr), void), \ - "pointer type mismatch in container_of()"); \ - IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) : \ - ((type *)(__mptr - offsetof(type, member))); }) - /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD From 418bb3b040752eeeb6da8760f0539679b3ccb5af Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Oct 2021 15:09:40 +1100 Subject: [PATCH 0910/1202] include/kunit/test.h: replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Link: https://lkml.kernel.org/r/20211013170417.87909-4-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Boqun Feng Cc: Brendan Higgins Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Sakari Ailus Cc: Thomas Gleixner Cc: Thorsten Leemhuis Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/kunit/test.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/include/kunit/test.h b/include/kunit/test.h index 018e776a34b9a..b26400731c021 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -11,11 +11,20 @@ #include #include -#include + +#include +#include +#include +#include +#include +#include #include #include +#include +#include #include -#include + +#include struct kunit_resource; From 4c7c2b0c93d6f16d7a24e00f7b6ccc0f089044df Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Oct 2021 15:09:40 +1100 Subject: [PATCH 0911/1202] include/linux/list.h: replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Link: https://lkml.kernel.org/r/20211013170417.87909-5-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Boqun Feng Cc: Brendan Higgins Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Sakari Ailus Cc: Thomas Gleixner Cc: Thorsten Leemhuis Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/list.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/list.h b/include/linux/list.h index f2af4b4aa4e9a..6636fc07f918f 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -2,11 +2,13 @@ #ifndef _LINUX_LIST_H #define _LINUX_LIST_H +#include #include #include #include #include -#include + +#include /* * Circular doubly linked list implementation. From 65dbf7a272d585ee0107f8eaee7c7618245302e7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Oct 2021 15:09:41 +1100 Subject: [PATCH 0912/1202] include/linux/llist.h: replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Link: https://lkml.kernel.org/r/20211013170417.87909-6-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Boqun Feng Cc: Brendan Higgins Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Sakari Ailus Cc: Thomas Gleixner Cc: Thorsten Leemhuis Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/llist.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/llist.h b/include/linux/llist.h index 24f207b0190b5..85bda2d02d65b 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -49,7 +49,9 @@ */ #include -#include +#include +#include +#include struct llist_head { struct llist_node *first; From 3f2ce341b4430d8080c833a4cc87f6f984b831f9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Oct 2021 15:09:42 +1100 Subject: [PATCH 0913/1202] include/linux/plist.h: replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Link: https://lkml.kernel.org/r/20211013170417.87909-7-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Boqun Feng Cc: Brendan Higgins Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Sakari Ailus Cc: Thomas Gleixner Cc: Thorsten Leemhuis Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/plist.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/plist.h b/include/linux/plist.h index 66bab1bca35c5..0f352c1d3c805 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -73,8 +73,11 @@ #ifndef _LINUX_PLIST_H_ #define _LINUX_PLIST_H_ -#include +#include #include +#include + +#include struct plist_head { struct list_head node_list; From 59727052943d68ab6732f9aad716b53d51bb93fc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Oct 2021 15:09:42 +1100 Subject: [PATCH 0914/1202] include/media/media-entity.h: replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Link: https://lkml.kernel.org/r/20211013170417.87909-8-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Acked-by: Sakari Ailus Cc: Boqun Feng Cc: Brendan Higgins Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Miguel Ojeda Cc: Peter Zijlstra Cc: Rasmus Villemoes Cc: Thomas Gleixner Cc: Thorsten Leemhuis Cc: Waiman Long Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/media/media-entity.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/media/media-entity.h b/include/media/media-entity.h index 09737b47881fd..fea489f03d57a 100644 --- a/include/media/media-entity.h +++ b/include/media/media-entity.h @@ -13,10 +13,11 @@ #include #include +#include #include -#include #include #include +#include /* Enums used internally at the media controller to represent graphs */ From 102a01d53c2241a504c39f48c84eabed83a1e8ce Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 21 Oct 2021 15:09:43 +1100 Subject: [PATCH 0915/1202] linux/container_of.h: switch to static_assert _Static_assert() is evaluated already in the compiler's frontend, and gives a somehat more to-the-point error, compared to the BUILD_BUG_ON macro, which only fires after the optimizer has had a chance to eliminate calls to functions marked with __attribute__((error)). In theory, this might make builds a tiny bit faster. There's also a little less gunk in the error message emitted: lib/sort.c: In function `foo': ./include/linux/build_bug.h:78:41: error: static assertion failed: "pointer type mismatch in container_of()" 78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) compared to lib/sort.c: In function `foo': ././include/linux/compiler_types.h:322:38: error: call to `__compiletime_assert_2' declared with attribute error: pointer type mismatch in container_of() 322 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) While at it, fix the copy-pasto in container_of_safe(). Link: https://lkml.kernel.org/r/20211015090530.2774079-1-linux@rasmusvillemoes.dk Link: https://lore.kernel.org/lkml/20211014132331.GA4811@kernel.org/T/ Signed-off-by: Rasmus Villemoes Reviewed-by: Miguel Ojeda Acked-by: Andy Shevchenko Reviewed-by: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/container_of.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/container_of.h b/include/linux/container_of.h index dd56019838c67..2f4944b791b81 100644 --- a/include/linux/container_of.h +++ b/include/linux/container_of.h @@ -16,9 +16,9 @@ */ #define container_of(ptr, type, member) ({ \ void *__mptr = (void *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ - !__same_type(*(ptr), void), \ - "pointer type mismatch in container_of()"); \ + static_assert(__same_type(*(ptr), ((type *)0)->member) || \ + __same_type(*(ptr), void), \ + "pointer type mismatch in container_of()"); \ ((type *)(__mptr - offsetof(type, member))); }) /** @@ -31,9 +31,9 @@ */ #define container_of_safe(ptr, type, member) ({ \ void *__mptr = (void *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ - !__same_type(*(ptr), void), \ - "pointer type mismatch in container_of()"); \ + static_assert(__same_type(*(ptr), ((type *)0)->member) || \ + __same_type(*(ptr), void), \ + "pointer type mismatch in container_of_safe()"); \ IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) : \ ((type *)(__mptr - offsetof(type, member))); }) From d2d87ddc0ad240028a680e46d1a8216085a2b19e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:09:43 +1100 Subject: [PATCH 0916/1202] MAINTAINERS: add "exec & binfmt" section with myself and Eric I'd like more continuity of review for the exec and binfmt (and ELF) stuff. Eric and I have been the most active lately, so list us as reviewers. Link: https://lkml.kernel.org/r/20211006180200.1178142-1-keescook@chromium.org Signed-off-by: Kees Cook Cc: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- MAINTAINERS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 430f2ca3f68ff..a43bae649f9b1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7025,6 +7025,20 @@ F: include/trace/events/mdio.h F: include/uapi/linux/mdio.h F: include/uapi/linux/mii.h +EXEC & BINFMT API +R: Eric Biederman +R: Kees Cook +F: arch/alpha/kernel/binfmt_loader.c +F: arch/x86/ia32/ia32_aout.c +F: fs/*binfmt_*.c +F: fs/exec.c +F: include/linux/binfmts.h +F: include/linux/elf.h +F: include/uapi/linux/binfmts.h +F: tools/testing/selftests/exec/ +N: asm/elf.h +N: binfmt + EXFAT FILE SYSTEM M: Namjae Jeon M: Sungjong Seo From 1f60c48392f1a32872681619ce06bb8a8de3e735 Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Thu, 21 Oct 2021 15:09:44 +1100 Subject: [PATCH 0917/1202] lib, stackdepot: check stackdepot handle before accessing slabs Patch series "lib, stackdepot: check stackdepot handle before accessing slabs", v2. PATCH-1: Checks validity of a stackdepot handle before proceeding to access stackdepot slab/objects. PATCH-2: Adds a helper in stackdepot, to allow users to print stack entries just by specifying the stackdepot handle. It also changes such users to use this new interface. PATCH-3: Adds a helper in stackdepot, to allow users to print stack entries into buffers just by specifying the stackdepot handle and destination buffer. It also changes such users to use this new interface. This patch (of 3): stack_depot_save allocates slabs that will be used for storing objects in future.If this slab allocation fails we may get to a situation where space allocation for a new stack_record fails, causing stack_depot_save to return 0 as handle. If user of this handle ends up invoking stack_depot_fetch with this handle value, current implementation of stack_depot_fetch will end up using slab from wrong index. To avoid this check handle value at the beginning. Link: https://lkml.kernel.org/r/20210915175321.3472770-1-imran.f.khan@oracle.com Link: https://lkml.kernel.org/r/20210915014806.3206938-1-imran.f.khan@oracle.com Link: https://lkml.kernel.org/r/20210915014806.3206938-2-imran.f.khan@oracle.com Signed-off-by: Imran Khan Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka Cc: Geert Uytterhoeven Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/stackdepot.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 09485dc5bd128..f034f095c627e 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -231,6 +231,9 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, struct stack_record *stack; *entries = NULL; + if (!handle) + return 0; + if (parts.slabindex > depot_index) { WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", parts.slabindex, depot_index, handle); From db1bc08151e693a066e13cdbaf5a1c1f27a834af Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Thu, 21 Oct 2021 15:09:44 +1100 Subject: [PATCH 0918/1202] lib, stackdepot: add helper to print stack entries To print a stack entries, users of stackdepot, first use stack_depot_fetch to get a list of stack entries and then use stack_trace_print to print this list. Provide a helper in stackdepot to print stack entries based on stackdepot handle. Also change above mentioned users to use this helper. Link: https://lkml.kernel.org/r/20210915014806.3206938-3-imran.f.khan@oracle.com Signed-off-by: Imran Khan Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka Reviewed-by: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Daniel Vetter Cc: David Airlie Cc: Dmitry Vyukov Cc: Geert Uytterhoeven Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/stackdepot.h | 2 ++ lib/stackdepot.c | 18 ++++++++++++++++++ mm/kasan/report.c | 15 +++------------ mm/page_owner.c | 13 ++++--------- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index d29860966bc91..8ab37500fcba8 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -25,6 +25,8 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +void stack_depot_print(depot_stack_handle_t stack); + #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); #else diff --git a/lib/stackdepot.c b/lib/stackdepot.c index f034f095c627e..4e1f2982d0fa7 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -213,6 +213,24 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, return NULL; } +/** + * stack_depot_print - print stack entries from a depot + * + * @stack: Stack depot handle which was returned from + * stack_depot_save(). + * + */ +void stack_depot_print(depot_stack_handle_t stack) +{ + unsigned long *entries; + unsigned int nr_entries; + + nr_entries = stack_depot_fetch(stack, &entries); + if (nr_entries > 0) + stack_trace_print(entries, nr_entries, 0); +} +EXPORT_SYMBOL_GPL(stack_depot_print); + /** * stack_depot_fetch - Fetch stack entries from a depot * diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 884a950c70265..3239fd8f87475 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -132,20 +132,11 @@ static void end_report(unsigned long *flags, unsigned long addr) kasan_enable_current(); } -static void print_stack(depot_stack_handle_t stack) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(stack, &entries); - stack_trace_print(entries, nr_entries, 0); -} - static void print_track(struct kasan_track *track, const char *prefix) { pr_err("%s by task %u:\n", prefix, track->pid); if (track->stack) { - print_stack(track->stack); + stack_depot_print(track->stack); } else { pr_err("(stack is not available)\n"); } @@ -214,12 +205,12 @@ static void describe_object_stacks(struct kmem_cache *cache, void *object, return; if (alloc_meta->aux_stack[0]) { pr_err("Last potentially related work creation:\n"); - print_stack(alloc_meta->aux_stack[0]); + stack_depot_print(alloc_meta->aux_stack[0]); pr_err("\n"); } if (alloc_meta->aux_stack[1]) { pr_err("Second to last potentially related work creation:\n"); - print_stack(alloc_meta->aux_stack[1]); + stack_depot_print(alloc_meta->aux_stack[1]); pr_err("\n"); } #endif diff --git a/mm/page_owner.c b/mm/page_owner.c index 8eddd2225e330..b6d25eb8e0131 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -394,8 +394,6 @@ void __dump_page_owner(const struct page *page) struct page_ext *page_ext = lookup_page_ext(page); struct page_owner *page_owner; depot_stack_handle_t handle; - unsigned long *entries; - unsigned int nr_entries; gfp_t gfp_mask; int mt; @@ -423,20 +421,17 @@ void __dump_page_owner(const struct page *page) page_owner->pid, page_owner->ts_nsec, page_owner->free_ts_nsec); handle = READ_ONCE(page_owner->handle); - if (!handle) { + if (!handle) pr_alert("page_owner allocation stack trace missing\n"); - } else { - nr_entries = stack_depot_fetch(handle, &entries); - stack_trace_print(entries, nr_entries, 0); - } + else + stack_depot_print(handle); handle = READ_ONCE(page_owner->free_handle); if (!handle) { pr_alert("page_owner free stack trace missing\n"); } else { - nr_entries = stack_depot_fetch(handle, &entries); pr_alert("page last free stack trace:\n"); - stack_trace_print(entries, nr_entries, 0); + stack_depot_print(handle); } if (page_owner->last_migrate_reason != -1) From af5786e111ec4cca426e644bcb2c77280b841aa4 Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Thu, 21 Oct 2021 15:09:45 +1100 Subject: [PATCH 0919/1202] lib, stackdepot: add helper to print stack entries into buffer To print stack entries into a buffer, users of stackdepot, first get a list of stack entries using stack_depot_fetch and then print this list into a buffer using stack_trace_snprint. Provide a helper in stackdepot for this purpose. Also change above mentioned users to use this helper. Link: https://lkml.kernel.org/r/20210915014806.3206938-4-imran.f.khan@oracle.com Signed-off-by: Imran Khan Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka Acked-by: Jani Nikula [i915] Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Daniel Vetter Cc: David Airlie Cc: Dmitry Vyukov Cc: Geert Uytterhoeven Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/gpu/drm/drm_dp_mst_topology.c | 5 +---- drivers/gpu/drm/drm_mm.c | 5 +---- drivers/gpu/drm/i915/i915_vma.c | 5 +---- drivers/gpu/drm/i915/intel_runtime_pm.c | 20 +++++--------------- include/linux/stackdepot.h | 3 +++ lib/stackdepot.c | 24 ++++++++++++++++++++++++ mm/page_owner.c | 5 +---- 7 files changed, 36 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 86d13d6bc4631..2d1adab9e3600 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1668,13 +1668,10 @@ __dump_topology_ref_history(struct drm_dp_mst_topology_ref_history *history, for (i = 0; i < history->len; i++) { const struct drm_dp_mst_topology_ref_entry *entry = &history->entries[i]; - ulong *entries; - uint nr_entries; u64 ts_nsec = entry->ts_nsec; u32 rem_nsec = do_div(ts_nsec, 1000000000); - nr_entries = stack_depot_fetch(entry->backtrace, &entries); - stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4); + stack_depot_snprint(entry->backtrace, buf, PAGE_SIZE, 4); drm_printf(&p, " %d %ss (last at %5llu.%06u):\n%s", entry->count, diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 93d48a6f04abe..ca04d7f6f7b50 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -118,8 +118,6 @@ static noinline void save_stack(struct drm_mm_node *node) static void show_leaks(struct drm_mm *mm) { struct drm_mm_node *node; - unsigned long *entries; - unsigned int nr_entries; char *buf; buf = kmalloc(BUFSZ, GFP_KERNEL); @@ -133,8 +131,7 @@ static void show_leaks(struct drm_mm *mm) continue; } - nr_entries = stack_depot_fetch(node->stack, &entries); - stack_trace_snprint(buf, BUFSZ, entries, nr_entries, 0); + stack_depot_snprint(node->stack, buf, BUFSZ); DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s", node->start, node->size, buf); } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4b7fc4647e460..f2d9ed3751093 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -56,8 +56,6 @@ void i915_vma_free(struct i915_vma *vma) static void vma_print_allocator(struct i915_vma *vma, const char *reason) { - unsigned long *entries; - unsigned int nr_entries; char buf[512]; if (!vma->node.stack) { @@ -66,8 +64,7 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason) return; } - nr_entries = stack_depot_fetch(vma->node.stack, &entries); - stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0); + stack_depot_snprint(vma->node.stack, buf, sizeof(buf), 0); DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n", vma->node.start, vma->node.size, reason, buf); } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index eaf7688f517d0..cc312f0a05ebc 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -65,16 +65,6 @@ static noinline depot_stack_handle_t __save_depot_stack(void) return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN); } -static void __print_depot_stack(depot_stack_handle_t stack, - char *buf, int sz, int indent) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(stack, &entries); - stack_trace_snprint(buf, sz, entries, nr_entries, indent); -} - static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { spin_lock_init(&rpm->debug.lock); @@ -146,12 +136,12 @@ static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, if (!buf) return; - __print_depot_stack(stack, buf, PAGE_SIZE, 2); + stack_depot_snprint(stack, buf, PAGE_SIZE, 2); DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf); stack = READ_ONCE(rpm->debug.last_release); if (stack) { - __print_depot_stack(stack, buf, PAGE_SIZE, 2); + stack_depot_snprint(stack, buf, PAGE_SIZE, 2); DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf); } @@ -183,12 +173,12 @@ __print_intel_runtime_pm_wakeref(struct drm_printer *p, return; if (dbg->last_acquire) { - __print_depot_stack(dbg->last_acquire, buf, PAGE_SIZE, 2); + stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2); drm_printf(p, "Wakeref last acquired:\n%s", buf); } if (dbg->last_release) { - __print_depot_stack(dbg->last_release, buf, PAGE_SIZE, 2); + stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2); drm_printf(p, "Wakeref last released:\n%s", buf); } @@ -203,7 +193,7 @@ __print_intel_runtime_pm_wakeref(struct drm_printer *p, rep = 1; while (i + 1 < dbg->count && dbg->owners[i + 1] == stack) rep++, i++; - __print_depot_stack(stack, buf, PAGE_SIZE, 2); + stack_depot_print(stack, buf, PAGE_SIZE, 2); drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf); } diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 8ab37500fcba8..c34b55a6e5540 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -25,6 +25,9 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, + int spaces); + void stack_depot_print(depot_stack_handle_t stack); #ifdef CONFIG_STACKDEPOT diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 4e1f2982d0fa7..96ee83aad00f6 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -213,6 +213,30 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, return NULL; } +/** + * stack_depot_snprint - print stack entries from a depot into a buffer + * + * @handle: Stack depot handle which was returned from + * stack_depot_save(). + * @buf: Pointer to the print buffer + * + * @size: Size of the print buffer + * + * @spaces: Number of leading spaces to print + * + * Return: Number of bytes printed. + */ +int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, + int spaces) +{ + unsigned long *entries; + unsigned int nr_entries; + + nr_entries = stack_depot_fetch(handle, &entries); + return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, + spaces) : 0; +} + /** * stack_depot_print - print stack entries from a depot * diff --git a/mm/page_owner.c b/mm/page_owner.c index b6d25eb8e0131..11160dbf7f94a 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -329,8 +329,6 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, depot_stack_handle_t handle) { int ret, pageblock_mt, page_mt; - unsigned long *entries; - unsigned int nr_entries; char *kbuf; count = min_t(size_t, count, PAGE_SIZE); @@ -361,8 +359,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, if (ret >= count) goto err; - nr_entries = stack_depot_fetch(handle, &entries); - ret += stack_trace_snprint(kbuf + ret, count - ret, entries, nr_entries, 0); + ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0); if (ret >= count) goto err; From bb0f15831bd6642d94d33c0aac336d691a60f81d Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Thu, 21 Oct 2021 15:09:45 +1100 Subject: [PATCH 0920/1202] lib-stackdepot-add-helper-to-print-stack-entries-into-buffer-v2 fix build error Link: https://lkml.kernel.org/r/20210915175321.3472770-4-imran.f.khan@oracle.com Signed-off-by: Imran Khan Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Daniel Vetter Cc: David Airlie Cc: Dmitry Vyukov Cc: Geert Uytterhoeven Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/gpu/drm/drm_mm.c | 2 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index ca04d7f6f7b50..7d1c578388d33 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -131,7 +131,7 @@ static void show_leaks(struct drm_mm *mm) continue; } - stack_depot_snprint(node->stack, buf, BUFSZ); + stack_depot_snprint(node->stack, buf, BUFSZ, 0); DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s", node->start, node->size, buf); } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index cc312f0a05ebc..0d85f3c5c5266 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -193,7 +193,7 @@ __print_intel_runtime_pm_wakeref(struct drm_printer *p, rep = 1; while (i + 1 < dbg->count && dbg->owners[i + 1] == stack) rep++, i++; - stack_depot_print(stack, buf, PAGE_SIZE, 2); + stack_depot_snprint(stack, buf, PAGE_SIZE, 2); drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf); } From 31b5a352db4f9b8dd069dd1fd1d955b1579f38f1 Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Thu, 21 Oct 2021 15:09:46 +1100 Subject: [PATCH 0921/1202] lib-stackdepot-add-helper-to-print-stack-entries-into-buffer-v3 export stack_depot_snprint() to modules Link: https://lkml.kernel.org/r/20210916133535.3592491-4-imran.f.khan@oracle.com Signed-off-by: Imran Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/stackdepot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 96ee83aad00f6..b437ae79aca14 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -236,6 +236,7 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, spaces) : 0; } +EXPORT_SYMBOL_GPL(stack_depot_snprint); /** * stack_depot_print - print stack entries from a depot From 44fa8dcdee00d79fa35cee0db330c9c9f0ddf366 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 21 Oct 2021 15:09:46 +1100 Subject: [PATCH 0922/1202] include/linux/string_helpers.h: add linux/string.h for strlen() linux/string_helpers.h uses strlen(), so include the correponding header. Otherwise we get a compilation error if it's not also included by whoever included the helper. Link: https://lkml.kernel.org/r/20211005212634.3223113-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/string_helpers.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 68189c4a2eb11..4ba39e1403b25 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -4,6 +4,7 @@ #include #include +#include #include struct file; From dcbfcac7455f6ca158a7a9a204576198ba6c5eba Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Oct 2021 15:09:47 +1100 Subject: [PATCH 0923/1202] lib: uninline simple_strntoull() as well Codegen become bloated again after simple_strntoull() introduction add/remove: 0/0 grow/shrink: 0/4 up/down: 0/-224 (-224) Function old new delta simple_strtoul 5 2 -3 simple_strtol 23 20 -3 simple_strtoull 119 15 -104 simple_strtoll 155 41 -114 Link: https://lkml.kernel.org/r/YVmlB9yY4lvbNKYt@localhost.localdomain Signed-off-by: Alexey Dobriyan Cc: Richard Fitzgerald Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/vsprintf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d7ad44f2c8f57..25d79ed532263 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -53,8 +53,7 @@ #include #include "kstrtox.h" -static unsigned long long simple_strntoull(const char *startp, size_t max_chars, - char **endp, unsigned int base) +static noinline unsigned long long simple_strntoull(const char *startp, size_t max_chars, char **endp, unsigned int base) { const char *cp; unsigned long long result = 0ULL; From 7d0d54dd463e8da49f7c5d5e16dea953177aa092 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 21 Oct 2021 15:09:48 +1100 Subject: [PATCH 0924/1202] lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() Currently, enabling CONFIG_STACKDEPOT means its stack_table will be allocated from memblock, even if stack depot ends up not actually used. The default size of stack_table is 4MB on 32-bit, 8MB on 64-bit. This is fine for use-cases such as KASAN which is also a config option and has overhead on its own. But it's an issue for functionality that has to be actually enabled on boot (page_owner) or depends on hardware (GPU drivers) and thus the memory might be wasted. This was raised as an issue [1] when attempting to add stackdepot support for SLUB's debug object tracking functionality. It's common to build kernels with CONFIG_SLUB_DEBUG and enable slub_debug on boot only when needed, or create only specific kmem caches with debugging for testing purposes. It would thus be more efficient if stackdepot's table was allocated only when actually going to be used. This patch thus makes the allocation (and whole stack_depot_init() call) optional: - Add a CONFIG_STACKDEPOT_ALWAYS_INIT flag to keep using the current well-defined point of allocation as part of mem_init(). Make CONFIG_KASAN select this flag. - Other users have to call stack_depot_init() as part of their own init when it's determined that stack depot will actually be used. This may depend on both config and runtime conditions. Convert current users which are page_owner and several in the DRM subsystem. Same will be done for SLUB later. - Because the init might now be called after the boot-time memblock allocation has given all memory to the buddy allocator, change stack_depot_init() to allocate stack_table with kvmalloc() when memblock is no longer available. Also handle allocation failure by disabling stackdepot (could have theoretically happened even with memblock allocation previously), and don't unnecessarily align the memblock allocation to its own size anymore. [1] https://lore.kernel.org/all/CAMuHMdW=eoVzM1Re5FVoEN87nKfiLmM2+Ah7eNu2KXEhCvbZyA@mail.gmail.com/ Link: https://lkml.kernel.org/r/20211013073005.11351-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Dmitry Vyukov Reviewed-by: Marco Elver # stackdepot Cc: Marco Elver Cc: Vijayanand Jitta Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Geert Uytterhoeven Cc: Oliver Glitta Cc: Imran Khan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + drivers/gpu/drm/drm_mm.c | 4 +++ drivers/gpu/drm/i915/intel_runtime_pm.c | 3 +++ include/linux/stackdepot.h | 25 ++++++++++++------- init/main.c | 2 +- lib/Kconfig | 4 +++ lib/Kconfig.kasan | 2 +- lib/stackdepot.c | 33 +++++++++++++++++++++---- mm/page_owner.c | 2 ++ 9 files changed, 60 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 2d1adab9e3600..bbe972d59dae9 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -5490,6 +5490,7 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, mutex_init(&mgr->probe_lock); #if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) mutex_init(&mgr->topology_ref_history_lock); + stack_depot_init(); #endif INIT_LIST_HEAD(&mgr->tx_msg_downq); INIT_LIST_HEAD(&mgr->destroy_port_list); diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 7d1c578388d33..8257f9d4f6190 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -980,6 +980,10 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size) add_hole(&mm->head_node); mm->scan_active = 0; + +#ifdef CONFIG_DRM_DEBUG_MM + stack_depot_init(); +#endif } EXPORT_SYMBOL(drm_mm_init); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 0d85f3c5c5266..806c32ab410bb 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -68,6 +68,9 @@ static noinline depot_stack_handle_t __save_depot_stack(void) static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { spin_lock_init(&rpm->debug.lock); + + if (rpm->available) + stack_depot_init(); } static noinline depot_stack_handle_t diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index c34b55a6e5540..17f992fe6355b 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -19,6 +19,22 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags, bool can_alloc); +/* + * Every user of stack depot has to call this during its own init when it's + * decided that it will be calling stack_depot_save() later. + * + * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot + * enabled as part of mm_init(), for subsystems where it's known at compile time + * that stack depot will be used. + */ +int stack_depot_init(void); + +#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT +static inline int stack_depot_early_init(void) { return stack_depot_init(); } +#else +static inline int stack_depot_early_init(void) { return 0; } +#endif + depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); @@ -30,13 +46,4 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, void stack_depot_print(depot_stack_handle_t stack); -#ifdef CONFIG_STACKDEPOT -int stack_depot_init(void); -#else -static inline int stack_depot_init(void) -{ - return 0; -} -#endif /* CONFIG_STACKDEPOT */ - #endif diff --git a/init/main.c b/init/main.c index f0001af8ebb90..329e2f51ea8ae 100644 --- a/init/main.c +++ b/init/main.c @@ -843,7 +843,7 @@ static void __init mm_init(void) init_mem_debugging_and_hardening(); kfence_alloc_pool(); report_meminit(); - stack_depot_init(); + stack_depot_early_init(); mem_init(); mem_init_print_info(); /* page_owner must be initialized after buddy is ready */ diff --git a/lib/Kconfig b/lib/Kconfig index 5e7165e6a346c..9d05690841523 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -671,6 +671,10 @@ config STACKDEPOT bool select STACKTRACE +config STACKDEPOT_ALWAYS_INIT + bool + select STACKDEPOT + config STACK_HASH_ORDER int "stack depot hash size (12 => 4KB, 20 => 1024KB)" range 12 20 diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index cdc842d090db3..879757b6dd149 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -38,7 +38,7 @@ menuconfig KASAN CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \ HAVE_ARCH_KASAN_HW_TAGS depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) - select STACKDEPOT + select STACKDEPOT_ALWAYS_INIT help Enables KASAN (KernelAddressSANitizer) - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/lib/stackdepot.c b/lib/stackdepot.c index b437ae79aca14..8894a5171b25b 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -161,18 +162,40 @@ static int __init is_stack_depot_disabled(char *str) } early_param("stack_depot_disable", is_stack_depot_disabled); -int __init stack_depot_init(void) +/* + * __ref because of memblock_alloc(), which will not be actually called after + * the __init code is gone, because at that point slab_is_available() is true + */ +__ref int stack_depot_init(void) { - if (!stack_depot_disable) { + static DEFINE_MUTEX(stack_depot_init_mutex); + + mutex_lock(&stack_depot_init_mutex); + if (!stack_depot_disable && stack_table == NULL) { size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); int i; - stack_table = memblock_alloc(size, size); - for (i = 0; i < STACK_HASH_SIZE; i++) - stack_table[i] = NULL; + if (slab_is_available()) { + pr_info("Stack Depot allocating hash table with kvmalloc\n"); + stack_table = kvmalloc(size, GFP_KERNEL); + } else { + pr_info("Stack Depot allocating hash table with memblock_alloc\n"); + stack_table = memblock_alloc(size, SMP_CACHE_BYTES); + } + if (stack_table) { + for (i = 0; i < STACK_HASH_SIZE; i++) + stack_table[i] = NULL; + } else { + pr_err("Stack Depot failed hash table allocationg, disabling\n"); + stack_depot_disable = true; + mutex_unlock(&stack_depot_init_mutex); + return -ENOMEM; + } } + mutex_unlock(&stack_depot_init_mutex); return 0; } +EXPORT_SYMBOL_GPL(stack_depot_init); /* Calculate hash for a stack */ static inline u32 hash_stack(unsigned long *entries, unsigned int size) diff --git a/mm/page_owner.c b/mm/page_owner.c index 11160dbf7f94a..2dc86d3680bf2 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -80,6 +80,8 @@ static void init_page_owner(void) if (!page_owner_enabled) return; + stack_depot_init(); + register_dummy_stack(); register_failure_stack(); register_early_stack(); From b0347e2247fa7c4a407ce7f6f93ddd70528636d0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 21 Oct 2021 15:09:48 +1100 Subject: [PATCH 0925/1202] lib/stackdepot: fix spelling mistake and grammar in pr_err message There is a spelling mistake of the work allocation so fix this and re-phrase the message to make it easier to read. Link: https://lkml.kernel.org/r/20211015104159.11282-1-colin.king@canonical.com Signed-off-by: Colin Ian King Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/stackdepot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 8894a5171b25b..8b6c415852035 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -186,7 +186,7 @@ __ref int stack_depot_init(void) for (i = 0; i < STACK_HASH_SIZE; i++) stack_table[i] = NULL; } else { - pr_err("Stack Depot failed hash table allocationg, disabling\n"); + pr_err("Stack Depot hash table allocation failed, disabling\n"); stack_depot_disable = true; mutex_unlock(&stack_depot_init_mutex); return -ENOMEM; From 3e84f65c032a4bec27052f4f2794895f2fcfe8d9 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 21 Oct 2021 15:09:49 +1100 Subject: [PATCH 0926/1202] lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() - fixup On FLATMEM, we call page_ext_init_flatmem_late() just before kmem_cache_init() which means stack_depot_init() (called by page owner init) will not recognize properly it should use kvmalloc() and not memblock_alloc(). memblock_alloc() will also not issue a warning and return a block memory that can be invalid and cause kernel page fault when saving stacks, as reported by the kernel test robot [1]. Fix this by moving page_ext_init_flatmem_late() below kmem_cache_init() so that slab_is_available() is true during stack_depot_init(). SPARSEMEM doesn't have this issue, as it doesn't do page_ext_init_flatmem_late(), but a different page_ext_init() even later in the boot process. Thanks to Mike Rapoport for pointing out the FLATMEM init ordering issue. While at it, also actually resolve a checkpatch warning in stack_depot_init() from DRM CI, which was supposed to be in the original patch already. [1] https://lore.kernel.org/all/20211014085450.GC18719@xsang-OptiPlex-9020/ Link: https://lkml.kernel.org/r/6abd9213-19a9-6d58-cedc-2414386d2d81@suse.cz Signed-off-by: Vlastimil Babka Reported-by: kernel test robot Cc: Mike Rapoport Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- init/main.c | 7 +++++-- lib/stackdepot.c | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/init/main.c b/init/main.c index 329e2f51ea8ae..0dccf937f49ce 100644 --- a/init/main.c +++ b/init/main.c @@ -846,9 +846,12 @@ static void __init mm_init(void) stack_depot_early_init(); mem_init(); mem_init_print_info(); - /* page_owner must be initialized after buddy is ready */ - page_ext_init_flatmem_late(); kmem_cache_init(); + /* + * page_owner must be initialized after buddy is ready, and also after + * slab is ready so that stack_depot_init() works properly + */ + page_ext_init_flatmem_late(); kmemleak_init(); pgtable_init(); debug_objects_mem_init(); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 8b6c415852035..00ccb106f1a8a 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -171,7 +171,7 @@ __ref int stack_depot_init(void) static DEFINE_MUTEX(stack_depot_init_mutex); mutex_lock(&stack_depot_init_mutex); - if (!stack_depot_disable && stack_table == NULL) { + if (!stack_depot_disable && !stack_table) { size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); int i; From 43633492736a2149cee2b2a2faf607de3f2234e7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Oct 2021 15:09:49 +1100 Subject: [PATCH 0927/1202] mm/scatterlist: replace the !preemptible warning in sg_miter_stop() sg_miter_stop() checks for disabled preemption before unmapping a page via kunmap_atomic(). The kernel doc mentions under context that preemption must be disabled if SG_MITER_ATOMIC is set. There is no active requirement for the caller to have preemption disabled before invoking sg_mitter_stop(). The sg_mitter_*() implementation itself has no such requirement. In fact, preemption is disabled by kmap_atomic() as part of sg_miter_next() and remains disabled as long as there is an active SG_MITER_ATOMIC mapping. This is a consequence of kmap_atomic() and not a requirement for sg_mitter_*() itself. The user chooses SG_MITER_ATOMIC because it uses the API in a context where blocking is not possible or blocking is possible but he chooses a lower weight mapping which is not available on all CPUs and so it might need less overhead to setup at a price that now preemption will be disabled. The kmap_atomic() implementation on PREEMPT_RT does not disable preemption. It simply disables CPU migration to ensure that the task remains on the same CPU while the caller remains preemptible. This in turn triggers the warning in sg_miter_stop() because preemption is allowed. The PREEMPT_RT and !PREEMPT_RT implementation of kmap_atomic() disable pagefaults as a requirement. It is sufficient to check for this instead of disabled preemption. Check for disabled pagefault handler in the SG_MITER_ATOMIC case. Remove the "preemption disabled" part from the kernel doc as the sg_milter*() implementation does not care. [bigeasy@linutronix.de: commit description] Link: https://lkml.kernel.org/r/20211015211409.cqopacv3pxdwn2ty@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- lib/scatterlist.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index abb3432ed7441..d5e82e4a57ad0 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -828,8 +828,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) * stops @miter. * * Context: - * Don't care if @miter is stopped, or not proceeded yet. - * Otherwise, preemption disabled if the SG_MITER_ATOMIC is set. + * Don't care. * * Returns: * true if @miter contains the valid mapping. false if end of sg @@ -865,8 +864,7 @@ EXPORT_SYMBOL(sg_miter_skip); * @miter->addr and @miter->length point to the current mapping. * * Context: - * Preemption disabled if SG_MITER_ATOMIC. Preemption must stay disabled - * till @miter is stopped. May sleep if !SG_MITER_ATOMIC. + * May sleep if !SG_MITER_ATOMIC. * * Returns: * true if @miter contains the next mapping. false if end of sg @@ -906,8 +904,7 @@ EXPORT_SYMBOL(sg_miter_next); * need to be released during iteration. * * Context: - * Preemption disabled if the SG_MITER_ATOMIC is set. Don't care - * otherwise. + * Don't care otherwise. */ void sg_miter_stop(struct sg_mapping_iter *miter) { @@ -922,7 +919,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) flush_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { - WARN_ON_ONCE(preemptible()); + WARN_ON_ONCE(!pagefault_disabled()); kunmap_atomic(miter->addr); } else kunmap(miter->page); From 6d23695af52ce9c04d254b4f9dbed5e7571526a1 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Thu, 21 Oct 2021 15:09:50 +1100 Subject: [PATCH 0928/1202] const_structs.checkpatch: add a few sound ops structs Add a couple of commonly used (>50 instances) sound ops structs that are typically const. Link: https://lkml.kernel.org/r/20210922211042.38017-1-rikard.falkeborn@gmail.com Signed-off-by: Rikard Falkeborn Cc: Joe Perches Cc: Liam Girdwood Cc: Mark Brown Cc: Jaroslav Kysela Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- scripts/const_structs.checkpatch | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/const_structs.checkpatch b/scripts/const_structs.checkpatch index 1aae4f4fdacce..3980985205a06 100644 --- a/scripts/const_structs.checkpatch +++ b/scripts/const_structs.checkpatch @@ -54,7 +54,11 @@ sd_desc seq_operations sirfsoc_padmux snd_ac97_build_ops +snd_pcm_ops +snd_rawmidi_ops snd_soc_component_driver +snd_soc_dai_ops +snd_soc_ops soc_pcmcia_socket_ops stacktrace_ops sysfs_ops From 18d56b989a1a456e52d2ae3426375ec05f361626 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 21 Oct 2021 15:09:51 +1100 Subject: [PATCH 0929/1202] checkpatch: improve EXPORT_SYMBOL test for EXPORT_SYMBOL_NS uses The EXPORT_SYMBOL test expects a single argument but definitions of EXPORT_SYMBOL_NS have multiple arguments. Update the test to extract only the first argument from any EXPORT_SYMBOL related definition. Link: https://lkml.kernel.org/r/9e8f251b42e405f460f26a23ba9b33ef45a94adc.camel@perches.com Signed-off-by: Joe Perches Reported-by: Ian Pilcher Cc: Dwaipayan Ray Cc: Lukas Bulwahn Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- scripts/checkpatch.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 88cb294dc4472..91798b07c6cb5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4449,6 +4449,7 @@ sub process { # XXX(foo); # EXPORT_SYMBOL(something_foo); my $name = $1; + $name =~ s/^\s*($Ident).*/$1/; if ($stat =~ /^(?:.\s*}\s*\n)?.([A-Z_]+)\s*\(\s*($Ident)/ && $name =~ /^${Ident}_$2/) { #print "FOO C name<$name>\n"; From aba2e7cf63c04d234e3f615b0fd3508e2589d342 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 21 Oct 2021 15:09:52 +1100 Subject: [PATCH 0930/1202] checkpatch: get default codespell dictionary path from package location The standard location of dictionary.txt is under codespell's package, on my machine atm (codespell 2.1, Artix Linux): /usr/lib/python3.9/site-packages/codespell_lib/data/dictionary.txt Since we enable the codespell by default for SOF I have constant: No codespell typos will be found - \ file '/usr/share/codespell/dictionary.txt': No such file or directory The patch proposes to try to fix up the path following the recommendation found here: https://github.com/codespell-project/codespell/issues/1540 Link: https://lkml.kernel.org/r/29e25d1364c8ad7f7657cc0660f60c568074d438.camel@perches.com Signed-off-by: Peter Ujfalusi Acked-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- scripts/checkpatch.pl | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 91798b07c6cb5..1784921c645da 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -63,6 +63,7 @@ my $spelling_file = "$D/spelling.txt"; my $codespell = 0; my $codespellfile = "/usr/share/codespell/dictionary.txt"; +my $user_codespellfile = ""; my $conststructsfile = "$D/const_structs.checkpatch"; my $docsfile = "$D/../Documentation/dev-tools/checkpatch.rst"; my $typedefsfile; @@ -130,7 +131,7 @@ sub help { --ignore-perl-version override checking of perl version. expect runtime errors. --codespell Use the codespell dictionary for spelling/typos - (default:/usr/share/codespell/dictionary.txt) + (default:$codespellfile) --codespellfile Use this codespell dictionary --typedefsfile Read additional types from this file --color[=WHEN] Use colors 'always', 'never', or only when output @@ -317,7 +318,7 @@ sub load_docs { 'debug=s' => \%debug, 'test-only=s' => \$tst_only, 'codespell!' => \$codespell, - 'codespellfile=s' => \$codespellfile, + 'codespellfile=s' => \$user_codespellfile, 'typedefsfile=s' => \$typedefsfile, 'color=s' => \$color, 'no-color' => \$color, #keep old behaviors of -nocolor @@ -325,9 +326,32 @@ sub load_docs { 'kconfig-prefix=s' => \${CONFIG_}, 'h|help' => \$help, 'version' => \$help -) or help(1); +) or $help = 2; + +if ($user_codespellfile) { + # Use the user provided codespell file unconditionally + $codespellfile = $user_codespellfile; +} elsif (!(-f $codespellfile)) { + # If /usr/share/codespell/dictionary.txt is not present, try to find it + # under codespell's install directory: /data/dictionary.txt + if (($codespell || $help) && which("codespell") ne "" && which("python") ne "") { + my $python_codespell_dict = << "EOF"; + +import os.path as op +import codespell_lib +codespell_dir = op.dirname(codespell_lib.__file__) +codespell_file = op.join(codespell_dir, 'data', 'dictionary.txt') +print(codespell_file, end='') +EOF + + my $codespell_dict = `python -c "$python_codespell_dict" 2> /dev/null`; + $codespellfile = $codespell_dict if (-f $codespell_dict); + } +} -help(0) if ($help); +# $help is 1 if either -h, --help or --version is passed as option - exitcode: 0 +# $help is 2 if invalid option is passed - exitcode: 1 +help($help - 1) if ($help); die "$P: --git cannot be used with --file or --fix\n" if ($git && ($file || $fix)); die "$P: --verbose cannot be used with --terse\n" if ($verbose && $terse); From 17cb99046638fe222155711093024d2ae589790e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Oct 2021 15:09:52 +1100 Subject: [PATCH 0931/1202] binfmt_elf: reintroduce using MAP_FIXED_NOREPLACE Commit b212921b13bd ("elf: don't use MAP_FIXED_NOREPLACE for elf executable mappings") reverted back to using MAP_FIXED to map ELF LOAD segments because it was found that the segments in some binaries overlap and can cause MAP_FIXED_NOREPLACE to fail. The original intent of MAP_FIXED_NOREPLACE in the ELF loader was to prevent the silent clobbering of an existing mapping (e.g. stack) by the ELF image, which could lead to exploitable conditions. Quoting commit 4ed28639519c ("fs, elf: drop MAP_FIXED usage from elf_map"), which originally introduced the use of MAP_FIXED_NOREPLACE in the loader: Both load_elf_interp and load_elf_binary rely on elf_map to map segments [to a specific] address and they use MAP_FIXED to enforce that. This is however [a] dangerous thing prone to silent data corruption which can be even exploitable. ... Let's take CVE-2017-1000253 as an example ... we could end up mapping [the executable] over the existing stack ... The [stack layout] issue has been fixed since then ... So we should be safe and any [similar] attack should be impractical. On the other hand this is just too subtle [an] assumption ... it can break quite easily and [be] hard to spot. ... Address this [weakness] by changing MAP_FIXED to the newly added MAP_FIXED_NOREPLACE. This will mean that mmap will fail if there is an existing mapping clashing with the requested one [instead of silently] clobbering it. Then processing ET_DYN binaries the loader already calculates a total size for the image when the first segment is mapped, maps the entire image, and then unmaps the remainder before the remaining segments are then individually mapped. To avoid the earlier problems (legitimate overlapping LOAD segments specified in the ELF), apply the same logic to ET_EXEC binaries as well. For both ET_EXEC and ET_DYN+INTERP use MAP_FIXED_NOREPLACE for the initial total size mapping and then use MAP_FIXED to build the final (possibly legitimately overlapping) mappings. For ET_DYN w/out INTERP, continue to map at a system-selected address in the mmap region. Link: https://lkml.kernel.org/r/20210916215947.3993776-1-keescook@chromium.org Link: https://lore.kernel.org/lkml/1595869887-23307-2-git-send-email-anthony.yznaga@oracle.com Co-developed-by: Anthony Yznaga Signed-off-by: Anthony Yznaga Signed-off-by: Kees Cook Cc: Russell King Cc: Michal Hocko Cc: Eric Biederman Cc: Chen Jingwen Cc: Alexander Viro Cc: Andrei Vagin Cc: Khalid Aziz Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/binfmt_elf.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index a813b70f594e6..7537a19f04845 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1074,20 +1074,26 @@ static int load_elf_binary(struct linux_binprm *bprm) vaddr = elf_ppnt->p_vaddr; /* - * If we are loading ET_EXEC or we have already performed - * the ET_DYN load_addr calculations, proceed normally. + * The first time through the loop, load_addr_set is false: + * layout will be calculated. Once set, use MAP_FIXED since + * we know we've already safely mapped the entire region with + * MAP_FIXED_NOREPLACE in the once-per-binary logic following. */ - if (elf_ex->e_type == ET_EXEC || load_addr_set) { + if (load_addr_set) { elf_flags |= MAP_FIXED; + } else if (elf_ex->e_type == ET_EXEC) { + /* + * This logic is run once for the first LOAD Program + * Header for ET_EXEC binaries. No special handling + * is needed. + */ + elf_flags |= MAP_FIXED_NOREPLACE; } else if (elf_ex->e_type == ET_DYN) { /* * This logic is run once for the first LOAD Program * Header for ET_DYN binaries to calculate the * randomization (load_bias) for all the LOAD - * Program Headers, and to calculate the entire - * size of the ELF mapping (total_size). (Note that - * load_addr_set is set to true later once the - * initial mapping is performed.) + * Program Headers. * * There are effectively two types of ET_DYN * binaries: programs (i.e. PIE: ET_DYN with INTERP) @@ -1108,7 +1114,7 @@ static int load_elf_binary(struct linux_binprm *bprm) * Therefore, programs are loaded offset from * ELF_ET_DYN_BASE and loaders are loaded into the * independently randomized mmap region (0 load_bias - * without MAP_FIXED). + * without MAP_FIXED nor MAP_FIXED_NOREPLACE). */ if (interpreter) { load_bias = ELF_ET_DYN_BASE; @@ -1117,7 +1123,7 @@ static int load_elf_binary(struct linux_binprm *bprm) alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); if (alignment) load_bias &= ~(alignment - 1); - elf_flags |= MAP_FIXED; + elf_flags |= MAP_FIXED_NOREPLACE; } else load_bias = 0; @@ -1129,7 +1135,14 @@ static int load_elf_binary(struct linux_binprm *bprm) * is then page aligned. */ load_bias = ELF_PAGESTART(load_bias - vaddr); + } + /* + * Calculate the entire size of the ELF mapping (total_size). + * (Note that load_addr_set is set to true later once the + * initial mapping is performed.) + */ + if (!load_addr_set) { total_size = total_mapping_size(elf_phdata, elf_ex->e_phnum); if (!total_size) { From 966a70ec2ccacb14a39e393ad6b045844711926d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Oct 2021 15:09:53 +1100 Subject: [PATCH 0932/1202] ELF: fix overflow in total mapping size calculation Kernel assumes that ELF program headers are ordered by mapping address, but doesn't enforce it. It is possible to make mapping size extremely huge by simply shuffling first and last PT_LOAD segments. As long as PT_LOAD segments do not overlap, it is silly to require sorting by v_addr anyway because mmap() doesn't care. Don't assume PT_LOAD segments are sorted and calculate min and max addresses correctly. Link: https://lkml.kernel.org/r/YVmd7D0M6G/DcP4O@localhost.localdomain Signed-off-by: Alexey Dobriyan Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/binfmt_elf.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7537a19f04845..3e7c35d217c75 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -93,7 +93,7 @@ static int elf_core_dump(struct coredump_params *cprm); #define ELF_CORE_EFLAGS 0 #endif -#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1)) +#define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1)) #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1)) #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1)) @@ -399,22 +399,21 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, return(map_addr); } -static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr) +static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr) { - int i, first_idx = -1, last_idx = -1; + elf_addr_t min_addr = -1; + elf_addr_t max_addr = 0; + bool pt_load = false; + int i; for (i = 0; i < nr; i++) { - if (cmds[i].p_type == PT_LOAD) { - last_idx = i; - if (first_idx == -1) - first_idx = i; + if (phdr[i].p_type == PT_LOAD) { + min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr)); + max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz); + pt_load = true; } } - if (first_idx == -1) - return 0; - - return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - - ELF_PAGESTART(cmds[first_idx].p_vaddr); + return pt_load ? (max_addr - min_addr) : 0; } static int elf_read(struct file *file, void *buf, size_t len, loff_t pos) From bf87e4b39cc3d704110c95f3d85acd52edb70199 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Oct 2021 15:09:53 +1100 Subject: [PATCH 0933/1202] ELF: simplify STACK_ALLOC macro "A -= B; A" is equivalent to "A -= B". Link: https://lkml.kernel.org/r/YVmcP256fRMqCwgK@localhost.localdomain Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3e7c35d217c75..5e48ff4f0d3a2 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -156,7 +156,7 @@ static int padzero(unsigned long elf_bss) #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items)) #define STACK_ROUND(sp, items) \ (((unsigned long) (sp - items)) &~ 15UL) -#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) +#define STACK_ALLOC(sp, len) (sp -= len) #endif #ifndef ELF_BASE_PLATFORM From ea18d266601f34b68277f1c6c79760d91322a406 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:09:54 +1100 Subject: [PATCH 0934/1202] kallsyms: remove arch specific text and data check Patch series "sections: Unify kernel sections range check and use", v4. There are three head files(kallsyms.h, kernel.h and sections.h) which include the kernel sections range check, let's make some cleanup and unify them. 1. cleanup arch specific text/data check and fix address boundary check in kallsyms.h 2. make all the basic/core kernel range check function into sections.h 3. update all the callers, and use the helper in sections.h to simplify the code After this series, we have 5 APIs about kernel sections range check in sections.h * is_kernel_rodata() --- already in sections.h * is_kernel_core_data() --- come from core_kernel_data() in kernel.h * is_kernel_inittext() --- come from kernel.h and kallsyms.h * __is_kernel_text() --- add new internal helper * __is_kernel() --- add new internal helper Note: For the last two helpers, people should not use directly, consider to use corresponding function in kallsyms.h. This patch (of 11): Remove arch specific text and data check after commit 4ba66a976072 ("arch: remove blackfin port"), no need arch-specific text/data check. Link: https://lkml.kernel.org/r/20210930071143.63410-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20210930071143.63410-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Sergey Senozhatsky Cc: Arnd Bergmann Cc: Steven Rostedt Cc: Ingo Molnar Cc: David S. Miller Cc: Alexei Starovoitov Cc: Andrey Ryabinin Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Christophe Leroy Cc: Kefeng Wang Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michal Simek Cc: Petr Mladek Cc: Richard Henderson Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/asm-generic/sections.h | 16 ---------------- include/linux/kallsyms.h | 3 +-- kernel/locking/lockdep.c | 3 --- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 596ab20922892..614fc809de34c 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -64,22 +64,6 @@ extern __visible const void __nosave_begin, __nosave_end; #define dereference_kernel_function_descriptor(p) ((void *)(p)) #endif -/* random extra sections (if any). Override - * in asm/sections.h */ -#ifndef arch_is_kernel_text -static inline int arch_is_kernel_text(unsigned long addr) -{ - return 0; -} -#endif - -#ifndef arch_is_kernel_data -static inline int arch_is_kernel_data(unsigned long addr) -{ - return 0; -} -#endif - /** * memory_contains - checks if an object is contained within a memory region * @begin: virtual address of the beginning of the memory region diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index a1d6fc82d7f06..bac3409726708 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -34,8 +34,7 @@ static inline int is_kernel_inittext(unsigned long addr) static inline int is_kernel_text(unsigned long addr) { - if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || - arch_is_kernel_text(addr)) + if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext)) return 1; return in_gate_area_no_mm(addr); } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 8e118caf835e0..f2d3bf4960f46 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -818,9 +818,6 @@ static int static_obj(const void *obj) if ((addr >= start) && (addr < end)) return 1; - if (arch_is_kernel_data(addr)) - return 1; - /* * in-kernel percpu var? */ From 572bf379d66e59d8ecdfeb84822d96c12afee6bd Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:09:55 +1100 Subject: [PATCH 0935/1202] kallsyms: fix address-checks for kernel related range The is_kernel_inittext/is_kernel_text/is_kernel function should not include the end address(the labels _einittext, _etext and _end) when check the address range, the issue exists since Linux v2.6.12. Link: https://lkml.kernel.org/r/20210930071143.63410-3-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Petr Mladek Reviewed-by: Steven Rostedt (VMware) Acked-by: Sergey Senozhatsky Cc: Arnd Bergmann Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Christophe Leroy Cc: "David S. Miller" Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Mackerras Cc: Richard Henderson Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/kallsyms.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index bac3409726708..62ce22b27ea82 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -27,21 +27,21 @@ struct module; static inline int is_kernel_inittext(unsigned long addr) { if (addr >= (unsigned long)_sinittext - && addr <= (unsigned long)_einittext) + && addr < (unsigned long)_einittext) return 1; return 0; } static inline int is_kernel_text(unsigned long addr) { - if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext)) + if ((addr >= (unsigned long)_stext && addr < (unsigned long)_etext)) return 1; return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { - if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) + if (addr >= (unsigned long)_stext && addr < (unsigned long)_end) return 1; return in_gate_area_no_mm(addr); } From 8db4810aaca0964a1cf7a1123b87cd46bf577050 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:09:56 +1100 Subject: [PATCH 0936/1202] sections: move and rename core_kernel_data() to is_kernel_core_data() Move core_kernel_data() into sections.h and rename it to is_kernel_core_data(), also make it return bool value, then update all the callers. Link: https://lkml.kernel.org/r/20210930071143.63410-4-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Sergey Senozhatsky Cc: Arnd Bergmann Cc: Steven Rostedt Cc: Ingo Molnar Cc: "David S. Miller" Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Christophe Leroy Cc: Dmitry Vyukov Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Mackerras Cc: Petr Mladek Cc: Richard Henderson Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/asm-generic/sections.h | 16 ++++++++++++++++ include/linux/kernel.h | 1 - kernel/extable.c | 18 ------------------ kernel/trace/ftrace.c | 2 +- net/sysctl_net.c | 2 +- 5 files changed, 18 insertions(+), 21 deletions(-) diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 614fc809de34c..7cba8ff10d3a6 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -128,6 +128,22 @@ static inline bool init_section_intersects(void *virt, size_t size) return memory_intersects(__init_begin, __init_end, virt, size); } +/** + * is_kernel_core_data - checks if the pointer address is located in the + * .data section + * + * @addr: address to check + * + * Returns: true if the address is located in .data, false otherwise. + * Note: On some archs it may return true for core RODATA, and false + * for others. But will always be true for core RW data. + */ +static inline bool is_kernel_core_data(unsigned long addr) +{ + return addr >= (unsigned long)_sdata && + addr < (unsigned long)_edata; +} + /** * is_kernel_rodata - checks if the pointer address is located in the * .rodata section diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ed4465757cec2..13ca0ff21a34d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -229,7 +229,6 @@ extern char *next_arg(char *args, char **param, char **val); extern int core_kernel_text(unsigned long addr); extern int init_kernel_text(unsigned long addr); -extern int core_kernel_data(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); diff --git a/kernel/extable.c b/kernel/extable.c index 290661f68e6b3..0e3412b48bbaf 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -82,24 +82,6 @@ int notrace core_kernel_text(unsigned long addr) return 0; } -/** - * core_kernel_data - tell if addr points to kernel data - * @addr: address to test - * - * Returns true if @addr passed in is from the core kernel data - * section. - * - * Note: On some archs it may return true for core RODATA, and false - * for others. But will always be true for core RW data. - */ -int core_kernel_data(unsigned long addr) -{ - if (addr >= (unsigned long)_sdata && - addr < (unsigned long)_edata) - return 1; - return 0; -} - int __kernel_text_address(unsigned long addr) { if (kernel_text_address(addr)) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 635fbdc9d589b..66d6ad0c3f135 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -323,7 +323,7 @@ int __register_ftrace_function(struct ftrace_ops *ops) if (!ftrace_enabled && (ops->flags & FTRACE_OPS_FL_PERMANENT)) return -EBUSY; - if (!core_kernel_data((unsigned long)ops)) + if (!is_kernel_core_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; add_ftrace_ops(&ftrace_ops_list, ops); diff --git a/net/sysctl_net.c b/net/sysctl_net.c index f6cb0d4d114cd..4b45ed631eb8b 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -144,7 +144,7 @@ static void ensure_safe_net_sysctl(struct net *net, const char *path, addr = (unsigned long)ent->data; if (is_module_address(addr)) where = "module"; - else if (core_kernel_data(addr)) + else if (is_kernel_core_data(addr)) where = "kernel"; else continue; From 5b0e3a64017d37c3ac3434d5feaf905475831857 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:09:56 +1100 Subject: [PATCH 0937/1202] sections: move is_kernel_inittext() into sections.h The is_kernel_inittext() and init_kernel_text() are with same functionality, let's just keep is_kernel_inittext() and move it into sections.h, then update all the callers. Link: https://lkml.kernel.org/r/20210930071143.63410-5-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Sergey Senozhatsky Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Christophe Leroy Cc: "David S. Miller" Cc: Dmitry Vyukov Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Mackerras Cc: Petr Mladek Cc: Richard Henderson Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/x86/kernel/unwind_orc.c | 2 +- include/asm-generic/sections.h | 14 ++++++++++++++ include/linux/kallsyms.h | 8 -------- include/linux/kernel.h | 1 - kernel/extable.c | 12 ++---------- 5 files changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index a1202536fc57c..d92ec2ced0595 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -175,7 +175,7 @@ static struct orc_entry *orc_find(unsigned long ip) } /* vmlinux .init slow lookup: */ - if (init_kernel_text(ip)) + if (is_kernel_inittext(ip)) return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 7cba8ff10d3a6..f82806ca87567 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -158,4 +158,18 @@ static inline bool is_kernel_rodata(unsigned long addr) addr < (unsigned long)__end_rodata; } +/** + * is_kernel_inittext - checks if the pointer address is located in the + * .init.text section + * + * @addr: address to check + * + * Returns: true if the address is located in .init.text, false otherwise. + */ +static inline bool is_kernel_inittext(unsigned long addr) +{ + return addr >= (unsigned long)_sinittext && + addr < (unsigned long)_einittext; +} + #endif /* _ASM_GENERIC_SECTIONS_H_ */ diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 62ce22b27ea82..bb5d9ec78e13b 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -24,14 +24,6 @@ struct cred; struct module; -static inline int is_kernel_inittext(unsigned long addr) -{ - if (addr >= (unsigned long)_sinittext - && addr < (unsigned long)_einittext) - return 1; - return 0; -} - static inline int is_kernel_text(unsigned long addr) { if ((addr >= (unsigned long)_stext && addr < (unsigned long)_etext)) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 13ca0ff21a34d..973c61ff2ef92 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -228,7 +228,6 @@ extern bool parse_option_str(const char *str, const char *option); extern char *next_arg(char *args, char **param, char **val); extern int core_kernel_text(unsigned long addr); -extern int init_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); extern int func_ptr_is_kernel_text(void *ptr); diff --git a/kernel/extable.c b/kernel/extable.c index 0e3412b48bbaf..6505207aa7e6a 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -62,14 +62,6 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -int init_kernel_text(unsigned long addr) -{ - if (addr >= (unsigned long)_sinittext && - addr < (unsigned long)_einittext) - return 1; - return 0; -} - int notrace core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && @@ -77,7 +69,7 @@ int notrace core_kernel_text(unsigned long addr) return 1; if (system_state < SYSTEM_FREEING_INITMEM && - init_kernel_text(addr)) + is_kernel_inittext(addr)) return 1; return 0; } @@ -94,7 +86,7 @@ int __kernel_text_address(unsigned long addr) * Since we are after the module-symbols check, there's * no danger of address overlap: */ - if (init_kernel_text(addr)) + if (is_kernel_inittext(addr)) return 1; return 0; } From 1045585bca33911f507d49d9636cc10f92f7c5d6 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:09:57 +1100 Subject: [PATCH 0938/1202] x86: mm: rename __is_kernel_text() to is_x86_32_kernel_text() Commit b56cd05c55a1 ("x86/mm: Rename is_kernel_text to __is_kernel_text"), add '__' prefix not to get in conflict with existing is_kernel_text() in . We will add __is_kernel_text() for the basic kernel text range check in the next patch, so use private is_x86_32_kernel_text() naming for x86 special check. Link: https://lkml.kernel.org/r/20210930071143.63410-6-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Sergey Senozhatsky Cc: Ingo Molnar Cc: Borislav Petkov Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Christophe Leroy Cc: "David S. Miller" Cc: Dmitry Vyukov Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Mackerras Cc: Petr Mladek Cc: Richard Henderson Cc: Steven Rostedt Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/x86/mm/init_32.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5cd7ea6d645cf..d4e2648a1dfb2 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -238,11 +238,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) } } -/* - * The already defines is_kernel_text, - * using '__' prefix not to get in conflict. - */ -static inline int __is_kernel_text(unsigned long addr) +static inline int is_x86_32_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) return 1; @@ -333,8 +329,8 @@ kernel_physical_mapping_init(unsigned long start, addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; - if (__is_kernel_text(addr) || - __is_kernel_text(addr2)) + if (is_x86_32_kernel_text(addr) || + is_x86_32_kernel_text(addr2)) prot = PAGE_KERNEL_LARGE_EXEC; pages_2m++; @@ -359,7 +355,7 @@ kernel_physical_mapping_init(unsigned long start, */ pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); - if (__is_kernel_text(addr)) + if (is_x86_32_kernel_text(addr)) prot = PAGE_KERNEL_EXEC; pages_4k++; @@ -789,7 +785,7 @@ static void mark_nxdata_nx(void) */ unsigned long start = PFN_ALIGN(_etext); /* - * This comes from __is_kernel_text upper limit. Also HPAGE where used: + * This comes from is_x86_32_kernel_text upper limit. Also HPAGE where used: */ unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; From 3b36657ce8f7482908a480d81aa4f70b4572605c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:09:58 +1100 Subject: [PATCH 0939/1202] sections: provide internal __is_kernel() and __is_kernel_text() helper An internal __is_kernel() helper which only check the kernel address ranges, and an internal __is_kernel_text() helper which only check text section ranges. Link: https://lkml.kernel.org/r/20210930071143.63410-7-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Sergey Senozhatsky Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Christophe Leroy Cc: "David S. Miller" Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Mackerras Cc: Petr Mladek Cc: Richard Henderson Cc: Steven Rostedt Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/asm-generic/sections.h | 29 +++++++++++++++++++++++++++++ include/linux/kallsyms.h | 4 ++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index f82806ca87567..1dfadb2e878db 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -172,4 +172,33 @@ static inline bool is_kernel_inittext(unsigned long addr) addr < (unsigned long)_einittext; } +/** + * __is_kernel_text - checks if the pointer address is located in the + * .text section + * + * @addr: address to check + * + * Returns: true if the address is located in .text, false otherwise. + * Note: an internal helper, only check the range of _stext to _etext. + */ +static inline bool __is_kernel_text(unsigned long addr) +{ + return addr >= (unsigned long)_stext && + addr < (unsigned long)_etext; +} + +/** + * __is_kernel - checks if the pointer address is located in the kernel range + * + * @addr: address to check + * + * Returns: true if the address is located in the kernel range, false otherwise. + * Note: an internal helper, only check the range of _stext to _end. + */ +static inline bool __is_kernel(unsigned long addr) +{ + return addr >= (unsigned long)_stext && + addr < (unsigned long)_end; +} + #endif /* _ASM_GENERIC_SECTIONS_H_ */ diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index bb5d9ec78e13b..4176c7eca7b5a 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -26,14 +26,14 @@ struct module; static inline int is_kernel_text(unsigned long addr) { - if ((addr >= (unsigned long)_stext && addr < (unsigned long)_etext)) + if (__is_kernel_text(addr)) return 1; return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { - if (addr >= (unsigned long)_stext && addr < (unsigned long)_end) + if (__is_kernel(addr)) return 1; return in_gate_area_no_mm(addr); } From 95df698c87e973e09c6ea699021be1071d0ad696 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:09:59 +1100 Subject: [PATCH 0940/1202] mm: kasan: use is_kernel() helper Directly use is_kernel() helper in kernel_or_module_addr(). Link: https://lkml.kernel.org/r/20210930071143.63410-8-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Alexander Potapenko Reviewed-by: Andrey Konovalov Reviewed-by: Sergey Senozhatsky Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Alexei Starovoitov Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Christophe Leroy Cc: "David S. Miller" Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Mackerras Cc: Petr Mladek Cc: Richard Henderson Cc: Steven Rostedt Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/kasan/report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 3239fd8f87475..1c955e1c98d50 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -226,7 +226,7 @@ static void describe_object(struct kmem_cache *cache, void *object, static inline bool kernel_or_module_addr(const void *addr) { - if (addr >= (void *)_stext && addr < (void *)_end) + if (is_kernel((unsigned long)addr)) return true; if (is_module_address((unsigned long)addr)) return true; From f62add8e4eb7a01caf83cdd089db6ee81d87cda0 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:10:00 +1100 Subject: [PATCH 0941/1202] extable: use is_kernel_text() helper The core_kernel_text() should check the gate area, as it is part of kernel text range, use is_kernel_text() in core_kernel_text(). Link: https://lkml.kernel.org/r/20210930071143.63410-9-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Sergey Senozhatsky Cc: Steven Rostedt Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Christophe Leroy Cc: "David S. Miller" Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Mackerras Cc: Petr Mladek Cc: Richard Henderson Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/extable.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/extable.c b/kernel/extable.c index 6505207aa7e6a..b6f330f0fe749 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -64,8 +64,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) int notrace core_kernel_text(unsigned long addr) { - if (addr >= (unsigned long)_stext && - addr < (unsigned long)_etext) + if (is_kernel_text(addr)) return 1; if (system_state < SYSTEM_FREEING_INITMEM && From 6b882a759e455db5a9bba20623b38a7b07ee4b7b Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:10:01 +1100 Subject: [PATCH 0942/1202] powerpc/mm: use core_kernel_text() helper Use core_kernel_text() helper to simplify code, also drop etext, _stext, _sinittext, _einittext declaration which already declared in section.h. Link: https://lkml.kernel.org/r/20210930071143.63410-10-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Sergey Senozhatsky Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Christophe Leroy Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Arnd Bergmann Cc: Borislav Petkov Cc: "David S. Miller" Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michal Simek Cc: Petr Mladek Cc: Richard Henderson Cc: Steven Rostedt Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/powerpc/mm/pgtable_32.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index dcf5ecca19d99..079abbf45a33c 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -33,8 +33,6 @@ #include -extern char etext[], _stext[], _sinittext[], _einittext[]; - static u8 early_fixmap_pagetable[FIXMAP_PTE_SIZE] __page_aligned_data; notrace void __init early_ioremap_init(void) @@ -104,14 +102,13 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) { unsigned long v, s; phys_addr_t p; - int ktext; + bool ktext; s = offset; v = PAGE_OFFSET + s; p = memstart_addr + s; for (; s < top; s += PAGE_SIZE) { - ktext = ((char *)v >= _stext && (char *)v < etext) || - ((char *)v >= _sinittext && (char *)v < _einittext); + ktext = core_kernel_text(v); map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL); v += PAGE_SIZE; p += PAGE_SIZE; From 51bc0902cc7266d9eae1966f4eec5830aad883da Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:10:02 +1100 Subject: [PATCH 0943/1202] microblaze: use is_kernel_text() helper Use is_kernel_text() helper to simplify code. Link: https://lkml.kernel.org/r/20210930071143.63410-11-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Michal Simek Reviewed-by: Sergey Senozhatsky Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Christophe Leroy Cc: "David S. Miller" Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Ellerman Cc: Paul Mackerras Cc: Petr Mladek Cc: Richard Henderson Cc: Steven Rostedt Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/microblaze/mm/pgtable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index c1833b159d3be..9f73265aad4e8 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -171,7 +172,7 @@ void __init mapin_ram(void) for (s = 0; s < lowmem_size; s += PAGE_SIZE) { f = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_SHARED | _PAGE_HWEXEC; - if ((char *) v < _stext || (char *) v >= _etext) + if (!is_kernel_text(v)) f |= _PAGE_WRENABLE; else /* On the MicroBlaze, no user access From 04681852fa5fdab7330e0dc2c0ee607b8aca91eb Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 21 Oct 2021 15:10:03 +1100 Subject: [PATCH 0944/1202] alpha: use is_kernel_text() helper Use is_kernel_text() helper to simplify code. Link: https://lkml.kernel.org/r/20210930071143.63410-12-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Sergey Senozhatsky Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Christophe Leroy Cc: "David S. Miller" Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Michael Ellerman Cc: Michal Simek Cc: Paul Mackerras Cc: Petr Mladek Cc: Steven Rostedt Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/alpha/kernel/traps.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index e805106409f76..2ae34702456cd 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -129,9 +129,7 @@ dik_show_trace(unsigned long *sp, const char *loglvl) extern char _stext[], _etext[]; unsigned long tmp = *sp; sp++; - if (tmp < (unsigned long) &_stext) - continue; - if (tmp >= (unsigned long) &_etext) + if (!is_kernel_text(tmp)) continue; printk("%s[<%lx>] %pSR\n", loglvl, tmp, (void *)tmp); if (i > 40) { From 1a5abcd9c36b52a9e08b250a6cb393513f983cf2 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Thu, 21 Oct 2021 15:10:04 +1100 Subject: [PATCH 0945/1202] ramfs: fix mount source show for ramfs ramfs_parse_param does not parse key "source", and will convert -ENOPARAM to 0. This will skip vfs_parse_fs_param_source in vfs_parse_fs_param, which lead always "none" mount source for ramfs. Fix it by parse "source" in ramfs_parse_param like cgroup1_parse_param has do. Link: https://lkml.kernel.org/r/20210924091756.1906118-1-yangerkun@huawei.com Signed-off-by: yangerkun Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/ramfs/inode.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 65e7e56005b8f..df4515d39cd46 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -203,17 +203,20 @@ static int ramfs_parse_param(struct fs_context *fc, struct fs_parameter *param) int opt; opt = fs_parse(fc, ramfs_fs_parameters, param, &result); - if (opt < 0) { + if (opt == -ENOPARAM) { + opt = vfs_parse_fs_param_source(fc, param); + if (opt != -ENOPARAM) + return opt; /* * We might like to report bad mount options here; * but traditionally ramfs has ignored all mount options, * and as it is used as a !CONFIG_SHMEM simple substitute * for tmpfs, better continue to ignore other mount options. */ - if (opt == -ENOPARAM) - opt = 0; - return opt; + return 0; } + if (opt < 0) + return opt; switch (opt) { case Opt_mode: From 742b99f5f8cb26232b4e7958b651cbfd61bdde2d Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Thu, 21 Oct 2021 15:10:05 +1100 Subject: [PATCH 0946/1202] init: make unknown command line param message clearer The prior message is confusing users, which is the exact opposite of the goal. If the message is being seen, one of the following situations is happening: 1. the param is misspelled 2. the param is not valid due to the kernel configuration 3. the param is intended for init but isn't after the '--' delineator on the command line To make that more clear to the user, explicitly mention "kernel command line" and also note that the params are still passed to user space to avoid causing any alarm over params intended for init. Link: https://lkml.kernel.org/r/20211013223502.96756-1-ahalaney@redhat.com Fixes: 86d1919a4fb0 ("init: print out unknown kernel parameters") Signed-off-by: Andrew Halaney Suggested-by: Steven Rostedt (VMware) Acked-by: Randy Dunlap Cc: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- init/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 0dccf937f49ce..3476c832ebe0b 100644 --- a/init/main.c +++ b/init/main.c @@ -927,7 +927,9 @@ static void __init print_unknown_bootoptions(void) for (p = &envp_init[2]; *p; p++) end += sprintf(end, " %s", *p); - pr_notice("Unknown command line parameters:%s\n", unknown_options); + /* Start at unknown_options[1] to skip the initial space */ + pr_notice("Unknown kernel command line parameters \"%s\", will be passed to user space.\n", + &unknown_options[1]); memblock_free(unknown_options, len); } From 31e87bd69543d3d25c019d32e3747a121b76fa6d Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Thu, 21 Oct 2021 15:10:06 +1100 Subject: [PATCH 0947/1202] init/main.c: silence some -Wunused-parameter warnings There are a bunch of callbacks with unused arguments, go ahead and silence those so "make KCFLAGS=-W init/main.o" is a little quieter. Here's a little sample: init/main.c:182:43: warning: unused parameter 'str' [-Wunused-parameter] static int __init set_reset_devices(char *str) Link: https://lkml.kernel.org/r/20210519162341.1275452-1-ahalaney@redhat.com Signed-off-by: Andrew Halaney Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- init/main.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/init/main.c b/init/main.c index 3476c832ebe0b..e24671f079928 100644 --- a/init/main.c +++ b/init/main.c @@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(static_key_initialized); unsigned int reset_devices; EXPORT_SYMBOL(reset_devices); -static int __init set_reset_devices(char *str) +static int __init set_reset_devices(char *str __always_unused) { reset_devices = 1; return 1; @@ -231,13 +231,13 @@ static bool __init obsolete_checksetup(char *line) unsigned long loops_per_jiffy = (1<<12); EXPORT_SYMBOL(loops_per_jiffy); -static int __init debug_kernel(char *str) +static int __init debug_kernel(char *str __always_unused) { console_loglevel = CONSOLE_LOGLEVEL_DEBUG; return 0; } -static int __init quiet_kernel(char *str) +static int __init quiet_kernel(char *str __always_unused) { console_loglevel = CONSOLE_LOGLEVEL_QUIET; return 0; @@ -482,7 +482,7 @@ static void __init setup_boot_config(void) get_boot_config_from_initrd(NULL, NULL); } -static int __init warn_bootconfig(char *str) +static int __init warn_bootconfig(char *str __always_unused) { pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n"); return 0; @@ -511,7 +511,8 @@ static void __init repair_env_string(char *param, char *val) /* Anything after -- gets handed straight to init. */ static int __init set_init_arg(char *param, char *val, - const char *unused, void *arg) + const char *unused __always_unused, + void *arg __always_unused) { unsigned int i; @@ -536,7 +537,8 @@ static int __init set_init_arg(char *param, char *val, * unused parameters (modprobe will find them in /proc/cmdline). */ static int __init unknown_bootoption(char *param, char *val, - const char *unused, void *arg) + const char *unused __always_unused, + void *arg __always_unused) { size_t len = strlen(param); @@ -736,7 +738,8 @@ noinline void __ref rest_init(void) /* Check for early params. */ static int __init do_early_param(char *param, char *val, - const char *unused, void *arg) + const char *unused __always_unused, + void *arg __always_unused) { const struct obs_kernel_param *p; @@ -1361,8 +1364,10 @@ static const char *initcall_level_names[] __initdata = { "late", }; -static int __init ignore_unknown_bootoption(char *param, char *val, - const char *unused, void *arg) +static int __init ignore_unknown_bootoption(char *param __always_unused, + char *val __always_unused, + const char *unused __always_unused, + void *arg __always_unused) { return 0; } @@ -1499,7 +1504,7 @@ void __weak free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } -static int __ref kernel_init(void *unused) +static int __ref kernel_init(void *unused __always_unused) { int ret; From d68d7396c2f9bbe5f6bf9607f2183684ff2c2631 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Thu, 21 Oct 2021 15:10:07 +1100 Subject: [PATCH 0948/1202] coda: avoid NULL pointer dereference from a bad inode Patch series "Coda updates for -next". The following patch series contains some fixes for the Coda kernel module I've had sitting around and were tested extensively in a development version of the Coda kernel module that lives outside of the main kernel. This patch (of 9): Avoid accessing coda_inode_info from a dentry with a bad inode. Link: https://lkml.kernel.org/r/20210908140308.18491-1-jaharkes@cs.cmu.edu Link: https://lkml.kernel.org/r/20210908140308.18491-2-jaharkes@cs.cmu.edu Signed-off-by: Jan Harkes Cc: Alex Shi Cc: Jing Yangyang Cc: Xin Tan Cc: Xiyu Yang Cc: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/coda/dir.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/coda/dir.c b/fs/coda/dir.c index d69989c1bac37..3fd085009f26d 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -499,15 +499,20 @@ static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) */ static int coda_dentry_delete(const struct dentry * dentry) { - int flags; + struct inode *inode; + struct coda_inode_info *cii; if (d_really_is_negative(dentry)) return 0; - flags = (ITOC(d_inode(dentry))->c_flags) & C_PURGE; - if (is_bad_inode(d_inode(dentry)) || flags) { + inode = d_inode(dentry); + if (!inode || is_bad_inode(inode)) return 1; - } + + cii = ITOC(inode); + if (cii->c_flags & C_PURGE) + return 1; + return 0; } From a6567850089ee29b55d8e97fafde30b3a41f0561 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Thu, 21 Oct 2021 15:10:08 +1100 Subject: [PATCH 0949/1202] coda: check for async upcall request using local state Originally flagged by Smatch because the code implicitly assumed outSize is not NULL for non-async upcalls because of a flag that was (not) set in req->uc_flags. However req->uc_flags field is in shared state and although the current code will not allow it to be changed before the async request check the code is more robust when it tests against the local outSize variable. Link: https://lkml.kernel.org/r/20210908140308.18491-3-jaharkes@cs.cmu.edu Signed-off-by: Jan Harkes Cc: Alex Shi Cc: Jing Yangyang Cc: Xin Tan Cc: Xiyu Yang Cc: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/coda/upcall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index eb3b1898da462..59f6cfd06f96a 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -744,7 +744,8 @@ static int coda_upcall(struct venus_comm *vcp, list_add_tail(&req->uc_chain, &vcp->vc_pending); wake_up_interruptible(&vcp->vc_waitq); - if (req->uc_flags & CODA_REQ_ASYNC) { + /* We can return early on asynchronous requests */ + if (outSize == NULL) { mutex_unlock(&vcp->vc_mutex); return 0; } From ba65851e88d97c25e0b159add2b69e97623fc831 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 21 Oct 2021 15:10:09 +1100 Subject: [PATCH 0950/1202] coda: remove err which no one care No one care 'err' in func coda_release, so better remove it. Link: https://lkml.kernel.org/r/20210908140308.18491-4-jaharkes@cs.cmu.edu Signed-off-by: Alex Shi Signed-off-by: Jan Harkes Cc: Jing Yangyang Cc: Xin Tan Cc: Xiyu Yang Cc: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/coda/file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/coda/file.c b/fs/coda/file.c index ef5ca22bfb3ea..52deab7846671 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -238,11 +238,10 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) struct coda_file_info *cfi; struct coda_inode_info *cii; struct inode *host_inode; - int err; cfi = coda_ftoc(coda_file); - err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode), + venus_close(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags, coda_file->f_cred->fsuid); host_inode = file_inode(cfi->cfi_container); From 8676ba34f795e2ab0514e6e16a16cfeeac22bd10 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Thu, 21 Oct 2021 15:10:10 +1100 Subject: [PATCH 0951/1202] coda: avoid flagging NULL inodes Somehow we hit a negative dentry in coda_rename even after checking with d_really_is_positive. Maybe something raced and turned the new_dentry negative while we were fixing up directory link counts. Link: https://lkml.kernel.org/r/20210908140308.18491-5-jaharkes@cs.cmu.edu Signed-off-by: Jan Harkes Cc: Alex Shi Cc: Jing Yangyang Cc: Xin Tan Cc: Xiyu Yang Cc: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/coda/coda_linux.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index e7b27754ce782..3c2947bba5e53 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -83,6 +83,9 @@ static __inline__ void coda_flag_inode(struct inode *inode, int flag) { struct coda_inode_info *cii = ITOC(inode); + if (!inode) + return; + spin_lock(&cii->c_lock); cii->c_flags |= flag; spin_unlock(&cii->c_lock); From c873f6e9d77d93a0fb06bdff9ffdbc316f5da534 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Thu, 21 Oct 2021 15:10:11 +1100 Subject: [PATCH 0952/1202] coda: avoid hidden code duplication in rename We were actually fixing up the directory mtime in both branches after the negative dentry test, it was just that one branch was only flagging the directory inodes to refresh their attributes while the other branch used the optional optimization to set mtime to the current time and not go back to the Coda client. Link: https://lkml.kernel.org/r/20210908140308.18491-6-jaharkes@cs.cmu.edu Signed-off-by: Jan Harkes Cc: Alex Shi Cc: Jing Yangyang Cc: Xin Tan Cc: Xiyu Yang Cc: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/coda/dir.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 3fd085009f26d..328d7a684b634 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -317,13 +317,10 @@ static int coda_rename(struct user_namespace *mnt_userns, struct inode *old_dir, coda_dir_drop_nlink(old_dir); coda_dir_inc_nlink(new_dir); } - coda_dir_update_mtime(old_dir); - coda_dir_update_mtime(new_dir); coda_flag_inode(d_inode(new_dentry), C_VATTR); - } else { - coda_flag_inode(old_dir, C_VATTR); - coda_flag_inode(new_dir, C_VATTR); } + coda_dir_update_mtime(old_dir); + coda_dir_update_mtime(new_dir); } return error; } From 0298bae25d96437dd231620c550c70cc26b53a90 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Thu, 21 Oct 2021 15:10:12 +1100 Subject: [PATCH 0953/1202] coda: avoid doing bad things on inode type changes during revalidation When Coda discovers an inconsistent object, it turns it into a symlink. However we can't just follow this change in the kernel on an existing file or directory inode that may still have references. This patch removes the inconsistent inode from the inode hash and allocates a new inode for the symlink object. Link: https://lkml.kernel.org/r/20210908140308.18491-7-jaharkes@cs.cmu.edu Signed-off-by: Jan Harkes Cc: Alex Shi Cc: Jing Yangyang Cc: Xin Tan Cc: Xiyu Yang Cc: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/coda/cnode.c | 13 +++++++++---- fs/coda/coda_linux.c | 39 +++++++++++++++++++-------------------- fs/coda/coda_linux.h | 3 ++- 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index 06855f6c79020..62a3d2565c261 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c @@ -63,9 +63,10 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, struct inode *inode; struct coda_inode_info *cii; unsigned long hash = coda_f2i(fid); + umode_t inode_type = coda_inode_type(attr); +retry: inode = iget5_locked(sb, hash, coda_test_inode, coda_set_inode, fid); - if (!inode) return ERR_PTR(-ENOMEM); @@ -75,11 +76,15 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, inode->i_ino = hash; /* inode is locked and unique, no need to grab cii->c_lock */ cii->c_mapcount = 0; + coda_fill_inode(inode, attr); unlock_new_inode(inode); + } else if ((inode->i_mode & S_IFMT) != inode_type) { + /* Inode has changed type, mark bad and grab a new one */ + remove_inode_hash(inode); + coda_flag_inode(inode, C_PURGE); + iput(inode); + goto retry; } - - /* always replace the attributes, type might have changed */ - coda_fill_inode(inode, attr); return inode; } diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index 2e1a5a192074d..903ca8fa4b9b8 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -87,28 +87,27 @@ static struct coda_timespec timespec64_to_coda(struct timespec64 ts64) } /* utility functions below */ +umode_t coda_inode_type(struct coda_vattr *attr) +{ + switch (attr->va_type) { + case C_VREG: + return S_IFREG; + case C_VDIR: + return S_IFDIR; + case C_VLNK: + return S_IFLNK; + case C_VNON: + default: + return 0; + } +} + void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr) { - int inode_type; - /* inode's i_flags, i_ino are set by iget - XXX: is this all we need ?? - */ - switch (attr->va_type) { - case C_VNON: - inode_type = 0; - break; - case C_VREG: - inode_type = S_IFREG; - break; - case C_VDIR: - inode_type = S_IFDIR; - break; - case C_VLNK: - inode_type = S_IFLNK; - break; - default: - inode_type = 0; - } + /* inode's i_flags, i_ino are set by iget + * XXX: is this all we need ?? + */ + umode_t inode_type = coda_inode_type(attr); inode->i_mode |= inode_type; if (attr->va_mode != (u_short) -1) diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index 3c2947bba5e53..9be281bbcc066 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -53,10 +53,11 @@ int coda_getattr(struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); int coda_setattr(struct user_namespace *, struct dentry *, struct iattr *); -/* this file: heloers */ +/* this file: helpers */ char *coda_f2s(struct CodaFid *f); int coda_iscontrol(const char *name, size_t length); +umode_t coda_inode_type(struct coda_vattr *attr); void coda_vattr_to_iattr(struct inode *, struct coda_vattr *); void coda_iattr_to_vattr(struct iattr *, struct coda_vattr *); unsigned short coda_flags_to_cflags(unsigned short); From 7bc626c9e205d06d41e6821cf19fb7a503453b49 Mon Sep 17 00:00:00 2001 From: Xiyu Yang Date: Thu, 21 Oct 2021 15:10:13 +1100 Subject: [PATCH 0954/1202] coda: convert from atomic_t to refcount_t on coda_vm_ops->refcnt refcount_t type and corresponding API can protect refcounters from accidental underflow and overflow and further use-after-free situations. Link: https://lkml.kernel.org/r/20210908140308.18491-8-jaharkes@cs.cmu.edu Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: Jan Harkes Cc: Alex Shi Cc: Jing Yangyang Cc: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/coda/file.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/coda/file.c b/fs/coda/file.c index 52deab7846671..29dd87be2fb86 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -8,6 +8,7 @@ * to the Coda project. Contact Peter Braam . */ +#include #include #include #include @@ -28,7 +29,7 @@ #include "coda_int.h" struct coda_vm_ops { - atomic_t refcnt; + refcount_t refcnt; struct file *coda_file; const struct vm_operations_struct *host_vm_ops; struct vm_operations_struct vm_ops; @@ -98,7 +99,7 @@ coda_vm_open(struct vm_area_struct *vma) struct coda_vm_ops *cvm_ops = container_of(vma->vm_ops, struct coda_vm_ops, vm_ops); - atomic_inc(&cvm_ops->refcnt); + refcount_inc(&cvm_ops->refcnt); if (cvm_ops->host_vm_ops && cvm_ops->host_vm_ops->open) cvm_ops->host_vm_ops->open(vma); @@ -113,7 +114,7 @@ coda_vm_close(struct vm_area_struct *vma) if (cvm_ops->host_vm_ops && cvm_ops->host_vm_ops->close) cvm_ops->host_vm_ops->close(vma); - if (atomic_dec_and_test(&cvm_ops->refcnt)) { + if (refcount_dec_and_test(&cvm_ops->refcnt)) { vma->vm_ops = cvm_ops->host_vm_ops; fput(cvm_ops->coda_file); kfree(cvm_ops); @@ -189,7 +190,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) cvm_ops->vm_ops.open = coda_vm_open; cvm_ops->vm_ops.close = coda_vm_close; cvm_ops->coda_file = coda_file; - atomic_set(&cvm_ops->refcnt, 1); + refcount_set(&cvm_ops->refcnt, 1); vma->vm_ops = &cvm_ops->vm_ops; } From 320a1414af03fbb209de87157f07e8a9351a649e Mon Sep 17 00:00:00 2001 From: Jing Yangyang Date: Thu, 21 Oct 2021 15:10:14 +1100 Subject: [PATCH 0955/1202] coda: use vmemdup_user to replace the open code vmemdup_user is better than duplicating its implementation, So just replace the open code. ./fs/coda/psdev.c:125:10-18:WARNING:opportunity for vmemdup_user The issue is detected with the help of Coccinelle. Link: https://lkml.kernel.org/r/20210908140308.18491-9-jaharkes@cs.cmu.edu Reported-by: Zeal Robot Signed-off-by: Jing Yangyang Signed-off-by: Jan Harkes Cc: Alex Shi Cc: Xin Tan Cc: Xiyu Yang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/coda/psdev.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 240669f51eac3..7e23cb22d3947 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -122,14 +122,10 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, hdr.opcode, hdr.unique); nbytes = size; } - dcbuf = kvmalloc(nbytes, GFP_KERNEL); - if (!dcbuf) { - retval = -ENOMEM; - goto out; - } - if (copy_from_user(dcbuf, buf, nbytes)) { - kvfree(dcbuf); - retval = -EFAULT; + + dcbuf = vmemdup_user(buf, nbytes); + if (IS_ERR(dcbuf)) { + retval = PTR_ERR(dcbuf); goto out; } From dd76ba967a96bc537fb46a5db0e6c2a8a931116b Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Thu, 21 Oct 2021 15:10:15 +1100 Subject: [PATCH 0956/1202] coda: bump module version to 7.2 Helps with tracking which patches have been propagated upstream and if users are running the latest known version. Link: https://lkml.kernel.org/r/20210908140308.18491-10-jaharkes@cs.cmu.edu Signed-off-by: Jan Harkes Cc: Alex Shi Cc: Jing Yangyang Cc: Xin Tan Cc: Xiyu Yang Cc: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/coda/psdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 7e23cb22d3947..b39580ad4ce51 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -384,7 +384,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam"); MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); MODULE_LICENSE("GPL"); -MODULE_VERSION("7.0"); +MODULE_VERSION("7.2"); static int __init init_coda(void) { From 8583455e21734ecdf7c36ac55e60d2b9956fe97b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Oct 2021 15:10:16 +1100 Subject: [PATCH 0957/1202] hfs/hfsplus: use WARN_ON for sanity check gcc warns about a couple of instances in which a sanity check exists but the author wasn't sure how to react to it failing, which makes it look like a possible bug: fs/hfsplus/inode.c: In function 'hfsplus_cat_read_inode': fs/hfsplus/inode.c:503:37: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 503 | /* panic? */; | ^ fs/hfsplus/inode.c:524:37: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 524 | /* panic? */; | ^ fs/hfsplus/inode.c: In function 'hfsplus_cat_write_inode': fs/hfsplus/inode.c:582:37: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 582 | /* panic? */; | ^ fs/hfsplus/inode.c:608:37: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 608 | /* panic? */; | ^ fs/hfs/inode.c: In function 'hfs_write_inode': fs/hfs/inode.c:464:37: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 464 | /* panic? */; | ^ fs/hfs/inode.c:485:37: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body] 485 | /* panic? */; | ^ panic() is probably not the correct choice here, but a WARN_ON seems appropriate and avoids the compile-time warning. Link: https://lkml.kernel.org/r/20210927102149.1809384-1-arnd@kernel.org Link: https://lore.kernel.org/all/20210322223249.2632268-1-arnd@kernel.org/ Signed-off-by: Arnd Bergmann Reviewed-by: Christian Brauner Cc: Alexander Viro Cc: Christian Brauner Cc: Greg Kroah-Hartman Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/hfs/inode.c | 6 ++---- fs/hfsplus/inode.c | 12 ++++-------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 4a95a92546a0d..2a51432462820 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -462,8 +462,7 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) goto out; if (S_ISDIR(main_inode->i_mode)) { - if (fd.entrylength < sizeof(struct hfs_cat_dir)) - /* panic? */; + WARN_ON(fd.entrylength < sizeof(struct hfs_cat_dir)); hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); if (rec.type != HFS_CDR_DIR || @@ -483,8 +482,7 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } else { - if (fd.entrylength < sizeof(struct hfs_cat_file)) - /* panic? */; + WARN_ON(fd.entrylength < sizeof(struct hfs_cat_file)); hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); if (rec.type != HFS_CDR_FIL || diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 6fef67c2a9f09..d08a8d1d40a4c 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -509,8 +509,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) if (type == HFSPLUS_FOLDER) { struct hfsplus_cat_folder *folder = &entry.folder; - if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) - /* panic? */; + WARN_ON(fd->entrylength < sizeof(struct hfsplus_cat_folder)); hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_folder)); hfsplus_get_perms(inode, &folder->permissions, 1); @@ -530,8 +529,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) } else if (type == HFSPLUS_FILE) { struct hfsplus_cat_file *file = &entry.file; - if (fd->entrylength < sizeof(struct hfsplus_cat_file)) - /* panic? */; + WARN_ON(fd->entrylength < sizeof(struct hfsplus_cat_file)); hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_file)); @@ -588,8 +586,7 @@ int hfsplus_cat_write_inode(struct inode *inode) if (S_ISDIR(main_inode->i_mode)) { struct hfsplus_cat_folder *folder = &entry.folder; - if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) - /* panic? */; + WARN_ON(fd.entrylength < sizeof(struct hfsplus_cat_folder)); hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); /* simple node checks? */ @@ -614,8 +611,7 @@ int hfsplus_cat_write_inode(struct inode *inode) } else { struct hfsplus_cat_file *file = &entry.file; - if (fd.entrylength < sizeof(struct hfsplus_cat_file)) - /* panic? */; + WARN_ON(fd.entrylength < sizeof(struct hfsplus_cat_file)); hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); hfsplus_inode_write_fork(inode, &file->data_fork); From 462bf71a8c2ed57040260b41f65c2e2bf1fb4fbb Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 21 Oct 2021 15:10:16 +1100 Subject: [PATCH 0958/1202] hfsplus: fix out-of-bounds warnings in __hfsplus_setxattr Fix the following out-of-bounds warnings by enclosing structure members file and finder into new struct info: fs/hfsplus/xattr.c:300:5: warning: 'memcpy' offset [65, 80] from the object at 'entry' is out of the bounds of referenced subobject 'user_info' with type 'struct DInfo' at offset 48 [-Warray-bounds] fs/hfsplus/xattr.c:313:5: warning: 'memcpy' offset [65, 80] from the object at 'entry' is out of the bounds of referenced subobject 'user_info' with type 'struct FInfo' at offset 48 [-Warray-bounds] Refactor the code by making it more "structured." Also, this helps with the ongoing efforts to enable -Warray-bounds and makes the code clearer and avoid confusing the compiler. Matthew said: : The offending line is this: : : - memcpy(&entry.file.user_info, value, : + memcpy(&entry.file.info, value, : file_finderinfo_len); : : what it's trying to do is copy two structs which are adjacent to each : other in a single call to memcpy(). gcc legitimately complains that : the memcpy to this struct overruns the bounds of the struct. What : Gustavo has done here is introduce a new struct that contains the two : structs, and now gcc is happy that the memcpy doesn't overrun the : length of this containing struct. Link: https://github.com/KSPP/linux/issues/109 Link: https://lkml.kernel.org/r/20210330145226.GA207011@embeddedor Signed-off-by: Gustavo A. R. Silva Reported-by: kernel test robot Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/hfsplus/catalog.c | 16 ++++++++-------- fs/hfsplus/dir.c | 4 ++-- fs/hfsplus/hfsplus_raw.h | 12 ++++++++---- fs/hfsplus/xattr.c | 18 ++++++++---------- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 35472cba750e1..9cdc6550b468e 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -124,7 +124,7 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, hfsplus_cat_set_perms(inode, &folder->permissions); if (inode == sbi->hidden_dir) /* invisible and namelocked */ - folder->user_info.frFlags = cpu_to_be16(0x5000); + folder->info.user.frFlags = cpu_to_be16(0x5000); return sizeof(*folder); } else { struct hfsplus_cat_file *file; @@ -142,14 +142,14 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, if (cnid == inode->i_ino) { hfsplus_cat_set_perms(inode, &file->permissions); if (S_ISLNK(inode->i_mode)) { - file->user_info.fdType = + file->info.user.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); - file->user_info.fdCreator = + file->info.user.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); } else { - file->user_info.fdType = + file->info.user.fdType = cpu_to_be32(sbi->type); - file->user_info.fdCreator = + file->info.user.fdCreator = cpu_to_be32(sbi->creator); } if (HFSPLUS_FLG_IMMUTABLE & @@ -158,11 +158,11 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); } else { - file->user_info.fdType = + file->info.user.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); - file->user_info.fdCreator = + file->info.user.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); - file->user_info.fdFlags = + file->info.user.fdFlags = cpu_to_be16(0x100); file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 84714bbccc123..135279a19b559 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -73,9 +73,9 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, goto fail; } cnid = be32_to_cpu(entry.file.id); - if (entry.file.user_info.fdType == + if (entry.file.info.user.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && - entry.file.user_info.fdCreator == + entry.file.info.user.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && HFSPLUS_SB(sb)->hidden_dir && (entry.file.create_date == diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 456e87aec7fd7..005a043bc7eed 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -260,8 +260,10 @@ struct hfsplus_cat_folder { __be32 access_date; __be32 backup_date; struct hfsplus_perm permissions; - struct DInfo user_info; - struct DXInfo finder_info; + struct { + struct DInfo user; + struct DXInfo finder; + } info; __be32 text_encoding; __be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */ } __packed; @@ -294,8 +296,10 @@ struct hfsplus_cat_file { __be32 access_date; __be32 backup_date; struct hfsplus_perm permissions; - struct FInfo user_info; - struct FXInfo finder_info; + struct { + struct FInfo user; + struct FXInfo finder; + } info; __be32 text_encoding; u32 reserved2; diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index e2855ceefd394..b21811baab0f2 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -261,10 +261,8 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, struct hfs_find_data cat_fd; hfsplus_cat_entry entry; u16 cat_entry_flags, cat_entry_type; - u16 folder_finderinfo_len = sizeof(struct DInfo) + - sizeof(struct DXInfo); - u16 file_finderinfo_len = sizeof(struct FInfo) + - sizeof(struct FXInfo); + u16 folder_finderinfo_len = sizeof(entry.folder.info); + u16 file_finderinfo_len = sizeof(entry.file.info); if ((!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) || @@ -296,7 +294,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, sizeof(hfsplus_cat_entry)); if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) { if (size == folder_finderinfo_len) { - memcpy(&entry.folder.user_info, value, + memcpy(&entry.folder.info, value, folder_finderinfo_len); hfs_bnode_write(cat_fd.bnode, &entry, cat_fd.entryoffset, @@ -309,7 +307,7 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, } } else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) { if (size == file_finderinfo_len) { - memcpy(&entry.file.user_info, value, + memcpy(&entry.file.info, value, file_finderinfo_len); hfs_bnode_write(cat_fd.bnode, &entry, cat_fd.entryoffset, @@ -462,14 +460,14 @@ static ssize_t hfsplus_getxattr_finder_info(struct inode *inode, if (entry_type == HFSPLUS_FOLDER) { hfs_bnode_read(fd.bnode, folder_finder_info, fd.entryoffset + - offsetof(struct hfsplus_cat_folder, user_info), + offsetof(struct hfsplus_cat_folder, info.user), folder_rec_len); memcpy(value, folder_finder_info, folder_rec_len); res = folder_rec_len; } else if (entry_type == HFSPLUS_FILE) { hfs_bnode_read(fd.bnode, file_finder_info, fd.entryoffset + - offsetof(struct hfsplus_cat_file, user_info), + offsetof(struct hfsplus_cat_file, info.user), file_rec_len); memcpy(value, file_finder_info, file_rec_len); res = file_rec_len; @@ -630,14 +628,14 @@ static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry, len = sizeof(struct DInfo) + sizeof(struct DXInfo); hfs_bnode_read(fd.bnode, folder_finder_info, fd.entryoffset + - offsetof(struct hfsplus_cat_folder, user_info), + offsetof(struct hfsplus_cat_folder, info.user), len); found_bit = find_first_bit((void *)folder_finder_info, len*8); } else if (entry_type == HFSPLUS_FILE) { len = sizeof(struct FInfo) + sizeof(struct FXInfo); hfs_bnode_read(fd.bnode, file_finder_info, fd.entryoffset + - offsetof(struct hfsplus_cat_file, user_info), + offsetof(struct hfsplus_cat_file, info.user), len); found_bit = find_first_bit((void *)file_finder_info, len*8); } else { From 8e99a8a1752d681fd1aedae0ba3c3291956c560b Mon Sep 17 00:00:00 2001 From: Ye Guojin Date: Thu, 21 Oct 2021 15:10:18 +1100 Subject: [PATCH 0959/1202] signal: remove duplicate include in signal.h 'linux/string.h' included in 'signal.h' is duplicated. it's also included at line 7. Link: https://lkml.kernel.org/r/20211019024934.973008-1-ye.guojin@zte.com.cn Signed-off-by: Ye Guojin Reported-by: Zeal Robot Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- include/linux/signal.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/signal.h b/include/linux/signal.h index 3f96a6374e4f1..37f5080c070a5 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -126,7 +126,6 @@ static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2) #define sigmask(sig) (1UL << ((sig) - 1)) #ifndef __HAVE_ARCH_SIG_SETOPS -#include #define _SIG_SET_BINOP(name, op) \ static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \ From 7b36af638d3d93f8ca722da7e4ab501912f61357 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 21 Oct 2021 15:10:19 +1100 Subject: [PATCH 0960/1202] seq_file: move seq_escape() to a header Move seq_escape() to the header as inliner, for a small kernel text size reduction. Link: https://lkml.kernel.org/r/20211001122917.67228-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/seq_file.c | 16 ---------------- include/linux/seq_file.h | 17 ++++++++++++++++- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 4a2cda04d3e29..f8e1f4ee87ffc 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -383,22 +383,6 @@ void seq_escape_mem(struct seq_file *m, const char *src, size_t len, } EXPORT_SYMBOL(seq_escape_mem); -/** - * seq_escape - print string into buffer, escaping some characters - * @m: target buffer - * @s: string - * @esc: set of characters that need escaping - * - * Puts string into buffer, replacing each occurrence of character from - * @esc with usual octal escape. - * Use seq_has_overflowed() to check for errors. - */ -void seq_escape(struct seq_file *m, const char *s, const char *esc) -{ - seq_escape_str(m, s, ESCAPE_OCTAL, esc); -} -EXPORT_SYMBOL(seq_escape); - void seq_vprintf(struct seq_file *m, const char *f, va_list args) { int len; diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index dd99569595fd3..103776e185554 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -135,7 +136,21 @@ static inline void seq_escape_str(struct seq_file *m, const char *src, seq_escape_mem(m, src, strlen(src), flags, esc); } -void seq_escape(struct seq_file *m, const char *s, const char *esc); +/** + * seq_escape - print string into buffer, escaping some characters + * @m: target buffer + * @s: NULL-terminated string + * @esc: set of characters that need escaping + * + * Puts string into buffer, replacing each occurrence of character from + * @esc with usual octal escape. + * + * Use seq_has_overflowed() to check for errors. + */ +static inline void seq_escape(struct seq_file *m, const char *s, const char *esc) +{ + seq_escape_str(m, s, ESCAPE_OCTAL, esc); +} void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, From fed482a308cc675c90ac31db81ea9c015f393f46 Mon Sep 17 00:00:00 2001 From: Ran Xiaokai Date: Thu, 21 Oct 2021 15:10:20 +1100 Subject: [PATCH 0961/1202] kernel/fork.c: unshare(): use swap() to make code cleaner Use swap() instead of reimplementing it. Link: https://lkml.kernel.org/r/20210909022046.8151-1-ran.xiaokai@zte.com.cn Signed-off-by: Ran Xiaokai Cc: Gabriel Krisman Bertazi Cc: Peter Zijlstra Cc: Eric W. Biederman Cc: Jens Axboe Cc: Alexey Gladkov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/fork.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index a7da9b0bc4029..723125f2cbeda 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3078,7 +3078,7 @@ int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, int ksys_unshare(unsigned long unshare_flags) { struct fs_struct *fs, *new_fs = NULL; - struct files_struct *fd, *new_fd = NULL; + struct files_struct *new_fd = NULL; struct cred *new_cred = NULL; struct nsproxy *new_nsproxy = NULL; int do_sysvsem = 0; @@ -3165,11 +3165,8 @@ int ksys_unshare(unsigned long unshare_flags) spin_unlock(&fs->lock); } - if (new_fd) { - fd = current->files; - current->files = new_fd; - new_fd = fd; - } + if (new_fd) + swap(current->files, new_fd); task_unlock(current); From f7fd6b9338a55759eb82731ae8a5cd2874864e3f Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 21 Oct 2021 15:10:21 +1100 Subject: [PATCH 0962/1202] sysv: use BUILD_BUG_ON instead of runtime check There were runtime checks about sizes of struct v7_super_block and struct sysv_inode. If one of these checks fail the kernel will panic. Since these values are known at compile time let's use BUILD_BUG_ON(), because it's a standard mechanism for validation checking at build time Link: https://lkml.kernel.org/r/20210813123020.22971-1-paskripkin@gmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Pavel Skripkin Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- fs/sysv/super.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/sysv/super.c b/fs/sysv/super.c index cc8e2ed155c84..d1def0771a408 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -474,10 +474,8 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent) struct sysv_sb_info *sbi; struct buffer_head *bh; - if (440 != sizeof (struct v7_super_block)) - panic("V7 FS: bad super-block size"); - if (64 != sizeof (struct sysv_inode)) - panic("sysv fs: bad i-node size"); + BUILD_BUG_ON(sizeof(struct v7_super_block) != 440); + BUILD_BUG_ON(sizeof(struct sysv_inode) != 64); sbi = kzalloc(sizeof(struct sysv_sb_info), GFP_KERNEL); if (!sbi) From f42893c1db5983f486ea406aeb11de241083e62f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 21 Oct 2021 15:10:22 +1100 Subject: [PATCH 0963/1202] Documentation/kcov: include types.h in the example Patch series "kcov: PREEMPT_RT fixup + misc", v2. The last patch in series is follow-up to address the PREEMPT_RT issue within in kcov reported by Clark [1]. Patches 1-3 are smaller things that I noticed while staring at it. Patch 4 is small change which makes replacement in #5 simpler / more obvious. [1] https://lkml.kernel.org/r/20210809155909.333073de@theseus.lan This patch (of 5): The first example code has includes at the top, the following two example share that part. The last example (remote coverage collection) requires the linux/types.h header file due its __aligned_u64 usage. Add the linux/types.h to the top most example and a comment that the header files from above are required as it is done in the second example. Link: https://lkml.kernel.org/r/20210923164741.1859522-1-bigeasy@linutronix.de Link: https://lore.kernel.org/r/20210830172627.267989-2-bigeasy@linutronix.de Link: https://lkml.kernel.org/r/20210923164741.1859522-2-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Acked-by: Dmitry Vyukov Acked-by: Marco Elver Tested-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Steven Rostedt Cc: Clark Williams Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/dev-tools/kcov.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index d2c4c27e1702d..347f3b6de8d40 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -50,6 +50,7 @@ program using kcov: #include #include #include + #include #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) #define KCOV_ENABLE _IO('c', 100) @@ -251,6 +252,8 @@ selectively from different subsystems. .. code-block:: c + /* Same includes and defines as above. */ + struct kcov_remote_arg { __u32 trace_mode; __u32 area_size; From 81b66d03d2902fb06f52e080f56e350c33a9b4a1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 21 Oct 2021 15:10:23 +1100 Subject: [PATCH 0964/1202] Documentation/kcov: define `ip' in the example The example code uses the variable `ip' but never declares it. Declare `ip' as a 64bit variable which is the same type as the array from which it loads its value. Link: https://lkml.kernel.org/r/20210923164741.1859522-3-bigeasy@linutronix.de Link: https://lore.kernel.org/r/20210830172627.267989-3-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Acked-by: Dmitry Vyukov Acked-by: Marco Elver Tested-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Clark Williams Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- Documentation/dev-tools/kcov.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 347f3b6de8d40..d83c9ab494275 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -178,6 +178,8 @@ Comparison operands collection is similar to coverage collection: /* Read number of comparisons collected. */ n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); for (i = 0; i < n; i++) { + uint64_t ip; + type = cover[i * KCOV_WORDS_PER_CMP + 1]; /* arg1 and arg2 - operands of the comparison. */ arg1 = cover[i * KCOV_WORDS_PER_CMP + 2]; From 2ec59ed5e2f90b3ed8e88b81a752b3aa0810e8bd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 21 Oct 2021 15:10:24 +1100 Subject: [PATCH 0965/1202] kcov: allocate per-CPU memory on the relevant node During boot kcov allocates per-CPU memory which is used later if remote/ softirq processing is enabled. Allocate the per-CPU memory on the CPU local node to avoid cross node memory access. Link: https://lkml.kernel.org/r/20210923164741.1859522-4-bigeasy@linutronix.de Link: https://lore.kernel.org/r/20210830172627.267989-4-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Acked-by: Dmitry Vyukov Acked-by: Marco Elver Tested-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Clark Williams Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/kcov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 80bfe71bbe13e..4f910231d99a2 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -1034,8 +1034,8 @@ static int __init kcov_init(void) int cpu; for_each_possible_cpu(cpu) { - void *area = vmalloc(CONFIG_KCOV_IRQ_AREA_SIZE * - sizeof(unsigned long)); + void *area = vmalloc_node(CONFIG_KCOV_IRQ_AREA_SIZE * + sizeof(unsigned long), cpu_to_node(cpu)); if (!area) return -ENOMEM; per_cpu_ptr(&kcov_percpu_data, cpu)->irq_area = area; From b7da6d68c9aee8bc4ed999634803bbc9dd1d61f4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 21 Oct 2021 15:10:25 +1100 Subject: [PATCH 0966/1202] kcov: avoid enable+disable interrupts if !in_task() kcov_remote_start() may need to allocate memory in the in_task() case (otherwise per-CPU memory has been pre-allocated) and therefore requires enabled interrupts. The interrupts are enabled before checking if the allocation is required so if no allocation is required then the interrupts are needlessly enabled and disabled again. Enable interrupts only if memory allocation is performed. Link: https://lkml.kernel.org/r/20210923164741.1859522-5-bigeasy@linutronix.de Link: https://lore.kernel.org/r/20210830172627.267989-5-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Acked-by: Dmitry Vyukov Acked-by: Marco Elver Tested-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Clark Williams Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/kcov.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 4f910231d99a2..620dc4ffeb685 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -869,19 +869,19 @@ void kcov_remote_start(u64 handle) size = CONFIG_KCOV_IRQ_AREA_SIZE; area = this_cpu_ptr(&kcov_percpu_data)->irq_area; } - spin_unlock_irqrestore(&kcov_remote_lock, flags); + spin_unlock(&kcov_remote_lock); /* Can only happen when in_task(). */ if (!area) { + local_irqrestore(flags); area = vmalloc(size * sizeof(unsigned long)); if (!area) { kcov_put(kcov); return; } + local_irq_save(flags); } - local_irq_save(flags); - /* Reset coverage size. */ *(u64 *)area = 0; From 2e7375f46897067a0528a098ad0efcd172b2ef1f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 21 Oct 2021 15:10:26 +1100 Subject: [PATCH 0967/1202] kcov: replace local_irq_save() with a local_lock_t The kcov code mixes local_irq_save() and spin_lock() in kcov_remote_{start|end}(). This creates a warning on PREEMPT_RT because local_irq_save() disables interrupts and spin_lock_t is turned into a sleeping lock which can not be acquired in a section with disabled interrupts. The kcov_remote_lock is used to synchronize the access to the hash-list kcov_remote_map. The local_irq_save() block protects access to the per-CPU data kcov_percpu_data. There is no compelling reason to change the lock type to raw_spin_lock_t to make it work with local_irq_save(). Changing it would require to move memory allocation (in kcov_remote_add()) and deallocation outside of the locked section. Adding an unlimited amount of entries to the hashlist will increase the IRQ-off time during lookup. It could be argued that this is debug code and the latency does not matter. There is however no need to do so and it would allow to use this facility in an RT enabled build. Using a local_lock_t instead of local_irq_save() has the befit of adding a protection scope within the source which makes it obvious what is protected. On a !PREEMPT_RT && !LOCKDEP build the local_lock_irqsave() maps directly to local_irq_save() so there is overhead at runtime. Replace the local_irq_save() section with a local_lock_t. Link: https://lkml.kernel.org/r/20210923164741.1859522-6-bigeasy@linutronix.de Link: https://lore.kernel.org/r/20210830172627.267989-6-bigeasy@linutronix.de Reported-by: Clark Williams Signed-off-by: Sebastian Andrzej Siewior Acked-by: Dmitry Vyukov Acked-by: Marco Elver Tested-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/kcov.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 620dc4ffeb685..36ca640c4f8e7 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -88,6 +88,7 @@ static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas); struct kcov_percpu_data { void *irq_area; + local_lock_t lock; unsigned int saved_mode; unsigned int saved_size; @@ -96,7 +97,9 @@ struct kcov_percpu_data { int saved_sequence; }; -static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data); +static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = { + .lock = INIT_LOCAL_LOCK(lock), +}; /* Must be called with kcov_remote_lock locked. */ static struct kcov_remote *kcov_remote_find(u64 handle) @@ -824,7 +827,7 @@ void kcov_remote_start(u64 handle) if (!in_task() && !in_serving_softirq()) return; - local_irq_save(flags); + local_lock_irqsave(&kcov_percpu_data.lock, flags); /* * Check that kcov_remote_start() is not called twice in background @@ -832,7 +835,7 @@ void kcov_remote_start(u64 handle) */ mode = READ_ONCE(t->kcov_mode); if (WARN_ON(in_task() && kcov_mode_enabled(mode))) { - local_irq_restore(flags); + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); return; } /* @@ -841,14 +844,15 @@ void kcov_remote_start(u64 handle) * happened while collecting coverage from a background thread. */ if (WARN_ON(in_serving_softirq() && t->kcov_softirq)) { - local_irq_restore(flags); + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); return; } spin_lock(&kcov_remote_lock); remote = kcov_remote_find(handle); if (!remote) { - spin_unlock_irqrestore(&kcov_remote_lock, flags); + spin_unlock(&kcov_remote_lock); + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); return; } kcov_debug("handle = %llx, context: %s\n", handle, @@ -873,13 +877,13 @@ void kcov_remote_start(u64 handle) /* Can only happen when in_task(). */ if (!area) { - local_irqrestore(flags); + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); area = vmalloc(size * sizeof(unsigned long)); if (!area) { kcov_put(kcov); return; } - local_irq_save(flags); + local_lock_irqsave(&kcov_percpu_data.lock, flags); } /* Reset coverage size. */ @@ -891,7 +895,7 @@ void kcov_remote_start(u64 handle) } kcov_start(t, kcov, size, area, mode, sequence); - local_irq_restore(flags); + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); } EXPORT_SYMBOL(kcov_remote_start); @@ -965,12 +969,12 @@ void kcov_remote_stop(void) if (!in_task() && !in_serving_softirq()) return; - local_irq_save(flags); + local_lock_irqsave(&kcov_percpu_data.lock, flags); mode = READ_ONCE(t->kcov_mode); barrier(); if (!kcov_mode_enabled(mode)) { - local_irq_restore(flags); + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); return; } /* @@ -978,12 +982,12 @@ void kcov_remote_stop(void) * actually found the remote handle and started collecting coverage. */ if (in_serving_softirq() && !t->kcov_softirq) { - local_irq_restore(flags); + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); return; } /* Make sure that kcov_softirq is only set when in softirq. */ if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) { - local_irq_restore(flags); + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); return; } @@ -1013,7 +1017,7 @@ void kcov_remote_stop(void) spin_unlock(&kcov_remote_lock); } - local_irq_restore(flags); + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); /* Get in kcov_remote_start(). */ kcov_put(kcov); From 0e9f8126d22df40c845a76e63d2ed6472ef64a7a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:10:27 +1100 Subject: [PATCH 0968/1202] kernel/resource: clean up and optimize iomem_is_exclusive() Patch series "virtio-mem: disallow mapping virtio-mem memory via /dev/mem", v5. Let's add the basic infrastructure to exclude some physical memory regions marked as "IORESOURCE_SYSTEM_RAM" completely from /dev/mem access, even though they are not marked IORESOURCE_BUSY and even though "iomem=relaxed" is set. Resource IORESOURCE_EXCLUSIVE for that purpose instead of adding new flags to express something similar to "soft-busy" or "not busy yet, but already prepared by a driver and not to be mapped by user space". Use it for virtio-mem, to disallow mapping any virtio-mem memory via /dev/mem to user space after the virtio-mem driver was loaded. This patch (of 3): We end up traversing subtrees of ranges we are not interested in; let's optimize this case, skipping such subtrees, cleaning up the function a bit. For example, in the following configuration (/proc/iomem): 00000000-00000fff : Reserved 00001000-00057fff : System RAM 00058000-00058fff : Reserved 00059000-0009cfff : System RAM 0009d000-000fffff : Reserved 000a0000-000bffff : PCI Bus 0000:00 000c0000-000c3fff : PCI Bus 0000:00 000c4000-000c7fff : PCI Bus 0000:00 000c8000-000cbfff : PCI Bus 0000:00 000cc000-000cffff : PCI Bus 0000:00 000d0000-000d3fff : PCI Bus 0000:00 000d4000-000d7fff : PCI Bus 0000:00 000d8000-000dbfff : PCI Bus 0000:00 000dc000-000dffff : PCI Bus 0000:00 000e0000-000e3fff : PCI Bus 0000:00 000e4000-000e7fff : PCI Bus 0000:00 000e8000-000ebfff : PCI Bus 0000:00 000ec000-000effff : PCI Bus 0000:00 000f0000-000fffff : PCI Bus 0000:00 000f0000-000fffff : System ROM 00100000-3fffffff : System RAM 40000000-403fffff : Reserved 40000000-403fffff : pnp 00:00 40400000-80a79fff : System RAM ... We don't have to look at any children of "0009d000-000fffff : Reserved" if we can just skip these 15 items directly because the parent range is not of interest. Link: https://lkml.kernel.org/r/20210920142856.17758-1-david@redhat.com Link: https://lkml.kernel.org/r/20210920142856.17758-2-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: "Rafael J. Wysocki" Cc: Hanjun Guo Cc: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/resource.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index ca9f5198a01ff..2999f57da38c3 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -73,6 +73,18 @@ static struct resource *next_resource(struct resource *p) return p->sibling; } +static struct resource *next_resource_skip_children(struct resource *p) +{ + while (!p->sibling && p->parent) + p = p->parent; + return p->sibling; +} + +#define for_each_resource(_root, _p, _skip_children) \ + for ((_p) = (_root)->child; (_p); \ + (_p) = (_skip_children) ? next_resource_skip_children(_p) : \ + next_resource(_p)) + static void *r_next(struct seq_file *m, void *v, loff_t *pos) { struct resource *p = v; @@ -1712,10 +1724,9 @@ static int strict_iomem_checks; */ bool iomem_is_exclusive(u64 addr) { - struct resource *p = &iomem_resource; - bool err = false; - loff_t l; + bool skip_children = false, err = false; int size = PAGE_SIZE; + struct resource *p; if (!strict_iomem_checks) return false; @@ -1723,15 +1734,19 @@ bool iomem_is_exclusive(u64 addr) addr = addr & PAGE_MASK; read_lock(&resource_lock); - for (p = p->child; p ; p = r_next(NULL, p, &l)) { + for_each_resource(&iomem_resource, p, skip_children) { /* * We can probably skip the resources without * IORESOURCE_IO attribute? */ if (p->start >= addr + size) break; - if (p->end < addr) + if (p->end < addr) { + skip_children = true; continue; + } + skip_children = false; + /* * A resource is exclusive if IORESOURCE_EXCLUSIVE is set * or CONFIG_IO_STRICT_DEVMEM is enabled and the From 3ef2f8dc6a19ec281dbd8db3f24b8c708d158efa Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:10:28 +1100 Subject: [PATCH 0969/1202] kernel/resource: disallow access to exclusive system RAM regions virtio-mem dynamically exposes memory inside a device memory region as system RAM to Linux, coordinating with the hypervisor which parts are actually "plugged" and consequently usable/accessible. On the one hand, the virtio-mem driver adds/removes whole memory blocks, creating/removing busy IORESOURCE_SYSTEM_RAM resources, on the other hand, it logically (un)plugs memory inside added memory blocks, dynamically either exposing them to the buddy or hiding them from the buddy and marking them PG_offline. In contrast to physical devices, like a DIMM, the virtio-mem driver is required to actually make use of any of the device-provided memory, because it performs the handshake with the hypervisor. virtio-mem memory cannot simply be access via /dev/mem without a driver. There is no safe way to: a) Access plugged memory blocks via /dev/mem, as they might contain unplugged holes or might get silently unplugged by the virtio-mem driver and consequently turned inaccessible. b) Access unplugged memory blocks via /dev/mem because the virtio-mem driver is required to make them actually accessible first. The virtio-spec states that unplugged memory blocks MUST NOT be written, and only selected unplugged memory blocks MAY be read. We want to make sure, this is the case in sane environments -- where the virtio-mem driver was loaded. We want to make sure that in a sane environment, nobody "accidentially" accesses unplugged memory inside the device managed region. For example, a user might spot a memory region in /proc/iomem and try accessing it via /dev/mem via gdb or dumping it via something else. By the time the mmap() happens, the memory might already have been removed by the virtio-mem driver silently: the mmap() would succeeed and user space might accidentially access unplugged memory. So once the driver was loaded and detected the device along the device-managed region, we just want to disallow any access via /dev/mem to it. In an ideal world, we would mark the whole region as busy ("owned by a driver") and exclude it; however, that would be wrong, as we don't really have actual system RAM at these ranges added to Linux ("busy system RAM"). Instead, we want to mark such ranges as "not actual busy system RAM but still soft-reserved and prepared by a driver for future use." Let's teach iomem_is_exclusive() to reject access to any range with "IORESOURCE_SYSTEM_RAM | IORESOURCE_EXCLUSIVE", even if not busy and even if "iomem=relaxed" is set. Introduce EXCLUSIVE_SYSTEM_RAM to make it easier for applicable drivers to depend on this setting in their Kconfig. For now, there are no applicable ranges and we'll modify virtio-mem next to properly set IORESOURCE_EXCLUSIVE on the parent resource container it creates to contain all actual busy system RAM added via add_memory_driver_managed(). Link: https://lkml.kernel.org/r/20210920142856.17758-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Hanjun Guo Cc: Jason Wang Cc: "Michael S. Tsirkin" Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- kernel/resource.c | 29 +++++++++++++++++++---------- mm/Kconfig | 7 +++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 2999f57da38c3..5ad3eba619ba7 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1719,26 +1719,23 @@ static int strict_iomem_checks; #endif /* - * check if an address is reserved in the iomem resource tree - * returns true if reserved, false if not reserved. + * Check if an address is exclusive to the kernel and must not be mapped to + * user space, for example, via /dev/mem. + * + * Returns true if exclusive to the kernel, otherwise returns false. */ bool iomem_is_exclusive(u64 addr) { + const unsigned int exclusive_system_ram = IORESOURCE_SYSTEM_RAM | + IORESOURCE_EXCLUSIVE; bool skip_children = false, err = false; int size = PAGE_SIZE; struct resource *p; - if (!strict_iomem_checks) - return false; - addr = addr & PAGE_MASK; read_lock(&resource_lock); for_each_resource(&iomem_resource, p, skip_children) { - /* - * We can probably skip the resources without - * IORESOURCE_IO attribute? - */ if (p->start >= addr + size) break; if (p->end < addr) { @@ -1747,12 +1744,24 @@ bool iomem_is_exclusive(u64 addr) } skip_children = false; + /* + * IORESOURCE_SYSTEM_RAM resources are exclusive if + * IORESOURCE_EXCLUSIVE is set, even if they + * are not busy and even if "iomem=relaxed" is set. The + * responsible driver dynamically adds/removes system RAM within + * such an area and uncontrolled access is dangerous. + */ + if ((p->flags & exclusive_system_ram) == exclusive_system_ram) { + err = true; + break; + } + /* * A resource is exclusive if IORESOURCE_EXCLUSIVE is set * or CONFIG_IO_STRICT_DEVMEM is enabled and the * resource is busy. */ - if ((p->flags & IORESOURCE_BUSY) == 0) + if (!strict_iomem_checks || !(p->flags & IORESOURCE_BUSY)) continue; if (IS_ENABLED(CONFIG_IO_STRICT_DEVMEM) || p->flags & IORESOURCE_EXCLUSIVE) { diff --git a/mm/Kconfig b/mm/Kconfig index 84387164a741f..c150a0c6fce2c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -109,6 +109,13 @@ config NUMA_KEEP_MEMINFO config MEMORY_ISOLATION bool +# IORESOURCE_SYSTEM_RAM regions in the kernel resource tree that are marked +# IORESOURCE_EXCLUSIVE cannot be mapped to user space, for example, via +# /dev/mem. +config EXCLUSIVE_SYSTEM_RAM + def_bool y + depends on !DEVMEM || STRICT_DEVMEM + # # Only be set on architectures that have completely implemented memory hotplug # feature. If you are not sure, don't touch it. From b5ba7aae2328d97fa736dcea36065a0c347d2451 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Oct 2021 15:10:29 +1100 Subject: [PATCH 0970/1202] virtio-mem: disallow mapping virtio-mem memory via /dev/mem We don't want user space to be able to map virtio-mem device memory directly (e.g., via /dev/mem) in order to have guarantees that in a sane setup we'll never accidentially access unplugged memory within the device-managed region of a virtio-mem device, just as required by the virtio-spec. As soon as the virtio-mem driver is loaded, the device region is visible in /proc/iomem via the parent device region. From that point on user space is aware of the device region and we want to disallow mapping anything inside that region (where we will dynamically (un)plug memory) until the driver has been unloaded cleanly and e.g., another driver might take over. By creating our parent IORESOURCE_SYSTEM_RAM resource with IORESOURCE_EXCLUSIVE, we will disallow any /dev/mem access to our device region until the driver was unloaded cleanly and removed the parent region. This will work even though only some memory blocks are actually currently added to Linux and appear as busy in the resource tree. So access to the region from user space is only possible a) if we don't load the virtio-mem driver. b) after unloading the virtio-mem driver cleanly. Don't build virtio-mem if access to /dev/mem cannot be restricticted -- if we have CONFIG_DEVMEM=y but CONFIG_STRICT_DEVMEM is not set. Link: https://lkml.kernel.org/r/20210920142856.17758-4-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michael S. Tsirkin Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Hanjun Guo Cc: Jason Wang Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/virtio/Kconfig | 1 + drivers/virtio/virtio_mem.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 3654def9915c5..44a082639439a 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -101,6 +101,7 @@ config VIRTIO_MEM depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE depends on CONTIG_ALLOC + depends on EXCLUSIVE_SYSTEM_RAM help This driver provides access to virtio-mem paravirtualized memory devices, allowing to hotplug and hotunplug memory. diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 3573b78f6b050..0da0af251c73d 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -2672,8 +2672,10 @@ static int virtio_mem_create_resource(struct virtio_mem *vm) if (!name) return -ENOMEM; + /* Disallow mapping device memory via /dev/mem completely. */ vm->parent_resource = __request_mem_region(vm->addr, vm->region_size, - name, IORESOURCE_SYSTEM_RAM); + name, IORESOURCE_SYSTEM_RAM | + IORESOURCE_EXCLUSIVE); if (!vm->parent_resource) { kfree(name); dev_warn(&vm->vdev->dev, "could not reserve device region\n"); From b2ebd00e2db35d4f6031fb9db6da97fc71b64658 Mon Sep 17 00:00:00 2001 From: Michal Clapinski Date: Thu, 21 Oct 2021 15:10:30 +1100 Subject: [PATCH 0971/1202] ipc: check checkpoint_restore_ns_capable() to modify C/R proc files This commit removes the requirement to be root to modify sem_next_id, msg_next_id and shm_next_id and checks checkpoint_restore_ns_capable instead. Since those files are specific to the IPC namespace, there is no reason they should require root privileges. This is similar to ns_last_pid, which also only checks checkpoint_restore_ns_capable. Link: https://lkml.kernel.org/r/20210916163717.3179496-1-mclapinski@google.com Signed-off-by: Michal Clapinski Reviewed-by: Davidlohr Bueso Reviewed-by: Manfred Spraul Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- ipc/ipc_sysctl.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 3f312bf2b1163..f8e27203ca49e 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -104,6 +104,19 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, return ret; } +#ifdef CONFIG_CHECKPOINT_RESTORE +static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, + int write, void *buffer, size_t *lenp, loff_t *ppos) +{ + struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; + + if (write && !checkpoint_restore_ns_capable(user_ns)) + return -EPERM; + + return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); +} +#endif + #else #define proc_ipc_doulongvec_minmax NULL #define proc_ipc_dointvec NULL @@ -111,6 +124,9 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, #define proc_ipc_dointvec_minmax_orphans NULL #define proc_ipc_auto_msgmni NULL #define proc_ipc_sem_dointvec NULL +#ifdef CONFIG_CHECKPOINT_RESTORE +#define proc_ipc_dointvec_minmax_checkpoint_restore NULL +#endif /* CONFIG_CHECKPOINT_RESTORE */ #endif int ipc_mni = IPCMNI; @@ -198,8 +214,8 @@ static struct ctl_table ipc_kern_table[] = { .procname = "sem_next_id", .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), - .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .mode = 0666, + .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -207,8 +223,8 @@ static struct ctl_table ipc_kern_table[] = { .procname = "msg_next_id", .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), - .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .mode = 0666, + .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, @@ -216,8 +232,8 @@ static struct ctl_table ipc_kern_table[] = { .procname = "shm_next_id", .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), - .mode = 0644, - .proc_handler = proc_ipc_dointvec_minmax, + .mode = 0666, + .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, From 2f417cf21c2d29564e38e2e94f47ce40253a6047 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 21 Oct 2021 15:10:31 +1100 Subject: [PATCH 0972/1202] ipc-check-checkpoint_restore_ns_capable-to-modify-c-r-proc-files-fix ipc/ipc_sysctl.c needs capability.h for checkpoint_restore_ns_capable() Cc: Davidlohr Bueso Cc: "Eric W. Biederman" Cc: Manfred Spraul Cc: Michal Clapinski Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- ipc/ipc_sysctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index f8e27203ca49e..345e4d673e61e 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "util.h" From cc0170e845bf878f9ef6151119120ff9e13b0e64 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Thu, 21 Oct 2021 15:10:32 +1100 Subject: [PATCH 0973/1202] ipc/ipc_sysctl.c: remove fallback for !CONFIG_PROC_SYSCTL Compilation of ipc/ipc_sysctl.c is controlled by obj-$(CONFIG_SYSVIPC_SYSCTL) [see ipc/Makefile] And CONFIG_SYSVIPC_SYSCTL depends on SYSCTL [see init/Kconfig] An SYSCTL is selected by PROC_SYSCTL. [see fs/proc/Kconfig] Thus: #ifndef CONFIG_PROC_SYSCTL in ipc/ipc_sysctl.c is impossible, the fallback can be removed. Link: https://lkml.kernel.org/r/20210918145337.3369-1-manfred@colorfullife.com Signed-off-by: Manfred Spraul Reviewed-by: "Eric W. Biederman" Acked-by: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- ipc/ipc_sysctl.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 345e4d673e61e..f101c171753f6 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -23,7 +23,6 @@ static void *get_ipc(struct ctl_table *table) return which; } -#ifdef CONFIG_PROC_SYSCTL static int proc_ipc_dointvec(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -118,18 +117,6 @@ static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, } #endif -#else -#define proc_ipc_doulongvec_minmax NULL -#define proc_ipc_dointvec NULL -#define proc_ipc_dointvec_minmax NULL -#define proc_ipc_dointvec_minmax_orphans NULL -#define proc_ipc_auto_msgmni NULL -#define proc_ipc_sem_dointvec NULL -#ifdef CONFIG_CHECKPOINT_RESTORE -#define proc_ipc_dointvec_minmax_checkpoint_restore NULL -#endif /* CONFIG_CHECKPOINT_RESTORE */ -#endif - int ipc_mni = IPCMNI; int ipc_mni_shift = IPCMNI_SHIFT; int ipc_min_cycle = RADIX_TREE_MAP_SIZE; From c035e8eecbf565dba405c4fb63cb00a87f9c174f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Oct 2021 15:52:49 +1100 Subject: [PATCH 0974/1202] mm: allow only SLUB on PREEMPT_RT Memory allocators may disable interrupts or preemption as part of the allocation and freeing process. For PREEMPT_RT it is important that these sections remain deterministic and short and therefore don't depend on the size of the memory to allocate/ free or the inner state of the algorithm. Until v3.12-RT the SLAB allocator was an option but involved several changes to meet all the requirements. The SLUB design fits better with PREEMPT_RT model and so the SLAB patches were dropped in the 3.12-RT patchset. Comparing the two allocator, SLUB outperformed SLAB in both throughput (time needed to allocate and free memory) and the maximal latency of the system measured with cyclictest during hackbench. SLOB was never evaluated since it was unlikely that it preforms better than SLAB. During a quick test, the kernel crashed with SLOB enabled during boot. Disable SLAB and SLOB on PREEMPT_RT. [bigeasy@linutronix.de: commit description] Link: https://lkml.kernel.org/r/20211015210336.gen3tib33ig5q2md@linutronix.de Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Acked-by: Vlastimil Babka Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- init/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 7e1b85f85c3d4..edc0a0228f143 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1906,6 +1906,7 @@ choice config SLAB bool "SLAB" + depends on !PREEMPT_RT select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work @@ -1926,6 +1927,7 @@ config SLUB config SLOB depends on EXPERT bool "SLOB (Simple Allocator)" + depends on !PREEMPT_RT help SLOB replaces the stock allocator with a drastically simpler allocator. SLOB is generally more space efficient but From e1c19ed9b7f014df7ac1f74e05062914ee7b3ae2 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 21 Oct 2021 15:52:50 +1100 Subject: [PATCH 0975/1202] mm: migrate: simplify the file-backed pages validation when migrating its mapping Patch series "Some cleanup for page migration", v3. This patchset does some cleanups and improvements for page migration. This patch (of 4): There is no need to validate the file-backed page's refcount before trying to freeze the page's expected refcount, instead we can rely on the folio_ref_freeze() to validate if the page has the expected refcount before migrating its mapping. Moreover we are always under the page lock when migrating the page mapping, which means nowhere else can remove it from the page cache, so we can remove the xas_load() validation under the i_pages lock. Link: https://lkml.kernel.org/r/cover.1629447552.git.baolin.wang@linux.alibaba.com Link: https://lkml.kernel.org/r/df4c129fd8e86a95dbc55f4663d77441cc0d3bd1.1629447552.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Suggested-by: Matthew Wilcox Cc: Yang Shi Cc: Alistair Popple Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/migrate.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index a11e948593df0..43dd88c7fcdc2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -404,12 +404,6 @@ int folio_migrate_mapping(struct address_space *mapping, newzone = folio_zone(newfolio); xas_lock_irq(&xas); - if (folio_ref_count(folio) != expected_count || - xas_load(&xas) != folio) { - xas_unlock_irq(&xas); - return -EAGAIN; - } - if (!folio_ref_freeze(folio, expected_count)) { xas_unlock_irq(&xas); return -EAGAIN; From c21a72c9784cb2748f3cfbfcea94013727feea07 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Oct 2021 15:52:50 +1100 Subject: [PATCH 0976/1202] mm: unexport folio_memcg_{,un}lock Patch series "unexport memcg locking helpers". Neither the old page-based nor the new folio-based memcg locking helpers are used in modular code at all, so drop the exports. This patch (of 2): folio_memcg_{,un}lock are only used in built-in core mm code. Link: https://lkml.kernel.org/r/20210820095815.445392-1-hch@lst.de Link: https://lkml.kernel.org/r/20210820095815.445392-2-hch@lst.de Signed-off-by: Christoph Hellwig Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 146e83a1b9a6c..a33dacd38e13b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2058,7 +2058,6 @@ void folio_memcg_lock(struct folio *folio) memcg->move_lock_task = current; memcg->move_lock_flags = flags; } -EXPORT_SYMBOL(folio_memcg_lock); void lock_page_memcg(struct page *page) { @@ -2092,7 +2091,6 @@ void folio_memcg_unlock(struct folio *folio) { __folio_memcg_unlock(folio_memcg(folio)); } -EXPORT_SYMBOL(folio_memcg_unlock); void unlock_page_memcg(struct page *page) { From 8dfec8787887cf93a47767396323d882bf525b36 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Oct 2021 15:52:50 +1100 Subject: [PATCH 0977/1202] mm: unexport {,un}lock_page_memcg These are only used in built-in core mm code. Link: https://lkml.kernel.org/r/20210820095815.445392-3-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- mm/memcontrol.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a33dacd38e13b..9edccfeac804c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2063,7 +2063,6 @@ void lock_page_memcg(struct page *page) { folio_memcg_lock(page_folio(page)); } -EXPORT_SYMBOL(lock_page_memcg); static void __folio_memcg_unlock(struct mem_cgroup *memcg) { @@ -2096,7 +2095,6 @@ void unlock_page_memcg(struct page *page) { folio_memcg_unlock(page_folio(page)); } -EXPORT_SYMBOL(unlock_page_memcg); struct obj_stock { #ifdef CONFIG_MEMCG_KMEM From c34b22143ac6ab6bbc812d70db0949ba167cdc8a Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Thu, 21 Oct 2021 15:52:50 +1100 Subject: [PATCH 0978/1202] kasan: add kasan mode messages when kasan init There are multiple kasan modes. It makes sense that we add some messages to know which kasan mode is when booting up. see [1]. Link: https://bugzilla.kernel.org/show_bug.cgi?id=212195 [1] Link: https://lkml.kernel.org/r/20211020094850.4113-1-Kuan-Ying.Lee@mediatek.com Signed-off-by: Kuan-Ying Lee Reviewed-by: Marco Elver Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Catalin Marinas Cc: Will Deacon Cc: Matthias Brugger Cc: Chinwen Chang Cc: Yee Lee Cc: Nicholas Tang Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- arch/arm64/mm/kasan_init.c | 2 +- mm/kasan/hw_tags.c | 14 +++++++++++++- mm/kasan/sw_tags.c | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 5b996ca4d9960..6f5a6fe8edd74 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -309,7 +309,7 @@ void __init kasan_init(void) kasan_init_depth(); #if defined(CONFIG_KASAN_GENERIC) /* CONFIG_KASAN_SW_TAGS also requires kasan_init_sw_tags(). */ - pr_info("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized (generic)\n"); #endif } diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index dc892119e88f4..7355cb534e4f8 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -106,6 +106,16 @@ static int __init early_kasan_flag_stacktrace(char *arg) } early_param("kasan.stacktrace", early_kasan_flag_stacktrace); +static inline const char *kasan_mode_info(void) +{ + if (kasan_mode == KASAN_MODE_ASYNC) + return "async"; + else if (kasan_mode == KASAN_MODE_ASYMM) + return "asymm"; + else + return "sync"; +} + /* kasan_init_hw_tags_cpu() is called for each CPU. */ void kasan_init_hw_tags_cpu(void) { @@ -177,7 +187,9 @@ void __init kasan_init_hw_tags(void) break; } - pr_info("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized (hw-tags, mode=%s, stacktrace=%s)\n", + kasan_mode_info(), + kasan_stack_collection_enabled() ? "on" : "off"); } void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index bd3f540feb472..77f13f391b577 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -42,7 +42,7 @@ void __init kasan_init_sw_tags(void) for_each_possible_cpu(cpu) per_cpu(prng_state, cpu) = (u32)get_cycles(); - pr_info("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized (sw-tags)\n"); } /* From 39573caa1324924133edaa6ded537f7e00502574 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 21 Oct 2021 20:39:04 -0700 Subject: [PATCH 0979/1202] lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() - fixup3 Due to cd06ab2fd48f ("drm/locking: add backtrace for locking contended locks without backoff") landing recently to -next adding a new stack depot user in drivers/gpu/drm/drm_modeset_lock.c we need to add an appropriate call to stack_depot_init() there as well. Link: https://lkml.kernel.org/r/2a692365-cfa1-64f2-34e0-8aa5674dce5e@suse.cz Signed-off-by: Vlastimil Babka Cc: Jani Nikula Cc: Naresh Kamboju Cc: Marco Elver Cc: Vijayanand Jitta Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Geert Uytterhoeven Cc: Oliver Glitta Cc: Imran Khan Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Stephen Rothwell --- drivers/gpu/drm/drm_modeset_lock.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index c97323365675c..918065982db46 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -107,6 +107,11 @@ static void __drm_stack_depot_print(depot_stack_handle_t stack_depot) kfree(buf); } + +static void __drm_stack_depot_init(void) +{ + stack_depot_init(); +} #else /* CONFIG_DRM_DEBUG_MODESET_LOCK */ static depot_stack_handle_t __drm_stack_depot_save(void) { @@ -115,6 +120,9 @@ static depot_stack_handle_t __drm_stack_depot_save(void) static void __drm_stack_depot_print(depot_stack_handle_t stack_depot) { } +static void __drm_stack_depot_init(void) +{ +} #endif /* CONFIG_DRM_DEBUG_MODESET_LOCK */ /** @@ -359,6 +367,7 @@ void drm_modeset_lock_init(struct drm_modeset_lock *lock) { ww_mutex_init(&lock->mutex, &crtc_ww_class); INIT_LIST_HEAD(&lock->head); + __drm_stack_depot_init(); } EXPORT_SYMBOL(drm_modeset_lock_init); From 9ae1fbdeabd3b3f668ad0bcb47d64b3a9fb4f8fc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 25 Oct 2021 20:47:07 +1100 Subject: [PATCH 0980/1202] Add linux-next specific files for 20211025 Signed-off-by: Stephen Rothwell --- Next/SHA1s | 341 + Next/Trees | 343 ++ Next/merge.log | 14707 ++++++++++++++++++++++++++++++++++++++++++++ localversion-next | 1 + 4 files changed, 15392 insertions(+) create mode 100644 Next/SHA1s create mode 100644 Next/Trees create mode 100644 Next/merge.log create mode 100644 localversion-next diff --git a/Next/SHA1s b/Next/SHA1s new file mode 100644 index 0000000000000..a338293eeab3d --- /dev/null +++ b/Next/SHA1s @@ -0,0 +1,341 @@ +Name SHA1 +---- ---- +origin 87066fdd2e30fe9dd531125d95257c118a74617e +fixes 3ca706c189db861b2ca2019a0901b94050ca49d8 +kbuild-current 0664684e1ebd7875e120d0cecd525bac4805f8ed +arc-current c3ca31ce0ea1e1ae34748ded54b6ccc319e7ed20 +arm-current 48ccc8edf5b90622cdc4f8878e0042ab5883e2ca +arm64-fixes 596143e3aec35c93508d6b7a05ddc999ee209b61 +arm-soc-fixes 36b6dcbc1245a3b4dd6bc5cc6b0284c9f05cdddc +drivers-memory-fixes 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +m68k-current 9fde0348640252c79d462c4d29a09a14e8741f5c +powerpc-fixes 787252a10d9422f3058df9a4821f389e5326c440 +s390-fixes 8b7216439e2e2128f6f1a19ad4b6be94d8b0e23d +sparc 05a59d79793d482f628a31753c671f2e92178a21 +fscrypt-current 80f6e3080bfcf865062a926817b3ca6c4a137a57 +net 95a359c9553342d36d408d35331ff0bfce75272f +bpf 04f8ef5643bcd8bcde25dfdebef998aea480b2ba +ipsec 93ec1320b0170d7a207eda2d119c669b673401ed +netfilter 2199f562730dd1382946e0a2532afc38cd444129 +ipvs d9aaaf223297f6146d9d7f36caca927c92ab855a +wireless-drivers 603a1621caa097be23c7784e36cb8edf23cd31db +mac80211 7fcb1c950e98e47918e86a5aa7b8fcc283ec6629 +rdma-fixes 2dace185caa580720c7cd67fec9efc5ee26108ac +sound-current 5fc462c3aaad601d5089fd5588a5799896a6937d +sound-asoc-fixes f15fe1868e701b6499595f42aa44a5c2cdfb5dab +regmap-fixes 38a4b4fb7c733bc491d160cec862433f5c42918f +regulator-fixes 519d81956ee277b4419c723adfb154603c2565ba +spi-fixes 6ffd7104e8873077279c0ed267ab5fac56a2575e +pci-current e4e737bb5c170df6135a127739a9e6148ee3da82 +driver-core.current 519d81956ee277b4419c723adfb154603c2565ba +tty.current 519d81956ee277b4419c723adfb154603c2565ba +usb.current 519d81956ee277b4419c723adfb154603c2565ba +usb-gadget-fixes e49d033bddf5b565044e2abe4241353959bc9120 +usb-serial-fixes ec8de6b8cec23078e1f5ec1c59177b199ce5f692 +usb-chipidea-fixes f130d08a8d79fd419c339d95eb28ddff72a73f52 +phy 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +staging.current 519d81956ee277b4419c723adfb154603c2565ba +iio-fixes 486a25084155bf633768c26f022201c051d6fd95 +char-misc.current 519d81956ee277b4419c723adfb154603c2565ba +soundwire-fixes 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +thunderbolt-fixes 64570fbc14f8d7cb3fe3995f20e26bc25ce4b2cc +input-current a02dcde595f7cbd240ccd64de96034ad91cffc40 +crypto-current f8690a4b5a1b64f74ae5c4f7c4ea880d8a8e1a0d +vfio-fixes 42de956ca7e5f6c47048dde640f797e783b23198 +kselftest-fixes 519d81956ee277b4419c723adfb154603c2565ba +modules-fixes 0d67e332e6df72f43eaa21228daa3a79e23093f3 +dmaengine-fixes 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +backlight-fixes a38fd8748464831584a19438cbb3082b5a2dab15 +mtd-fixes f60f5741002b9fde748cff65fd09bd6222c5db0c +mfd-fixes a61f4661fba404418a7c77e86586dc52a58a93c6 +v4l-dvb-fixes 206704a1fe0bcaaa036d3e90358bb168fac8bea1 +reset-fixes 3ad60b4b3570937f3278509fe6797a5093ce53f8 +mips-fixes 740da9d7ca4e25f5d87db9f80d75432681b61305 +at91-fixes dbe68bc9e82b6951ff88285ccffc191d872d9a01 +omap-fixes 80d680fdccba214e8106dc1aa33de5207ad75394 +kvm-fixes 95e16b4792b0429f1933872f743410f00e590c55 +kvms390-fixes 0e9ff65f455dfd0a8aea5e7843678ab6fe097e21 +hwmon-fixes ada61aa0b1184a8fda1a89a340c7d6cc4e59aee5 +nvdimm-fixes d55174cccac2e4c2a58ff68b6b573fc0836f73bd +cxl-fixes fae8817ae804a682c6823ad1672438f39fc46c28 +btrfs-fixes d7395f03c79cd6693b9ebf838039a8e25675a133 +vfs-fixes 25f54d08f12feb593e62cc2193fedefaf7825301 +dma-mapping-fixes 18a3c5f7abfdf97f88536d35338ebbee119c355c +i3c-fixes fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8 +drivers-x86-fixes 7df227847ab562c42d318bceccebb0c911c87b04 +samsung-krzk-fixes 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +pinctrl-samsung-fixes 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +devicetree-fixes b2d70c0dbf2731a37d1c7bcc86ab2387954d5f56 +scsi-fixes 282da7cef078a87b6d5e8ceba8b17e428cf0e37c +drm-fixes 595cb5e0b832a3e100cbbdefef797b0c27bf725a +amdgpu-fixes 2c409ba81be25516afe05ae27a4a15da01740b01 +drm-intel-fixes 59be177a909ac320e5f4b2a461ac09e20f35b2d8 +mmc-fixes 162079f2dccd02cb4b6654defd32ca387dd6d4d4 +rtc-fixes bd33335aa93d615cac77d991c448b986761e7a8d +gnss-fixes e73f0f0ee7541171d89f2e2491130c7771ba58d3 +hyperv-fixes 8017c99680fa65e1e8d999df1583de476a187830 +soc-fsl-fixes 7e5e744183bbb0ad02412b21b1e61380c998dd18 +risc-v-fixes 3ef6ca4f354c53abf263cbeb51e7272523c294d8 +pidfd-fixes 03ba0fe4d09f2eb0a91888caaa057ed67462ae2d +fpga-fixes 2a2a79577ddae7d5314b2f57ca86b44d794403d5 +spdx 519d81956ee277b4419c723adfb154603c2565ba +gpio-brgl-fixes 6fda593f3082ef1aa783ac13e89f673fd69a2cb6 +gpio-intel-fixes 1649b83766944e320a6f087abf1e8a221f1cd8b0 +pinctrl-intel-fixes 64570fbc14f8d7cb3fe3995f20e26bc25ce4b2cc +erofs-fixes c40dd3ca2a45d5bd6e8b3f4ace5cb81493096263 +integrity-fixes 8433856947217ebb5697a8ff9c4c9cad4639a2cf +kunit-fixes 519d81956ee277b4419c723adfb154603c2565ba +ubifs-fixes 78c7d49f55d8631b67c09f9bfbe8155211a9ea06 +memblock-fixes 5da2b76dd1f9f41ac46159b35043da6cb09be256 +cel-fixes 7d2a07b769330c34b4deabeed939325c77a7ec2f +irqchip-fixes b78f26926b17cc289e4f16b63363abe0aa2e8efc +renesas-fixes 432b52eea3dcf48083bafa4b2b6ef5b054ead609 +perf-current 3ff6d64e68abc231955d216236615918797614ae +efi-fixes 38fa3206bf441911258e5001ac8b6738693f8d82 +drm-misc-fixes ee71fb6c4d99c51f2d82a32c503c872b7e40e7f7 +kbuild 7c5c49dc2b808ff9562a0ae207caf45b5e3f83c9 +perf be8ecc57f180415e8a7c1cc5620c5236be2a7e56 +compiler-attributes 7c00621dcaeea206d7489b3e8b50b1864841ae69 +dma-mapping 7d6db80b7d264d6af9dc21baafe59f93cc4607c5 +asm-generic 7efbbe6e141466dbe022b39fafbc81d17a8ed8be +arc 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +arm c03c06cd0ac5740e209e889d62bcd5f986951b15 +arm64 6203cd38cfc5c9abe6baf8844743e6ae259f8427 +arm-perf e656972b69864348a747954ea187576808000c5a +arm-soc c9b2bcb46758dc5a605d92e70ae50874490c3aef +actions 444d018d8d3874c9c3784a5df3ad2e5f554fbdb6 +amlogic 83e38509109e44b884be13fb6c6943aca1742a57 +aspeed 509d3f2b755fe1289c19ba63d0152e14f5ac482d +at91 f3c0366411d6893360be21a7544595bf275bc9b2 +drivers-memory 0fcbc3b7bceaf3d8eb1c2308a920fd8275c1f40c +imx-mxs 8bd7cd1cc7f06391880fc73cab39e1d5bb6bbe0e +keystone cb293d3b430e35dbc1e8425869f27624355ff10a +mediatek 0efac36e9559b13b8c4daa794a478abd8fbaacc5 +mvebu 04e78a787b74b2a8ebece8e59ec0a54230fcf6ae +omap 92d190433bd8fff5595f4641510d0c63b4004632 +qcom 35aab56bc55a75cb93c6b24d9545293d6e343611 +raspberrypi a036b0a5d7d6ab4bae3b9028bb1d44b677bba5f7 +renesas 525a6b4bd53f32db8b94e94f802cfa91466d92f9 +reset 8c81620ac1ace77dd0cbcc2193d4c7311f77d49b +rockchip cc3bcb015bb1067d2bcd9cb065d6e0083d8e9ec1 +samsung-krzk 3ed6ae3305e0a378a2c33376a3d2bbf897d9c172 +scmi a38a50b66af83206226390fc896394ce55587ef4 +stm32 d4b3aaf0f90bcb4fe9f994016156ffb91c482b42 +sunxi 7fb77af71236681ccf007554cca576ca33589ebd +tegra bbd827b4de7ed0e2e5cf701d58fb0de634c2d781 +ti-k3 1e3d655fe7b48c2341d63f981774742e21744f53 +ti-k3-new f46d16cf5b43b66de030f0e3b2f20d24ba95f369 +xilinx 326b5e9db528f5b50870f6db3e9fa7f2afbb8f20 +clk e88c20ffe7d59b7353a0dfb5a9b6bf3ce016ea2e +clk-imx e8271eff5d8c8499289380edbf3bc47de83ab70b +clk-renesas 2bd9feed23166f5ab67dec2ca02bd3f74c77b0ba +clk-samsung 651521d396a8712be1e4eb5b67a1ad340faa6c56 +csky e21e52ad1e0126e2a5e2013084ac3f47cf1e887a +h8300 1ec10274d436fbe77b821fbdf095b45d0888e46d +m68k 8a3c0a74ae87473589cb881a3854948d40000b7a +m68knommu b7c534f5015b9af21574b89aa108732bafecbd74 +microblaze 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +mips 95b8a5e0111a47e5ef780c04cb6293be33d6ee0d +nds32 07cd7745c6f2081dac0aff7f57ea2b48c86de5fa +nios2 7f7bc20bc41a4fbcd2db75b375ac95e5faf958ae +openrisc 210893cad279e4b33d707beebeacd81c36020da2 +parisc-hd 60017239b6b2892665070c812fee1b6a1c8137dc +powerpc 8f6aca0e0f26eaaee670cd27896993a45cdc8f9e +soc-fsl 54c8b5b6f8a868b9c21a7b0efe92ed2fbcc67080 +risc-v 241527bb84674bd597113892ecf2c7ed4a410e00 +s390 2482e6b6e33c5756266b81c93b64f16e8ba78b0f +sh 12285ff8667bf06c168113c10d3619834e423ae6 +sparc-next dd0d718152e4c65b173070d48ea9dfc06894c3e5 +uml ab6ff1fda1e8646b27b9d87b718250f8c828ad55 +xtensa bd47cdb78997f83bd170c389ef59de9eec65976a +pidfd 61bc346ce64a3864ac55f5d18bdc1572cda4fb18 +fscrypt 7f595d6a6cdc336834552069a2e0a4f6d4756ddf +fscache 2bc879c792fa4d1f97d5da6846b5dcc4ff0e7b69 +afs 7af08140979a6e7e12b78c93b8625c8d25b084e2 +btrfs b54d2cb4664c726fd1f340320c5cc39c394d5279 +ceph 324bfaa1a6cc948c58f7af521994d0edc1836708 +cifs 08e9f52e2dcea8028c06cb3abbb098a0f3d9cc97 +configfs c42dd069be8dfc9b2239a5c89e73bbd08ab35de0 +ecryptfs 682a8e2b41effcaf2e80697e395d47f77c91273f +erofs 1d35b798bffe8b38bb0d5ce84fb7b85cd87622e5 +exfat efc7bd8527d02155ec71f0aa6a149d3e2cc01bdc +ext3 e96a1866b40570b5950cda8602c2819189c62a48 +ext4 916ff8d5ea0e24fd43f113b6b5326d5ea8f68310 +f2fs 6d135345f6adff3a8a564e352e9a8c25ae8f1621 +fsverity 07c99001312cbf90a357d4877a358f796eede65b +fuse 85bf4c6df4e91d3693748d769d2ea6a30b573339 +gfs2 d000f3b901c3c423933fbac37db3a78cb6d27e80 +jfs c48a14dca2cb57527dde6b960adbe69953935f10 +ksmbd 0d994cd482ee4e8e851388a70869beee51be1c54 +nfs 4cd27df88af29929cda6e8eb4e0f5bb4e25812bf +nfs-anna 8cfb9015280d49f9d92d5b0f88cedf5f0856c0fd +nfsd 291cd656da04163f4bba67953c1f2f823e0d1231 +cel c20106944eb679fa3ab7e686fe5f6ba30fbc51e5 +ntfs3 8607954cf255329d1c6dfc073ff1508b7585573c +orangefs ac2c63757f4f413980d6c676dbe1ae2941b94afa +overlayfs 1dc1eed46f9fa4cb8a07baa24fb44c96d6dd35c9 +ubifs a801fcfeef96702fa3f9b22ad56c5eb1989d9221 +v9fs 9c4d94dc9a64426d2fa0255097a3a84f6ff2eebe +xfs 5ca5916b6bc93577c360c06cb7cdf71adb9b5faf +zonefs 95b115332a835fb0cbd36dfabacf1c57d915e705 +iomap 03b8df8d43ecc3c5724e6bfb80bc0b9ea2aa2612 +djw-vfs d03ef4daf33a33da8d7c397102fff8ae87d04a93 +file-locks e9728cc72d915966dcc288d2e217af48e8fa2362 +vfs 8f40da9494cf4dedee1cbbf168b1ce107d3a484d +printk c15b5fc054c3d6c97e953617605235c5cb8ce979 +pci 1b6c2cdb635d3c966b7b6e113844f33cacb1b2c9 +pstore c5d4fb2539cad2e62c5a3f0d8237613c394f297e +hid 26922a6c2aaf13baa4c341efd2eb4f2b38bc6e8a +i2c 26701d49482a9acf9a6c557c02430efd0c0e583a +i3c a3587e2c0578101900d4abbbd72ab3dad6d5568a +dmi f97a2103f1a75ca70f23deadb4d96a16c4d85e7d +hwmon-staging c0d79987a0d82671bff374c07f2201f9bdf4aaa2 +jc_docs 2df9f7f57905e61cf638284bf86af30f7849ed6b +v4l-dvb fd2eda71a47b095e81b9170c3f8b7ae82b04e785 +v4l-dvb-next 57c3b9f55ba875a6f6295fa59f0bdc0a01c544f8 +pm 30c7771b41dcb03df34307efbc8783ea4d8c28b7 +cpufreq-arm b3c08d1ad2bb1a731e1d7f4eaa6b1373e5cf52d3 +cpupower 79a0dc5530a91694b1e85ef7219893397d85e5bb +devfreq 6b28c7d0781e9554cd50e5e7d4b48aea0c2bdf93 +opp 27ff8187f13ecfec8a26fb1928e906f46f326cc5 +thermal a67a46af4ad6342378e332b7420c1d1a2818c53f +ieee1394 54b3bd99f094b3b919de4078f60d722e62a767e3 +dlm ecd95673142ef80169a6c003b569b8a86d1e6329 +rdma 71ee1f1275432421307d27dd4df43f33e60456de +net-next 4d98bb0d7ec2d0b417df6207b0bafe1868bad9f8 +bpf-next c825f5fee19caf301d9821cd79abaa734322de26 +ipsec-next 83688aec17bf3203d8477a9dd7cce88132950798 +mlx5-next 60dd57c7479418e2bc902143eb46a2fdcfeecbbb +netfilter-next 241eb3f3ee42f6639b31f6e69e7b6ea845e2d182 +ipvs-next ffdd33dd9c12a8c263f78d778066709ef94671f9 +wireless-drivers-next 753453afacc0243bd45de45e34218a8d17493e8f +bluetooth f33b0068cdaf2b9998fa3662585858ef30bc4b9e +mac80211-next 47b068247aa7d76bb7abea796b72e18a4c6e35c3 +mtd c13de2386c78e890d4ae6f01a85eefd0b293fb08 +nand fc9e18f9e987ad46722dad53adab1c12148c213c +spi-nor df872ab1ffe4bd865dfb5956cae4901429004ab6 +crypto 3ae88f676aa63366ffa9eebb8ae787c7e19f0c57 +drm 6f2f7c83303d2227f47551423e507d77d9ea01c7 +drm-misc 4b2b5e142ff499a2bef2b8db0272bbda1088a3fe +amdgpu 519fc0a14c8187ab5ca1959f8264e1d9dc12b398 +drm-intel 877d074939a5f82b099da2db3bcccc6c418b9c39 +drm-tegra 5dccbc9de8f0071eb731b4de81d0638ea6c06a53 +drm-msm e60af4f8550f4400263e6a28e01f285ddf71b8c3 +imx-drm 20fbfc81e390180db738c414c1b7ac85d31e24b3 +etnaviv 81fd23e2b3ccf71c807e671444e8accaba98ca53 +regmap a8d880671c13971b2b88b66e80d12d5e3f0808de +sound f917c04fac45b70ff38633675f86ab4b51782205 +sound-asoc fbdf6ab3d36eacb52e25e37cc87071019fea5865 +modules ced75a2f5da71de5775fda44250e27d7b8024355 +input c6ac8f0b4ca927316eb40e1e9ba83df5d29f3793 +block a43753be92b43c11d878b2c4c04172d38e21ee5b +device-mapper dffca4d565b326b2238167c425fd9c0fdb3fb6d5 +libata 1af5f7af2484004e71d384d8b88c221fc62cd6b5 +pcmcia e39cdacf2f664b09029e7c1eb354c91a20c367af +mmc 61840edc88138cb7e274ac530aeec6de36fbf386 +mfd 813c24f4caf323dd7d90de95ab8a8d96ee73647a +backlight 3976e974df1fd3a718627b0c9bdfee1953baa0d5 +battery 172d0ccea55ce69718bac693d2ea9341fb61b6c7 +regulator 7492b724df4d33ca3d5b38b70fb4acb93e6d02bf +security 047843bdb3160e8fb225f3752616ac7257033fe4 +apparmor d108370c644b153382632b3e5511ade575c91c86 +integrity cc4299ea039972e57e219e2981b74967c133d41c +keys e377c31f788fc98815e1ab90b5a35704ce35843a +safesetid 1b8b719229197b7afa1b1191e083fb41ace095c5 +selinux 15bf32398ad488c0df1cbaf16431422c87e4feea +smack 0934ad42bb2c5df90a1b9de690f93de735b622fe +tomoyo 7d2a07b769330c34b4deabeed939325c77a7ec2f +tpmdd f985911b7bc75d5c98ed24d8aaa8b94c590f7c6a +watchdog 519d81956ee277b4419c723adfb154603c2565ba +iommu 145cac8e9cfe575b5b462fd6508408f7e9a9f2cd +audit d9516f346e8b8e9c7dd37976a06a5bde1a871d6f +devicetree f1d46c113d5c3b635fa4be0cff595b8672d68ef1 +mailbox 0a5ad4322927ee4aaba6facc0e4faf1ab6c0d48e +spi 52e541e871585919f118a931a1017d18fc009ae4 +tip 5323cc30d36f9fc65a1d6f567abf687bd3fb9951 +clockevents eda9a4f7af6ee47e9e131f20e4f8a41a97379293 +edac f889e52436d6a42e773c4e8aab390c6b25841d61 +irqchip f880d3b0e2a40bb622e6342caf15e7747c028d5f +ftrace fed240d9c9743815fcbc0ca5c0913292ce1f25e2 +rcu 6161068bf209e4f6b5ab68f2bcd7e046f1e79298 +kvm 5edcbfdf96fd9b80777f778431b44daeb8ee1fa9 +kvm-arm 5a2acbbb0179a7ffbb5440b9fa46689f619705ac +kvm-ppc 72476aaa469179222b92c380de60c76b4cb9a318 +kvms390 2ef7843375dca1fd0247adda34238ab584fc063d +xen-tip 319933a80fd4f07122466a77f93e5019d71be74c +percpu a81a52b325ec886eb004ca28b943480dae0353c7 +workqueues 55df0933be74bd2e52aba0b67eb743ae0feabe7e +drivers-x86 fc3341b4b55f46ad0846a22ae149d94bac6ce40b +chrome-platform 3119c28634ddc6ee3813778d9d17741baceef19d +hsi 4ef69e17eb567a3b276fcc3cb3452dcf89d8b063 +leds 97b31c1f8eb865bc3aa5f4a08286a6406d782ea8 +ipmi fc4e78481afa33585195e896f0f9d9cec1129c80 +driver-core c8dcf655ec81f94e19a41c54b74c907b57350360 +usb e4ce9ed835bcaf4cd3230a53a79645986c25ce0f +usb-gadget e49d033bddf5b565044e2abe4241353959bc9120 +usb-serial c8345c0500de4411762db5941058e34979879128 +usb-chipidea-next 78665f57c3faa09f123c4818101526e43ae9b6a4 +tty 7c0408d805797178f075f33d0f705a1c6ef76c82 +char-misc 7b473ae754fec86cfa44f05f0882d1329d244e6c +extcon f83d7033d4ec1edd02c0289e71dc77bf3f41fee6 +phy-next a18c27eccafa8384dcd86b2849ca7646183daf27 +soundwire abd9a6049bb59a9bab8cc8b42ccbe4a46c307f92 +thunderbolt 0a0624a26f9ceb8aae16882d87dc3b12f17f4a49 +vfio 9cef73918e15d2284e71022291a8a07901e80bad +staging 5c0480deda08ae804c495409a3d11c5345f2a964 +iio 7b473ae754fec86cfa44f05f0882d1329d244e6c +mux 3516bd729358a2a9b090c1905bd2a3fa926e24c6 +icc dfe14674bf7b267c44eb91d66bad076af3827a47 +dmaengine 635156d94b644a4000ff19c4fff68a60afff279f +cgroup 0061270307f2ad5fa01db1ac5f1da0e83410adaf +scsi 151a3b7b9d3ce651b97119fe0fa68ab7a9df590a +scsi-mkp 83c3a7beaef7fd261c190b69f6be6337f251bf16 +vhost 2b109044b081148b58974f5696ffd4383c3e9abb +rpmsg 6ee5808de07417fce77167ed72da60f627f682bc +gpio 7ac554888233468a9fd7c4f28721396952dd9959 +gpio-brgl f4a20dfac88c06c9b529a41ff4cf9acba8f3fdff +gpio-intel 1649b83766944e320a6f087abf1e8a221f1cd8b0 +pinctrl 664bad61ccdec3b3b751da79f9c0449e1af005ed +pinctrl-intel 176412f8674be8ba9a45bb61086e224e2118324a +pinctrl-renesas f4e260bffcf367523b77f936fe0dbd278581305e +pinctrl-samsung f9d8de699ac4a059e30cf0e84e7027f1058b9163 +pwm 3f2b16734914fa7c53ef7f8a10a63828890dbd37 +userns e9012e756d3097b663c67dc444a4220192ce81fc +ktest 170f4869e66275f498ae4736106fb54c0fdcd036 +kselftest c3867ab5924b7a9a0b4a117902a08669d8be7c21 +livepatching cd2d68f2d6b2e5cee0b49c13cef052baca8c0b76 +coresight 1efbcec2ef8c037f1e801c76e4b9434ee2400be7 +rtc a5feda3b361e11b291786d5c4ff86d4b9a55498f +nvdimm e765f13ed126fe7e41d1a6e3c60d754cd6c2af93 +at24 7629254054820bead6e18f8c3ae65c2bb01a5ae2 +ntb f96cb827ce49627543b4aabe8d54d8ea9740ae4e +seccomp d9bbdbf324cda23aa44873f505be77ed4b61d79c +kspp a7790b4a43109e1804a4c683c576f077dec90c8f +kspp-gustavo 3915822ab3a5122dcbae0529febfefd3e999eb04 +cisco 9e98c678c2d6ae3a17cb2de55d17f69dddaa231b +gnss 0f79ce970e79ffb771733f9634d5918d0eb3e30a +fsi 7cc2f34e1f4da07c791737cc6b3d965b31815ea0 +slimbus 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +nvmem 413333fd6a88d023229cc121a5c65d48c4e9c7d6 +xarray 2c7e57a02708a69d0194f9ef2a7b7e54f5a0484a +hyperv 9d68cd9120e4e3af38f843e165631c323b86b4e4 +auxdisplay 97fbb29fc1ebde6ce362e3d0a9473d4c6dec7442 +kgdb f8416aa29185468e0d914ba4b2a330fd53ee263f +hmm 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +fpga 57b44817a8d63e75394bc21849f585ded53de8bb +kunit 6880fa6c56601bb8ed59df6c30fd390cc5f6dd8f +cfi ff1176468d368232b684f75e82563369208bc371 +kunit-next 17ac23eb43f0cbefc8bfce44ad51a9f065895f9f +trivial 9ff9b0d392ea08090cd1780fb196f36dbb586529 +mhi 8c61951b372d83b426a66dafc9c4ac24b3ce3335 +memblock e888fa7bb882a1f305526d8f49d7016a7bc5f5ca +init 38b082236e77d403fed23ac2d30d570598744ec3 +counters e71ba9452f0b5b2e8dc8aa5445198cd9214a6a62 +rust a5085d70a3bab91e91dd7f6273e3138ba92fb006 +cxl ed97afb53365cd03dde266c9644334a558fe5a16 +folio 121703c1c817b3c77f61002466d0bfca7e39f25d +bitmap 785cb064e2f87b6cd2157554f50193dba0642d75 +zstd 464413496acb619a380364a26912f02c8b709d29 +efi 720dff78de360ad9742d5f438101cedcdb5dad84 +akpm-current cc0170e845bf878f9ef6151119120ff9e13b0e64 +akpm 39573caa1324924133edaa6ded537f7e00502574 diff --git a/Next/Trees b/Next/Trees new file mode 100644 index 0000000000000..20d8aeb3911a3 --- /dev/null +++ b/Next/Trees @@ -0,0 +1,343 @@ +Trees included into this release: + +Name Type URL +---- ---- --- +origin git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git#master +fixes git git://git.kernel.org/pub/scm/linux/kernel/git/sfr/next-fixes.git#fixes +kbuild-current git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git#fixes +arc-current git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git#for-curr +arm-current git git://git.armlinux.org.uk/~rmk/linux-arm.git#fixes +arm64-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux#for-next/fixes +arm-soc-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git#arm/fixes +drivers-memory-fixes git https://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git#fixes +m68k-current git git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k.git#for-linus +powerpc-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git#fixes +s390-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git#fixes +sparc git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git#master +fscrypt-current git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git#for-stable +net git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git#master +bpf git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git#master +ipsec git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git#master +netfilter git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git#master +ipvs git git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs.git#master +wireless-drivers git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git#master +mac80211 git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git#master +rdma-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git#for-rc +sound-current git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git#for-linus +sound-asoc-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git#for-linus +regmap-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git#for-linus +regulator-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git#for-linus +spi-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git#for-linus +pci-current git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git#for-linus +driver-core.current git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git#driver-core-linus +tty.current git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git#tty-linus +usb.current git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git#usb-linus +usb-gadget-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git#fixes +usb-serial-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial.git#usb-linus +usb-chipidea-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git#for-usb-fixes +phy git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git#fixes +staging.current git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git#staging-linus +iio-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git#fixes-togreg +char-misc.current git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git#char-misc-linus +soundwire-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire.git#fixes +thunderbolt-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git#fixes +input-current git git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git#for-linus +crypto-current git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git#master +vfio-fixes git git://github.com/awilliam/linux-vfio.git#for-linus +kselftest-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git#fixes +modules-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git#modules-linus +dmaengine-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git#fixes +backlight-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/lee/backlight.git#for-backlight-fixes +mtd-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git#mtd/fixes +mfd-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git#for-mfd-fixes +v4l-dvb-fixes git git://linuxtv.org/mchehab/media-next.git#fixes +reset-fixes git https://git.pengutronix.de/git/pza/linux#reset/fixes +mips-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git#mips-fixes +at91-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git#at91-fixes +omap-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git#fixes +kvm-fixes git git://git.kernel.org/pub/scm/virt/kvm/kvm.git#master +kvms390-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git#master +hwmon-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git#hwmon +nvdimm-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git#libnvdimm-fixes +cxl-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git#fixes +btrfs-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git#next-fixes +vfs-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git#fixes +dma-mapping-fixes git git://git.infradead.org/users/hch/dma-mapping.git#for-linus +i3c-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux.git#i3c/fixes +drivers-x86-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git#fixes +samsung-krzk-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git#fixes +pinctrl-samsung-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git#fixes +devicetree-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git#dt/linus +scsi-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git#fixes +drm-fixes git git://git.freedesktop.org/git/drm/drm.git#drm-fixes +amdgpu-fixes git git://people.freedesktop.org/~agd5f/linux#drm-fixes +drm-intel-fixes git git://anongit.freedesktop.org/drm-intel#for-linux-next-fixes +mmc-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git#fixes +rtc-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git#rtc-fixes +gnss-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/johan/gnss.git#gnss-linus +hyperv-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git#hyperv-fixes +soc-fsl-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/leo/linux.git#fix +risc-v-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git#fixes +pidfd-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git#fixes +fpga-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git#fixes +spdx git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/spdx.git#spdx-linus +gpio-brgl-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git#gpio/for-current +gpio-intel-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git#fixes +pinctrl-intel-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git#fixes +erofs-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git#fixes +integrity-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity#fixes +kunit-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git#kunit-fixes +ubifs-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git#fixes +memblock-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git#fixes +cel-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux#for-rc +irqchip-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git#irq/irqchip-fixes +renesas-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git#fixes +perf-current git git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git#perf/urgent +efi-fixes git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git#urgent +drm-misc-fixes git git://anongit.freedesktop.org/drm/drm-misc#for-linux-next-fixes +kbuild git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git#for-next +perf git git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git#perf/core +compiler-attributes git https://github.com/ojeda/linux.git#compiler-attributes +dma-mapping git git://git.infradead.org/users/hch/dma-mapping.git#for-next +asm-generic git git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git#master +arc git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git#for-next +arm git git://git.armlinux.org.uk/~rmk/linux-arm.git#for-next +arm64 git git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux#for-next/core +arm-perf git git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git#for-next/perf +arm-soc git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git#for-next +actions git git://git.kernel.org/pub/scm/linux/kernel/git/mani/linux-actions.git#for-next +amlogic git git://git.kernel.org/pub/scm/linux/kernel/git/amlogic/linux.git#for-next +aspeed git git://git.kernel.org/pub/scm/linux/kernel/git/joel/bmc.git#for-next +at91 git git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git#at91-next +drivers-memory git https://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git#for-next +imx-mxs git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git#for-next +keystone git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git#next +mediatek git git://git.kernel.org/pub/scm/linux/kernel/git/matthias.bgg/linux.git#for-next +mvebu git git://git.kernel.org/pub/scm/linux/kernel/git/gclement/mvebu.git#for-next +omap git git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git#for-next +qcom git git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git#for-next +raspberrypi git git://git.kernel.org/pub/scm/linux/kernel/git/nsaenz/linux-rpi.git#for-next +renesas git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git#next +reset git https://git.pengutronix.de/git/pza/linux#reset/next +rockchip git git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip.git#for-next +samsung-krzk git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git#for-next +scmi git git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux.git#for-linux-next +stm32 git git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32.git#stm32-next +sunxi git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git#sunxi/for-next +tegra git git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux.git#for-next +ti-k3 git git://git.kernel.org/pub/scm/linux/kernel/git/kristo/linux.git#ti-k3-next +ti-k3-new git git://git.kernel.org/pub/scm/linux/kernel/git/nmenon/linux.git#ti-k3-next +xilinx git git://github.com/Xilinx/linux-xlnx.git#for-next +clk git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git#clk-next +clk-imx git git://git.kernel.org/pub/scm/linux/kernel/git/abelvesa/linux.git#for-next +clk-renesas git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git#renesas-clk +clk-samsung git git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git#for-next +csky git git://github.com/c-sky/csky-linux.git#linux-next +h8300 git git://git.sourceforge.jp/gitroot/uclinux-h8/linux.git#h8300-next +m68k git git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k.git#for-next +m68knommu git git://git.kernel.org/pub/scm/linux/kernel/git/gerg/m68knommu.git#for-next +microblaze git git://git.monstr.eu/linux-2.6-microblaze.git#next +mips git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git#mips-next +nds32 git git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux.git#next +nios2 git git://git.kernel.org/pub/scm/linux/kernel/git/lftan/nios2.git#for-next +openrisc git git://github.com/openrisc/linux.git#for-next +parisc-hd git git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git#for-next +powerpc git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git#next +soc-fsl git git://git.kernel.org/pub/scm/linux/kernel/git/leo/linux.git#next +risc-v git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git#for-next +s390 git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git#for-next +sh git git://git.libc.org/linux-sh#for-next +sparc-next git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next.git#master +uml git git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git#linux-next +xtensa git git://github.com/jcmvbkbc/linux-xtensa.git#xtensa-for-next +pidfd git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git#for-next +fscrypt git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git#master +fscache git git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git#fscache-next +afs git git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git#afs-next +btrfs git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git#for-next +ceph git git://github.com/ceph/ceph-client.git#master +cifs git git://git.samba.org/sfrench/cifs-2.6.git#for-next +configfs git git://git.infradead.org/users/hch/configfs.git#for-next +ecryptfs git git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git#next +erofs git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git#dev +exfat git git://git.kernel.org/pub/scm/linux/kernel/git/linkinjeon/exfat.git#dev +ext3 git git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs.git#for_next +ext4 git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git#dev +f2fs git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git#dev +fsverity git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git#fsverity +fuse git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git#for-next +gfs2 git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git#for-next +jfs git git://github.com/kleikamp/linux-shaggy.git#jfs-next +ksmbd git https://github.com/smfrench/smb3-kernel.git#ksmbd-for-next +nfs git git://git.linux-nfs.org/projects/trondmy/nfs-2.6.git#linux-next +nfs-anna git git://git.linux-nfs.org/projects/anna/linux-nfs.git#linux-next +nfsd git git://git.linux-nfs.org/~bfields/linux.git#nfsd-next +cel git git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux#for-next +ntfs3 git https://github.com/Paragon-Software-Group/linux-ntfs3.git#master +orangefs git git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux#for-next +overlayfs git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git#overlayfs-next +ubifs git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git#next +v9fs git git://github.com/martinetd/linux#9p-next +xfs git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git#for-next +zonefs git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git#for-next +iomap git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git#iomap-for-next +djw-vfs git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git#vfs-for-next +file-locks git git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git#locks-next +vfs git git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git#for-next +printk git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git#for-next +pci git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git#next +pstore git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git#for-next/pstore +hid git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git#for-next +i2c git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git#i2c/for-next +i3c git git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux.git#i3c/next +dmi git git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging.git#dmi-for-next +hwmon-staging git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git#hwmon-next +jc_docs git git://git.lwn.net/linux.git#docs-next +v4l-dvb git git://linuxtv.org/media_tree.git#master +v4l-dvb-next git git://linuxtv.org/mchehab/media-next.git#master +pm git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git#linux-next +cpufreq-arm git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git#cpufreq/arm/linux-next +cpupower git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux.git#cpupower +devfreq git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git#devfreq-next +opp git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git#opp/linux-next +thermal git git://git.kernel.org/pub/scm/linux/kernel/git/thermal/linux.git#thermal/linux-next +ieee1394 git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394.git#for-next +dlm git git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm.git#next +rdma git git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git#for-next +net-next git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git#master +bpf-next git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git#for-next +ipsec-next git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git#master +mlx5-next git git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux.git#mlx5-next +netfilter-next git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git#master +ipvs-next git git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git#master +wireless-drivers-next git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git#master +bluetooth git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git#master +mac80211-next git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git#master +mtd git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git#mtd/next +nand git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git#nand/next +spi-nor git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git#spi-nor/next +crypto git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git#master +drm git git://git.freedesktop.org/git/drm/drm.git#drm-next +drm-misc git git://anongit.freedesktop.org/drm/drm-misc#for-linux-next +amdgpu git https://gitlab.freedesktop.org/agd5f/linux#drm-next +drm-intel git git://anongit.freedesktop.org/drm-intel#for-linux-next +drm-tegra git git://anongit.freedesktop.org/tegra/linux.git#drm/tegra/for-next +drm-msm git https://gitlab.freedesktop.org/drm/msm.git#msm-next +imx-drm git https://git.pengutronix.de/git/pza/linux#imx-drm/next +etnaviv git https://git.pengutronix.de/git/lst/linux#etnaviv/next +regmap git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git#for-next +sound git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git#for-next +sound-asoc git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git#for-next +modules git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git#modules-next +input git git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git#next +block git git://git.kernel.dk/linux-block.git#for-next +device-mapper git git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git#for-next +libata git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git#for-next +pcmcia git git://git.kernel.org/pub/scm/linux/kernel/git/brodo/linux.git#pcmcia-next +mmc git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git#next +mfd git git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git#for-mfd-next +backlight git git://git.kernel.org/pub/scm/linux/kernel/git/lee/backlight.git#for-backlight-next +battery git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git#for-next +regulator git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git#for-next +security git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git#next-testing +apparmor git git://git.kernel.org/pub/scm/linux/kernel/git/jj/linux-apparmor#apparmor-next +integrity git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity#next-integrity +keys git git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git#keys-next +safesetid git https://github.com/micah-morton/linux.git#safesetid-next +selinux git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git#next +smack git git://github.com/cschaufler/smack-next#next +tomoyo git https://scm.osdn.net/gitroot/tomoyo/tomoyo-test1.git#master +tpmdd git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git#next +watchdog git git://www.linux-watchdog.org/linux-watchdog-next.git#master +iommu git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git#next +audit git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git#next +devicetree git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git#for-next +mailbox git git://git.linaro.org/landing-teams/working/fujitsu/integration.git#mailbox-for-next +spi git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git#for-next +tip git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git#auto-latest +clockevents git git://git.linaro.org/people/daniel.lezcano/linux.git#timers/drivers/next +edac git git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git#edac-for-next +irqchip git git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git#irq/irqchip-next +ftrace git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git#for-next +rcu git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git#rcu/next +kvm git git://git.kernel.org/pub/scm/virt/kvm/kvm.git#next +kvm-arm git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git#next +kvm-ppc git git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc.git#kvm-ppc-next +kvms390 git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git#next +xen-tip git git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git#linux-next +percpu git git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git#for-next +workqueues git git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git#for-next +drivers-x86 git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git#for-next +chrome-platform git git://git.kernel.org/pub/scm/linux/kernel/git/chrome-platform/linux.git#for-next +hsi git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-hsi.git#for-next +leds git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git#for-next +ipmi git git://github.com/cminyard/linux-ipmi.git#for-next +driver-core git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git#driver-core-next +usb git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git#usb-next +usb-gadget git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git#next +usb-serial git git://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial.git#usb-next +usb-chipidea-next git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git#for-usb-next +tty git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git#tty-next +char-misc git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git#char-misc-next +extcon git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon.git#extcon-next +phy-next git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git#next +soundwire git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire.git#next +thunderbolt git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git#next +vfio git git://github.com/awilliam/linux-vfio.git#next +staging git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git#staging-next +iio git git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git#togreg +mux git https://gitlab.com/peda-linux/mux.git#for-next +icc git git://git.kernel.org/pub/scm/linux/kernel/git/djakov/icc.git#icc-next +dmaengine git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git#next +cgroup git git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git#for-next +scsi git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git#for-next +scsi-mkp git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git#for-next +vhost git git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git#linux-next +rpmsg git git://git.kernel.org/pub/scm/linux/kernel/git/remoteproc/linux.git#for-next +gpio git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git#for-next +gpio-brgl git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git#gpio/for-next +gpio-intel git git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git#for-next +pinctrl git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git#for-next +pinctrl-intel git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git#for-next +pinctrl-renesas git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git#renesas-pinctrl +pinctrl-samsung git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git#for-next +pwm git git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm.git#for-next +userns git git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git#for-next +ktest git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-ktest.git#for-next +kselftest git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git#next +livepatching git git://git.kernel.org/pub/scm/linux/kernel/git/livepatching/livepatching#for-next +coresight git git://git.linaro.org/kernel/coresight.git#next +rtc git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git#rtc-next +nvdimm git git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git#libnvdimm-for-next +at24 git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git#at24/for-next +ntb git https://github.com/jonmason/ntb.git#ntb-next +seccomp git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git#for-next/seccomp +kspp git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git#for-next/kspp +kspp-gustavo git git://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git#for-next/kspp +cisco git https://github.com/daniel-walker/cisco-linux.git#for-next +gnss git git://git.kernel.org/pub/scm/linux/kernel/git/johan/gnss.git#gnss-next +fsi git git://git.kernel.org/pub/scm/linux/kernel/git/joel/fsi.git#next +slimbus git git://git.kernel.org/pub/scm/linux/kernel/git/srini/slimbus.git#for-next +nvmem git git://git.kernel.org/pub/scm/linux/kernel/git/srini/nvmem.git#for-next +xarray git git://git.infradead.org/users/willy/xarray.git#main +hyperv git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git#hyperv-next +auxdisplay git https://github.com/ojeda/linux.git#auxdisplay +kgdb git git://git.kernel.org/pub/scm/linux/kernel/git/danielt/linux.git#kgdb/for-next +hmm git git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git#hmm +fpga git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git#for-next +kunit git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git#test +cfi git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git#cfi/next +kunit-next git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git#kunit +trivial git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial.git#for-next +mhi git git://git.kernel.org/pub/scm/linux/kernel/git/mani/mhi.git#mhi-next +memblock git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git#for-next +init git git://git.infradead.org/users/hch/misc.git#init-user-pointers +counters git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux.git#counters +rust git https://github.com/Rust-for-Linux/linux.git#rust-next +cxl git git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git#next +folio git git://git.infradead.org/users/willy/pagecache.git#for-next +bitmap git https://guthub.com/norov/linux.git#bitmap-master-5.15 +zstd git https://github.com/terrelln/linux.git#zstd-1.4.10 +efi git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git#next +akpm-current mmotm https://www.ozlabs.org/~akpm/mmotm/ +akpm mmotm https://www.ozlabs.org/~akpm/mmotm/ diff --git a/Next/merge.log b/Next/merge.log new file mode 100644 index 0000000000000..bbc64c85b4442 --- /dev/null +++ b/Next/merge.log @@ -0,0 +1,14707 @@ +$ date -R +Mon, 25 Oct 2021 08:35:23 +1100 +$ git checkout master +Already on 'master' +$ git reset --hard stable +Updating files: 68% (6275/9124) Updating files: 69% (6296/9124) Updating files: 70% (6387/9124) Updating files: 71% (6479/9124) Updating files: 72% (6570/9124) Updating files: 73% (6661/9124) Updating files: 74% (6752/9124) Updating files: 75% (6843/9124) Updating files: 76% (6935/9124) Updating files: 77% (7026/9124) Updating files: 78% (7117/9124) Updating files: 79% (7208/9124) Updating files: 80% (7300/9124) Updating files: 81% (7391/9124) Updating files: 82% (7482/9124) Updating files: 83% (7573/9124) Updating files: 84% (7665/9124) Updating files: 85% (7756/9124) Updating files: 86% (7847/9124) Updating files: 87% (7938/9124) Updating files: 88% (8030/9124) Updating files: 89% (8121/9124) Updating files: 90% (8212/9124) Updating files: 91% (8303/9124) Updating files: 92% (8395/9124) Updating files: 93% (8486/9124) Updating files: 94% (8577/9124) Updating files: 95% (8668/9124) Updating files: 96% (8760/9124) Updating files: 97% (8851/9124) Updating files: 98% (8942/9124) Updating files: 99% (9033/9124) Updating files: 100% (9124/9124) Updating files: 100% (9124/9124), done. +HEAD is now at 2f111a6fd5b5 Merge tag 'ceph-for-5.15-rc7' of git://github.com/ceph/ceph-client +Merging origin/master (87066fdd2e30 Revert "mm/secretmem: use refcount_t instead of atomic_t") +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git origin/master +Updating 2f111a6fd5b5..87066fdd2e30 +Fast-forward (no commit created; -m option ignored) + Documentation/networking/devlink/ice.rst | 9 +- + Documentation/networking/mctp.rst | 10 +- + MAINTAINERS | 1 + + arch/powerpc/kernel/idle_book3s.S | 10 +- + arch/powerpc/kernel/smp.c | 2 - + arch/x86/include/asm/kvm_host.h | 3 +- + arch/x86/kvm/mmu/mmu.c | 6 +- + arch/x86/kvm/svm/sev.c | 7 + + arch/x86/kvm/vmx/vmx.c | 17 +- + arch/x86/kvm/x86.c | 150 ++++++--- + block/blk-cgroup.c | 5 +- + block/partitions/core.c | 1 + + drivers/acpi/power.c | 7 +- + drivers/acpi/tables.c | 3 + + drivers/gpu/drm/ast/ast_mode.c | 18 +- + drivers/gpu/drm/kmb/kmb_crtc.c | 41 ++- + drivers/gpu/drm/kmb/kmb_drv.c | 2 +- + drivers/gpu/drm/kmb/kmb_drv.h | 10 +- + drivers/gpu/drm/kmb/kmb_dsi.c | 25 +- + drivers/gpu/drm/kmb/kmb_dsi.h | 2 +- + drivers/gpu/drm/kmb/kmb_plane.c | 43 ++- + drivers/gpu/drm/kmb/kmb_plane.h | 6 + + drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 7 + + drivers/gpu/drm/msm/msm_gpu.h | 4 + + drivers/gpu/drm/msm/msm_gpu_devfreq.c | 3 +- + drivers/gpu/drm/mxsfb/mxsfb_drv.c | 6 +- + drivers/gpu/drm/panel/panel-ilitek-ili9881c.c | 12 +- + drivers/hv/hyperv_vmbus.h | 1 + + drivers/isdn/hardware/mISDN/hfcpci.c | 8 +- + drivers/net/can/m_can/m_can_platform.c | 14 +- + drivers/net/can/rcar/rcar_can.c | 20 +- + drivers/net/can/sja1000/peak_pci.c | 9 +- + drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 8 +- + drivers/net/dsa/lantiq_gswip.c | 2 +- + drivers/net/dsa/mt7530.c | 8 +- + drivers/net/ethernet/cavium/thunder/nic_main.c | 2 +- + drivers/net/ethernet/cavium/thunder/nicvf_main.c | 4 +- + .../net/ethernet/freescale/enetc/enetc_ethtool.c | 2 +- + drivers/net/ethernet/freescale/enetc/enetc_pf.c | 5 +- + drivers/net/ethernet/hisilicon/hns3/hnae3.c | 21 ++ + drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 37 ++- + drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 7 +- + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 9 + + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 5 +- + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h | 2 + + .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 + + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 + + .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 6 +- + drivers/net/ethernet/intel/e1000e/e1000.h | 4 +- + drivers/net/ethernet/intel/e1000e/ich8lan.c | 31 +- + drivers/net/ethernet/intel/e1000e/ich8lan.h | 3 + + drivers/net/ethernet/intel/e1000e/netdev.c | 45 +-- + drivers/net/ethernet/intel/ice/ice_common.c | 2 + + drivers/net/ethernet/intel/ice/ice_devids.h | 4 + + drivers/net/ethernet/intel/ice/ice_devlink.c | 3 +- + drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 4 +- + drivers/net/ethernet/intel/ice/ice_lib.c | 9 + + drivers/net/ethernet/intel/ice/ice_main.c | 8 +- + drivers/net/ethernet/intel/ice/ice_sched.c | 13 + + drivers/net/ethernet/intel/ice/ice_sched.h | 1 + + drivers/net/ethernet/intel/igc/igc_hw.h | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/en/fs.h | 3 + + .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 2 + + .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 51 +-- + drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 28 +- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 + + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 + + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 20 +- + drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 7 +- + drivers/net/ethernet/mellanox/mlx5/core/lag.c | 4 + + drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 13 +- + drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h | 2 + + .../net/ethernet/microchip/sparx5/sparx5_main.c | 1 + + drivers/net/ethernet/mscc/ocelot_vsc7514.c | 1 + + drivers/net/ethernet/netronome/nfp/nfp_asm.c | 4 +- + drivers/net/ethernet/sfc/mcdi_port_common.c | 37 ++- + drivers/net/ethernet/sfc/ptp.c | 4 +- + drivers/net/ethernet/sfc/siena_sriov.c | 2 +- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- + drivers/net/hamradio/baycom_epp.c | 6 +- + drivers/net/usb/Kconfig | 1 + + drivers/net/usb/usbnet.c | 4 + + drivers/net/vrf.c | 4 - + drivers/nfc/st95hf/core.c | 6 +- + drivers/of/of_reserved_mem.c | 2 + + drivers/ptp/ptp_clock.c | 16 +- + drivers/ptp/ptp_kvm_x86.c | 4 +- + drivers/scsi/hosts.c | 3 +- + drivers/scsi/mpi3mr/mpi3mr_os.c | 2 +- + drivers/scsi/qla2xxx/qla_bsg.c | 2 +- + drivers/scsi/qla2xxx/qla_os.c | 2 +- + drivers/scsi/qla2xxx/qla_target.c | 14 +- + drivers/scsi/scsi.c | 4 +- + drivers/scsi/scsi_sysfs.c | 9 + + drivers/scsi/scsi_transport_iscsi.c | 2 - + drivers/scsi/sd.c | 7 +- + drivers/scsi/storvsc_drv.c | 32 +- + drivers/scsi/ufs/ufshcd-pci.c | 33 +- + fs/autofs/waitq.c | 2 +- + fs/fuse/fuse_i.h | 3 + + fs/fuse/inode.c | 87 +++--- + fs/fuse/virtio_fs.c | 12 +- + fs/io-wq.c | 7 +- + fs/io_uring.c | 54 +++- + fs/ksmbd/auth.c | 16 +- + fs/ksmbd/connection.c | 2 + + fs/ksmbd/ksmbd_netlink.h | 2 + + fs/ksmbd/mgmt/user_config.c | 2 +- + fs/ksmbd/mgmt/user_config.h | 1 + + fs/ksmbd/smb2misc.c | 55 ++-- + fs/ksmbd/smb2ops.c | 3 + + fs/ksmbd/smb2pdu.c | 346 ++++++++++++++------- + fs/ksmbd/smb2pdu.h | 2 + + fs/ksmbd/transport_ipc.c | 3 +- + fs/ksmbd/transport_ipc.h | 2 +- + fs/ksmbd/transport_rdma.c | 21 +- + fs/ksmbd/vfs.c | 2 +- + fs/ksmbd/vfs.h | 2 +- + include/acpi/platform/acgcc.h | 9 +- + include/linux/mlx5/driver.h | 1 - + include/linux/user_namespace.h | 2 + + include/net/mctp.h | 2 +- + include/net/sctp/sm.h | 6 +- + include/net/tcp.h | 5 +- + include/uapi/linux/mctp.h | 7 +- + kernel/cred.c | 9 +- + kernel/sched/core.c | 1 + + kernel/signal.c | 25 +- + kernel/ucount.c | 49 +++ + mm/memblock.c | 10 +- + mm/secretmem.c | 9 +- + net/bridge/br_private.h | 4 +- + net/bridge/netfilter/ebtables.c | 4 +- + net/can/isotp.c | 51 ++- + net/can/j1939/j1939-priv.h | 1 + + net/can/j1939/main.c | 7 +- + net/can/j1939/transport.c | 14 +- + net/dsa/dsa2.c | 9 +- + net/ipv4/tcp_ipv4.c | 45 ++- + net/ipv6/ip6_output.c | 3 +- + net/ipv6/netfilter/ip6t_rt.c | 48 +-- + net/ipv6/tcp_ipv6.c | 15 +- + net/netfilter/Kconfig | 2 +- + net/netfilter/ipvs/ip_vs_ctl.c | 5 + + net/netfilter/nft_chain_filter.c | 9 +- + net/netfilter/xt_IDLETIMER.c | 2 +- + net/sched/act_ct.c | 2 +- + security/keys/process_keys.c | 8 + + tools/testing/selftests/net/config | 1 + + tools/testing/selftests/net/fcnal-test.sh | 60 ++++ + tools/testing/selftests/net/forwarding/Makefile | 1 + + .../net/forwarding/forwarding.config.sample | 2 + + .../net/forwarding/ip6_forward_instats_vrf.sh | 172 ++++++++++ + tools/testing/selftests/net/forwarding/lib.sh | 8 + + tools/testing/selftests/net/nettest.c | 28 +- + tools/testing/selftests/netfilter/nft_flowtable.sh | 1 - + tools/testing/selftests/netfilter/nft_nat.sh | 145 +++++++++ + tools/testing/vsock/vsock_diag_test.c | 2 - + 159 files changed, 1743 insertions(+), 721 deletions(-) + create mode 100755 tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh +Merging fixes/fixes (3ca706c189db drm/ttm: fix type mismatch error on sparc64) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/sfr/next-fixes.git fixes/fixes +Already up to date. +Merging kbuild-current/fixes (0664684e1ebd kbuild: Add -Werror=ignored-optimization-argument to CLANG_FLAGS) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kbuild-current/fixes +Already up to date. +Merging arc-current/for-curr (c3ca31ce0ea1 ARC: fix potential build snafu) +$ git merge -m Merge branch 'for-curr' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git arc-current/for-curr +Already up to date. +Merging arm-current/fixes (48ccc8edf5b9 ARM: 9141/1: only warn about XIP address when not compile testing) +$ git merge -m Merge branch 'fixes' of git://git.armlinux.org.uk/~rmk/linux-arm.git arm-current/fixes +Auto-merging arch/arm/Kconfig +Merge made by the 'recursive' strategy. + arch/arm/Kconfig | 1 + + arch/arm/boot/compressed/decompress.c | 3 +++ + arch/arm/include/asm/uaccess.h | 4 +++- + arch/arm/kernel/traps.c | 2 +- + arch/arm/kernel/vmlinux-xip.lds.S | 6 +++++- + arch/arm/mm/proc-macros.S | 1 + + arch/arm/probes/kprobes/core.c | 2 +- + 7 files changed, 15 insertions(+), 4 deletions(-) +Merging arm64-fixes/for-next/fixes (596143e3aec3 acpi/arm64: fix next_platform_timer() section mismatch error) +$ git merge -m Merge branch 'for-next/fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux arm64-fixes/for-next/fixes +Already up to date. +Merging arm-soc-fixes/arm/fixes (36b6dcbc1245 Merge tag 'reset-fixes-for-v5.15' of git://git.pengutronix.de/pza/linux into arm/fixes) +$ git merge -m Merge branch 'arm/fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git arm-soc-fixes/arm/fixes +Merge made by the 'recursive' strategy. + arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts | 2 +- + .../boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts | 2 +- + .../boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 8 ++++--- + .../dts/freescale/imx8mm-kontron-n801x-som.dtsi | 8 ++++--- + drivers/reset/Kconfig | 4 ++-- + drivers/reset/reset-brcmstb-rescal.c | 2 +- + drivers/reset/reset-socfpga.c | 26 ++++++++++++++++++++++ + drivers/reset/tegra/reset-bpmp.c | 9 +++++++- + 8 files changed, 49 insertions(+), 12 deletions(-) +Merging drivers-memory-fixes/fixes (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'fixes' of https://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git drivers-memory-fixes/fixes +Already up to date. +Merging m68k-current/for-linus (9fde03486402 m68k: Remove set_fs()) +$ git merge -m Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k.git m68k-current/for-linus +Already up to date. +Merging powerpc-fixes/fixes (787252a10d94 powerpc/smp: do not decrement idle task preempt count in CPU offline) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git powerpc-fixes/fixes +Already up to date. +Merging s390-fixes/fixes (8b7216439e2e s390: add Alexander Gordeev as reviewer) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git s390-fixes/fixes +Already up to date. +Merging sparc/master (05a59d79793d Merge git://git.kernel.org:/pub/scm/linux/kernel/git/netdev/net) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git sparc/master +Already up to date. +Merging fscrypt-current/for-stable (80f6e3080bfc fs-verity: fix signed integer overflow with i_size near S64_MAX) +$ git merge -m Merge branch 'for-stable' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fscrypt-current/for-stable +Already up to date. +Merging net/master (95a359c95533 net: ethernet: microchip: lan743x: Fix dma allocation failure by using dma_set_mask_and_coherent) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git net/master +Merge made by the 'recursive' strategy. + drivers/net/ethernet/microchip/lan743x_main.c | 22 ++++ + net/core/skbuff.c | 36 ++++--- + net/core/sock_destructor.h | 12 +++ + net/mac80211/mesh.c | 9 +- + net/sctp/sm_statefuns.c | 139 ++++++++++++++++---------- + net/wireless/scan.c | 7 +- + tools/testing/selftests/net/fcnal-test.sh | 3 + + 7 files changed, 155 insertions(+), 73 deletions(-) + create mode 100644 net/core/sock_destructor.h +Merging bpf/master (04f8ef5643bc cgroup: Fix memory leak caused by missing cgroup_bpf_offline) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git bpf/master +Merge made by the 'recursive' strategy. + arch/arm64/net/bpf_jit_comp.c | 5 +++++ + arch/riscv/net/bpf_jit_core.c | 5 +++++ + include/linux/filter.h | 1 + + kernel/bpf/core.c | 4 +++- + kernel/bpf/syscall.c | 5 +++-- + kernel/cgroup/cgroup.c | 4 +++- + net/core/sysctl_net_core.c | 2 +- + 7 files changed, 21 insertions(+), 5 deletions(-) +Merging ipsec/master (93ec1320b017 xfrm: fix rcu lock in xfrm_notify_userpolicy()) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git ipsec/master +Already up to date. +Merging netfilter/master (2199f562730d ipvs: autoload ipvs on genl access) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git netfilter/master +Merge made by the 'recursive' strategy. + net/netfilter/ipvs/ip_vs_ctl.c | 2 ++ + net/netfilter/nfnetlink_queue.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) +Merging ipvs/master (d9aaaf223297 netfilter: ebtables: allocate chainstack on CPU local nodes) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs.git ipvs/master +Already up to date. +Merging wireless-drivers/master (603a1621caa0 mwifiex: avoid null-pointer-subtraction warning) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git wireless-drivers/master +Already up to date. +Merging mac80211/master (7fcb1c950e98 Merge tag 'mac80211-for-net-2021-10-21' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git mac80211/master +Already up to date. +Merging rdma-fixes/for-rc (2dace185caa5 RDMA/irdma: Do not hold qos mutex twice on QP resume) +$ git merge -m Merge branch 'for-rc' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git rdma-fixes/for-rc +Merge made by the 'recursive' strategy. + drivers/infiniband/hw/hfi1/pio.c | 9 ++++++--- + drivers/infiniband/hw/irdma/uk.c | 4 ++-- + drivers/infiniband/hw/irdma/verbs.c | 8 ++++++-- + drivers/infiniband/hw/irdma/ws.c | 13 ++++++------ + drivers/infiniband/hw/mlx5/mr.c | 2 +- + drivers/infiniband/hw/mlx5/qp.c | 2 ++ + drivers/infiniband/hw/qedr/qedr.h | 1 + + drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- + drivers/infiniband/hw/qedr/verbs.c | 5 ++++- + drivers/infiniband/hw/qib/qib_user_sdma.c | 33 +++++++++++++++++++++---------- + drivers/infiniband/sw/rdmavt/qp.c | 2 +- + 11 files changed, 54 insertions(+), 27 deletions(-) +Merging sound-current/for-linus (5fc462c3aaad ALSA: hda/realtek: Fix mic mute LED for the HP Spectre x360 14) +$ git merge -m Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git sound-current/for-linus +Merge made by the 'recursive' strategy. + sound/core/oss/mixer_oss.c | 44 ++++++++++++++++++++++++++++++++----------- + sound/pci/hda/patch_realtek.c | 17 +++++++++++++++++ + 2 files changed, 50 insertions(+), 11 deletions(-) +Merging sound-asoc-fixes/for-linus (f15fe1868e70 Merge remote-tracking branch 'asoc/for-5.15' into asoc-linus) +$ git merge -m Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git sound-asoc-fixes/for-linus +Removing sound/soc/rockchip/rockchip_pcm.h +Removing sound/soc/rockchip/rockchip_pcm.c +Merge made by the 'recursive' strategy. + sound/soc/codecs/cs42l42.c | 218 ++++++++++++++++++++++---------------- + sound/soc/codecs/cs42l42.h | 3 + + sound/soc/rockchip/Makefile | 3 +- + sound/soc/rockchip/rockchip_i2s.c | 3 +- + sound/soc/rockchip/rockchip_pcm.c | 44 -------- + sound/soc/rockchip/rockchip_pcm.h | 11 -- + 6 files changed, 131 insertions(+), 151 deletions(-) + delete mode 100644 sound/soc/rockchip/rockchip_pcm.c + delete mode 100644 sound/soc/rockchip/rockchip_pcm.h +Merging regmap-fixes/for-linus (38a4b4fb7c73 Merge remote-tracking branch 'regmap/for-5.15' into regmap-linus) +$ git merge -m Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git regmap-fixes/for-linus +Merge made by the 'recursive' strategy. + drivers/base/regmap/regcache-rbtree.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) +Merging regulator-fixes/for-linus (519d81956ee2 Linux 5.15-rc6) +$ git merge -m Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git regulator-fixes/for-linus +Already up to date. +Merging spi-fixes/for-linus (6ffd7104e887 Merge remote-tracking branch 'spi/for-5.15' into spi-linus) +$ git merge -m Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git spi-fixes/for-linus +Merge made by the 'recursive' strategy. + drivers/spi/spi-altera-dfl.c | 2 +- + drivers/spi/spi-altera-platform.c | 2 +- + drivers/spi/spi-pl022.c | 5 +++-- + drivers/spi/spi-tegra20-slink.c | 2 +- + 4 files changed, 6 insertions(+), 5 deletions(-) +Merging pci-current/for-linus (e4e737bb5c17 Linux 5.15-rc2) +$ git merge -m Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git pci-current/for-linus +Already up to date. +Merging driver-core.current/driver-core-linus (519d81956ee2 Linux 5.15-rc6) +$ git merge -m Merge branch 'driver-core-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git driver-core.current/driver-core-linus +Already up to date. +Merging tty.current/tty-linus (519d81956ee2 Linux 5.15-rc6) +$ git merge -m Merge branch 'tty-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git tty.current/tty-linus +Already up to date. +Merging usb.current/usb-linus (519d81956ee2 Linux 5.15-rc6) +$ git merge -m Merge branch 'usb-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb.current/usb-linus +Already up to date. +Merging usb-gadget-fixes/fixes (e49d033bddf5 Linux 5.12-rc6) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git usb-gadget-fixes/fixes +Already up to date. +Merging usb-serial-fixes/usb-linus (ec8de6b8cec2 USB: serial: keyspan: fix memleak on probe errors) +$ git merge -m Merge branch 'usb-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial.git usb-serial-fixes/usb-linus +Merge made by the 'recursive' strategy. + drivers/usb/serial/keyspan.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) +Merging usb-chipidea-fixes/for-usb-fixes (f130d08a8d79 usb: chipidea: ci_hdrc_imx: Also search for 'phys' phandle) +$ git merge -m Merge branch 'for-usb-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git usb-chipidea-fixes/for-usb-fixes +Merge made by the 'recursive' strategy. +Merging phy/fixes (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git phy/fixes +Already up to date. +Merging staging.current/staging-linus (519d81956ee2 Linux 5.15-rc6) +$ git merge -m Merge branch 'staging-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git staging.current/staging-linus +Already up to date. +Merging iio-fixes/fixes-togreg (486a25084155 iio: buffer: Fix memory leak in iio_buffers_alloc_sysfs_and_mask()) +$ git merge -m Merge branch 'fixes-togreg' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git iio-fixes/fixes-togreg +Merge made by the 'recursive' strategy. + .../devicetree/bindings/iio/dac/adi,ad5766.yaml | 2 +- + drivers/iio/adc/ti-tsc2046.c | 2 +- + drivers/iio/dac/ad5446.c | 9 ++++++- + drivers/iio/dac/ad5766.c | 6 ++--- + drivers/iio/industrialio-buffer.c | 28 +++++++++++++--------- + drivers/iio/industrialio-core.c | 9 ++++++- + drivers/iio/pressure/st_pressure_spi.c | 4 ++++ + 7 files changed, 42 insertions(+), 18 deletions(-) +Merging char-misc.current/char-misc-linus (519d81956ee2 Linux 5.15-rc6) +$ git merge -m Merge branch 'char-misc-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git char-misc.current/char-misc-linus +Already up to date. +Merging soundwire-fixes/fixes (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire.git soundwire-fixes/fixes +Already up to date. +Merging thunderbolt-fixes/fixes (64570fbc14f8 Linux 5.15-rc5) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git thunderbolt-fixes/fixes +Already up to date. +Merging input-current/for-linus (a02dcde595f7 Input: touchscreen - avoid bitwise vs logical OR warning) +$ git merge -m Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git input-current/for-linus +Already up to date. +Merging crypto-current/master (f8690a4b5a1b crypto: x86/sm4 - Fix invalid section entry size) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git crypto-current/master +Merge made by the 'recursive' strategy. + arch/x86/crypto/sm4-aesni-avx-asm_64.S | 6 +++++- + arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 6 +++++- + 2 files changed, 10 insertions(+), 2 deletions(-) +Merging vfio-fixes/for-linus (42de956ca7e5 vfio/ap_ops: Add missed vfio_uninit_group_dev()) +$ git merge -m Merge branch 'for-linus' of git://github.com/awilliam/linux-vfio.git vfio-fixes/for-linus +Already up to date. +Merging kselftest-fixes/fixes (519d81956ee2 Linux 5.15-rc6) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git kselftest-fixes/fixes +Already up to date. +Merging modules-fixes/modules-linus (0d67e332e6df module: fix clang CFI with MODULE_UNLOAD=n) +$ git merge -m Merge branch 'modules-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-fixes/modules-linus +Already up to date. +Merging dmaengine-fixes/fixes (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git dmaengine-fixes/fixes +Already up to date. +Merging backlight-fixes/for-backlight-fixes (a38fd8748464 Linux 5.12-rc2) +$ git merge -m Merge branch 'for-backlight-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/backlight.git backlight-fixes/for-backlight-fixes +Already up to date. +Merging mtd-fixes/mtd/fixes (f60f5741002b mtd: rawnand: qcom: Update code word value for raw read) +$ git merge -m Merge branch 'mtd/fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd-fixes/mtd/fixes +Already up to date. +Merging mfd-fixes/for-mfd-fixes (a61f4661fba4 mfd: intel_quark_i2c_gpio: Revert "Constify static struct resources") +$ git merge -m Merge branch 'for-mfd-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git mfd-fixes/for-mfd-fixes +Already up to date. +Merging v4l-dvb-fixes/fixes (206704a1fe0b media: atomisp: restore missing 'return' statement) +$ git merge -m Merge branch 'fixes' of git://linuxtv.org/mchehab/media-next.git v4l-dvb-fixes/fixes +Already up to date. +Merging reset-fixes/reset/fixes (3ad60b4b3570 reset: socfpga: add empty driver allowing consumers to probe) +$ git merge -m Merge branch 'reset/fixes' of https://git.pengutronix.de/git/pza/linux reset-fixes/reset/fixes +Already up to date. +Merging mips-fixes/mips-fixes (740da9d7ca4e MIPS: Revert "add support for buggy MT7621S core detection") +$ git merge -m Merge branch 'mips-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git mips-fixes/mips-fixes +Already up to date. +Merging at91-fixes/at91-fixes (dbe68bc9e82b ARM: dts: at91: sama7g5ek: to not touch slew-rate for SDMMC pins) +$ git merge -m Merge branch 'at91-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git at91-fixes/at91-fixes +Already up to date. +Merging omap-fixes/fixes (80d680fdccba ARM: dts: omap3430-sdp: Fix NAND device node) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git omap-fixes/fixes +Already up to date. +Merging kvm-fixes/master (95e16b4792b0 KVM: SEV-ES: go over the sev_pio_data buffer in multiple passes if needed) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/virt/kvm/kvm.git kvm-fixes/master +Already up to date. +Merging kvms390-fixes/master (0e9ff65f455d KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git kvms390-fixes/master +Merge made by the 'recursive' strategy. + arch/s390/kvm/interrupt.c | 5 +++-- + arch/s390/kvm/kvm-s390.c | 1 + + 2 files changed, 4 insertions(+), 2 deletions(-) +Merging hwmon-fixes/hwmon (ada61aa0b118 hwmon: Fix possible memleak in __hwmon_device_register()) +$ git merge -m Merge branch 'hwmon' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git hwmon-fixes/hwmon +Merge made by the 'recursive' strategy. + drivers/hwmon/hwmon.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) +Merging nvdimm-fixes/libnvdimm-fixes (d55174cccac2 nvdimm/pmem: fix creating the dax group) +$ git merge -m Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git nvdimm-fixes/libnvdimm-fixes +Already up to date. +Merging cxl-fixes/fixes (fae8817ae804 cxl/mem: Fix memory device capacity probing) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git cxl-fixes/fixes +Already up to date. +Merging btrfs-fixes/next-fixes (d7395f03c79c Merge branch 'misc-5.15' into next-fixes) +$ git merge -m Merge branch 'next-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git btrfs-fixes/next-fixes +Already up to date. +Merge made by the 'recursive' strategy. +Merging vfs-fixes/fixes (25f54d08f12f autofs: fix wait name hash calculation in autofs_wait()) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git vfs-fixes/fixes +Already up to date. +Merging dma-mapping-fixes/for-linus (18a3c5f7abfd Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost) +$ git merge -m Merge branch 'for-linus' of git://git.infradead.org/users/hch/dma-mapping.git dma-mapping-fixes/for-linus +Already up to date. +Merging i3c-fixes/i3c/fixes (fe07bfda2fb9 Linux 5.12-rc1) +$ git merge -m Merge branch 'i3c/fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux.git i3c-fixes/i3c/fixes +Already up to date. +Merging drivers-x86-fixes/fixes (7df227847ab5 platform/x86: int1092: Fix non sequential device mode handling) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git drivers-x86-fixes/fixes +Already up to date. +Merging samsung-krzk-fixes/fixes (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git samsung-krzk-fixes/fixes +Already up to date. +Merging pinctrl-samsung-fixes/fixes (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git pinctrl-samsung-fixes/fixes +Already up to date. +Merging devicetree-fixes/dt/linus (b2d70c0dbf27 dt-bindings: drm/bridge: ti-sn65dsi86: Fix reg value) +$ git merge -m Merge branch 'dt/linus' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git devicetree-fixes/dt/linus +Already up to date. +Merging scsi-fixes/fixes (282da7cef078 scsi: ufs: ufs-exynos: Correct timeout value setting registers) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git scsi-fixes/fixes +Merge made by the 'recursive' strategy. + drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++- + drivers/scsi/ufs/ufs-exynos.c | 6 +++--- + 2 files changed, 5 insertions(+), 4 deletions(-) +Merging drm-fixes/drm-fixes (595cb5e0b832 Revert "drm/ast: Add detect function support") +$ git merge -m Merge branch 'drm-fixes' of git://git.freedesktop.org/git/drm/drm.git drm-fixes/drm-fixes +Already up to date. +Merging amdgpu-fixes/drm-fixes (2c409ba81be2 drm/radeon: fix si_enable_smc_cac() failed issue) +$ git merge -m Merge branch 'drm-fixes' of git://people.freedesktop.org/~agd5f/linux amdgpu-fixes/drm-fixes +Already up to date. +Merging drm-intel-fixes/for-linux-next-fixes (59be177a909a drm/i915: Remove memory frequency calculation) +$ git merge -m Merge branch 'for-linux-next-fixes' of git://anongit.freedesktop.org/drm-intel drm-intel-fixes/for-linux-next-fixes +Merge made by the 'recursive' strategy. + drivers/gpu/drm/i915/i915_reg.h | 8 -------- + drivers/gpu/drm/i915/intel_dram.c | 30 ++---------------------------- + 2 files changed, 2 insertions(+), 36 deletions(-) +Merging mmc-fixes/fixes (162079f2dccd mmc: winbond: don't build on M68K) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git mmc-fixes/fixes +Merge made by the 'recursive' strategy. + drivers/mmc/host/Kconfig | 2 +- + drivers/mmc/host/sdhci-esdhc-imx.c | 16 ++++++++++++++++ + drivers/mmc/host/sdhci-pci-core.c | 29 ++++++++++++++++++++++++----- + drivers/mmc/host/sdhci.c | 6 ++++++ + 4 files changed, 47 insertions(+), 6 deletions(-) +Merging rtc-fixes/rtc-fixes (bd33335aa93d rtc: cmos: Disable irq around direct invocation of cmos_interrupt()) +$ git merge -m Merge branch 'rtc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git rtc-fixes/rtc-fixes +Auto-merging drivers/rtc/rtc-cmos.c +Merge made by the 'recursive' strategy. +Merging gnss-fixes/gnss-linus (e73f0f0ee754 Linux 5.14-rc1) +$ git merge -m Merge branch 'gnss-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/johan/gnss.git gnss-fixes/gnss-linus +Already up to date. +Merging hyperv-fixes/hyperv-fixes (8017c99680fa hyperv/vmbus: include linux/bitops.h) +$ git merge -m Merge branch 'hyperv-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git hyperv-fixes/hyperv-fixes +Already up to date. +Merging soc-fsl-fixes/fix (7e5e744183bb soc: fsl: dpio: fix qbman alignment error in the virtualization context) +$ git merge -m Merge branch 'fix' of git://git.kernel.org/pub/scm/linux/kernel/git/leo/linux.git soc-fsl-fixes/fix +Merge made by the 'recursive' strategy. + drivers/soc/fsl/dpaa2-console.c | 1 + + drivers/soc/fsl/dpio/dpio-service.c | 2 +- + drivers/soc/fsl/dpio/qbman-portal.c | 33 +++++++++++++++------------------ + 3 files changed, 17 insertions(+), 19 deletions(-) +Merging risc-v-fixes/fixes (3ef6ca4f354c checksyscalls: Unconditionally ignore fstat{,at}64) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git risc-v-fixes/fixes +Already up to date. +Merging pidfd-fixes/fixes (03ba0fe4d09f file: simplify logic in __close_range()) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git pidfd-fixes/fixes +Already up to date. +Merging fpga-fixes/fixes (2a2a79577dda fpga: ice40-spi: Add SPI device ID table) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git fpga-fixes/fixes +Already up to date. +Merging spdx/spdx-linus (519d81956ee2 Linux 5.15-rc6) +$ git merge -m Merge branch 'spdx-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/spdx.git spdx/spdx-linus +Already up to date. +Merging gpio-brgl-fixes/gpio/for-current (6fda593f3082 gpio: mockup: Convert to use software nodes) +$ git merge -m Merge branch 'gpio/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git gpio-brgl-fixes/gpio/for-current +Already up to date. +Merging gpio-intel-fixes/fixes (1649b8376694 gpio: pca953x: Improve bias setting) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git gpio-intel-fixes/fixes +Merge made by the 'recursive' strategy. +Merging pinctrl-intel-fixes/fixes (64570fbc14f8 Linux 5.15-rc5) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git pinctrl-intel-fixes/fixes +Already up to date. +Merging erofs-fixes/fixes (c40dd3ca2a45 erofs: clear compacted_2b if compacted_4b_initial > totalidx) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git erofs-fixes/fixes +Already up to date. +Merging integrity-fixes/fixes (843385694721 evm: Fix a small race in init_desc()) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity integrity-fixes/fixes +Already up to date. +Merging kunit-fixes/kunit-fixes (519d81956ee2 Linux 5.15-rc6) +$ git merge -m Merge branch 'kunit-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git kunit-fixes/kunit-fixes +Already up to date. +Merging ubifs-fixes/fixes (78c7d49f55d8 ubifs: journal: Make sure to not dirty twice for auth nodes) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git ubifs-fixes/fixes +Already up to date. +Merging memblock-fixes/fixes (5da2b76dd1f9 memblock: exclude MEMBLOCK_NOMAP regions from kmemleak) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git memblock-fixes/fixes +Auto-merging mm/memblock.c +Merge made by the 'recursive' strategy. +Merging cel-fixes/for-rc (7d2a07b76933 Linux 5.14) +$ git merge -m Merge branch 'for-rc' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux cel-fixes/for-rc +Already up to date. +Merging irqchip-fixes/irq/irqchip-fixes (b78f26926b17 irqchip/gic: Work around broken Renesas integration) +$ git merge -m Merge branch 'irq/irqchip-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git irqchip-fixes/irq/irqchip-fixes +Already up to date. +Merging renesas-fixes/fixes (432b52eea3dc ARM: shmobile: defconfig: Restore graphical consoles) +$ git merge -m Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git renesas-fixes/fixes +Already up to date. +Merging perf-current/perf/urgent (3ff6d64e68ab libperf tests: Fix test_stat_cpu) +$ git merge -m Merge branch 'perf/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git perf-current/perf/urgent +Already up to date. +Merging efi-fixes/urgent (38fa3206bf44 efi: Change down_interruptible() in virt_efi_reset_system() to down_trylock()) +$ git merge -m Merge branch 'urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git efi-fixes/urgent +Already up to date. +Merging drm-misc-fixes/for-linux-next-fixes (ee71fb6c4d99 drm/i915/selftests: Properly reset mock object propers for each test) +$ git merge -m Merge branch 'for-linux-next-fixes' of git://anongit.freedesktop.org/drm/drm-misc drm-misc-fixes/for-linux-next-fixes +Merge made by the 'recursive' strategy. + drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ + drivers/gpu/drm/selftests/test-drm_damage_helper.c | 1 + + drivers/gpu/drm/ttm/ttm_bo_util.c | 1 + + 3 files changed, 8 insertions(+) +Merging kbuild/for-next (7c5c49dc2b80 [for -next only] kconfig: generate include/generated/rustc_cfg) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kbuild/for-next +Auto-merging Makefile +Merge made by the 'recursive' strategy. + Documentation/kbuild/makefiles.rst | 17 +- + Makefile | 41 +--- + arch/alpha/Kbuild | 3 + + arch/alpha/Makefile | 3 - + arch/arc/Kbuild | 3 + + arch/arc/Makefile | 3 - + arch/arm/Kbuild | 3 + + arch/arm/Makefile | 4 - + arch/arm64/Kbuild | 3 + + arch/arm64/Makefile | 7 - + arch/arm64/kernel/Makefile | 3 + + arch/arm64/lib/Makefile | 2 + + arch/csky/Kbuild | 3 + + arch/csky/Makefile | 3 - + arch/h8300/Kbuild | 3 + + arch/h8300/Makefile | 3 - + arch/ia64/Makefile | 2 - + arch/m68k/Makefile | 4 +- + arch/microblaze/Kbuild | 3 + + arch/microblaze/Makefile | 3 - + arch/mips/Kbuild | 3 + + arch/mips/Makefile | 8 +- + arch/mips/boot/Makefile | 3 + + arch/nds32/Kbuild | 3 + + arch/nds32/Makefile | 5 +- + arch/nios2/Kbuild | 3 + + arch/nios2/Makefile | 9 +- + arch/nios2/boot/Makefile | 3 - + arch/openrisc/Kbuild | 3 + + arch/openrisc/Makefile | 7 +- + arch/parisc/Kbuild | 3 + + arch/parisc/Makefile | 7 +- + arch/powerpc/Kbuild | 3 + + arch/powerpc/Makefile | 7 +- + arch/powerpc/lib/Makefile | 2 + + arch/riscv/Kbuild | 3 + + arch/riscv/Makefile | 7 +- + arch/s390/Kbuild | 3 + + arch/s390/Makefile | 8 +- + arch/sh/Kbuild | 3 + + arch/sh/Makefile | 4 - + arch/sparc/Kbuild | 3 + + arch/sparc/Makefile | 3 - + arch/sparc/boot/Makefile | 8 +- + arch/x86/Kbuild | 3 + + arch/x86/Makefile | 2 - + arch/xtensa/Makefile | 4 +- + crypto/Makefile | 2 + + lib/raid6/Makefile | 4 + + scripts/Makefile.build | 63 ++--- + scripts/Makefile.debug | 33 +++ + scripts/Makefile.lib | 11 - + scripts/Makefile.package | 10 +- + scripts/kconfig/conf.c | 15 +- + scripts/kconfig/confdata.c | 468 +++++++++++++++++++++---------------- + scripts/kconfig/lexer.l | 9 +- + scripts/kconfig/lkc_proto.h | 2 +- + scripts/kconfig/menu.c | 33 +-- + scripts/kconfig/symbol.c | 43 ---- + scripts/link-vmlinux.sh | 6 +- + scripts/package/buildtar | 4 + + usr/gen_init_cpio.c | 20 +- + 62 files changed, 487 insertions(+), 472 deletions(-) + create mode 100644 scripts/Makefile.debug +Merging perf/perf/core (be8ecc57f180 perf srcline: Use long-running addr2line per DSO) +$ git merge -m Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git perf/perf/core +Auto-merging tools/perf/tests/code-reading.c +Auto-merging tools/perf/pmu-events/jevents.c +Auto-merging tools/perf/Makefile.perf +Auto-merging tools/perf/Makefile.config +Merge made by the 'recursive' strategy. + tools/build/Makefile.feature | 1 + + tools/build/feature/Makefile | 4 + + tools/build/feature/test-libtracefs.c | 10 + + tools/perf/Documentation/perf-record.txt | 16 + + tools/perf/Makefile.config | 21 ++ + tools/perf/Makefile.perf | 2 + + tools/perf/arch/riscv64/annotate/instructions.c | 34 +++ + tools/perf/arch/x86/annotate/instructions.c | 28 +- + tools/perf/bench/synthesize.c | 4 +- + tools/perf/builtin-daemon.c | 13 +- + tools/perf/builtin-kvm.c | 2 +- + tools/perf/builtin-record.c | 46 ++- + tools/perf/builtin-top.c | 2 +- + tools/perf/builtin-trace.c | 4 +- + .../arch/arm64/hisilicon/hip08/uncore-ddrc.json | 32 +- + .../arch/arm64/hisilicon/hip08/uncore-hha.json | 120 ++++++-- + .../arch/arm64/hisilicon/hip08/uncore-l3c.json | 52 ++-- + .../pmu-events/arch/test/test_soc/sys/uncore.json | 7 + + tools/perf/pmu-events/jevents.c | 16 +- + tools/perf/tests/code-reading.c | 3 +- + tools/perf/tests/expr.c | 160 +++++++--- + tools/perf/tests/mmap-thread-lookup.c | 4 +- + tools/perf/tests/parse-events.c | 8 +- + tools/perf/tests/pmu-events.c | 135 +++++--- + tools/perf/tests/shell/stat_all_metricgroups.sh | 12 + + tools/perf/tests/shell/stat_all_metrics.sh | 22 ++ + tools/perf/tests/shell/stat_all_pmu.sh | 22 ++ + tools/perf/tests/vmlinux-kallsyms.c | 102 +++++++ + tools/perf/util/annotate.c | 6 +- + tools/perf/util/bpf_counter.c | 8 +- + tools/perf/util/bpf_counter_cgroup.c | 8 +- + tools/perf/util/debug.c | 19 ++ + tools/perf/util/expr.c | 121 ++++++-- + tools/perf/util/expr.h | 20 +- + tools/perf/util/expr.l | 9 - + tools/perf/util/expr.y | 325 +++++++++++++++----- + tools/perf/util/metricgroup.c | 142 +++++---- + tools/perf/util/parse-events.c | 82 ++++- + tools/perf/util/parse-events.h | 3 +- + tools/perf/util/parse-events.l | 1 - + tools/perf/util/record.h | 1 + + tools/perf/util/srcline.c | 338 +++++++++++++++------ + tools/perf/util/stat-shadow.c | 54 ++-- + tools/perf/util/synthetic-events.c | 73 +++-- + tools/perf/util/synthetic-events.h | 20 +- + 45 files changed, 1608 insertions(+), 504 deletions(-) + create mode 100644 tools/build/feature/test-libtracefs.c + create mode 100644 tools/perf/arch/riscv64/annotate/instructions.c + create mode 100755 tools/perf/tests/shell/stat_all_metricgroups.sh + create mode 100755 tools/perf/tests/shell/stat_all_metrics.sh + create mode 100755 tools/perf/tests/shell/stat_all_pmu.sh +Merging compiler-attributes/compiler-attributes (7c00621dcaee compiler_types: mark __compiletime_assert failure as __noreturn) +$ git merge -m Merge branch 'compiler-attributes' of https://github.com/ojeda/linux.git compiler-attributes/compiler-attributes +Merge made by the 'recursive' strategy. + include/linux/compiler_attributes.h | 1 - + include/linux/compiler_types.h | 8 +++++++- + 2 files changed, 7 insertions(+), 2 deletions(-) +Merging dma-mapping/for-next (7d6db80b7d26 sparc32: use DMA_DIRECT_REMAP) +$ git merge -m Merge branch 'for-next' of git://git.infradead.org/users/hch/dma-mapping.git dma-mapping/for-next +Merge made by the 'recursive' strategy. + arch/sparc/Kconfig | 3 +- + arch/sparc/kernel/ioport.c | 76 +++++----------------------------------------- + 2 files changed, 10 insertions(+), 69 deletions(-) +Merging asm-generic/master (7efbbe6e1414 qcom_scm: hide Kconfig symbol) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git asm-generic/master +Auto-merging drivers/mmc/host/Kconfig +Auto-merging drivers/iommu/Kconfig +Auto-merging arch/x86/Kconfig +Auto-merging arch/riscv/include/asm/syscall.h +Auto-merging arch/riscv/Kconfig +Auto-merging arch/arm64/Kconfig +Auto-merging arch/arm/Kconfig +Merge made by the 'recursive' strategy. + arch/arm/include/asm/syscall.h | 10 ---------- + arch/arm64/include/asm/syscall.h | 10 ---------- + arch/csky/include/asm/syscall.h | 9 --------- + arch/ia64/include/asm/syscall.h | 17 ++--------------- + arch/ia64/kernel/ptrace.c | 31 ++++++++++++------------------- + arch/microblaze/include/asm/syscall.h | 33 --------------------------------- + arch/nds32/include/asm/syscall.h | 22 ---------------------- + arch/nios2/include/asm/syscall.h | 11 ----------- + arch/openrisc/include/asm/syscall.h | 7 ------- + arch/powerpc/include/asm/syscall.h | 10 ---------- + arch/riscv/include/asm/syscall.h | 9 --------- + arch/s390/include/asm/syscall.h | 12 ------------ + arch/sh/include/asm/syscall_32.h | 12 ------------ + arch/sparc/include/asm/syscall.h | 10 ---------- + arch/um/include/asm/syscall-generic.h | 14 -------------- + arch/x86/include/asm/syscall.h | 33 --------------------------------- + arch/xtensa/include/asm/syscall.h | 11 ----------- + include/asm-generic/syscall.h | 16 ---------------- + 18 files changed, 14 insertions(+), 263 deletions(-) +Merging arc/for-next (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git arc/for-next +Already up to date. +Merging arm/for-next (c03c06cd0ac5 Merge branch 'devel-stable' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.armlinux.org.uk/~rmk/linux-arm.git arm/for-next +Auto-merging arch/arm/mach-imx/pm-imx6.c +Auto-merging arch/arm/Makefile +Auto-merging arch/arm/Kconfig +Merge made by the 'recursive' strategy. + arch/arm/Kconfig | 8 +- + arch/arm/Makefile | 9 +- + arch/arm/boot/compressed/fdt_check_mem_start.c | 48 ++++++++-- + arch/arm/common/scoop.c | 3 - + arch/arm/include/asm/assembler.h | 29 ++++++ + arch/arm/include/asm/current.h | 50 +++++++++++ + arch/arm/include/asm/io.h | 1 + + arch/arm/include/asm/smp.h | 3 +- + arch/arm/include/asm/stackprotector.h | 2 - + arch/arm/include/asm/switch_to.h | 16 ++++ + arch/arm/include/asm/thread_info.h | 15 +++- + arch/arm/include/asm/tls.h | 10 ++- + arch/arm/kernel/asm-offsets.c | 6 +- + arch/arm/kernel/entry-armv.S | 5 ++ + arch/arm/kernel/entry-common.S | 1 + + arch/arm/kernel/entry-header.S | 8 ++ + arch/arm/kernel/head-common.S | 5 ++ + arch/arm/kernel/head-nommu.S | 1 + + arch/arm/kernel/head.S | 5 +- + arch/arm/kernel/process.c | 8 +- + arch/arm/kernel/smp.c | 13 ++- + arch/arm/mach-imx/pm-imx6.c | 2 + + arch/arm/mm/fault.c | 119 +++++++++++++------------ + arch/arm/mm/fault.h | 4 + + arch/arm/mm/ioremap.c | 6 ++ + arch/arm/mm/proc-macros.S | 3 +- + drivers/amba/bus.c | 100 ++++++--------------- + drivers/of/platform.c | 6 +- + include/linux/amba/bus.h | 18 ---- + scripts/gcc-plugins/arm_ssp_per_task_plugin.c | 27 ++---- + 30 files changed, 319 insertions(+), 212 deletions(-) + create mode 100644 arch/arm/include/asm/current.h +Merging arm64/for-next/core (6203cd38cfc5 Merge branch 'for-next/trbe-errata' into for-next/core) +$ git merge -m Merge branch 'for-next/core' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux arm64/for-next/core +Removing tools/testing/selftests/arm64/fp/sve-ptrace-asm.S +Auto-merging kernel/dma/mapping.c +Auto-merging arch/arm64/net/bpf_jit_comp.c +Removing arch/arm64/kernel/cpu-reset.h +Auto-merging arch/arm64/Kconfig +Merge made by the 'recursive' strategy. + Documentation/arm64/cpu-feature-registers.rst | 12 +- + Documentation/arm64/elf_hwcaps.rst | 4 + + Documentation/arm64/silicon-errata.rst | 12 + + Documentation/dev-tools/kasan.rst | 7 +- + arch/arm/include/asm/arch_timer.h | 37 +- + arch/arm64/Kconfig | 121 ++++- + arch/arm64/include/asm/arch_timer.h | 78 ++-- + arch/arm64/include/asm/asm-extable.h | 95 ++++ + arch/arm64/include/asm/asm-uaccess.h | 7 +- + arch/arm64/include/asm/assembler.h | 78 ++-- + arch/arm64/include/asm/barrier.h | 16 +- + arch/arm64/include/asm/cputype.h | 4 + + arch/arm64/include/asm/esr.h | 6 + + arch/arm64/include/asm/extable.h | 23 +- + arch/arm64/include/asm/fpsimd.h | 113 ++++- + arch/arm64/include/asm/fpsimdmacros.h | 21 +- + arch/arm64/include/asm/ftrace.h | 2 +- + arch/arm64/include/asm/futex.h | 25 +- + arch/arm64/include/asm/gpr-num.h | 26 ++ + arch/arm64/include/asm/hwcap.h | 1 + + arch/arm64/include/asm/kexec.h | 12 + + arch/arm64/include/asm/kvm_asm.h | 7 +- + arch/arm64/include/asm/memory.h | 1 + + arch/arm64/include/asm/mmu_context.h | 24 + + arch/arm64/include/asm/mte-kasan.h | 5 + + arch/arm64/include/asm/mte.h | 8 +- + arch/arm64/include/asm/page.h | 1 - + arch/arm64/include/asm/pgtable.h | 5 + + arch/arm64/include/asm/processor.h | 49 ++- + arch/arm64/include/asm/sections.h | 1 + + arch/arm64/include/asm/setup.h | 6 + + arch/arm64/include/asm/sysreg.h | 31 +- + arch/arm64/include/asm/thread_info.h | 2 +- + arch/arm64/include/asm/trans_pgd.h | 14 +- + arch/arm64/include/asm/uaccess.h | 30 +- + arch/arm64/include/asm/vdso/compat_barrier.h | 7 - + arch/arm64/include/asm/virt.h | 7 + + arch/arm64/include/asm/vmalloc.h | 4 +- + arch/arm64/include/asm/word-at-a-time.h | 21 +- + arch/arm64/include/uapi/asm/hwcap.h | 1 + + arch/arm64/kernel/armv8_deprecated.c | 12 +- + arch/arm64/kernel/asm-offsets.c | 11 + + arch/arm64/kernel/cpu-reset.S | 7 +- + arch/arm64/kernel/cpu-reset.h | 32 -- + arch/arm64/kernel/cpu_errata.c | 64 +++ + arch/arm64/kernel/cpufeature.c | 29 +- + arch/arm64/kernel/cpuinfo.c | 1 + + arch/arm64/kernel/entry-fpsimd.S | 34 +- + arch/arm64/kernel/entry.S | 10 +- + arch/arm64/kernel/fpsimd.c | 342 +++++++++------ + arch/arm64/kernel/hibernate-asm.S | 72 --- + arch/arm64/kernel/hibernate.c | 49 +-- + arch/arm64/kernel/machine_kexec.c | 177 ++++---- + arch/arm64/kernel/mte.c | 67 ++- + arch/arm64/kernel/ptrace.c | 6 +- + arch/arm64/kernel/relocate_kernel.S | 69 ++- + arch/arm64/kernel/sdei.c | 2 +- + arch/arm64/kernel/signal.c | 8 +- + arch/arm64/kernel/traps.c | 24 +- + arch/arm64/kernel/vdso32/Makefile | 36 +- + arch/arm64/kernel/vmlinux.lds.S | 22 +- + arch/arm64/kvm/hyp/fpsimd.S | 6 +- + arch/arm64/kvm/hyp/include/hyp/switch.h | 10 +- + arch/arm64/kvm/reset.c | 6 +- + arch/arm64/lib/clear_user.S | 10 +- + arch/arm64/lib/copy_from_user.S | 8 +- + arch/arm64/lib/copy_to_user.S | 8 +- + arch/arm64/mm/Makefile | 1 + + arch/arm64/mm/extable.c | 85 +++- + arch/arm64/mm/hugetlbpage.c | 27 +- + arch/arm64/mm/init.c | 39 -- + arch/arm64/mm/mmu.c | 5 + + arch/arm64/mm/trans_pgd-asm.S | 65 +++ + arch/arm64/mm/trans_pgd.c | 84 ++-- + arch/arm64/net/bpf_jit_comp.c | 9 +- + arch/arm64/tools/cpucaps | 5 + + drivers/clocksource/arm_arch_timer.c | 243 ++++++---- + drivers/perf/Kconfig | 12 +- + drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- + drivers/perf/thunderx2_pmu.c | 2 +- + include/clocksource/arm_arch_timer.h | 2 +- + include/linux/kasan.h | 9 +- + kernel/dma/mapping.c | 4 - + kernel/scs.c | 1 + + lib/test_kasan.c | 2 +- + mm/kasan/hw_tags.c | 29 +- + mm/kasan/kasan.h | 32 +- + mm/kasan/report.c | 2 +- + scripts/sorttable.c | 30 ++ + tools/testing/selftests/arm64/fp/Makefile | 6 +- + tools/testing/selftests/arm64/fp/TODO | 9 +- + tools/testing/selftests/arm64/fp/asm-utils.S | 172 ++++++++ + tools/testing/selftests/arm64/fp/assembler.h | 11 + + tools/testing/selftests/arm64/fp/fpsimd-test.S | 164 ------- + tools/testing/selftests/arm64/fp/sve-ptrace-asm.S | 33 -- + tools/testing/selftests/arm64/fp/sve-ptrace.c | 511 ++++++++++++++++------ + tools/testing/selftests/arm64/fp/sve-test.S | 163 ------- + tools/testing/selftests/arm64/fp/vec-syscfg.c | 95 +++- + 98 files changed, 2373 insertions(+), 1527 deletions(-) + create mode 100644 arch/arm64/include/asm/asm-extable.h + create mode 100644 arch/arm64/include/asm/gpr-num.h + delete mode 100644 arch/arm64/kernel/cpu-reset.h + create mode 100644 arch/arm64/mm/trans_pgd-asm.S + create mode 100644 tools/testing/selftests/arm64/fp/asm-utils.S + delete mode 100644 tools/testing/selftests/arm64/fp/sve-ptrace-asm.S +Merging arm-perf/for-next/perf (e656972b6986 drivers/perf: Improve build test coverage) +$ git merge -m Merge branch 'for-next/perf' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git arm-perf/for-next/perf +Already up to date. +Merging arm-soc/for-next (c9b2bcb46758 Merge branch 'arm/dt' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git arm-soc/for-next +Removing include/dt-bindings/power/qcom-aoss-qmp.h +Removing drivers/tee/optee/shm_pool.h +Removing drivers/tee/optee/shm_pool.c +Auto-merging drivers/soc/fsl/dpio/qbman-portal.c +Auto-merging drivers/soc/fsl/dpio/dpio-service.c +Removing arch/arm/mach-omap2/scrm54xx.h +Auto-merging arch/arm/Kconfig +Auto-merging MAINTAINERS +Removing Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt +Removing Documentation/devicetree/bindings/serial/fsl,s32-linflexuart.txt +Removing Documentation/devicetree/bindings/net/gpmc-eth.txt +Removing Documentation/devicetree/bindings/mtd/gpmc-onenand.txt +Removing Documentation/devicetree/bindings/mtd/gpmc-nor.txt +Removing Documentation/devicetree/bindings/mtd/gpmc-nand.txt +Removing Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt +Removing Documentation/devicetree/bindings/ddr/lpddr2.txt +Auto-merging .mailmap +Merge made by the 'recursive' strategy. + .mailmap | 1 + + .../ABI/testing/sysfs-driver-aspeed-uart-routing | 27 + + Documentation/arm/index.rst | 1 + + Documentation/arm/microchip.rst | 20 + + Documentation/arm/stm32/stm32mp13-overview.rst | 37 + + Documentation/devicetree/bindings/arm/amlogic.yaml | 3 + + .../devicetree/bindings/arm/atmel-at91.yaml | 24 + + .../devicetree/bindings/arm/bcm/bcm2835.yaml | 1 + + .../devicetree/bindings/arm/bcm/brcm,nsp.yaml | 65 +- + Documentation/devicetree/bindings/arm/cpus.yaml | 4 +- + Documentation/devicetree/bindings/arm/fsl.yaml | 99 +- + .../devicetree/bindings/arm/mediatek.yaml | 1 + + .../bindings/arm/mediatek/mediatek,mmsys.yaml | 4 + + Documentation/devicetree/bindings/arm/qcom.yaml | 16 + + Documentation/devicetree/bindings/arm/renesas.yaml | 61 + + .../devicetree/bindings/arm/rockchip.yaml | 48 +- + .../devicetree/bindings/arm/rockchip/pmu.yaml | 4 + + .../bindings/arm/samsung/samsung-boards.yaml | 6 + + .../devicetree/bindings/arm/sprd/sprd.yaml | 5 + + .../devicetree/bindings/arm/stm32/stm32.yaml | 4 + + .../arm/sunxi/allwinner,sun4i-a10-mbus.yaml | 1 + + .../arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml | 38 + + .../arm/sunxi/allwinner,sun9i-a80-prcm.yaml | 33 + + Documentation/devicetree/bindings/arm/ti/k3.yaml | 15 +- + Documentation/devicetree/bindings/arm/toshiba.yaml | 1 + + Documentation/devicetree/bindings/arm/xilinx.yaml | 17 + + Documentation/devicetree/bindings/ddr/lpddr2.txt | 102 - + .../bindings/display/brcm,bcm2835-dsi0.yaml | 3 + + .../bindings/display/brcm,bcm2835-hdmi.yaml | 3 + + .../bindings/display/brcm,bcm2835-v3d.yaml | 3 + + .../bindings/display/brcm,bcm2835-vec.yaml | 3 + + .../bindings/display/mediatek/mediatek,dsi.txt | 6 + + .../bindings/display/msm/dp-controller.yaml | 1 - + .../devicetree/bindings/firmware/qcom,scm.txt | 4 +- + .../bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml | 106 + + .../memory-controllers/ddr/jedec,lpddr2.yaml | 223 ++ + .../ddr/lpddr2-timings.txt | 0 + .../ddr/lpddr3-timings.txt | 0 + .../{ => memory-controllers}/ddr/lpddr3.txt | 5 +- + .../memory-controllers/mediatek,smi-common.yaml | 34 +- + .../memory-controllers/mediatek,smi-larb.yaml | 3 + + .../memory-controllers/nvidia,tegra20-emc.yaml | 23 +- + .../bindings/memory-controllers/omap-gpmc.txt | 157 - + .../memory-controllers/renesas,rpc-if.yaml | 1 + + .../memory-controllers/samsung,exynos5422-dmc.yaml | 3 +- + .../bindings/memory-controllers/ti,gpmc-child.yaml | 245 ++ + .../bindings/memory-controllers/ti,gpmc.yaml | 172 + + .../devicetree/bindings/mtd/gpmc-nand.txt | 147 - + Documentation/devicetree/bindings/mtd/gpmc-nor.txt | 98 - + .../devicetree/bindings/mtd/gpmc-onenand.txt | 48 - + .../devicetree/bindings/mtd/ti,gpmc-nand.yaml | 121 + + .../devicetree/bindings/mtd/ti,gpmc-onenand.yaml | 81 + + Documentation/devicetree/bindings/net/gpmc-eth.txt | 97 - + .../bindings/pci/nvidia,tegra194-pcie.txt | 2 +- + .../devicetree/bindings/power/qcom,rpmpd.yaml | 2 + + .../devicetree/bindings/reset/microchip,rst.yaml | 4 +- + .../reset/socionext,uniphier-glue-reset.yaml | 1 + + .../bindings/reset/socionext,uniphier-reset.yaml | 3 + + .../bindings/serial/fsl,s32-linflexuart.txt | 22 - + .../bindings/serial/fsl,s32-linflexuart.yaml | 48 + + .../bindings/soc/imx/fsl,imx8mm-disp-blk-ctrl.yaml | 94 + + .../bindings/soc/imx/fsl,imx8mm-vpu-blk-ctrl.yaml | 76 + + .../bindings/soc/qcom/qcom,aoss-qmp.yaml | 12 +- + .../devicetree/bindings/soc/qcom/qcom,apr.txt | 134 - + .../devicetree/bindings/soc/qcom/qcom,apr.yaml | 177 + + .../devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml | 3 + + .../devicetree/bindings/soc/qcom/qcom,spm.yaml | 80 + + .../bindings/sound/allwinner,sun4i-a10-i2s.yaml | 3 + + .../devicetree/bindings/vendor-prefixes.yaml | 12 + + MAINTAINERS | 17 + + arch/arm/Kconfig | 2 +- + arch/arm/arm-soc-for-next-contents.txt | 170 + + arch/arm/boot/dts/Makefile | 25 +- + arch/arm/boot/dts/am335x-pocketbeagle.dts | 1 + + arch/arm/boot/dts/armada-381-netgear-gs110emx.dts | 295 ++ + arch/arm/boot/dts/aspeed-bmc-amd-ethanolx.dts | 5 + + arch/arm/boot/dts/aspeed-bmc-ampere-mtjade.dts | 21 +- + arch/arm/boot/dts/aspeed-bmc-ibm-everest.dts | 883 ++--- + arch/arm/boot/dts/aspeed-bmc-ibm-rainier.dts | 1287 ++++++-- + arch/arm/boot/dts/aspeed-bmc-inspur-fp5280g2.dts | 9 +- + .../boot/dts/aspeed-bmc-inventec-transformers.dts | 328 ++ + arch/arm/boot/dts/aspeed-bmc-tyan-s7106.dts | 488 +++ + arch/arm/boot/dts/aspeed-g4.dtsi | 6 + + arch/arm/boot/dts/aspeed-g5.dtsi | 6 + + arch/arm/boot/dts/aspeed-g6.dtsi | 26 + + arch/arm/boot/dts/at91-lmu5000.dts | 147 + + arch/arm/boot/dts/at91-q5xr5.dts | 199 ++ + arch/arm/boot/dts/at91-sama5d27_som1.dtsi | 12 +- + arch/arm/boot/dts/at91-sama5d27_som1_ek.dts | 23 +- + arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi | 70 + + arch/arm/boot/dts/at91-sama5d2_icp.dts | 22 +- + arch/arm/boot/dts/at91-sama7g5ek.dts | 8 + + arch/arm/boot/dts/at91-tse850-3.dts | 2 +- + arch/arm/boot/dts/at91sam9260.dtsi | 2 +- + arch/arm/boot/dts/axp209.dtsi | 6 +- + arch/arm/boot/dts/axp22x.dtsi | 6 +- + arch/arm/boot/dts/axp81x.dtsi | 10 +- + arch/arm/boot/dts/bcm-nsp-ax.dtsi | 70 + + arch/arm/boot/dts/bcm-nsp.dtsi | 52 +- + arch/arm/boot/dts/bcm2711-rpi-4-b.dts | 38 +- + arch/arm/boot/dts/bcm2711-rpi-cm4-io.dts | 138 + + arch/arm/boot/dts/bcm2711-rpi-cm4.dtsi | 113 + + arch/arm/boot/dts/bcm2835-rpi-zero-w.dts | 31 +- + arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts | 36 +- + arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts | 36 +- + arch/arm/boot/dts/bcm2837-rpi-3-b.dts | 36 +- + arch/arm/boot/dts/bcm283x-rpi-wifi-bt.dtsi | 34 + + arch/arm/boot/dts/bcm4708-netgear-r6250.dts | 39 +- + arch/arm/boot/dts/bcm47081-buffalo-wzr-600dhp2.dts | 37 + + arch/arm/boot/dts/bcm4709-asus-rt-ac87u.dts | 2 +- + arch/arm/boot/dts/bcm4709-buffalo-wxr-1900dhp.dts | 2 +- + arch/arm/boot/dts/bcm4709-linksys-ea9200.dts | 2 +- + arch/arm/boot/dts/bcm4709-netgear-r7000.dts | 2 +- + arch/arm/boot/dts/bcm4709-netgear-r8000.dts | 44 +- + arch/arm/boot/dts/bcm4709-tplink-archer-c9-v1.dts | 2 +- + arch/arm/boot/dts/bcm47094-asus-rt-ac88u.dts | 200 ++ + arch/arm/boot/dts/bcm47094-dlink-dir-885l.dts | 42 + + arch/arm/boot/dts/bcm47094-linksys-panamera.dts | 2 +- + arch/arm/boot/dts/bcm47094-luxul-abr-4500.dts | 37 + + arch/arm/boot/dts/bcm47094-luxul-xbr-4500.dts | 37 + + arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts | 2 +- + arch/arm/boot/dts/bcm47189-tenda-ac9.dts | 37 + + arch/arm/boot/dts/bcm53016-meraki-mr32.dts | 35 +- + arch/arm/boot/dts/bcm5301x.dtsi | 10 +- + arch/arm/boot/dts/bcm53573.dtsi | 18 + + arch/arm/boot/dts/bcm94708.dts | 2 +- + arch/arm/boot/dts/bcm94709.dts | 2 +- + arch/arm/boot/dts/bcm958522er.dts | 3 +- + arch/arm/boot/dts/bcm958525er.dts | 3 +- + arch/arm/boot/dts/bcm958525xmc.dts | 3 +- + arch/arm/boot/dts/bcm958622hr.dts | 3 +- + arch/arm/boot/dts/bcm958623hr.dts | 3 +- + arch/arm/boot/dts/bcm958625-meraki-alamo.dtsi | 281 ++ + arch/arm/boot/dts/bcm958625-meraki-kingpin.dtsi | 163 + + arch/arm/boot/dts/bcm958625-meraki-mx64-a0.dts | 25 + + arch/arm/boot/dts/bcm958625-meraki-mx64.dts | 24 + + arch/arm/boot/dts/bcm958625-meraki-mx64w-a0.dts | 33 + + arch/arm/boot/dts/bcm958625-meraki-mx64w.dts | 32 + + arch/arm/boot/dts/bcm958625-meraki-mx65.dts | 24 + + arch/arm/boot/dts/bcm958625-meraki-mx65w.dts | 32 + + .../arm/boot/dts/bcm958625-meraki-mx6x-common.dtsi | 129 + + arch/arm/boot/dts/bcm958625hr.dts | 3 +- + arch/arm/boot/dts/bcm958625k.dts | 3 +- + arch/arm/boot/dts/bcm988312hr.dts | 7 +- + arch/arm/boot/dts/dra7.dtsi | 19 + + arch/arm/boot/dts/e60k02.dtsi | 2 +- + arch/arm/boot/dts/e70k02.dtsi | 320 ++ + arch/arm/boot/dts/emev2-kzm9d.dts | 2 +- + arch/arm/boot/dts/exynos4210-origen.dts | 24 +- + arch/arm/boot/dts/exynos4412-origen.dts | 14 +- + arch/arm/boot/dts/exynos5250-arndale.dts | 3 - + arch/arm/boot/dts/exynos5250.dtsi | 1 - + arch/arm/boot/dts/gemini-dlink-dir-685.dts | 18 - + arch/arm/boot/dts/gemini-ns2502.dts | 148 + + arch/arm/boot/dts/gemini-sl93512r.dts | 18 - + arch/arm/boot/dts/gemini-sq201.dts | 18 - + arch/arm/boot/dts/gemini-ssi1328.dts | 138 + + arch/arm/boot/dts/gemini-wbd111.dts | 18 - + arch/arm/boot/dts/gemini-wbd222.dts | 18 - + arch/arm/boot/dts/gemini.dtsi | 33 +- + arch/arm/boot/dts/imx6dl-alti6p.dts | 2 +- + arch/arm/boot/dts/imx6dl-b1x5v2.dtsi | 1 - + arch/arm/boot/dts/imx6dl-prtrvt.dts | 2 - + arch/arm/boot/dts/imx6dl-skov-revc-lt2.dts | 1 + + arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 8 - + arch/arm/boot/dts/imx6q-skov-revc-lt2.dts | 1 + + arch/arm/boot/dts/imx6qdl-apalis.dtsi | 7 +- + arch/arm/boot/dts/imx6qdl-phytec-mira.dtsi | 31 +- + arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi | 12 +- + arch/arm/boot/dts/imx6qdl-skov-revc-lt2.dtsi | 99 + + arch/arm/boot/dts/imx6qdl-tqma6.dtsi | 2 +- + arch/arm/boot/dts/imx6qdl.dtsi | 7 +- + arch/arm/boot/dts/imx6qp-prtwd3.dts | 4 +- + arch/arm/boot/dts/imx6qp.dtsi | 2 +- + arch/arm/boot/dts/imx6sl-tolino-vision5.dts | 349 ++ + arch/arm/boot/dts/imx6sl.dtsi | 18 +- + arch/arm/boot/dts/imx6sll-kobo-librah2o.dts | 339 ++ + arch/arm/boot/dts/imx6sll.dtsi | 22 +- + arch/arm/boot/dts/imx6sx.dtsi | 6 +- + arch/arm/boot/dts/imx6ul-phytec-phycore-som.dtsi | 12 +- + arch/arm/boot/dts/imx6ul-phytec-segin.dtsi | 1 + + arch/arm/boot/dts/imx6ull-colibri-emmc-eval-v3.dts | 17 + + .../arm/boot/dts/imx6ull-colibri-emmc-nonwifi.dtsi | 185 ++ + arch/arm/boot/dts/imx6ull-colibri.dtsi | 32 +- + arch/arm/boot/dts/imx7-mba7.dtsi | 42 +- + arch/arm/boot/dts/imx7-tqma7.dtsi | 47 +- + arch/arm/boot/dts/imx7d-mba7.dts | 6 +- + arch/arm/boot/dts/imx7d-sdb.dts | 2 +- + arch/arm/boot/dts/imx7d-tqma7.dtsi | 4 +- + arch/arm/boot/dts/imx7d.dtsi | 7 +- + arch/arm/boot/dts/imx7s-mba7.dts | 6 +- + arch/arm/boot/dts/imx7s-tqma7.dtsi | 4 +- + arch/arm/boot/dts/intel-ixp42x-adi-coyote.dts | 2 + + arch/arm/boot/dts/intel-ixp42x-arcom-vulcan.dts | 2 + + arch/arm/boot/dts/intel-ixp42x-dlink-dsm-g600.dts | 2 + + arch/arm/boot/dts/intel-ixp42x-freecom-fsg-3.dts | 2 + + .../arm/boot/dts/intel-ixp42x-gateworks-gw2348.dts | 2 + + arch/arm/boot/dts/intel-ixp42x-iomega-nas100d.dts | 2 + + arch/arm/boot/dts/intel-ixp42x-ixdpg425.dts | 2 + + arch/arm/boot/dts/intel-ixp42x-linksys-nslu2.dts | 2 + + arch/arm/boot/dts/intel-ixp42x-linksys-wrv54g.dts | 2 + + arch/arm/boot/dts/intel-ixp42x-netgear-wg302v2.dts | 2 + + .../arm/boot/dts/intel-ixp43x-gateworks-gw2358.dts | 2 + + arch/arm/boot/dts/intel-ixp45x-ixp46x.dtsi | 8 + + .../boot/dts/intel-ixp4xx-reference-design.dtsi | 2 + + arch/arm/boot/dts/intel-ixp4xx.dtsi | 2 - + arch/arm/boot/dts/iwg20d-q7-common.dtsi | 2 + + arch/arm/boot/dts/ls1021a-qds.dts | 85 +- + arch/arm/boot/dts/ls1021a-tsn.dts | 4 +- + arch/arm/boot/dts/ls1021a-twr.dts | 63 +- + arch/arm/boot/dts/ls1021a.dtsi | 219 +- + arch/arm/boot/dts/mstar-v7.dtsi | 9 + + arch/arm/boot/dts/mt7623.dtsi | 33 + + arch/arm/boot/dts/mt7623a.dtsi | 4 + + arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts | 25 + + arch/arm/boot/dts/mt7629-rfb.dts | 3 +- + arch/arm/boot/dts/mt7629.dtsi | 45 +- + arch/arm/boot/dts/omap-gpmc-smsc911x.dtsi | 4 +- + arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi | 2 +- + arch/arm/boot/dts/omap-zoom-common.dtsi | 4 +- + arch/arm/boot/dts/omap2430-sdp.dts | 4 +- + arch/arm/boot/dts/omap3-cpu-thermal.dtsi | 2 +- + arch/arm/boot/dts/omap3-devkit8000-common.dtsi | 4 +- + arch/arm/boot/dts/omap3-gta04.dtsi | 23 +- + arch/arm/boot/dts/omap3-gta04a5.dts | 2 + + arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi | 2 +- + arch/arm/boot/dts/omap3-sb-t35.dtsi | 4 +- + arch/arm/boot/dts/qcom-apq8026-lge-lenok.dts | 237 ++ + arch/arm/boot/dts/qcom-apq8064.dtsi | 18 +- + arch/arm/boot/dts/qcom-ipq4019-ap.dk04.1-c1.dts | 2 +- + arch/arm/boot/dts/qcom-ipq4019-ap.dk04.1-c3.dts | 2 +- + arch/arm/boot/dts/qcom-ipq4019-ap.dk07.1-c1.dts | 2 +- + arch/arm/boot/dts/qcom-ipq4019-ap.dk07.1-c2.dts | 2 +- + arch/arm/boot/dts/qcom-ipq8064-ap148.dts | 2 +- + arch/arm/boot/dts/qcom-msm8226.dtsi | 263 +- + arch/arm/boot/dts/qcom-msm8974.dtsi | 4 +- + arch/arm/boot/dts/qcom-pm8226.dtsi | 27 + + arch/arm/boot/dts/r7s72100-genmai.dts | 2 + + arch/arm/boot/dts/r7s72100-gr-peach.dts | 2 + + arch/arm/boot/dts/r7s72100-rskrza1.dts | 2 + + arch/arm/boot/dts/r7s9210-rza2mevb.dts | 21 + + arch/arm/boot/dts/r8a73a4-ape6evm.dts | 1 + + arch/arm/boot/dts/r8a7740-armadillo800eva.dts | 3 + + arch/arm/boot/dts/r8a7742-iwg21d-q7-dbcm-ca.dts | 2 + + arch/arm/boot/dts/r8a7742-iwg21d-q7.dts | 2 + + arch/arm/boot/dts/r8a7743-sk-rzg1m.dts | 4 + + arch/arm/boot/dts/r8a7745-iwg22d-sodimm.dts | 2 + + arch/arm/boot/dts/r8a7745-sk-rzg1e.dts | 4 + + arch/arm/boot/dts/r8a77470-iwg23s-sbc.dts | 2 + + arch/arm/boot/dts/r8a7778-bockw.dts | 2 +- + arch/arm/boot/dts/r8a7779-marzen.dts | 2 +- + arch/arm/boot/dts/r8a7790-lager.dts | 2 + + arch/arm/boot/dts/r8a7790-stout.dts | 2 + + arch/arm/boot/dts/r8a7791-koelsch.dts | 2 + + arch/arm/boot/dts/r8a7791-porter.dts | 2 + + arch/arm/boot/dts/r8a7793-gose.dts | 2 + + arch/arm/boot/dts/r8a7794-alt.dts | 2 + + arch/arm/boot/dts/r8a7794-silk.dts | 2 + + arch/arm/boot/dts/rk3036.dtsi | 10 +- + arch/arm/boot/dts/rk3066a-mk808.dts | 27 + + arch/arm/boot/dts/rk3066a.dtsi | 32 +- + arch/arm/boot/dts/rk3188.dtsi | 13 +- + arch/arm/boot/dts/rk3229.dtsi | 2 +- + arch/arm/boot/dts/rk322x.dtsi | 14 +- + arch/arm/boot/dts/rk3288.dtsi | 22 +- + arch/arm/boot/dts/rv1108.dtsi | 16 +- + arch/arm/boot/dts/sama5d29.dtsi | 16 + + arch/arm/boot/dts/sama7g5.dtsi | 16 + + arch/arm/boot/dts/sh73a0-kzm9g.dts | 2 +- + arch/arm/boot/dts/socfpga_arria10_mercury_aa1.dts | 112 + + arch/arm/boot/dts/spear1310.dtsi | 6 - + arch/arm/boot/dts/spear1340.dtsi | 2 - + arch/arm/boot/dts/ste-ab8500.dtsi | 13 +- + arch/arm/boot/dts/ste-ab8505.dtsi | 13 +- + arch/arm/boot/dts/ste-href.dtsi | 6 + + arch/arm/boot/dts/ste-snowball.dts | 6 + + arch/arm/boot/dts/ste-ux500-samsung-codina.dts | 4 + + arch/arm/boot/dts/ste-ux500-samsung-gavini.dts | 4 + + arch/arm/boot/dts/ste-ux500-samsung-golden.dts | 4 + + arch/arm/boot/dts/ste-ux500-samsung-janice.dts | 7 + + arch/arm/boot/dts/ste-ux500-samsung-kyle.dts | 4 + + arch/arm/boot/dts/ste-ux500-samsung-skomer.dts | 38 +- + arch/arm/boot/dts/stm32mp13-pinctrl.dtsi | 64 + + arch/arm/boot/dts/stm32mp131.dtsi | 283 ++ + arch/arm/boot/dts/stm32mp133.dtsi | 37 + + arch/arm/boot/dts/stm32mp135.dtsi | 12 + + arch/arm/boot/dts/stm32mp135f-dk.dts | 56 + + arch/arm/boot/dts/stm32mp13xc.dtsi | 17 + + arch/arm/boot/dts/stm32mp13xf.dtsi | 17 + + arch/arm/boot/dts/stm32mp15-pinctrl.dtsi | 8 +- + arch/arm/boot/dts/stm32mp151.dtsi | 18 +- + arch/arm/boot/dts/stm32mp157c-odyssey.dts | 6 + + arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi | 2 +- + arch/arm/boot/dts/stm32mp15xx-dkx.dtsi | 2 +- + arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts | 11 +- + arch/arm/boot/dts/sun4i-a10.dtsi | 11 +- + arch/arm/boot/dts/sun5i-a13.dtsi | 15 +- + arch/arm/boot/dts/sun6i-a31.dtsi | 44 +- + arch/arm/boot/dts/sun7i-a20-bananapi.dts | 17 +- + arch/arm/boot/dts/sun7i-a20.dtsi | 34 +- + arch/arm/boot/dts/sun8i-a33.dtsi | 4 +- + arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts | 2 +- + arch/arm/boot/dts/sun8i-a83t.dtsi | 4 +- + arch/arm/boot/dts/sun8i-h3.dtsi | 4 +- + arch/arm/boot/dts/sun8i-r40.dtsi | 39 + + arch/arm/boot/dts/sun8i-v3-sl631.dtsi | 2 +- + arch/arm/boot/dts/sun9i-a80-cubieboard4.dts | 2 +- + arch/arm/boot/dts/sunxi-libretech-all-h3-it.dtsi | 2 +- + arch/arm/boot/dts/tegra114.dtsi | 8 +- + arch/arm/boot/dts/tegra124.dtsi | 12 +- + arch/arm/boot/dts/tegra20-acer-a500-picasso.dts | 7 +- + arch/arm/boot/dts/tegra20-paz00.dts | 2 - + arch/arm/boot/dts/tegra20.dtsi | 13 +- + .../dts/tegra30-asus-nexus7-grouper-common.dtsi | 30 +- + arch/arm/boot/dts/tegra30-ouya.dts | 5 +- + arch/arm/boot/dts/tegra30.dtsi | 12 +- + arch/arm/configs/aspeed_g4_defconfig | 1 + + arch/arm/configs/aspeed_g5_defconfig | 35 +- + arch/arm/configs/imx_v6_v7_defconfig | 47 +- + arch/arm/configs/multi_v7_defconfig | 86 +- + arch/arm/configs/mvebu_v7_defconfig | 18 +- + arch/arm/mach-at91/Kconfig | 9 + + arch/arm/mach-ep93xx/clock.c | 975 +++--- + arch/arm/mach-ep93xx/core.c | 2 +- + arch/arm/mach-ep93xx/soc.h | 42 +- + arch/arm/mach-omap2/cm-regbits-44xx.h | 101 - + arch/arm/mach-omap2/cm1_44xx.h | 174 - + arch/arm/mach-omap2/cm1_54xx.h | 168 - + arch/arm/mach-omap2/cm1_7xx.h | 263 -- + arch/arm/mach-omap2/cm2_44xx.h | 386 --- + arch/arm/mach-omap2/cm2_54xx.h | 325 -- + arch/arm/mach-omap2/cm2_7xx.h | 449 --- + arch/arm/mach-omap2/cm33xx.h | 280 -- + arch/arm/mach-omap2/omap_hwmod.c | 6 +- + arch/arm/mach-omap2/pdata-quirks.c | 36 - + arch/arm/mach-omap2/powerdomain.c | 6 +- + arch/arm/mach-omap2/prcm43xx.h | 94 - + arch/arm/mach-omap2/prm33xx.h | 40 - + arch/arm/mach-omap2/prm44xx.h | 630 ---- + arch/arm/mach-omap2/prm54xx.h | 358 -- + arch/arm/mach-omap2/prm7xx.h | 613 ---- + arch/arm/mach-omap2/scrm44xx.h | 141 - + arch/arm/mach-omap2/scrm54xx.h | 228 -- + arch/arm/mach-qcom/platsmp.c | 71 + + arch/arm/mach-stm32/Kconfig | 8 + + arch/arm/mach-stm32/board-dt.c | 3 + + arch/arm/mach-sunxi/platsmp.c | 4 +- + arch/arm/mach-sunxi/sunxi.c | 4 +- + arch/arm64/boot/dts/allwinner/axp803.dtsi | 10 +- + arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi | 6 +- + .../boot/dts/allwinner/sun50i-a64-cpu-opp.dtsi | 2 +- + .../boot/dts/allwinner/sun50i-a64-orangepi-win.dts | 2 +- + .../boot/dts/allwinner/sun50i-a64-pinetab.dts | 28 +- + .../boot/dts/allwinner/sun50i-a64-teres-i.dts | 3 +- + arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 17 + + .../boot/dts/allwinner/sun50i-h5-cpu-opp.dtsi | 2 +- + .../boot/dts/allwinner/sun50i-h5-nanopi-r1s-h5.dts | 9 +- + arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi | 2 +- + .../boot/dts/allwinner/sun50i-h6-cpu-opp.dtsi | 2 +- + arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi | 8 +- + arch/arm64/boot/dts/amlogic/Makefile | 3 + + .../dts/amlogic/meson-axg-jethome-jethub-j100.dts | 362 +++ + .../boot/dts/amlogic/meson-g12a-radxa-zero.dts | 405 +++ + arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts | 2 +- + arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts | 2 +- + arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts | 2 +- + .../boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi | 4 +- + .../boot/dts/amlogic/meson-g12b-odroid-n2.dtsi | 6 +- + arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi | 4 +- + .../amlogic/meson-gxl-s905w-jethome-jethub-j80.dts | 241 ++ + arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts | 61 + + .../boot/dts/amlogic/meson-sm1-bananapi-m5.dts | 2 +- + .../boot/dts/amlogic/meson-sm1-khadas-vim3l.dts | 2 +- + arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi | 6 +- + arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts | 2 +- + arch/arm64/boot/dts/broadcom/Makefile | 1 + + .../arm64/boot/dts/broadcom/bcm2711-rpi-cm4-io.dts | 2 + + arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi | 16 +- + arch/arm64/boot/dts/exynos/Makefile | 3 +- + arch/arm64/boot/dts/exynos/exynos5433-bus.dtsi | 10 +- + arch/arm64/boot/dts/exynos/exynos5433.dtsi | 6 +- + .../boot/dts/exynos/exynosautov9-pinctrl.dtsi | 1189 +++++++ + arch/arm64/boot/dts/exynos/exynosautov9-sadk.dts | 56 + + arch/arm64/boot/dts/exynos/exynosautov9.dtsi | 301 ++ + arch/arm64/boot/dts/freescale/Makefile | 4 + + arch/arm64/boot/dts/freescale/fsl-ls1012a-rdb.dts | 1 + + .../freescale/fsl-ls1028a-kontron-sl28-var1.dts | 60 +- + .../freescale/fsl-ls1028a-kontron-sl28-var2.dts | 17 +- + .../freescale/fsl-ls1028a-kontron-sl28-var4.dts | 49 +- + .../dts/freescale/fsl-ls1028a-kontron-sl28.dts | 31 +- + arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts | 10 +- + arch/arm64/boot/dts/freescale/fsl-ls1028a-rdb.dts | 19 +- + arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 72 +- + arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi | 40 +- + arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 40 +- + .../dts/freescale/fsl-lx2160a-bluebox3-rev-a.dts | 34 + + .../boot/dts/freescale/fsl-lx2160a-bluebox3.dts | 658 ++++ + arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi | 24 +- + .../boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 32 +- + .../dts/freescale/imx8mm-kontron-n801x-som.dtsi | 4 +- + .../boot/dts/freescale/imx8mm-venice-gw71xx.dtsi | 2 +- + .../boot/dts/freescale/imx8mm-venice-gw72xx.dtsi | 2 +- + .../boot/dts/freescale/imx8mm-venice-gw73xx.dtsi | 2 +- + .../boot/dts/freescale/imx8mm-venice-gw7901.dts | 24 + + arch/arm64/boot/dts/freescale/imx8mm.dtsi | 180 + + arch/arm64/boot/dts/freescale/imx8mp.dtsi | 2 +- + arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi | 46 +- + .../boot/dts/freescale/imx8mq-mnt-reform2.dts | 1 + + .../arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi | 2 + + arch/arm64/boot/dts/freescale/imx8mq.dtsi | 10 +- + arch/arm64/boot/dts/freescale/s32g2.dtsi | 124 + + arch/arm64/boot/dts/freescale/s32g274a-evb.dts | 34 + + arch/arm64/boot/dts/freescale/s32g274a-rdb2.dts | 40 + + arch/arm64/boot/dts/hisilicon/hi3660.dtsi | 8 +- + arch/arm64/boot/dts/hisilicon/hi3670-hikey970.dts | 22 +- + arch/arm64/boot/dts/hisilicon/hi3670.dtsi | 2 +- + arch/arm64/boot/dts/hisilicon/hi6220.dtsi | 4 +- + arch/arm64/boot/dts/hisilicon/hikey970-pmic.dtsi | 86 + + arch/arm64/boot/dts/marvell/Makefile | 1 + + .../boot/dts/marvell/armada-7040-mochabin.dts | 458 +++ + arch/arm64/boot/dts/mediatek/mt2712e.dtsi | 97 +- + arch/arm64/boot/dts/mediatek/mt6358.dtsi | 1 + + .../boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts | 16 +- + arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts | 6 +- + arch/arm64/boot/dts/mediatek/mt7622.dtsi | 112 +- + arch/arm64/boot/dts/mediatek/mt8173.dtsi | 2 + + .../mt8183-kukui-audio-da7219-max98357a.dtsi | 13 + + .../mt8183-kukui-audio-da7219-rt1015p.dtsi | 13 + + .../dts/mediatek/mt8183-kukui-audio-da7219.dtsi | 54 + + .../dts/mediatek/mt8183-kukui-audio-max98357a.dtsi | 13 + + .../dts/mediatek/mt8183-kukui-audio-rt1015p.dtsi | 13 + + .../mt8183-kukui-audio-ts3a227e-max98357a.dtsi | 13 + + .../mt8183-kukui-audio-ts3a227e-rt1015p.dtsi | 13 + + .../dts/mediatek/mt8183-kukui-audio-ts3a227e.dtsi | 32 + + .../dts/mediatek/mt8183-kukui-jacuzzi-burnet.dts | 1 + + .../dts/mediatek/mt8183-kukui-jacuzzi-damu.dts | 1 + + .../dts/mediatek/mt8183-kukui-jacuzzi-fennel.dtsi | 1 + + .../mt8183-kukui-jacuzzi-juniper-sku16.dts | 1 + + .../dts/mediatek/mt8183-kukui-jacuzzi-kappa.dts | 1 + + .../dts/mediatek/mt8183-kukui-jacuzzi-kenzo.dts | 1 + + .../mediatek/mt8183-kukui-jacuzzi-willow-sku0.dts | 1 + + .../mediatek/mt8183-kukui-jacuzzi-willow-sku1.dts | 1 + + .../boot/dts/mediatek/mt8183-kukui-kakadu.dts | 1 + + .../boot/dts/mediatek/mt8183-kukui-kodama.dtsi | 1 + + .../boot/dts/mediatek/mt8183-kukui-krane.dtsi | 5 + + arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 67 +- + arch/arm64/boot/dts/mediatek/mt8183.dtsi | 99 +- + arch/arm64/boot/dts/mediatek/mt8192.dtsi | 163 + + arch/arm64/boot/dts/nvidia/tegra132-norrin.dts | 2 - + arch/arm64/boot/dts/nvidia/tegra132.dtsi | 12 +- + arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts | 1554 ++++++++- + .../dts/nvidia/tegra186-p3509-0000+p3636-0001.dts | 506 ++- + arch/arm64/boot/dts/nvidia/tegra186.dtsi | 136 + + arch/arm64/boot/dts/nvidia/tegra194-p2972-0000.dts | 1495 ++++++++- + .../arm64/boot/dts/nvidia/tegra194-p3509-0000.dtsi | 1522 ++++++++- + arch/arm64/boot/dts/nvidia/tegra194.dtsi | 209 +- + arch/arm64/boot/dts/nvidia/tegra210-p2371-2180.dts | 876 +++++ + arch/arm64/boot/dts/nvidia/tegra210-p3450-0000.dts | 876 +++++ + arch/arm64/boot/dts/nvidia/tegra210.dtsi | 81 +- + arch/arm64/boot/dts/qcom/Makefile | 9 + + arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi | 12 + + arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 29 - + arch/arm64/boot/dts/qcom/ipq6018.dtsi | 85 +- + arch/arm64/boot/dts/qcom/ipq8074.dtsi | 23 +- + .../boot/dts/qcom/msm8916-longcheer-l8150.dts | 62 +- + arch/arm64/boot/dts/qcom/msm8916.dtsi | 16 +- + .../arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi | 673 ++++ + arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts | 431 +++ + .../arm64/boot/dts/qcom/msm8996-xiaomi-scorpio.dts | 431 +++ + arch/arm64/boot/dts/qcom/msm8996.dtsi | 53 +- + arch/arm64/boot/dts/qcom/msm8998-fxtec-pro1.dts | 319 ++ + .../dts/qcom/msm8998-sony-xperia-yoshino-lilac.dts | 30 + + .../dts/qcom/msm8998-sony-xperia-yoshino-maple.dts | 54 + + .../qcom/msm8998-sony-xperia-yoshino-poplar.dts | 35 + + .../boot/dts/qcom/msm8998-sony-xperia-yoshino.dtsi | 670 ++++ + arch/arm64/boot/dts/qcom/msm8998.dtsi | 200 +- + arch/arm64/boot/dts/qcom/pm6150l.dtsi | 1 + + arch/arm64/boot/dts/qcom/pm660.dtsi | 5 +- + arch/arm64/boot/dts/qcom/pm8916.dtsi | 9 +- + arch/arm64/boot/dts/qcom/pmi8998.dtsi | 12 + + .../boot/dts/qcom/sc7180-trogdor-coachz-r1.dts | 14 + + .../arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi | 2 +- + .../boot/dts/qcom/sc7180-trogdor-homestar-r2.dts | 20 + + .../boot/dts/qcom/sc7180-trogdor-homestar-r3.dts | 15 + + .../boot/dts/qcom/sc7180-trogdor-homestar.dtsi | 335 ++ + arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi | 12 + + .../boot/dts/qcom/sc7180-trogdor-lte-sku.dtsi | 11 + + .../boot/dts/qcom/sc7180-trogdor-pompom-r1.dts | 8 + + .../boot/dts/qcom/sc7180-trogdor-pompom-r2.dts | 8 + + .../arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi | 8 +- + arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi | 15 +- + arch/arm64/boot/dts/qcom/sc7180.dtsi | 76 +- + arch/arm64/boot/dts/qcom/sc7280-idp.dtsi | 225 +- + arch/arm64/boot/dts/qcom/sc7280.dtsi | 3430 +++++++++++++++----- + .../boot/dts/qcom/sdm630-sony-xperia-nile.dtsi | 8 +- + arch/arm64/boot/dts/qcom/sdm845.dtsi | 182 +- + .../boot/dts/qcom/sdm850-lenovo-yoga-c630.dts | 5 + + arch/arm64/boot/dts/qcom/sm6125.dtsi | 52 +- + .../dts/qcom/sm6350-sony-xperia-lena-pdx213.dts | 57 + + arch/arm64/boot/dts/qcom/sm6350.dtsi | 934 ++++++ + arch/arm64/boot/dts/qcom/sm8150.dtsi | 28 +- + arch/arm64/boot/dts/qcom/sm8250.dtsi | 22 +- + arch/arm64/boot/dts/qcom/sm8350.dtsi | 30 +- + arch/arm64/boot/dts/renesas/Makefile | 2 + + .../arm64/boot/dts/renesas/beacon-renesom-som.dtsi | 3 + + arch/arm64/boot/dts/renesas/cat875.dtsi | 2 + + arch/arm64/boot/dts/renesas/draak.dtsi | 686 ++++ + arch/arm64/boot/dts/renesas/ebisu.dtsi | 803 +++++ + arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi | 2 + + arch/arm64/boot/dts/renesas/r8a77961.dtsi | 11 + + arch/arm64/boot/dts/renesas/r8a77970-eagle.dts | 4 + + arch/arm64/boot/dts/renesas/r8a77970-v3msk.dts | 4 + + arch/arm64/boot/dts/renesas/r8a77980-condor.dts | 4 + + arch/arm64/boot/dts/renesas/r8a77980-v3hsk.dts | 4 + + arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts | 788 +---- + arch/arm64/boot/dts/renesas/r8a77995-draak.dts | 671 +--- + .../boot/dts/renesas/r8a779a0-falcon-cpu.dtsi | 70 + + arch/arm64/boot/dts/renesas/r8a779a0-falcon.dts | 2 + + arch/arm64/boot/dts/renesas/r8a779a0.dtsi | 1458 +++++++++ + arch/arm64/boot/dts/renesas/r8a779m0.dtsi | 12 + + arch/arm64/boot/dts/renesas/r8a779m1.dtsi | 9 + + arch/arm64/boot/dts/renesas/r8a779m2.dtsi | 12 + + arch/arm64/boot/dts/renesas/r8a779m3.dtsi | 9 + + arch/arm64/boot/dts/renesas/r8a779m4.dtsi | 12 + + .../boot/dts/renesas/r8a779m5-salvator-xs.dts | 36 + + arch/arm64/boot/dts/renesas/r8a779m5.dtsi | 21 + + arch/arm64/boot/dts/renesas/r8a779m6.dtsi | 12 + + arch/arm64/boot/dts/renesas/r8a779m7.dtsi | 12 + + arch/arm64/boot/dts/renesas/r8a779m8.dtsi | 12 + + arch/arm64/boot/dts/renesas/r9a07g044.dtsi | 337 ++ + arch/arm64/boot/dts/renesas/r9a07g044l2-smarc.dts | 7 +- + arch/arm64/boot/dts/renesas/rzg2l-smarc-som.dtsi | 275 ++ + arch/arm64/boot/dts/renesas/rzg2l-smarc.dtsi | 292 +- + arch/arm64/boot/dts/renesas/salvator-common.dtsi | 56 +- + arch/arm64/boot/dts/renesas/ulcb.dtsi | 2 + + arch/arm64/boot/dts/rockchip/Makefile | 6 + + arch/arm64/boot/dts/rockchip/px30-evb.dts | 52 + + arch/arm64/boot/dts/rockchip/px30.dtsi | 126 +- + arch/arm64/boot/dts/rockchip/rk3308.dtsi | 49 +- + arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts | 3 - + arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts | 28 +- + arch/arm64/boot/dts/rockchip/rk3328-roc-pc.dts | 110 + + arch/arm64/boot/dts/rockchip/rk3328-rock64.dts | 2 +- + arch/arm64/boot/dts/rockchip/rk3328.dtsi | 17 +- + arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi | 47 +- + arch/arm64/boot/dts/rockchip/rk3368.dtsi | 191 +- + arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts | 1 + + .../boot/dts/rockchip/rk3399-gru-chromebook.dtsi | 176 + + arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts | 1 + + .../boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts | 41 + + .../boot/dts/rockchip/rk3399-gru-scarlet.dtsi | 182 ++ + arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi | 4 +- + .../boot/dts/rockchip/rk3399-kobol-helios64.dts | 36 + + arch/arm64/boot/dts/rockchip/rk3399-op1-opp.dtsi | 6 +- + arch/arm64/boot/dts/rockchip/rk3399-opp.dtsi | 6 +- + .../boot/dts/rockchip/rk3399-pinebook-pro.dts | 7 +- + .../arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts | 218 ++ + arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 54 + + .../boot/dts/rockchip/rk3399-rock-pi-4a-plus.dts | 14 + + .../boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts | 47 + + arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dtsi | 29 + + arch/arm64/boot/dts/rockchip/rk3399.dtsi | 116 +- + arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts | 548 ++++ + arch/arm64/boot/dts/rockchip/rk3566.dtsi | 20 + + arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts | 313 ++ + arch/arm64/boot/dts/rockchip/rk3568-pinctrl.dtsi | 9 + + arch/arm64/boot/dts/rockchip/rk3568.dtsi | 644 +--- + arch/arm64/boot/dts/rockchip/rk356x.dtsi | 1145 +++++++ + arch/arm64/boot/dts/ti/Makefile | 4 +- + arch/arm64/boot/dts/ti/k3-am64-main.dtsi | 280 ++ + arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi | 8 + + arch/arm64/boot/dts/ti/k3-am64.dtsi | 2 + + arch/arm64/boot/dts/ti/k3-am642-evm.dts | 8 + + arch/arm64/boot/dts/ti/k3-am642-sk.dts | 8 + + .../boot/dts/ti/k3-am65-iot2050-common-pg1.dtsi | 46 + + .../boot/dts/ti/k3-am65-iot2050-common-pg2.dtsi | 51 + + arch/arm64/boot/dts/ti/k3-am65-iot2050-common.dtsi | 39 +- + arch/arm64/boot/dts/ti/k3-am65-main.dtsi | 8 +- + arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 4 - + arch/arm64/boot/dts/ti/k3-am65.dtsi | 2 + + .../dts/ti/k3-am6528-iot2050-basic-common.dtsi | 60 + + .../boot/dts/ti/k3-am6528-iot2050-basic-pg2.dts | 24 + + arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic.dts | 56 +- + arch/arm64/boot/dts/ti/k3-am654.dtsi | 4 + + .../dts/ti/k3-am6548-iot2050-advanced-common.dtsi | 56 + + .../boot/dts/ti/k3-am6548-iot2050-advanced-pg2.dts | 29 + + .../boot/dts/ti/k3-am6548-iot2050-advanced.dts | 50 +- + .../boot/dts/ti/k3-j7200-common-proc-board.dts | 3 + + arch/arm64/boot/dts/ti/k3-j7200-main.dtsi | 7 +- + arch/arm64/boot/dts/ti/k3-j7200.dtsi | 2 + + .../boot/dts/ti/k3-j721e-common-proc-board.dts | 3 + + arch/arm64/boot/dts/ti/k3-j721e-main.dtsi | 16 +- + arch/arm64/boot/dts/ti/k3-j721e-sk.dts | 1002 ++++++ + arch/arm64/boot/dts/ti/k3-j721e.dtsi | 3 + + arch/arm64/boot/dts/toshiba/Makefile | 1 + + arch/arm64/boot/dts/toshiba/tmpv7708-rm-mbrc.dts | 6 + + .../boot/dts/toshiba/tmpv7708-visrobo-vrb.dts | 61 + + .../boot/dts/toshiba/tmpv7708-visrobo-vrc.dtsi | 44 + + arch/arm64/boot/dts/toshiba/tmpv7708.dtsi | 59 + + arch/arm64/boot/dts/xilinx/Makefile | 14 + + arch/arm64/boot/dts/xilinx/zynqmp-clk-ccf.dtsi | 13 +- + .../arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revA.dts | 315 ++ + .../arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revB.dts | 298 ++ + arch/arm64/boot/dts/xilinx/zynqmp-sm-k26-revA.dts | 289 ++ + arch/arm64/boot/dts/xilinx/zynqmp-smk-k26-revA.dts | 21 + + arch/arm64/boot/dts/xilinx/zynqmp-zc1232-revA.dts | 16 +- + arch/arm64/boot/dts/xilinx/zynqmp-zc1254-revA.dts | 16 +- + arch/arm64/boot/dts/xilinx/zynqmp-zc1275-revA.dts | 18 +- + .../boot/dts/xilinx/zynqmp-zc1751-xm015-dc1.dts | 298 +- + .../boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts | 342 +- + .../boot/dts/xilinx/zynqmp-zc1751-xm017-dc3.dts | 49 +- + .../boot/dts/xilinx/zynqmp-zc1751-xm018-dc4.dts | 24 +- + .../boot/dts/xilinx/zynqmp-zc1751-xm019-dc5.dts | 330 +- + arch/arm64/boot/dts/xilinx/zynqmp-zcu100-revC.dts | 264 +- + .../arm64/boot/dts/xilinx/zynqmp-zcu102-rev1.1.dts | 15 + + arch/arm64/boot/dts/xilinx/zynqmp-zcu102-revA.dts | 320 +- + arch/arm64/boot/dts/xilinx/zynqmp-zcu102-revB.dts | 3 +- + arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revA.dts | 292 +- + arch/arm64/boot/dts/xilinx/zynqmp-zcu104-revC.dts | 250 +- + arch/arm64/boot/dts/xilinx/zynqmp-zcu106-revA.dts | 340 +- + arch/arm64/boot/dts/xilinx/zynqmp-zcu111-revA.dts | 274 +- + arch/arm64/boot/dts/xilinx/zynqmp.dtsi | 93 +- + arch/arm64/configs/defconfig | 16 +- + drivers/bus/Kconfig | 2 +- + drivers/bus/brcmstb_gisb.c | 7 +- + drivers/bus/sun50i-de2.c | 7 +- + drivers/bus/ti-sysc.c | 276 +- + drivers/cpuidle/Kconfig.arm | 3 +- + drivers/cpuidle/cpuidle-qcom-spm.c | 318 +- + drivers/cpuidle/cpuidle-tegra.c | 3 + + drivers/firmware/qcom_scm.c | 4 + + drivers/firmware/tegra/bpmp-debugfs.c | 26 +- + drivers/firmware/tegra/bpmp-tegra210.c | 7 +- + drivers/gpu/drm/mediatek/mtk_dsi.c | 5 +- + drivers/memory/Kconfig | 5 +- + drivers/memory/fsl_ifc.c | 13 +- + drivers/memory/jedec_ddr.h | 47 + + drivers/memory/jedec_ddr_data.c | 41 + + drivers/memory/mtk-smi.c | 596 ++-- + drivers/memory/of_memory.c | 87 + + drivers/memory/of_memory.h | 9 + + drivers/memory/renesas-rpc-if.c | 159 +- + drivers/memory/samsung/Kconfig | 13 +- + drivers/memory/tegra/Kconfig | 1 + + drivers/memory/tegra/mc.c | 25 +- + drivers/memory/tegra/tegra186-emc.c | 5 + + drivers/memory/tegra/tegra20-emc.c | 200 +- + drivers/memory/tegra/tegra210-emc-cc-r21021.c | 2 +- + drivers/memory/tegra/tegra210-emc-core.c | 6 +- + drivers/memory/tegra/tegra30-emc.c | 4 +- + drivers/reset/Kconfig | 4 +- + drivers/reset/reset-microchip-sparx5.c | 40 +- + drivers/reset/reset-uniphier-glue.c | 4 + + drivers/reset/reset-uniphier.c | 27 + + drivers/soc/amlogic/meson-canvas.c | 4 +- + drivers/soc/amlogic/meson-clk-measure.c | 4 +- + drivers/soc/amlogic/meson-gx-socinfo.c | 1 + + drivers/soc/aspeed/Kconfig | 10 + + drivers/soc/aspeed/Makefile | 9 +- + drivers/soc/aspeed/aspeed-uart-routing.c | 603 ++++ + drivers/soc/bcm/bcm63xx/bcm-pmb.c | 4 +- + drivers/soc/bcm/bcm63xx/bcm63xx-power.c | 4 +- + drivers/soc/bcm/brcmstb/biuctrl.c | 2 + + drivers/soc/fsl/dpio/dpio-service.c | 2 +- + drivers/soc/fsl/dpio/qbman-portal.c | 8 +- + drivers/soc/fsl/guts.c | 4 +- + drivers/soc/fsl/rcpm.c | 7 +- + drivers/soc/imx/Kconfig | 1 + + drivers/soc/imx/Makefile | 1 + + drivers/soc/imx/gpcv2.c | 134 +- + drivers/soc/imx/imx8m-blk-ctrl.c | 523 +++ + drivers/soc/mediatek/mt8192-mmsys.h | 76 + + drivers/soc/mediatek/mtk-mmsys.c | 79 + + drivers/soc/mediatek/mtk-mmsys.h | 2 + + drivers/soc/mediatek/mtk-mutex.c | 35 + + drivers/soc/qcom/Kconfig | 11 +- + drivers/soc/qcom/Makefile | 1 + + drivers/soc/qcom/apr.c | 287 +- + drivers/soc/qcom/cpr.c | 4 +- + drivers/soc/qcom/llcc-qcom.c | 18 +- + drivers/soc/qcom/ocmem.c | 4 +- + drivers/soc/qcom/pdr_interface.c | 12 +- + drivers/soc/qcom/qcom-geni-se.c | 4 +- + drivers/soc/qcom/qcom_aoss.c | 165 +- + drivers/soc/qcom/qcom_gsbi.c | 4 +- + drivers/soc/qcom/rpmh-rsc.c | 4 +- + drivers/soc/qcom/rpmhpd.c | 16 + + drivers/soc/qcom/rpmpd.c | 24 + + drivers/soc/qcom/smd-rpm.c | 2 + + drivers/soc/qcom/smp2p.c | 20 + + drivers/soc/qcom/socinfo.c | 12 + + drivers/soc/qcom/spm.c | 258 ++ + drivers/soc/renesas/Kconfig | 7 +- + drivers/soc/renesas/renesas-soc.c | 7 + + drivers/soc/sunxi/sunxi_sram.c | 4 +- + drivers/soc/tegra/Makefile | 1 + + drivers/soc/tegra/ari-tegra186.c | 80 + + drivers/soc/tegra/pmc.c | 28 +- + drivers/tee/optee/Makefile | 5 +- + drivers/tee/optee/call.c | 445 +-- + drivers/tee/optee/core.c | 719 +--- + drivers/tee/optee/ffa_abi.c | 911 ++++++ + drivers/tee/optee/optee_ffa.h | 153 + + drivers/tee/optee/optee_msg.h | 27 +- + drivers/tee/optee/optee_private.h | 157 +- + drivers/tee/optee/rpc.c | 237 +- + drivers/tee/optee/shm_pool.c | 101 - + drivers/tee/optee/shm_pool.h | 14 - + drivers/tee/optee/smc_abi.c | 1362 ++++++++ + drivers/watchdog/mtk_wdt.c | 6 +- + include/dt-bindings/power/imx8mm-power.h | 9 + + include/dt-bindings/power/qcom-aoss-qmp.h | 14 - + include/dt-bindings/power/qcom-rpmpd.h | 17 + + .../{reset-controller => reset}/mt2712-resets.h | 0 + include/dt-bindings/reset/mt8173-resets.h | 2 + + .../{reset-controller => reset}/mt8183-resets.h | 3 + + .../{reset-controller => reset}/mt8192-resets.h | 0 + include/dt-bindings/soc/qcom,gpr.h | 19 + + include/linux/clk/tegra.h | 24 +- + include/linux/platform_data/ti-sysc.h | 3 + + include/linux/soc/mediatek/mtk-mmsys.h | 3 + + include/linux/soc/qcom/apr.h | 70 +- + include/linux/soc/qcom/qcom_aoss.h | 38 + + include/linux/tee_drv.h | 7 +- + include/memory/renesas-rpc-if.h | 1 + + include/soc/qcom/spm.h | 43 + + include/soc/tegra/fuse.h | 31 +- + include/soc/tegra/irq.h | 9 +- + include/soc/tegra/pm.h | 2 +- + sound/soc/cirrus/ep93xx-i2s.c | 12 +- + 730 files changed, 50828 insertions(+), 14323 deletions(-) + create mode 100644 Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing + create mode 100644 Documentation/arm/stm32/stm32mp13-overview.rst + create mode 100644 Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml + create mode 100644 Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml + delete mode 100644 Documentation/devicetree/bindings/ddr/lpddr2.txt + create mode 100644 Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml + create mode 100644 Documentation/devicetree/bindings/memory-controllers/ddr/jedec,lpddr2.yaml + rename Documentation/devicetree/bindings/{ => memory-controllers}/ddr/lpddr2-timings.txt (100%) + rename Documentation/devicetree/bindings/{ => memory-controllers}/ddr/lpddr3-timings.txt (100%) + rename Documentation/devicetree/bindings/{ => memory-controllers}/ddr/lpddr3.txt (94%) + delete mode 100644 Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt + create mode 100644 Documentation/devicetree/bindings/memory-controllers/ti,gpmc-child.yaml + create mode 100644 Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml + delete mode 100644 Documentation/devicetree/bindings/mtd/gpmc-nand.txt + delete mode 100644 Documentation/devicetree/bindings/mtd/gpmc-nor.txt + delete mode 100644 Documentation/devicetree/bindings/mtd/gpmc-onenand.txt + create mode 100644 Documentation/devicetree/bindings/mtd/ti,gpmc-nand.yaml + create mode 100644 Documentation/devicetree/bindings/mtd/ti,gpmc-onenand.yaml + delete mode 100644 Documentation/devicetree/bindings/net/gpmc-eth.txt + delete mode 100644 Documentation/devicetree/bindings/serial/fsl,s32-linflexuart.txt + create mode 100644 Documentation/devicetree/bindings/serial/fsl,s32-linflexuart.yaml + create mode 100644 Documentation/devicetree/bindings/soc/imx/fsl,imx8mm-disp-blk-ctrl.yaml + create mode 100644 Documentation/devicetree/bindings/soc/imx/fsl,imx8mm-vpu-blk-ctrl.yaml + delete mode 100644 Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt + create mode 100644 Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml + create mode 100644 Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml + create mode 100644 arch/arm/arm-soc-for-next-contents.txt + create mode 100644 arch/arm/boot/dts/armada-381-netgear-gs110emx.dts + create mode 100644 arch/arm/boot/dts/aspeed-bmc-inventec-transformers.dts + create mode 100644 arch/arm/boot/dts/aspeed-bmc-tyan-s7106.dts + create mode 100644 arch/arm/boot/dts/at91-lmu5000.dts + create mode 100644 arch/arm/boot/dts/at91-q5xr5.dts + create mode 100644 arch/arm/boot/dts/bcm-nsp-ax.dtsi + create mode 100644 arch/arm/boot/dts/bcm2711-rpi-cm4-io.dts + create mode 100644 arch/arm/boot/dts/bcm2711-rpi-cm4.dtsi + create mode 100644 arch/arm/boot/dts/bcm283x-rpi-wifi-bt.dtsi + create mode 100644 arch/arm/boot/dts/bcm47094-asus-rt-ac88u.dts + create mode 100644 arch/arm/boot/dts/bcm958625-meraki-alamo.dtsi + create mode 100644 arch/arm/boot/dts/bcm958625-meraki-kingpin.dtsi + create mode 100644 arch/arm/boot/dts/bcm958625-meraki-mx64-a0.dts + create mode 100644 arch/arm/boot/dts/bcm958625-meraki-mx64.dts + create mode 100644 arch/arm/boot/dts/bcm958625-meraki-mx64w-a0.dts + create mode 100644 arch/arm/boot/dts/bcm958625-meraki-mx64w.dts + create mode 100644 arch/arm/boot/dts/bcm958625-meraki-mx65.dts + create mode 100644 arch/arm/boot/dts/bcm958625-meraki-mx65w.dts + create mode 100644 arch/arm/boot/dts/bcm958625-meraki-mx6x-common.dtsi + create mode 100644 arch/arm/boot/dts/e70k02.dtsi + create mode 100644 arch/arm/boot/dts/gemini-ns2502.dts + create mode 100644 arch/arm/boot/dts/gemini-ssi1328.dts + create mode 100644 arch/arm/boot/dts/imx6qdl-skov-revc-lt2.dtsi + create mode 100644 arch/arm/boot/dts/imx6sl-tolino-vision5.dts + create mode 100644 arch/arm/boot/dts/imx6sll-kobo-librah2o.dts + create mode 100644 arch/arm/boot/dts/imx6ull-colibri-emmc-eval-v3.dts + create mode 100644 arch/arm/boot/dts/imx6ull-colibri-emmc-nonwifi.dtsi + create mode 100644 arch/arm/boot/dts/qcom-apq8026-lge-lenok.dts + create mode 100644 arch/arm/boot/dts/qcom-pm8226.dtsi + create mode 100644 arch/arm/boot/dts/sama5d29.dtsi + create mode 100644 arch/arm/boot/dts/socfpga_arria10_mercury_aa1.dts + create mode 100644 arch/arm/boot/dts/stm32mp13-pinctrl.dtsi + create mode 100644 arch/arm/boot/dts/stm32mp131.dtsi + create mode 100644 arch/arm/boot/dts/stm32mp133.dtsi + create mode 100644 arch/arm/boot/dts/stm32mp135.dtsi + create mode 100644 arch/arm/boot/dts/stm32mp135f-dk.dts + create mode 100644 arch/arm/boot/dts/stm32mp13xc.dtsi + create mode 100644 arch/arm/boot/dts/stm32mp13xf.dtsi + delete mode 100644 arch/arm/mach-omap2/scrm54xx.h + create mode 100644 arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j100.dts + create mode 100644 arch/arm64/boot/dts/amlogic/meson-g12a-radxa-zero.dts + create mode 100644 arch/arm64/boot/dts/amlogic/meson-gxl-s905w-jethome-jethub-j80.dts + create mode 100644 arch/arm64/boot/dts/broadcom/bcm2711-rpi-cm4-io.dts + create mode 100644 arch/arm64/boot/dts/exynos/exynosautov9-pinctrl.dtsi + create mode 100644 arch/arm64/boot/dts/exynos/exynosautov9-sadk.dts + create mode 100644 arch/arm64/boot/dts/exynos/exynosautov9.dtsi + create mode 100644 arch/arm64/boot/dts/freescale/fsl-lx2160a-bluebox3-rev-a.dts + create mode 100644 arch/arm64/boot/dts/freescale/fsl-lx2160a-bluebox3.dts + create mode 100644 arch/arm64/boot/dts/freescale/s32g2.dtsi + create mode 100644 arch/arm64/boot/dts/freescale/s32g274a-evb.dts + create mode 100644 arch/arm64/boot/dts/freescale/s32g274a-rdb2.dts + create mode 100644 arch/arm64/boot/dts/hisilicon/hikey970-pmic.dtsi + create mode 100644 arch/arm64/boot/dts/marvell/armada-7040-mochabin.dts + create mode 100644 arch/arm64/boot/dts/mediatek/mt8183-kukui-audio-da7219-max98357a.dtsi + create mode 100644 arch/arm64/boot/dts/mediatek/mt8183-kukui-audio-da7219-rt1015p.dtsi + create mode 100644 arch/arm64/boot/dts/mediatek/mt8183-kukui-audio-da7219.dtsi + create mode 100644 arch/arm64/boot/dts/mediatek/mt8183-kukui-audio-max98357a.dtsi + create mode 100644 arch/arm64/boot/dts/mediatek/mt8183-kukui-audio-rt1015p.dtsi + create mode 100644 arch/arm64/boot/dts/mediatek/mt8183-kukui-audio-ts3a227e-max98357a.dtsi + create mode 100644 arch/arm64/boot/dts/mediatek/mt8183-kukui-audio-ts3a227e-rt1015p.dtsi + create mode 100644 arch/arm64/boot/dts/mediatek/mt8183-kukui-audio-ts3a227e.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts + create mode 100644 arch/arm64/boot/dts/qcom/msm8996-xiaomi-scorpio.dts + create mode 100644 arch/arm64/boot/dts/qcom/msm8998-fxtec-pro1.dts + create mode 100644 arch/arm64/boot/dts/qcom/msm8998-sony-xperia-yoshino-lilac.dts + create mode 100644 arch/arm64/boot/dts/qcom/msm8998-sony-xperia-yoshino-maple.dts + create mode 100644 arch/arm64/boot/dts/qcom/msm8998-sony-xperia-yoshino-poplar.dts + create mode 100644 arch/arm64/boot/dts/qcom/msm8998-sony-xperia-yoshino.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar-r2.dts + create mode 100644 arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar-r3.dts + create mode 100644 arch/arm64/boot/dts/qcom/sc7180-trogdor-homestar.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/sm6350-sony-xperia-lena-pdx213.dts + create mode 100644 arch/arm64/boot/dts/qcom/sm6350.dtsi + create mode 100644 arch/arm64/boot/dts/renesas/draak.dtsi + create mode 100644 arch/arm64/boot/dts/renesas/ebisu.dtsi + create mode 100644 arch/arm64/boot/dts/renesas/r8a779m0.dtsi + create mode 100644 arch/arm64/boot/dts/renesas/r8a779m2.dtsi + create mode 100644 arch/arm64/boot/dts/renesas/r8a779m4.dtsi + create mode 100644 arch/arm64/boot/dts/renesas/r8a779m5-salvator-xs.dts + create mode 100644 arch/arm64/boot/dts/renesas/r8a779m5.dtsi + create mode 100644 arch/arm64/boot/dts/renesas/r8a779m6.dtsi + create mode 100644 arch/arm64/boot/dts/renesas/r8a779m7.dtsi + create mode 100644 arch/arm64/boot/dts/renesas/r8a779m8.dtsi + create mode 100644 arch/arm64/boot/dts/renesas/rzg2l-smarc-som.dtsi + create mode 100644 arch/arm64/boot/dts/rockchip/rk3328-roc-pc.dts + create mode 100644 arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts + create mode 100644 arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts + create mode 100644 arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4a-plus.dts + create mode 100644 arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts + create mode 100644 arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts + create mode 100644 arch/arm64/boot/dts/rockchip/rk3566.dtsi + create mode 100644 arch/arm64/boot/dts/rockchip/rk356x.dtsi + create mode 100644 arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg1.dtsi + create mode 100644 arch/arm64/boot/dts/ti/k3-am65-iot2050-common-pg2.dtsi + create mode 100644 arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-common.dtsi + create mode 100644 arch/arm64/boot/dts/ti/k3-am6528-iot2050-basic-pg2.dts + create mode 100644 arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-common.dtsi + create mode 100644 arch/arm64/boot/dts/ti/k3-am6548-iot2050-advanced-pg2.dts + create mode 100644 arch/arm64/boot/dts/ti/k3-j721e-sk.dts + create mode 100644 arch/arm64/boot/dts/toshiba/tmpv7708-visrobo-vrb.dts + create mode 100644 arch/arm64/boot/dts/toshiba/tmpv7708-visrobo-vrc.dtsi + create mode 100644 arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revA.dts + create mode 100644 arch/arm64/boot/dts/xilinx/zynqmp-sck-kv-g-revB.dts + create mode 100644 arch/arm64/boot/dts/xilinx/zynqmp-sm-k26-revA.dts + create mode 100644 arch/arm64/boot/dts/xilinx/zynqmp-smk-k26-revA.dts + create mode 100644 arch/arm64/boot/dts/xilinx/zynqmp-zcu102-rev1.1.dts + create mode 100644 drivers/soc/aspeed/aspeed-uart-routing.c + create mode 100644 drivers/soc/imx/imx8m-blk-ctrl.c + create mode 100644 drivers/soc/mediatek/mt8192-mmsys.h + create mode 100644 drivers/soc/qcom/spm.c + create mode 100644 drivers/soc/tegra/ari-tegra186.c + create mode 100644 drivers/tee/optee/ffa_abi.c + create mode 100644 drivers/tee/optee/optee_ffa.h + delete mode 100644 drivers/tee/optee/shm_pool.c + delete mode 100644 drivers/tee/optee/shm_pool.h + create mode 100644 drivers/tee/optee/smc_abi.c + delete mode 100644 include/dt-bindings/power/qcom-aoss-qmp.h + rename include/dt-bindings/{reset-controller => reset}/mt2712-resets.h (100%) + rename include/dt-bindings/{reset-controller => reset}/mt8183-resets.h (98%) + rename include/dt-bindings/{reset-controller => reset}/mt8192-resets.h (100%) + create mode 100644 include/dt-bindings/soc/qcom,gpr.h + create mode 100644 include/linux/soc/qcom/qcom_aoss.h + create mode 100644 include/soc/qcom/spm.h +Merging actions/for-next (444d018d8d38 ARM: dts: owl-s500-roseapplepi: Add ATC2603C PMIC) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mani/linux-actions.git actions/for-next +Auto-merging arch/arm/boot/dts/owl-s500-roseapplepi.dts +Merge made by the 'recursive' strategy. +Merging amlogic/for-next (83e38509109e Merge branch 'v5.16/dt64' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/amlogic/linux.git amlogic/for-next +Already up to date. +Merge made by the 'recursive' strategy. +Merging aspeed/for-next (509d3f2b755f Merge branches 'defconfig-for-v5.16' and 'dt-for-v5.16' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/joel/bmc.git aspeed/for-next +Already up to date. +Merge made by the 'recursive' strategy. +Merging at91/at91-next (f3c0366411d6 ARM: dts: at91: sama7g5-ek: use blocks 0 and 1 of TCB0 as cs and ce) +$ git merge -m Merge branch 'at91-next' of git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git at91/at91-next +Auto-merging arch/arm/boot/dts/sama7g5.dtsi +CONFLICT (content): Merge conflict in arch/arm/boot/dts/sama7g5.dtsi +Auto-merging arch/arm/boot/dts/at91-sama7g5ek.dts +Resolved 'arch/arm/boot/dts/sama7g5.dtsi' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master 1f5106508c94] Merge branch 'at91-next' of git://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git +$ git diff -M --stat --summary HEAD^.. + arch/arm/boot/dts/at91-sama7g5ek.dts | 12 ++++++++++++ + arch/arm/boot/dts/sama7g5.dtsi | 27 +++++++++++++++++++++++++++ + 2 files changed, 39 insertions(+) +Merging drivers-memory/for-next (0fcbc3b7bcea Merge branch 'mem-ctrl-next' into for-next) +$ git merge -m Merge branch 'for-next' of https://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git drivers-memory/for-next +Already up to date. +Merge made by the 'recursive' strategy. +Merging imx-mxs/for-next (8bd7cd1cc7f0 Merge branch 'imx/maintainers' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git imx-mxs/for-next +Already up to date. +Merge made by the 'recursive' strategy. +Merging keystone/next (cb293d3b430e Merge branch 'for_5.15/drivers-soc' into next) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git keystone/next +Already up to date. +Merge made by the 'recursive' strategy. +Merging mediatek/for-next (0efac36e9559 Merge branch 'v5.16-tmp/dts64' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/matthias.bgg/linux.git mediatek/for-next +Merge made by the 'recursive' strategy. + arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging mvebu/for-next (04e78a787b74 arm/arm64: dts: Enable 2.5G Ethernet port on CN9130-CRB) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gclement/mvebu.git mvebu/for-next +Merge made by the 'recursive' strategy. + arch/arm64/boot/dts/marvell/cn9130-crb.dtsi | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) +Merging omap/for-next (92d190433bd8 Merge branch 'omap-for-v5.16/gpmc' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git omap/for-next +Already up to date. +Merge made by the 'recursive' strategy. +Merging qcom/for-next (d86ebed59e55 Merge branches 'arm64-defconfig-for-5.16', 'arm64-for-5.16', 'drivers-for-5.16' and 'dts-for-5.16' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git qcom/for-next +Auto-merging drivers/of/platform.c +Auto-merging arch/arm64/configs/defconfig +Removing arch/arm64/boot/dts/qcom/msm8996-mtp.dtsi +Removing arch/arm64/boot/dts/qcom/msm8916-mtp.dtsi +Removing arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +Removing arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi +Auto-merging arch/arm/boot/dts/Makefile +Merge made by the 'recursive' strategy. + Documentation/devicetree/bindings/arm/cpus.yaml | 6 +- + Documentation/devicetree/bindings/arm/qcom.yaml | 11 +- + .../devicetree/bindings/soc/qcom/qcom,smem.yaml | 34 +- + .../devicetree/bindings/soc/qcom/qcom,spm.yaml | 1 + + .../devicetree/bindings/soc/qcom/qcom-stats.yaml | 47 + + Documentation/devicetree/bindings/sram/sram.yaml | 5 +- + arch/arm/boot/dts/Makefile | 3 +- + ...026-lge-lenok.dts => qcom-apq8026-lg-lenok.dts} | 2 +- + arch/arm/boot/dts/qcom-apq8060-dragonboard.dts | 10 +- + arch/arm/boot/dts/qcom-apq8064-asus-nexus7-flo.dts | 2 +- + arch/arm/boot/dts/qcom-apq8064-cm-qs600.dts | 6 +- + arch/arm/boot/dts/qcom-apq8064-ifc6410.dts | 6 +- + .../arm/boot/dts/qcom-apq8064-sony-xperia-yuga.dts | 4 +- + arch/arm/boot/dts/qcom-apq8064.dtsi | 45 +- + arch/arm/boot/dts/qcom-apq8084.dtsi | 8 +- + arch/arm/boot/dts/qcom-ipq4019-ap.dk01.1-c1.dts | 2 +- + arch/arm/boot/dts/qcom-ipq4019-ap.dk01.1.dtsi | 1 - + arch/arm/boot/dts/qcom-ipq8064-rb3011.dts | 8 +- + arch/arm/boot/dts/qcom-ipq8064.dtsi | 28 +- + arch/arm/boot/dts/qcom-mdm9615-wp8548.dtsi | 2 +- + arch/arm/boot/dts/qcom-mdm9615.dtsi | 16 +- + arch/arm/boot/dts/qcom-msm8660-surf.dts | 4 +- + arch/arm/boot/dts/qcom-msm8660.dtsi | 27 +- + .../boot/dts/qcom-msm8916-samsung-serranove.dts | 3 + + arch/arm/boot/dts/qcom-msm8916-smp.dtsi | 62 + + arch/arm/boot/dts/qcom-msm8960-cdp.dts | 4 +- + arch/arm/boot/dts/qcom-msm8960.dtsi | 4 +- + arch/arm/boot/dts/qcom-msm8974.dtsi | 12 +- + arch/arm/boot/dts/qcom-pm8841.dtsi | 7 +- + arch/arm/boot/dts/qcom-pm8941.dtsi | 11 +- + arch/arm/boot/dts/qcom-pma8084.dtsi | 11 +- + arch/arm/boot/dts/qcom-sdx55.dtsi | 1 - + arch/arm/mach-qcom/Kconfig | 10 + + arch/arm/mach-qcom/platsmp.c | 1 + + arch/arm64/boot/dts/qcom/Makefile | 3 + + arch/arm64/boot/dts/qcom/apq8016-sbc.dts | 832 +++++++++++- + arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi | 838 ------------ + .../apq8094-sony-xperia-kitakami-karin_windy.dts | 1 + + arch/arm64/boot/dts/qcom/apq8096-db820c.dts | 1070 ++++++++++++++- + arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 1076 --------------- + arch/arm64/boot/dts/qcom/apq8096-ifc6640.dts | 3 - + arch/arm64/boot/dts/qcom/ipq6018.dtsi | 8 +- + arch/arm64/boot/dts/qcom/ipq8074-hk01.dts | 6 + + arch/arm64/boot/dts/qcom/ipq8074.dtsi | 69 +- + .../boot/dts/qcom/msm8916-alcatel-idol347.dts | 1 + + arch/arm64/boot/dts/qcom/msm8916-asus-z00l.dts | 177 +++ + arch/arm64/boot/dts/qcom/msm8916-huawei-g7.dts | 1 + + .../boot/dts/qcom/msm8916-longcheer-l8150.dts | 1 + + .../boot/dts/qcom/msm8916-longcheer-l8910.dts | 1 + + arch/arm64/boot/dts/qcom/msm8916-mtp.dts | 15 +- + arch/arm64/boot/dts/qcom/msm8916-mtp.dtsi | 21 - + .../boot/dts/qcom/msm8916-samsung-a3u-eur.dts | 1 + + .../boot/dts/qcom/msm8916-samsung-a5u-eur.dts | 1 + + .../boot/dts/qcom/msm8916-samsung-serranove.dts | 534 ++++++++ + .../boot/dts/qcom/msm8916-wingtech-wt88047.dts | 1 + + arch/arm64/boot/dts/qcom/msm8916.dtsi | 73 +- + .../boot/dts/qcom/msm8992-bullhead-rev-101.dts | 2 + + .../qcom/msm8992-msft-lumia-octagon-talkman.dts | 1 + + arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts | 2 + + .../arm64/boot/dts/qcom/msm8994-angler-rev-101.dts | 1 + + .../qcom/msm8994-msft-lumia-octagon-cityman.dts | 1 + + .../dts/qcom/msm8994-sony-xperia-kitakami-ivy.dts | 1 + + .../qcom/msm8994-sony-xperia-kitakami-karin.dts | 1 + + .../qcom/msm8994-sony-xperia-kitakami-satsuki.dts | 1 + + .../qcom/msm8994-sony-xperia-kitakami-sumire.dts | 1 + + .../qcom/msm8994-sony-xperia-kitakami-suzuran.dts | 1 + + arch/arm64/boot/dts/qcom/msm8994.dtsi | 2 +- + arch/arm64/boot/dts/qcom/msm8996-mtp.dts | 24 +- + arch/arm64/boot/dts/qcom/msm8996-mtp.dtsi | 30 - + .../dts/qcom/msm8996-sony-xperia-tone-dora.dts | 1 + + .../dts/qcom/msm8996-sony-xperia-tone-kagura.dts | 1 + + .../dts/qcom/msm8996-sony-xperia-tone-keyaki.dts | 1 + + .../boot/dts/qcom/msm8996-sony-xperia-tone.dtsi | 1 + + .../arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi | 2 +- + arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts | 34 + + .../arm64/boot/dts/qcom/msm8996-xiaomi-scorpio.dts | 1 + + arch/arm64/boot/dts/qcom/msm8996.dtsi | 43 +- + .../boot/dts/qcom/msm8998-asus-novago-tp370ql.dts | 1 + + arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi | 2 + + arch/arm64/boot/dts/qcom/msm8998-fxtec-pro1.dts | 1 + + arch/arm64/boot/dts/qcom/msm8998-hp-envy-x2.dts | 1 + + .../boot/dts/qcom/msm8998-lenovo-miix-630.dts | 1 + + arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi | 4 - + .../boot/dts/qcom/msm8998-oneplus-cheeseburger.dts | 1 + + .../boot/dts/qcom/msm8998-oneplus-common.dtsi | 4 - + .../boot/dts/qcom/msm8998-oneplus-dumpling.dts | 1 + + .../dts/qcom/msm8998-sony-xperia-yoshino-lilac.dts | 1 + + .../dts/qcom/msm8998-sony-xperia-yoshino-maple.dts | 1 + + .../qcom/msm8998-sony-xperia-yoshino-poplar.dts | 1 + + arch/arm64/boot/dts/qcom/msm8998.dtsi | 23 +- + arch/arm64/boot/dts/qcom/pm6350.dtsi | 54 + + arch/arm64/boot/dts/qcom/pm660l.dtsi | 7 - + arch/arm64/boot/dts/qcom/pm8916.dtsi | 9 +- + arch/arm64/boot/dts/qcom/pm8994.dtsi | 13 +- + arch/arm64/boot/dts/qcom/pmi8994.dtsi | 5 +- + arch/arm64/boot/dts/qcom/pmk8350.dtsi | 1 + + arch/arm64/boot/dts/qcom/qcs404.dtsi | 7 +- + arch/arm64/boot/dts/qcom/qrb5165-rb5.dts | 4 +- + arch/arm64/boot/dts/qcom/sa8155p-adp.dts | 12 +- + .../arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi | 1 + + .../boot/dts/qcom/sc7180-trogdor-homestar.dtsi | 1 + + arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi | 1 + + .../dts/qcom/sc7180-trogdor-parade-ps8640.dtsi | 109 ++ + .../arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi | 1 + + arch/arm64/boot/dts/qcom/sc7180-trogdor-r1.dts | 1 + + .../boot/dts/qcom/sc7180-trogdor-ti-sn65dsi86.dtsi | 90 ++ + arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi | 86 -- + arch/arm64/boot/dts/qcom/sc7180.dtsi | 9 +- + arch/arm64/boot/dts/qcom/sc7280-herobrine.dts | 14 + + arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi | 1412 ++++++++++++++++++++ + arch/arm64/boot/dts/qcom/sc7280-idp.dts | 12 + + arch/arm64/boot/dts/qcom/sc7280-idp.dtsi | 50 + + arch/arm64/boot/dts/qcom/sc7280-idp2.dts | 8 + + arch/arm64/boot/dts/qcom/sc7280.dtsi | 130 +- + .../dts/qcom/sdm630-sony-xperia-ganges-kirin.dts | 1 + + .../dts/qcom/sdm630-sony-xperia-nile-discovery.dts | 1 + + .../dts/qcom/sdm630-sony-xperia-nile-pioneer.dts | 1 + + .../dts/qcom/sdm630-sony-xperia-nile-voyager.dts | 1 + + arch/arm64/boot/dts/qcom/sdm630.dtsi | 58 +- + .../arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts | 1 + + arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi | 17 +- + arch/arm64/boot/dts/qcom/sdm845-db845c.dts | 16 +- + arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 18 +- + .../arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi | 26 +- + .../boot/dts/qcom/sdm845-oneplus-enchilada.dts | 1 + + arch/arm64/boot/dts/qcom/sdm845-oneplus-fajita.dts | 1 + + .../boot/dts/qcom/sdm845-xiaomi-beryllium.dts | 19 +- + arch/arm64/boot/dts/qcom/sdm845.dtsi | 61 +- + .../boot/dts/qcom/sdm850-lenovo-yoga-c630.dts | 16 +- + .../dts/qcom/sm6125-sony-xperia-seine-pdx201.dts | 1 + + arch/arm64/boot/dts/qcom/sm6125.dtsi | 7 +- + .../dts/qcom/sm6350-sony-xperia-lena-pdx213.dts | 1 + + arch/arm64/boot/dts/qcom/sm6350.dtsi | 31 + + arch/arm64/boot/dts/qcom/sm7225-fairphone-fp4.dts | 320 +++++ + arch/arm64/boot/dts/qcom/sm7225.dtsi | 16 + + arch/arm64/boot/dts/qcom/sm8150-hdk.dts | 2 - + .../boot/dts/qcom/sm8150-microsoft-surface-duo.dts | 3 +- + arch/arm64/boot/dts/qcom/sm8150-mtp.dts | 2 - + .../dts/qcom/sm8150-sony-xperia-kumano-bahamut.dts | 1 + + .../dts/qcom/sm8150-sony-xperia-kumano-griffin.dts | 1 + + arch/arm64/boot/dts/qcom/sm8150.dtsi | 133 +- + arch/arm64/boot/dts/qcom/sm8250-hdk.dts | 2 - + arch/arm64/boot/dts/qcom/sm8250-mtp.dts | 2 - + .../dts/qcom/sm8250-sony-xperia-edo-pdx203.dts | 1 + + .../dts/qcom/sm8250-sony-xperia-edo-pdx206.dts | 1 + + arch/arm64/boot/dts/qcom/sm8250.dtsi | 30 +- + arch/arm64/boot/dts/qcom/sm8350-hdk.dts | 2 - + arch/arm64/boot/dts/qcom/sm8350-mtp.dts | 6 +- + arch/arm64/boot/dts/qcom/sm8350.dtsi | 272 +++- + arch/arm64/configs/defconfig | 11 +- + drivers/firmware/qcom_scm.c | 86 +- + drivers/firmware/qcom_scm.h | 4 + + drivers/of/platform.c | 1 + + drivers/soc/qcom/Kconfig | 10 + + drivers/soc/qcom/Makefile | 1 + + drivers/soc/qcom/apr.c | 2 + + drivers/soc/qcom/qcom_stats.c | 277 ++++ + drivers/soc/qcom/rpmhpd.c | 20 +- + drivers/soc/qcom/smem.c | 57 +- + drivers/soc/qcom/smp2p.c | 134 +- + drivers/soc/qcom/socinfo.c | 6 +- + drivers/soc/qcom/spm.c | 21 + + 162 files changed, 6603 insertions(+), 2467 deletions(-) + create mode 100644 Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml + rename arch/arm/boot/dts/{qcom-apq8026-lge-lenok.dts => qcom-apq8026-lg-lenok.dts} (99%) + create mode 100644 arch/arm/boot/dts/qcom-msm8916-samsung-serranove.dts + create mode 100644 arch/arm/boot/dts/qcom-msm8916-smp.dtsi + delete mode 100644 arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi + delete mode 100644 arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi + delete mode 100644 arch/arm64/boot/dts/qcom/msm8916-mtp.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/msm8916-samsung-serranove.dts + delete mode 100644 arch/arm64/boot/dts/qcom/msm8996-mtp.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/pm6350.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/sc7180-trogdor-parade-ps8640.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/sc7180-trogdor-ti-sn65dsi86.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/sc7280-herobrine.dts + create mode 100644 arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/sm7225-fairphone-fp4.dts + create mode 100644 arch/arm64/boot/dts/qcom/sm7225.dtsi + create mode 100644 drivers/soc/qcom/qcom_stats.c +$ git reset --hard HEAD^ +HEAD is now at a180fd8811c1 Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git +Merging next-20211022 version of qcom +$ git merge -m next-20211022/qcom 35aab56bc55a75cb93c6b24d9545293d6e343611 +Auto-merging drivers/of/platform.c +Auto-merging arch/arm64/configs/defconfig +Merge made by the 'recursive' strategy. + .../devicetree/bindings/soc/qcom/qcom,smem.yaml | 34 ++- + .../devicetree/bindings/soc/qcom/qcom-stats.yaml | 47 ++++ + Documentation/devicetree/bindings/sram/sram.yaml | 5 +- + arch/arm/boot/dts/qcom-apq8060-dragonboard.dts | 4 +- + arch/arm/boot/dts/qcom-apq8064.dtsi | 31 +-- + arch/arm/boot/dts/qcom-mdm9615.dtsi | 12 +- + arch/arm/boot/dts/qcom-msm8660.dtsi | 17 +- + arch/arm/boot/dts/qcom-pm8841.dtsi | 7 +- + arch/arm/boot/dts/qcom-pm8941.dtsi | 11 +- + arch/arm/boot/dts/qcom-pma8084.dtsi | 11 +- + arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi | 10 +- + .../apq8094-sony-xperia-kitakami-karin_windy.dts | 1 + + .../boot/dts/qcom/msm8916-alcatel-idol347.dts | 1 + + arch/arm64/boot/dts/qcom/msm8916-asus-z00l.dts | 1 + + arch/arm64/boot/dts/qcom/msm8916-huawei-g7.dts | 1 + + .../boot/dts/qcom/msm8916-longcheer-l8150.dts | 1 + + .../boot/dts/qcom/msm8916-longcheer-l8910.dts | 1 + + .../boot/dts/qcom/msm8916-samsung-a3u-eur.dts | 1 + + .../boot/dts/qcom/msm8916-samsung-a5u-eur.dts | 1 + + .../boot/dts/qcom/msm8916-wingtech-wt88047.dts | 1 + + arch/arm64/boot/dts/qcom/msm8916.dtsi | 2 +- + .../boot/dts/qcom/msm8992-bullhead-rev-101.dts | 2 + + .../qcom/msm8992-msft-lumia-octagon-talkman.dts | 1 + + arch/arm64/boot/dts/qcom/msm8992-xiaomi-libra.dts | 2 + + .../arm64/boot/dts/qcom/msm8994-angler-rev-101.dts | 1 + + .../qcom/msm8994-msft-lumia-octagon-cityman.dts | 1 + + .../dts/qcom/msm8994-sony-xperia-kitakami-ivy.dts | 1 + + .../qcom/msm8994-sony-xperia-kitakami-karin.dts | 1 + + .../qcom/msm8994-sony-xperia-kitakami-satsuki.dts | 1 + + .../qcom/msm8994-sony-xperia-kitakami-sumire.dts | 1 + + .../qcom/msm8994-sony-xperia-kitakami-suzuran.dts | 1 + + .../dts/qcom/msm8996-sony-xperia-tone-dora.dts | 1 + + .../dts/qcom/msm8996-sony-xperia-tone-kagura.dts | 1 + + .../dts/qcom/msm8996-sony-xperia-tone-keyaki.dts | 1 + + arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts | 1 + + .../arm64/boot/dts/qcom/msm8996-xiaomi-scorpio.dts | 1 + + arch/arm64/boot/dts/qcom/msm8996.dtsi | 5 + + .../boot/dts/qcom/msm8998-asus-novago-tp370ql.dts | 1 + + arch/arm64/boot/dts/qcom/msm8998-fxtec-pro1.dts | 1 + + arch/arm64/boot/dts/qcom/msm8998-hp-envy-x2.dts | 1 + + .../boot/dts/qcom/msm8998-lenovo-miix-630.dts | 1 + + .../boot/dts/qcom/msm8998-oneplus-cheeseburger.dts | 1 + + .../boot/dts/qcom/msm8998-oneplus-dumpling.dts | 1 + + .../dts/qcom/msm8998-sony-xperia-yoshino-lilac.dts | 1 + + .../dts/qcom/msm8998-sony-xperia-yoshino-maple.dts | 1 + + .../qcom/msm8998-sony-xperia-yoshino-poplar.dts | 1 + + arch/arm64/boot/dts/qcom/msm8998.dtsi | 5 + + arch/arm64/boot/dts/qcom/pm8916.dtsi | 9 +- + arch/arm64/boot/dts/qcom/pm8994.dtsi | 13 +- + arch/arm64/boot/dts/qcom/qcs404.dtsi | 5 + + .../arm64/boot/dts/qcom/sc7180-trogdor-coachz.dtsi | 1 + + .../boot/dts/qcom/sc7180-trogdor-homestar.dtsi | 1 + + arch/arm64/boot/dts/qcom/sc7180-trogdor-lazor.dtsi | 1 + + .../dts/qcom/sc7180-trogdor-parade-ps8640.dtsi | 109 ++++++++ + .../arm64/boot/dts/qcom/sc7180-trogdor-pompom.dtsi | 1 + + arch/arm64/boot/dts/qcom/sc7180-trogdor-r1.dts | 1 + + .../boot/dts/qcom/sc7180-trogdor-ti-sn65dsi86.dtsi | 90 +++++++ + arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi | 86 ------- + arch/arm64/boot/dts/qcom/sc7180.dtsi | 9 +- + arch/arm64/boot/dts/qcom/sc7280.dtsi | 7 +- + .../dts/qcom/sdm630-sony-xperia-ganges-kirin.dts | 1 + + .../dts/qcom/sdm630-sony-xperia-nile-discovery.dts | 1 + + .../dts/qcom/sdm630-sony-xperia-nile-pioneer.dts | 1 + + .../dts/qcom/sdm630-sony-xperia-nile-voyager.dts | 1 + + arch/arm64/boot/dts/qcom/sdm630.dtsi | 5 + + .../arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts | 1 + + .../boot/dts/qcom/sdm845-oneplus-enchilada.dts | 1 + + arch/arm64/boot/dts/qcom/sdm845-oneplus-fajita.dts | 1 + + .../boot/dts/qcom/sdm845-xiaomi-beryllium.dts | 1 + + arch/arm64/boot/dts/qcom/sdm845.dtsi | 10 +- + .../boot/dts/qcom/sdm850-lenovo-yoga-c630.dts | 1 + + .../dts/qcom/sm6125-sony-xperia-seine-pdx201.dts | 1 + + arch/arm64/boot/dts/qcom/sm6125.dtsi | 5 + + .../dts/qcom/sm6350-sony-xperia-lena-pdx213.dts | 1 + + .../boot/dts/qcom/sm8150-microsoft-surface-duo.dts | 1 + + .../dts/qcom/sm8150-sony-xperia-kumano-bahamut.dts | 1 + + .../dts/qcom/sm8150-sony-xperia-kumano-griffin.dts | 1 + + arch/arm64/boot/dts/qcom/sm8150.dtsi | 7 +- + .../dts/qcom/sm8250-sony-xperia-edo-pdx203.dts | 1 + + .../dts/qcom/sm8250-sony-xperia-edo-pdx206.dts | 1 + + arch/arm64/boot/dts/qcom/sm8250.dtsi | 20 +- + arch/arm64/boot/dts/qcom/sm8350.dtsi | 7 +- + arch/arm64/configs/defconfig | 5 +- + drivers/of/platform.c | 1 + + drivers/soc/qcom/Kconfig | 10 + + drivers/soc/qcom/Makefile | 1 + + drivers/soc/qcom/qcom_stats.c | 277 +++++++++++++++++++++ + drivers/soc/qcom/rpmhpd.c | 18 +- + drivers/soc/qcom/smem.c | 57 +++-- + drivers/soc/qcom/smp2p.c | 121 +++++++-- + drivers/soc/qcom/socinfo.c | 4 +- + 91 files changed, 885 insertions(+), 248 deletions(-) + create mode 100644 Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml + create mode 100644 arch/arm64/boot/dts/qcom/sc7180-trogdor-parade-ps8640.dtsi + create mode 100644 arch/arm64/boot/dts/qcom/sc7180-trogdor-ti-sn65dsi86.dtsi + create mode 100644 drivers/soc/qcom/qcom_stats.c +Merging raspberrypi/for-next (a036b0a5d7d6 ARM: dts: bcm2711-rpi-4-b: Fix usb's unit address) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nsaenz/linux-rpi.git raspberrypi/for-next +Auto-merging arch/arm/boot/dts/Makefile +Merge made by the 'recursive' strategy. +Merging renesas/next (525a6b4bd53f Merge branch 'renesas-arm-dt-for-v5.16' into renesas-next) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git renesas/next +Already up to date. +Merge made by the 'recursive' strategy. +Merging reset/reset/next (8c81620ac1ac reset: mchp: sparx5: Extend support for lan966x) +$ git merge -m Merge branch 'reset/next' of https://git.pengutronix.de/git/pza/linux reset/reset/next +Already up to date. +Merging rockchip/for-next (cc3bcb015bb1 Merge branch 'v5.16-armsoc/dts64' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip.git rockchip/for-next +Merge made by the 'recursive' strategy. + drivers/clk/rockchip/clk-rk3399.c | 17 ++++++++++------- + drivers/clk/rockchip/clk-rk3568.c | 2 +- + 2 files changed, 11 insertions(+), 8 deletions(-) +Merging samsung-krzk/for-next (3ed6ae3305e0 Merge branch 'for-v5.16/dts-riscv' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git samsung-krzk/for-next +Auto-merging arch/arm/Kconfig +Merge made by the 'recursive' strategy. + .../bindings/arm/samsung/exynos-chipid.yaml | 5 +- + arch/arm/Kconfig | 1 - + arch/arm/boot/dts/exynos3250-rinato.dts | 1 + + arch/arm/boot/dts/exynos4210-i9100.dts | 1 + + arch/arm/boot/dts/exynos4210-trats.dts | 1 + + arch/arm/boot/dts/exynos4210-universal_c210.dts | 1 + + arch/arm/boot/dts/exynos4412-i9300.dts | 1 + + arch/arm/boot/dts/exynos4412-i9305.dts | 1 + + arch/arm/boot/dts/exynos4412-n710x.dts | 1 + + arch/arm/boot/dts/exynos4412-p4note-n8010.dts | 1 + + arch/arm/boot/dts/exynos4412-trats2.dts | 1 + + arch/arm/boot/dts/exynos5250-snow-rev5.dts | 1 + + arch/arm/boot/dts/exynos5250-snow.dts | 1 + + arch/arm/boot/dts/exynos5250-spring.dts | 1 + + arch/arm/boot/dts/exynos5420-peach-pit.dts | 1 + + arch/arm/boot/dts/exynos5800-peach-pi.dts | 1 + + arch/arm/boot/dts/s5pv210-fascinate4g.dts | 1 + + arch/arm/boot/dts/s5pv210-galaxys.dts | 1 + + arch/arm/mach-exynos/Kconfig | 2 - + arch/arm/mach-s3c/irq-s3c24xx.c | 22 ++++- + arch/arm/mach-s3c/mach-mini6410.c | 2 +- + arch/arm/mach-s5pv210/Kconfig | 1 - + arch/arm64/Kconfig.platforms | 2 - + arch/arm64/boot/dts/exynos/exynos5433-tm2.dts | 1 + + arch/arm64/boot/dts/exynos/exynos5433-tm2e.dts | 1 + + arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi | 4 +- + arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 2 +- + .../riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 10 +-- + .../riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 7 +- + drivers/rtc/Kconfig | 10 +-- + drivers/soc/samsung/Kconfig | 5 +- + drivers/soc/samsung/Makefile | 3 +- + drivers/soc/samsung/exynos-chipid.c | 93 ++++++++++++++++++---- + drivers/soc/samsung/exynos5422-asv.c | 1 + + drivers/soc/samsung/pm_domains.c | 1 - + include/linux/soc/samsung/exynos-chipid.h | 6 +- + 36 files changed, 137 insertions(+), 58 deletions(-) +Merging scmi/for-linux-next (a38a50b66af8 Merge branch 'for-next/ffa' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux into for-linux-next) +$ git merge -m Merge branch 'for-linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/sudeep.holla/linux.git scmi/for-linux-next +Merge made by the 'recursive' strategy. + drivers/firmware/arm_ffa/driver.c | 54 ++++++++++++++++++++++++++++++++++---- + drivers/firmware/arm_scmi/virtio.c | 21 +++++++-------- + include/linux/arm_ffa.h | 2 ++ + 3 files changed, 61 insertions(+), 16 deletions(-) +Merging stm32/stm32-next (d4b3aaf0f90b ARM: dts: stm32: use usbphyc ck_usbo_48m as USBH OHCI clock on stm32mp151) +$ git merge -m Merge branch 'stm32-next' of git://git.kernel.org/pub/scm/linux/kernel/git/atorgue/stm32.git stm32/stm32-next +Auto-merging arch/arm/boot/dts/Makefile +Merge made by the 'recursive' strategy. +Merging sunxi/sunxi/for-next (7fb77af71236 Merge branch 'sunxi/dt-for-5.16' into sunxi/for-next) +$ git merge -m Merge branch 'sunxi/for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git sunxi/sunxi/for-next +Merge made by the 'recursive' strategy. + .../clock/allwinner,sun8i-a83t-de2-clk.yaml | 2 +- + drivers/clk/sunxi-ng/Kconfig | 1 + + drivers/clk/sunxi-ng/ccu-sun4i-a10.c | 2 +- + drivers/clk/sunxi-ng/ccu-sun50i-a100-r.c | 3 +- + drivers/clk/sunxi-ng/ccu-sun50i-a100.c | 3 +- + drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 7 +- + drivers/clk/sunxi-ng/ccu-sun50i-h6-r.c | 2 +- + drivers/clk/sunxi-ng/ccu-sun50i-h6.c | 7 +- + drivers/clk/sunxi-ng/ccu-sun50i-h616.c | 4 +- + drivers/clk/sunxi-ng/ccu-sun5i.c | 2 +- + drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 2 +- + drivers/clk/sunxi-ng/ccu-sun8i-a23.c | 2 +- + drivers/clk/sunxi-ng/ccu-sun8i-a33.c | 2 +- + drivers/clk/sunxi-ng/ccu-sun8i-a83t.c | 7 +- + drivers/clk/sunxi-ng/ccu-sun8i-de2.c | 6 +- + drivers/clk/sunxi-ng/ccu-sun8i-h3.c | 2 +- + drivers/clk/sunxi-ng/ccu-sun8i-r.c | 2 +- + drivers/clk/sunxi-ng/ccu-sun8i-r40.c | 7 +- + drivers/clk/sunxi-ng/ccu-sun8i-v3s.c | 2 +- + drivers/clk/sunxi-ng/ccu-sun9i-a80-de.c | 8 +- + drivers/clk/sunxi-ng/ccu-sun9i-a80-usb.c | 7 +- + drivers/clk/sunxi-ng/ccu-sun9i-a80.c | 7 +- + drivers/clk/sunxi-ng/ccu-suniv-f1c100s.c | 2 +- + drivers/clk/sunxi-ng/ccu_common.c | 96 ++++++++++++++++++---- + drivers/clk/sunxi-ng/ccu_common.h | 6 +- + drivers/clk/sunxi-ng/ccu_mux.h | 1 - + drivers/clk/sunxi/clk-mod0.c | 4 +- + drivers/clk/sunxi/clk-sun6i-apb0-gates.c | 4 +- + drivers/clk/sunxi/clk-sun6i-apb0.c | 4 +- + drivers/clk/sunxi/clk-sun6i-ar100.c | 4 +- + drivers/clk/sunxi/clk-sun8i-apb0.c | 4 +- + 31 files changed, 127 insertions(+), 85 deletions(-) +Merging tegra/for-next (bbd827b4de7e Merge branch for-5.16/arm64/defconfig into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux.git tegra/for-next +Merge made by the 'recursive' strategy. + drivers/firmware/tegra/bpmp-debugfs.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) +Merging ti-k3/ti-k3-next (1e3d655fe7b4 Merge branch 'ti-k3-config-next' into ti-k3-next) +$ git merge -m Merge branch 'ti-k3-next' of git://git.kernel.org/pub/scm/linux/kernel/git/kristo/linux.git ti-k3/ti-k3-next +Already up to date. +Merge made by the 'recursive' strategy. +Merging ti-k3-new/ti-k3-next (f46d16cf5b43 arm64: dts: ti: k3-j721e-sk: Add DDR carveout memory nodes) +$ git merge -m Merge branch 'ti-k3-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nmenon/linux.git ti-k3-new/ti-k3-next +Already up to date. +Merging xilinx/for-next (326b5e9db528 Merge branch 'zynqmp/soc' into for-next) +$ git merge -m Merge branch 'for-next' of git://github.com/Xilinx/linux-xlnx.git xilinx/for-next +Merge made by the 'recursive' strategy. + drivers/firmware/xilinx/zynqmp.c | 5 +- + drivers/soc/xilinx/zynqmp_pm_domains.c | 91 +++++++++++++++++++--------------- + drivers/soc/xilinx/zynqmp_power.c | 1 - + 3 files changed, 55 insertions(+), 42 deletions(-) +Merging clk/clk-next (e88c20ffe7d5 Merge branch 'clk-composite-determine-fix' into clk-next) +$ git merge -m Merge branch 'clk-next' of git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git clk/clk-next +Removing Documentation/devicetree/bindings/clock/fixed-mmio-clock.txt +Merge made by the 'recursive' strategy. + .../arm/mediatek/mediatek,mt8195-clock.yaml | 254 ++ + .../arm/mediatek/mediatek,mt8195-sys-clock.yaml | 73 + + .../devicetree/bindings/clock/arm,syscon-icst.yaml | 5 + + .../devicetree/bindings/clock/fixed-mmio-clock.txt | 24 - + .../bindings/clock/fixed-mmio-clock.yaml | 47 + + .../bindings/clock/imx8ulp-cgc-clock.yaml | 43 + + .../bindings/clock/imx8ulp-pcc-clock.yaml | 50 + + .../bindings/clock/qcom,dispcc-sm8x50.yaml | 13 + + .../bindings/clock/qcom,gcc-msm8994.yaml | 70 + + .../bindings/clock/qcom,gcc-msm8998.yaml | 26 +- + .../bindings/clock/qcom,gcc-qcm2290.yaml | 72 + + .../devicetree/bindings/clock/qcom,gcc.yaml | 2 - + .../devicetree/bindings/clock/qcom,rpmcc.txt | 1 + + .../bindings/clock/qcom,sc7280-camcc.yaml | 71 + + .../bindings/clock/qcom,sc7280-lpasscc.yaml | 68 + + .../devicetree/bindings/clock/qcom,videocc.yaml | 13 + + arch/arm/mach-integrator/Kconfig | 2 +- + arch/arm/mach-realview/Kconfig | 2 +- + arch/arm/mach-versatile/Kconfig | 2 +- + arch/arm/mach-vexpress/Kconfig | 2 +- + drivers/clk/at91/pmc.c | 5 + + drivers/clk/clk-composite.c | 77 +- + drivers/clk/imx/Kconfig | 7 + + drivers/clk/imx/Makefile | 2 + + drivers/clk/imx/clk-composite-7ulp.c | 88 +- + drivers/clk/imx/clk-composite-8m.c | 4 +- + drivers/clk/imx/clk-imx6ul.c | 9 +- + drivers/clk/imx/clk-imx7ulp.c | 20 +- + drivers/clk/imx/clk-imx8ulp.c | 569 ++++ + drivers/clk/imx/clk-pfdv2.c | 23 +- + drivers/clk/imx/clk-pllv4.c | 35 +- + drivers/clk/imx/clk.h | 457 +-- + drivers/clk/mediatek/Kconfig | 28 +- + drivers/clk/mediatek/Makefile | 8 + + drivers/clk/mediatek/clk-apmixed.c | 3 + + drivers/clk/mediatek/clk-cpumux.c | 3 + + drivers/clk/mediatek/clk-gate.c | 8 + + drivers/clk/mediatek/clk-mt6779-aud.c | 4 +- + drivers/clk/mediatek/clk-mt6779-cam.c | 4 +- + drivers/clk/mediatek/clk-mt6779-img.c | 4 +- + drivers/clk/mediatek/clk-mt6779-ipe.c | 4 +- + drivers/clk/mediatek/clk-mt6779-mfg.c | 4 +- + drivers/clk/mediatek/clk-mt6779-mm.c | 4 +- + drivers/clk/mediatek/clk-mt6779-vdec.c | 4 +- + drivers/clk/mediatek/clk-mt6779-venc.c | 4 +- + drivers/clk/mediatek/clk-mt6779.c | 2 + + drivers/clk/mediatek/clk-mt8195-apmixedsys.c | 145 + + drivers/clk/mediatek/clk-mt8195-apusys_pll.c | 92 + + drivers/clk/mediatek/clk-mt8195-cam.c | 142 + + drivers/clk/mediatek/clk-mt8195-ccu.c | 50 + + drivers/clk/mediatek/clk-mt8195-img.c | 96 + + drivers/clk/mediatek/clk-mt8195-imp_iic_wrap.c | 68 + + drivers/clk/mediatek/clk-mt8195-infra_ao.c | 206 ++ + drivers/clk/mediatek/clk-mt8195-ipe.c | 51 + + drivers/clk/mediatek/clk-mt8195-mfg.c | 47 + + drivers/clk/mediatek/clk-mt8195-peri_ao.c | 62 + + drivers/clk/mediatek/clk-mt8195-scp_adsp.c | 47 + + drivers/clk/mediatek/clk-mt8195-topckgen.c | 1273 ++++++++ + drivers/clk/mediatek/clk-mt8195-vdec.c | 104 + + drivers/clk/mediatek/clk-mt8195-vdo0.c | 123 + + drivers/clk/mediatek/clk-mt8195-vdo1.c | 140 + + drivers/clk/mediatek/clk-mt8195-venc.c | 69 + + drivers/clk/mediatek/clk-mt8195-vpp0.c | 110 + + drivers/clk/mediatek/clk-mt8195-vpp1.c | 108 + + drivers/clk/mediatek/clk-mt8195-wpe.c | 143 + + drivers/clk/mediatek/clk-mtk.c | 29 +- + drivers/clk/mediatek/clk-mtk.h | 1 + + drivers/clk/mediatek/clk-mux.c | 6 + + drivers/clk/mediatek/clk-pll.c | 6 +- + drivers/clk/mediatek/reset.c | 2 + + drivers/clk/mvebu/ap-cpu-clk.c | 14 +- + drivers/clk/qcom/Kconfig | 43 +- + drivers/clk/qcom/Makefile | 3 + + drivers/clk/qcom/a53-pll.c | 4 +- + drivers/clk/qcom/camcc-sc7280.c | 2484 ++++++++++++++++ + drivers/clk/qcom/clk-smd-rpm.c | 135 +- + drivers/clk/qcom/common.c | 8 +- + drivers/clk/qcom/dispcc-sm8250.c | 27 +- + drivers/clk/qcom/gcc-msm8953.c | 1 - + drivers/clk/qcom/gcc-msm8994.c | 1384 +++++---- + drivers/clk/qcom/gcc-msm8998.c | 705 ++--- + drivers/clk/qcom/gcc-qcm2290.c | 3044 ++++++++++++++++++++ + drivers/clk/qcom/gcc-sc7280.c | 85 - + drivers/clk/qcom/gcc-sdm660.c | 80 +- + drivers/clk/qcom/gdsc.c | 51 +- + drivers/clk/qcom/gdsc.h | 2 + + drivers/clk/qcom/gpucc-msm8998.c | 13 +- + drivers/clk/qcom/gpucc-sdm660.c | 15 +- + drivers/clk/qcom/kpss-xcc.c | 4 +- + drivers/clk/qcom/lpasscc-sc7280.c | 216 ++ + drivers/clk/qcom/mmcc-msm8998.c | 183 +- + drivers/clk/qcom/mmcc-sdm660.c | 75 +- + drivers/clk/qcom/videocc-sm8250.c | 27 +- + drivers/clk/renesas/r8a7795-cpg-mssr.c | 1 + + drivers/clk/renesas/r8a7796-cpg-mssr.c | 1 + + drivers/clk/renesas/r8a77965-cpg-mssr.c | 1 + + drivers/clk/renesas/r8a779a0-cpg-mssr.c | 191 ++ + drivers/clk/renesas/r9a07g044-cpg.c | 83 +- + drivers/clk/renesas/rcar-cpg-lib.c | 83 + + drivers/clk/renesas/rcar-cpg-lib.h | 7 + + drivers/clk/renesas/rcar-gen3-cpg.c | 89 +- + drivers/clk/renesas/rzg2l-cpg.c | 212 ++ + drivers/clk/renesas/rzg2l-cpg.h | 45 +- + drivers/clk/versatile/Kconfig | 3 +- + drivers/clk/versatile/Makefile | 2 +- + drivers/clk/versatile/clk-icst.c | 3 +- + include/dt-bindings/clock/imx8ulp-clock.h | 258 ++ + include/dt-bindings/clock/mt8195-clk.h | 864 ++++++ + include/dt-bindings/clock/qcom,camcc-sc7280.h | 127 + + include/dt-bindings/clock/qcom,gcc-msm8994.h | 13 + + include/dt-bindings/clock/qcom,gcc-qcm2290.h | 188 ++ + include/dt-bindings/clock/qcom,lpass-sc7280.h | 16 + + include/dt-bindings/clock/qcom,rpmcc.h | 6 + + include/dt-bindings/reset/imx8ulp-pcc-reset.h | 59 + + include/linux/soc/qcom/smd-rpm.h | 2 + + 115 files changed, 14362 insertions(+), 1835 deletions(-) + create mode 100644 Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-clock.yaml + create mode 100644 Documentation/devicetree/bindings/arm/mediatek/mediatek,mt8195-sys-clock.yaml + delete mode 100644 Documentation/devicetree/bindings/clock/fixed-mmio-clock.txt + create mode 100644 Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml + create mode 100644 Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml + create mode 100644 Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml + create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml + create mode 100644 Documentation/devicetree/bindings/clock/qcom,gcc-qcm2290.yaml + create mode 100644 Documentation/devicetree/bindings/clock/qcom,sc7280-camcc.yaml + create mode 100644 Documentation/devicetree/bindings/clock/qcom,sc7280-lpasscc.yaml + create mode 100644 drivers/clk/imx/clk-imx8ulp.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-apmixedsys.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-apusys_pll.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-cam.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-ccu.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-img.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-imp_iic_wrap.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-infra_ao.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-ipe.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-mfg.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-peri_ao.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-scp_adsp.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-topckgen.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-vdec.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-vdo0.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-vdo1.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-venc.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-vpp0.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-vpp1.c + create mode 100644 drivers/clk/mediatek/clk-mt8195-wpe.c + create mode 100644 drivers/clk/qcom/camcc-sc7280.c + create mode 100644 drivers/clk/qcom/gcc-qcm2290.c + create mode 100644 drivers/clk/qcom/lpasscc-sc7280.c + create mode 100644 include/dt-bindings/clock/imx8ulp-clock.h + create mode 100644 include/dt-bindings/clock/mt8195-clk.h + create mode 100644 include/dt-bindings/clock/qcom,camcc-sc7280.h + create mode 100644 include/dt-bindings/clock/qcom,gcc-qcm2290.h + create mode 100644 include/dt-bindings/clock/qcom,lpass-sc7280.h + create mode 100644 include/dt-bindings/reset/imx8ulp-pcc-reset.h +Merging clk-imx/for-next (e8271eff5d8c clk: imx: Make CLK_IMX8ULP select MXC_CLK) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/abelvesa/linux.git clk-imx/for-next +Already up to date. +Merging clk-renesas/renesas-clk (2bd9feed2316 clk: renesas: r8a779[56]x: Add MLP clocks) +$ git merge -m Merge branch 'renesas-clk' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git clk-renesas/renesas-clk +Already up to date. +Merging clk-samsung/for-next (651521d396a8 clk: samsung: remove __clk_lookup() usage) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git clk-samsung/for-next +Merge made by the 'recursive' strategy. + .../bindings/clock/samsung,exynos850-clock.yaml | 185 +++++ + drivers/clk/samsung/Kconfig | 30 +- + drivers/clk/samsung/Makefile | 1 + + drivers/clk/samsung/clk-cpu.c | 18 + + drivers/clk/samsung/clk-exynos-audss.c | 4 +- + drivers/clk/samsung/clk-exynos4.c | 18 +- + drivers/clk/samsung/clk-exynos4412-isp.c | 4 +- + drivers/clk/samsung/clk-exynos5250.c | 10 +- + drivers/clk/samsung/clk-exynos5420.c | 2 +- + drivers/clk/samsung/clk-exynos5433.c | 124 ++- + drivers/clk/samsung/clk-exynos850.c | 835 +++++++++++++++++++++ + drivers/clk/samsung/clk-pll.c | 196 +++++ + drivers/clk/samsung/clk-pll.h | 2 + + drivers/clk/samsung/clk-s3c2410.c | 6 +- + drivers/clk/samsung/clk-s3c64xx.c | 8 +- + drivers/clk/samsung/clk-s5pv210-audss.c | 4 +- + drivers/clk/samsung/clk-s5pv210.c | 8 +- + drivers/clk/samsung/clk.c | 16 +- + drivers/clk/samsung/clk.h | 28 +- + include/dt-bindings/clock/exynos4.h | 4 +- + include/dt-bindings/clock/exynos5250.h | 4 +- + include/dt-bindings/clock/exynos850.h | 141 ++++ + 22 files changed, 1516 insertions(+), 132 deletions(-) + create mode 100644 Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml + create mode 100644 drivers/clk/samsung/clk-exynos850.c + create mode 100644 include/dt-bindings/clock/exynos850.h +Merging csky/linux-next (e21e52ad1e01 csky: Make HAVE_TCM depend on !COMPILE_TEST) +$ git merge -m Merge branch 'linux-next' of git://github.com/c-sky/csky-linux.git csky/linux-next +Already up to date. +Merging h8300/h8300-next (1ec10274d436 h8300: don't implement set_fs) +$ git merge -m Merge branch 'h8300-next' of git://git.sourceforge.jp/gitroot/uclinux-h8/linux.git h8300/h8300-next +Auto-merging drivers/tty/serial/sh-sci.c +Auto-merging drivers/net/ethernet/smsc/smc91x.c +Auto-merging drivers/net/ethernet/smsc/Kconfig +Removing arch/h8300/mm/memory.c +Auto-merging arch/h8300/mm/init.c +Auto-merging arch/h8300/kernel/setup.c +Removing arch/h8300/include/asm/segment.h +Auto-merging arch/h8300/Kconfig.cpu +Merge made by the 'recursive' strategy. + arch/h8300/Kconfig | 1 - + arch/h8300/Kconfig.cpu | 4 ++ + arch/h8300/boot/dts/edosk2674.dts | 10 ++++- + arch/h8300/boot/dts/h8300h_sim.dts | 2 +- + arch/h8300/boot/dts/h8s_sim.dts | 2 +- + arch/h8300/configs/edosk2674_defconfig | 10 ++--- + arch/h8300/configs/h8300h-sim_defconfig | 8 +--- + arch/h8300/configs/h8s-sim_defconfig | 8 +--- + arch/h8300/include/asm/processor.h | 1 - + arch/h8300/include/asm/segment.h | 40 -------------------- + arch/h8300/include/asm/thread_info.h | 3 -- + arch/h8300/kernel/entry.S | 1 - + arch/h8300/kernel/head_ram.S | 1 - + arch/h8300/kernel/setup.c | 2 +- + arch/h8300/lib/memset.S | 17 +++++---- + arch/h8300/mm/Makefile | 2 +- + arch/h8300/mm/init.c | 6 --- + arch/h8300/mm/memory.c | 53 -------------------------- + drivers/clocksource/h8300_timer8.c | 20 +++++----- + drivers/irqchip/irq-renesas-h8300h.c | 19 +++++++++- + drivers/irqchip/irq-renesas-h8s.c | 67 +++++++++++++++++++++++++-------- + drivers/net/ethernet/smsc/Kconfig | 1 - + drivers/net/ethernet/smsc/smc91x.c | 10 +++++ + drivers/tty/serial/sh-sci.c | 5 +-- + 24 files changed, 124 insertions(+), 169 deletions(-) + delete mode 100644 arch/h8300/include/asm/segment.h + delete mode 100644 arch/h8300/mm/memory.c +Merging m68k/for-next (8a3c0a74ae87 m68k: defconfig: Update defconfigs for v5.15-rc1) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k.git m68k/for-next +Merge made by the 'recursive' strategy. + arch/m68k/configs/amiga_defconfig | 7 +++++-- + arch/m68k/configs/apollo_defconfig | 7 +++++-- + arch/m68k/configs/atari_defconfig | 7 +++++-- + arch/m68k/configs/bvme6000_defconfig | 7 +++++-- + arch/m68k/configs/hp300_defconfig | 7 +++++-- + arch/m68k/configs/mac_defconfig | 7 +++++-- + arch/m68k/configs/multi_defconfig | 7 +++++-- + arch/m68k/configs/mvme147_defconfig | 7 +++++-- + arch/m68k/configs/mvme16x_defconfig | 7 +++++-- + arch/m68k/configs/q40_defconfig | 7 +++++-- + arch/m68k/configs/sun3_defconfig | 7 +++++-- + arch/m68k/configs/sun3x_defconfig | 7 +++++-- + arch/m68k/lib/muldi3.c | 2 +- + 13 files changed, 61 insertions(+), 25 deletions(-) +Merging m68knommu/for-next (b7c534f5015b m68knommu: Remove MCPU32 config symbol) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gerg/m68knommu.git m68knommu/for-next +Merge made by the 'recursive' strategy. + arch/m68k/Kconfig.cpu | 11 ----------- + arch/m68k/Kconfig.machine | 1 + + arch/m68k/include/asm/bitops.h | 2 +- + 3 files changed, 2 insertions(+), 12 deletions(-) +Merging microblaze/next (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'next' of git://git.monstr.eu/linux-2.6-microblaze.git microblaze/next +Already up to date. +Merging mips/mips-next (95b8a5e0111a MIPS: Remove NETLOGIC support) +$ git merge -m Merge branch 'mips-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git mips/mips-next +Removing arch/mips/pci/pci-xlr.c +Removing arch/mips/pci/pci-xlp.c +Removing arch/mips/pci/msi-xlp.c +Removing arch/mips/netlogic/xlr/wakeup.c +Removing arch/mips/netlogic/xlr/setup.c +Removing arch/mips/netlogic/xlr/platform.c +Removing arch/mips/netlogic/xlr/platform-flash.c +Removing arch/mips/netlogic/xlr/fmn.c +Removing arch/mips/netlogic/xlr/fmn-config.c +Removing arch/mips/netlogic/xlr/Makefile +Removing arch/mips/netlogic/xlp/wakeup.c +Removing arch/mips/netlogic/xlp/usb-init.c +Removing arch/mips/netlogic/xlp/usb-init-xlp2.c +Removing arch/mips/netlogic/xlp/setup.c +Removing arch/mips/netlogic/xlp/nlm_hal.c +Removing arch/mips/netlogic/xlp/dt.c +Removing arch/mips/netlogic/xlp/cop2-ex.c +Removing arch/mips/netlogic/xlp/ahci-init.c +Removing arch/mips/netlogic/xlp/ahci-init-xlp2.c +Removing arch/mips/netlogic/xlp/Makefile +Removing arch/mips/netlogic/common/time.c +Removing arch/mips/netlogic/common/smpboot.S +Removing arch/mips/netlogic/common/smp.c +Removing arch/mips/netlogic/common/reset.S +Removing arch/mips/netlogic/common/irq.c +Removing arch/mips/netlogic/common/earlycons.c +Removing arch/mips/netlogic/common/Makefile +Removing arch/mips/netlogic/Platform +Removing arch/mips/netlogic/Makefile +Removing arch/mips/netlogic/Kconfig +Removing arch/mips/include/asm/netlogic/xlr/xlr.h +Removing arch/mips/include/asm/netlogic/xlr/pic.h +Removing arch/mips/include/asm/netlogic/xlr/msidef.h +Removing arch/mips/include/asm/netlogic/xlr/iomap.h +Removing arch/mips/include/asm/netlogic/xlr/gpio.h +Removing arch/mips/include/asm/netlogic/xlr/fmn.h +Removing arch/mips/include/asm/netlogic/xlr/flash.h +Removing arch/mips/include/asm/netlogic/xlr/bridge.h +Removing arch/mips/include/asm/netlogic/xlp-hal/xlp.h +Removing arch/mips/include/asm/netlogic/xlp-hal/uart.h +Removing arch/mips/include/asm/netlogic/xlp-hal/sys.h +Removing arch/mips/include/asm/netlogic/xlp-hal/pic.h +Removing arch/mips/include/asm/netlogic/xlp-hal/pcibus.h +Removing arch/mips/include/asm/netlogic/xlp-hal/iomap.h +Removing arch/mips/include/asm/netlogic/xlp-hal/cpucontrol.h +Removing arch/mips/include/asm/netlogic/xlp-hal/bridge.h +Removing arch/mips/include/asm/netlogic/psb-bootinfo.h +Removing arch/mips/include/asm/netlogic/mips-extns.h +Removing arch/mips/include/asm/netlogic/interrupt.h +Removing arch/mips/include/asm/netlogic/haldefs.h +Removing arch/mips/include/asm/netlogic/common.h +Removing arch/mips/include/asm/mach-netlogic/multi-node.h +Removing arch/mips/include/asm/mach-netlogic/irq.h +Removing arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h +Removing arch/mips/configs/nlm_xlr_defconfig +Removing arch/mips/configs/nlm_xlp_defconfig +Removing arch/mips/boot/dts/netlogic/xlp_svp.dts +Removing arch/mips/boot/dts/netlogic/xlp_rvp.dts +Removing arch/mips/boot/dts/netlogic/xlp_gvp.dts +Removing arch/mips/boot/dts/netlogic/xlp_fvp.dts +Removing arch/mips/boot/dts/netlogic/xlp_evp.dts +Removing arch/mips/boot/dts/netlogic/Makefile +Auto-merging arch/mips/Kconfig +Merge made by the 'recursive' strategy. + arch/mips/Kbuild.platforms | 1 - + arch/mips/Kconfig | 92 +--- + arch/mips/boot/compressed/uart-16550.c | 12 - + arch/mips/boot/dts/Makefile | 1 - + arch/mips/boot/dts/netlogic/Makefile | 8 - + arch/mips/boot/dts/netlogic/xlp_evp.dts | 131 ----- + arch/mips/boot/dts/netlogic/xlp_fvp.dts | 131 ----- + arch/mips/boot/dts/netlogic/xlp_gvp.dts | 89 ---- + arch/mips/boot/dts/netlogic/xlp_rvp.dts | 89 ---- + arch/mips/boot/dts/netlogic/xlp_svp.dts | 131 ----- + arch/mips/cavium-octeon/executive/cvmx-helper.c | 10 - + arch/mips/cavium-octeon/executive/cvmx-pko.c | 14 - + arch/mips/configs/loongson3_defconfig | 1 + + arch/mips/configs/nlm_xlp_defconfig | 557 -------------------- + arch/mips/configs/nlm_xlr_defconfig | 508 ------------------ + arch/mips/include/asm/cmpxchg.h | 5 +- + arch/mips/include/asm/cop2.h | 11 - + arch/mips/include/asm/cpu-type.h | 8 - + arch/mips/include/asm/cpu.h | 2 +- + arch/mips/include/asm/ginvt.h | 11 +- + arch/mips/include/asm/hazards.h | 2 +- + .../include/asm/mach-loongson64/loongson_regs.h | 12 + + .../asm/mach-netlogic/cpu-feature-overrides.h | 57 -- + arch/mips/include/asm/mach-netlogic/irq.h | 17 - + arch/mips/include/asm/mach-netlogic/multi-node.h | 74 --- + arch/mips/include/asm/mipsregs.h | 190 ++++--- + arch/mips/include/asm/msa.h | 34 +- + arch/mips/include/asm/netlogic/common.h | 132 ----- + arch/mips/include/asm/netlogic/haldefs.h | 171 ------ + arch/mips/include/asm/netlogic/interrupt.h | 45 -- + arch/mips/include/asm/netlogic/mips-extns.h | 301 ----------- + arch/mips/include/asm/netlogic/psb-bootinfo.h | 95 ---- + arch/mips/include/asm/netlogic/xlp-hal/bridge.h | 186 ------- + .../mips/include/asm/netlogic/xlp-hal/cpucontrol.h | 89 ---- + arch/mips/include/asm/netlogic/xlp-hal/iomap.h | 214 -------- + arch/mips/include/asm/netlogic/xlp-hal/pcibus.h | 113 ---- + arch/mips/include/asm/netlogic/xlp-hal/pic.h | 366 ------------- + arch/mips/include/asm/netlogic/xlp-hal/sys.h | 213 -------- + arch/mips/include/asm/netlogic/xlp-hal/uart.h | 192 ------- + arch/mips/include/asm/netlogic/xlp-hal/xlp.h | 119 ----- + arch/mips/include/asm/netlogic/xlr/bridge.h | 104 ---- + arch/mips/include/asm/netlogic/xlr/flash.h | 55 -- + arch/mips/include/asm/netlogic/xlr/fmn.h | 365 ------------- + arch/mips/include/asm/netlogic/xlr/gpio.h | 74 --- + arch/mips/include/asm/netlogic/xlr/iomap.h | 109 ---- + arch/mips/include/asm/netlogic/xlr/msidef.h | 84 --- + arch/mips/include/asm/netlogic/xlr/pic.h | 306 ----------- + arch/mips/include/asm/netlogic/xlr/xlr.h | 59 --- + arch/mips/include/asm/octeon/cvmx-helper.h | 7 - + arch/mips/include/asm/octeon/cvmx-pko.h | 1 - + arch/mips/include/asm/processor.h | 13 - + arch/mips/include/asm/vermagic.h | 4 - + arch/mips/kernel/cpu-probe.c | 84 --- + arch/mips/kernel/idle.c | 2 - + arch/mips/kernel/perf_event_mipsxx.c | 86 ---- + arch/mips/kernel/proc.c | 227 ++++++-- + arch/mips/kvm/entry.c | 8 +- + arch/mips/loongson64/init.c | 1 + + arch/mips/mm/c-r4k.c | 2 - + arch/mips/mm/tlbex.c | 9 +- + arch/mips/netlogic/Kconfig | 86 ---- + arch/mips/netlogic/Makefile | 4 - + arch/mips/netlogic/Platform | 16 - + arch/mips/netlogic/common/Makefile | 5 - + arch/mips/netlogic/common/earlycons.c | 63 --- + arch/mips/netlogic/common/irq.c | 350 ------------- + arch/mips/netlogic/common/reset.S | 299 ----------- + arch/mips/netlogic/common/smp.c | 285 ---------- + arch/mips/netlogic/common/smpboot.S | 141 ----- + arch/mips/netlogic/common/time.c | 110 ---- + arch/mips/netlogic/xlp/Makefile | 11 - + arch/mips/netlogic/xlp/ahci-init-xlp2.c | 390 -------------- + arch/mips/netlogic/xlp/ahci-init.c | 209 -------- + arch/mips/netlogic/xlp/cop2-ex.c | 121 ----- + arch/mips/netlogic/xlp/dt.c | 95 ---- + arch/mips/netlogic/xlp/nlm_hal.c | 508 ------------------ + arch/mips/netlogic/xlp/setup.c | 174 ------- + arch/mips/netlogic/xlp/usb-init-xlp2.c | 288 ----------- + arch/mips/netlogic/xlp/usb-init.c | 149 ------ + arch/mips/netlogic/xlp/wakeup.c | 212 -------- + arch/mips/netlogic/xlr/Makefile | 3 - + arch/mips/netlogic/xlr/fmn-config.c | 296 ----------- + arch/mips/netlogic/xlr/fmn.c | 199 ------- + arch/mips/netlogic/xlr/platform-flash.c | 216 -------- + arch/mips/netlogic/xlr/platform.c | 250 --------- + arch/mips/netlogic/xlr/setup.c | 206 -------- + arch/mips/netlogic/xlr/wakeup.c | 85 --- + arch/mips/pci/Makefile | 3 - + arch/mips/pci/msi-xlp.c | 571 --------------------- + arch/mips/pci/pci-bcm47xx.c | 16 +- + arch/mips/pci/pci-xlp.c | 332 ------------ + arch/mips/pci/pci-xlr.c | 368 ------------- + arch/mips/sni/time.c | 4 +- + 93 files changed, 359 insertions(+), 11740 deletions(-) + delete mode 100644 arch/mips/boot/dts/netlogic/Makefile + delete mode 100644 arch/mips/boot/dts/netlogic/xlp_evp.dts + delete mode 100644 arch/mips/boot/dts/netlogic/xlp_fvp.dts + delete mode 100644 arch/mips/boot/dts/netlogic/xlp_gvp.dts + delete mode 100644 arch/mips/boot/dts/netlogic/xlp_rvp.dts + delete mode 100644 arch/mips/boot/dts/netlogic/xlp_svp.dts + delete mode 100644 arch/mips/configs/nlm_xlp_defconfig + delete mode 100644 arch/mips/configs/nlm_xlr_defconfig + delete mode 100644 arch/mips/include/asm/mach-netlogic/cpu-feature-overrides.h + delete mode 100644 arch/mips/include/asm/mach-netlogic/irq.h + delete mode 100644 arch/mips/include/asm/mach-netlogic/multi-node.h + delete mode 100644 arch/mips/include/asm/netlogic/common.h + delete mode 100644 arch/mips/include/asm/netlogic/haldefs.h + delete mode 100644 arch/mips/include/asm/netlogic/interrupt.h + delete mode 100644 arch/mips/include/asm/netlogic/mips-extns.h + delete mode 100644 arch/mips/include/asm/netlogic/psb-bootinfo.h + delete mode 100644 arch/mips/include/asm/netlogic/xlp-hal/bridge.h + delete mode 100644 arch/mips/include/asm/netlogic/xlp-hal/cpucontrol.h + delete mode 100644 arch/mips/include/asm/netlogic/xlp-hal/iomap.h + delete mode 100644 arch/mips/include/asm/netlogic/xlp-hal/pcibus.h + delete mode 100644 arch/mips/include/asm/netlogic/xlp-hal/pic.h + delete mode 100644 arch/mips/include/asm/netlogic/xlp-hal/sys.h + delete mode 100644 arch/mips/include/asm/netlogic/xlp-hal/uart.h + delete mode 100644 arch/mips/include/asm/netlogic/xlp-hal/xlp.h + delete mode 100644 arch/mips/include/asm/netlogic/xlr/bridge.h + delete mode 100644 arch/mips/include/asm/netlogic/xlr/flash.h + delete mode 100644 arch/mips/include/asm/netlogic/xlr/fmn.h + delete mode 100644 arch/mips/include/asm/netlogic/xlr/gpio.h + delete mode 100644 arch/mips/include/asm/netlogic/xlr/iomap.h + delete mode 100644 arch/mips/include/asm/netlogic/xlr/msidef.h + delete mode 100644 arch/mips/include/asm/netlogic/xlr/pic.h + delete mode 100644 arch/mips/include/asm/netlogic/xlr/xlr.h + delete mode 100644 arch/mips/netlogic/Kconfig + delete mode 100644 arch/mips/netlogic/Makefile + delete mode 100644 arch/mips/netlogic/Platform + delete mode 100644 arch/mips/netlogic/common/Makefile + delete mode 100644 arch/mips/netlogic/common/earlycons.c + delete mode 100644 arch/mips/netlogic/common/irq.c + delete mode 100644 arch/mips/netlogic/common/reset.S + delete mode 100644 arch/mips/netlogic/common/smp.c + delete mode 100644 arch/mips/netlogic/common/smpboot.S + delete mode 100644 arch/mips/netlogic/common/time.c + delete mode 100644 arch/mips/netlogic/xlp/Makefile + delete mode 100644 arch/mips/netlogic/xlp/ahci-init-xlp2.c + delete mode 100644 arch/mips/netlogic/xlp/ahci-init.c + delete mode 100644 arch/mips/netlogic/xlp/cop2-ex.c + delete mode 100644 arch/mips/netlogic/xlp/dt.c + delete mode 100644 arch/mips/netlogic/xlp/nlm_hal.c + delete mode 100644 arch/mips/netlogic/xlp/setup.c + delete mode 100644 arch/mips/netlogic/xlp/usb-init-xlp2.c + delete mode 100644 arch/mips/netlogic/xlp/usb-init.c + delete mode 100644 arch/mips/netlogic/xlp/wakeup.c + delete mode 100644 arch/mips/netlogic/xlr/Makefile + delete mode 100644 arch/mips/netlogic/xlr/fmn-config.c + delete mode 100644 arch/mips/netlogic/xlr/fmn.c + delete mode 100644 arch/mips/netlogic/xlr/platform-flash.c + delete mode 100644 arch/mips/netlogic/xlr/platform.c + delete mode 100644 arch/mips/netlogic/xlr/setup.c + delete mode 100644 arch/mips/netlogic/xlr/wakeup.c + delete mode 100644 arch/mips/pci/msi-xlp.c + delete mode 100644 arch/mips/pci/pci-xlp.c + delete mode 100644 arch/mips/pci/pci-xlr.c +Merging nds32/next (07cd7745c6f2 nds32/setup: remove unused memblock_region variable in setup_memory()) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux.git nds32/next +Auto-merging arch/nds32/Makefile +Auto-merging arch/nds32/Kconfig +CONFLICT (content): Merge conflict in arch/nds32/Kconfig +Auto-merging arch/nds32/Kbuild +CONFLICT (content): Merge conflict in arch/nds32/Kbuild +Resolved 'arch/nds32/Kbuild' using previous resolution. +Resolved 'arch/nds32/Kconfig' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master 9959228938e7] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/greentime/linux.git +$ git diff -M --stat --summary HEAD^.. + arch/nds32/Kbuild | 3 +++ + arch/nds32/Kconfig | 4 +++- + arch/nds32/Makefile | 3 --- + 3 files changed, 6 insertions(+), 4 deletions(-) +Merging nios2/for-next (7f7bc20bc41a nios2: Don't use _end for calculating min_low_pfn) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/lftan/nios2.git nios2/for-next +Already up to date. +Merging openrisc/for-next (210893cad279 openrisc: signal: remove unused DEBUG_SIG macro) +$ git merge -m Merge branch 'for-next' of git://github.com/openrisc/linux.git openrisc/for-next +Merge made by the 'recursive' strategy. + arch/openrisc/kernel/signal.c | 2 -- + arch/openrisc/kernel/time.c | 2 +- + 2 files changed, 1 insertion(+), 3 deletions(-) +Merging parisc-hd/for-next (60017239b6b2 parisc/ftrace: use static key to enable/disable function graph tracer) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git parisc-hd/for-next +Merge made by the 'recursive' strategy. + .../core/thread-info-in-task/arch-support.txt | 2 +- + arch/parisc/Kconfig | 22 +++- + arch/parisc/include/asm/assembly.h | 32 ++++++ + arch/parisc/include/asm/bitops.h | 10 -- + arch/parisc/include/asm/current.h | 19 ++++ + arch/parisc/include/asm/futex.h | 3 - + arch/parisc/include/asm/ide.h | 4 - + arch/parisc/include/asm/kfence.h | 44 ++++++++ + arch/parisc/include/asm/mckinley.h | 2 - + arch/parisc/include/asm/pdc.h | 2 + + arch/parisc/include/asm/processor.h | 11 +- + arch/parisc/include/asm/ptrace.h | 6 +- + arch/parisc/include/asm/runway.h | 2 - + arch/parisc/include/asm/smp.h | 19 +++- + arch/parisc/include/asm/thread_info.h | 12 +-- + arch/parisc/include/asm/traps.h | 1 + + arch/parisc/include/asm/unaligned.h | 2 - + arch/parisc/include/uapi/asm/pdc.h | 28 +++++- + arch/parisc/kernel/Makefile | 1 + + arch/parisc/kernel/asm-offsets.c | 29 ++---- + arch/parisc/kernel/cache.c | 87 +++++++--------- + arch/parisc/kernel/entry.S | 90 ++++++----------- + arch/parisc/kernel/firmware.c | 32 ++++++ + arch/parisc/kernel/ftrace.c | 20 ++-- + arch/parisc/kernel/head.S | 40 ++++---- + arch/parisc/kernel/irq.c | 6 +- + arch/parisc/kernel/pdt.c | 4 +- + arch/parisc/kernel/process.c | 4 +- + arch/parisc/kernel/smp.c | 25 ++++- + arch/parisc/kernel/stacktrace.c | 30 +++--- + arch/parisc/kernel/sys_parisc.c | 10 +- + arch/parisc/kernel/syscall.S | 26 ++--- + arch/parisc/kernel/toc.c | 111 +++++++++++++++++++++ + arch/parisc/kernel/toc_asm.S | 88 ++++++++++++++++ + arch/parisc/kernel/traps.c | 9 +- + arch/parisc/kernel/unwind.c | 34 ++++--- + arch/parisc/lib/bitops.c | 12 +-- + arch/parisc/mm/fault.c | 2 +- + arch/parisc/mm/init.c | 4 +- + include/linux/sched/task_stack.h | 4 + + 40 files changed, 603 insertions(+), 286 deletions(-) + create mode 100644 arch/parisc/include/asm/current.h + create mode 100644 arch/parisc/include/asm/kfence.h + create mode 100644 arch/parisc/kernel/toc.c + create mode 100644 arch/parisc/kernel/toc_asm.S +Merging powerpc/next (8f6aca0e0f26 powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git powerpc/next +Merge made by the 'recursive' strategy. + arch/powerpc/boot/Makefile | 2 +- + arch/powerpc/boot/serial.c | 2 +- + arch/powerpc/include/asm/asm-const.h | 2 - + arch/powerpc/include/asm/atomic.h | 8 +- + arch/powerpc/include/asm/io.h | 4 +- + arch/powerpc/include/asm/paravirt.h | 40 +++- + arch/powerpc/include/asm/uaccess.h | 6 +- + arch/powerpc/include/uapi/asm/perf_regs.h | 28 ++- + arch/powerpc/kernel/eeh.c | 12 +- + arch/powerpc/kernel/firmware.c | 7 +- + arch/powerpc/kernel/sysfs.c | 3 +- + arch/powerpc/kvm/powerpc.c | 4 +- + arch/powerpc/mm/mem.c | 4 +- + arch/powerpc/perf/perf_regs.c | 4 + + arch/powerpc/perf/power10-events-list.h | 8 +- + arch/powerpc/perf/power10-pmu.c | 44 ++-- + arch/powerpc/platforms/44x/ppc476.c | 4 +- + arch/powerpc/platforms/85xx/Makefile | 4 +- + arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 7 +- + arch/powerpc/platforms/85xx/smp.c | 12 +- + arch/powerpc/platforms/powermac/pmac.h | 1 - + arch/powerpc/platforms/powermac/setup.c | 2 - + arch/powerpc/platforms/powernv/opal-dump.c | 2 +- + arch/powerpc/platforms/powernv/pci-sriov.c | 6 - + arch/powerpc/platforms/pseries/hotplug-cpu.c | 298 +++++++-------------------- + arch/powerpc/platforms/pseries/iommu.c | 10 +- + arch/powerpc/platforms/pseries/setup.c | 3 +- + drivers/video/fbdev/chipsfb.c | 2 +- + 28 files changed, 217 insertions(+), 312 deletions(-) +Merging soc-fsl/next (54c8b5b6f8a8 soc: fsl: dpio: rename the enqueue descriptor variable) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/leo/linux.git soc-fsl/next +Already up to date. +Merging risc-v/for-next (241527bb8467 Merge tag 'riscv-sifive-dt-5.16' of git://gitolite.kernel.org/pub/scm/linux/kernel/git/krzk/linux into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git risc-v/for-next +Auto-merging arch/riscv/Makefile +CONFLICT (content): Merge conflict in arch/riscv/Makefile +Auto-merging arch/riscv/Kconfig +Resolved 'arch/riscv/Makefile' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master 07407d5509a7] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git +$ git diff -M --stat --summary HEAD^.. + .../devicetree/bindings/mmc/cdns,sdhci.yaml | 1 + + arch/riscv/Kconfig | 1 + + arch/riscv/Makefile | 10 + + .../dts/microchip/microchip-mpfs-icicle-kit.dts | 18 +- + arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi | 36 +-- + arch/riscv/configs/32-bit.config | 2 + + arch/riscv/configs/64-bit.config | 2 + + arch/riscv/include/asm/csr.h | 87 +++++++ + arch/riscv/include/asm/page.h | 2 + + arch/riscv/include/asm/vdso.h | 2 +- + arch/riscv/include/asm/vdso/gettimeofday.h | 7 + + arch/riscv/kernel/reset.c | 12 +- + arch/riscv/kernel/vdso.c | 250 ++++++++++++++++----- + arch/riscv/kernel/vdso/vdso.lds.S | 3 + + arch/riscv/mm/context.c | 8 +- + 15 files changed, 343 insertions(+), 98 deletions(-) + create mode 100644 arch/riscv/configs/32-bit.config + create mode 100644 arch/riscv/configs/64-bit.config +Merging s390/for-next (2482e6b6e33c Merge branch 'features' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git s390/for-next +Auto-merging arch/x86/Kconfig +Merge made by the 'recursive' strategy. + arch/s390/Kconfig | 18 +++ + arch/s390/boot/compressed/decompressor.h | 1 + + arch/s390/boot/head.S | 17 +++ + arch/s390/boot/startup.c | 8 + + arch/s390/configs/debug_defconfig | 9 +- + arch/s390/configs/defconfig | 6 + + arch/s390/include/asm/barrier.h | 24 +-- + arch/s390/include/asm/debug.h | 2 +- + arch/s390/include/asm/ftrace.h | 58 ++++++- + arch/s390/include/asm/jump_label.h | 2 + + arch/s390/include/asm/livepatch.h | 4 +- + arch/s390/include/asm/ptrace.h | 22 ++- + arch/s390/include/asm/text-patching.h | 16 ++ + arch/s390/kernel/alternative.c | 20 +++ + arch/s390/kernel/asm-offsets.c | 1 + + arch/s390/kernel/entry.h | 1 + + arch/s390/kernel/ftrace.c | 96 ++---------- + arch/s390/kernel/head64.S | 18 --- + arch/s390/kernel/jump_label.c | 34 +++-- + arch/s390/kernel/kprobes.c | 48 +++++- + arch/s390/kernel/mcount.S | 64 +++++--- + arch/s390/kernel/setup.c | 22 ++- + arch/s390/kernel/vmlinux.lds.S | 1 + + arch/s390/lib/Makefile | 2 + + arch/s390/lib/test_kprobes.c | 75 +++++++++ + arch/s390/lib/test_kprobes.h | 10 ++ + arch/s390/lib/test_kprobes_asm.S | 45 ++++++ + arch/s390/lib/test_unwind.c | 169 ++++++++++++--------- + arch/s390/mm/cmm.c | 11 +- + arch/s390/pci/pci_event.c | 4 + + arch/s390/pci/pci_sysfs.c | 8 + + arch/x86/Kconfig | 1 + + drivers/s390/block/dasd_genhd.c | 10 +- + drivers/s390/block/dcssblk.c | 8 +- + drivers/s390/block/scm_blk.c | 7 +- + drivers/s390/char/sclp_sd.c | 9 +- + drivers/s390/char/sclp_vt220.c | 4 +- + drivers/s390/cio/css.c | 9 +- + drivers/s390/cio/device.c | 2 + + samples/Kconfig | 5 +- + samples/ftrace/ftrace-direct-modify.c | 44 ++++++ + samples/ftrace/ftrace-direct-too.c | 28 ++++ + samples/ftrace/ftrace-direct.c | 28 ++++ + .../ftrace/test.d/kprobe/kprobe_args_string.tc | 3 + + .../ftrace/test.d/kprobe/kprobe_args_syntax.tc | 4 + + 45 files changed, 722 insertions(+), 256 deletions(-) + create mode 100644 arch/s390/include/asm/text-patching.h + create mode 100644 arch/s390/lib/test_kprobes.c + create mode 100644 arch/s390/lib/test_kprobes.h + create mode 100644 arch/s390/lib/test_kprobes_asm.S +Merging sh/for-next (12285ff8667b sh: kdump: add some attribute to function) +$ git merge -m Merge branch 'for-next' of git://git.libc.org/linux-sh sh/for-next +Auto-merging arch/sh/boot/Makefile +Merge made by the 'recursive' strategy. + arch/sh/boards/mach-landisk/irq.c | 4 ++-- + arch/sh/boot/Makefile | 4 ++-- + arch/sh/boot/compressed/.gitignore | 5 ----- + arch/sh/boot/compressed/Makefile | 32 +++++++++++++------------------- + arch/sh/boot/compressed/ashiftrt.S | 2 ++ + arch/sh/boot/compressed/ashldi3.c | 2 ++ + arch/sh/boot/compressed/ashlsi3.S | 2 ++ + arch/sh/boot/compressed/ashrsi3.S | 2 ++ + arch/sh/boot/compressed/lshrsi3.S | 2 ++ + arch/sh/include/asm/checksum_32.h | 5 +++-- + arch/sh/include/asm/uaccess.h | 4 ++-- + arch/sh/kernel/cpu/sh4a/smp-shx3.c | 5 +++-- + arch/sh/kernel/crash_dump.c | 4 ++-- + arch/sh/kernel/traps_32.c | 8 ++++---- + arch/sh/math-emu/math.c | 4 ++-- + arch/sh/mm/nommu.c | 4 ++-- + drivers/sh/maple/maple.c | 5 ++++- + 17 files changed, 49 insertions(+), 45 deletions(-) + create mode 100644 arch/sh/boot/compressed/ashiftrt.S + create mode 100644 arch/sh/boot/compressed/ashldi3.c + create mode 100644 arch/sh/boot/compressed/ashlsi3.S + create mode 100644 arch/sh/boot/compressed/ashrsi3.S + create mode 100644 arch/sh/boot/compressed/lshrsi3.S +Merging sparc-next/master (dd0d718152e4 Merge tag 'spi-fix-v5.8-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next.git sparc-next/master +Already up to date. +Merging uml/linux-next (ab6ff1fda1e8 uml: x86: add FORCE to user_constants.h) +$ git merge -m Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git uml/linux-next +Merge made by the 'recursive' strategy. + arch/um/include/asm/delay.h | 4 ++-- + arch/um/include/asm/irqflags.h | 4 ++-- + arch/um/include/shared/longjmp.h | 2 +- + arch/um/include/shared/os.h | 4 ++-- + arch/um/kernel/ksyms.c | 2 +- + arch/um/os-Linux/sigio.c | 6 +++--- + arch/um/os-Linux/signal.c | 8 ++++---- + arch/x86/um/Makefile | 2 +- + 8 files changed, 16 insertions(+), 16 deletions(-) +Merging xtensa/xtensa-for-next (bd47cdb78997 xtensa: move section symbols to asm/sections.h) +$ git merge -m Merge branch 'xtensa-for-next' of git://github.com/jcmvbkbc/linux-xtensa.git xtensa/xtensa-for-next +Merge made by the 'recursive' strategy. + arch/xtensa/boot/boot-elf/bootstrap.S | 2 + + arch/xtensa/boot/boot-redboot/bootstrap.S | 72 +++++----- + arch/xtensa/include/asm/asmmacro.h | 65 +++++++++ + arch/xtensa/include/asm/atomic.h | 26 ++-- + arch/xtensa/include/asm/cmpxchg.h | 16 +-- + arch/xtensa/include/asm/core.h | 11 ++ + arch/xtensa/include/asm/processor.h | 32 ++++- + arch/xtensa/include/asm/sections.h | 41 ++++++ + arch/xtensa/include/asm/traps.h | 2 + + arch/xtensa/kernel/align.S | 2 + + arch/xtensa/kernel/entry.S | 216 +++++++++++++++++++----------- + arch/xtensa/kernel/head.S | 24 ++-- + arch/xtensa/kernel/mcount.S | 38 +++++- + arch/xtensa/kernel/process.c | 27 +++- + arch/xtensa/kernel/setup.c | 102 +++++--------- + arch/xtensa/kernel/signal.c | 12 +- + arch/xtensa/kernel/traps.c | 6 +- + arch/xtensa/kernel/vectors.S | 55 +++++--- + arch/xtensa/kernel/vmlinux.lds.S | 12 +- + arch/xtensa/lib/strncpy_user.S | 17 ++- + arch/xtensa/lib/usercopy.S | 28 +++- + 21 files changed, 541 insertions(+), 265 deletions(-) + create mode 100644 arch/xtensa/include/asm/sections.h +Merging pidfd/for-next (61bc346ce64a uapi/linux/prctl: provide macro definitions for the PR_SCHED_CORE type argument) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git pidfd/for-next +Merge made by the 'recursive' strategy. + Documentation/admin-guide/hw-vuln/core-scheduling.rst | 5 +++-- + include/uapi/linux/prctl.h | 3 +++ + kernel/sched/core_sched.c | 4 ++++ + 3 files changed, 10 insertions(+), 2 deletions(-) +Merging fscrypt/master (7f595d6a6cdc fscrypt: allow 256-bit master keys with AES-256-XTS) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fscrypt/master +Auto-merging fs/ext4/super.c +Merge made by the 'recursive' strategy. + Documentation/block/inline-encryption.rst | 2 + + Documentation/filesystems/fscrypt.rst | 83 +++++++++++++++++++++++-------- + fs/crypto/bio.c | 32 ++++++------ + fs/crypto/fname.c | 3 +- + fs/crypto/fscrypt_private.h | 5 +- + fs/crypto/hkdf.c | 11 ++-- + fs/crypto/keysetup.c | 57 ++++++++++++++++----- + fs/ext4/super.c | 1 - + fs/f2fs/super.c | 1 - + fs/ubifs/crypto.c | 1 - + include/linux/fscrypt.h | 3 -- + 11 files changed, 137 insertions(+), 62 deletions(-) +Merging fscache/fscache-next (2bc879c792fa Merge branch 'fscache-remove-old-io' into fscache-next) +$ git merge -m Merge branch 'fscache-next' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git fscache/fscache-next +Auto-merging fs/netfs/read_helper.c +Removing fs/cachefiles/rdwr.c +Merge made by the 'recursive' strategy. + Documentation/filesystems/caching/backend-api.rst | 138 +-- + Documentation/filesystems/caching/netfs-api.rst | 385 +------- + fs/9p/Kconfig | 1 + + fs/9p/cache.c | 137 --- + fs/9p/cache.h | 98 +- + fs/9p/v9fs.h | 9 + + fs/9p/vfs_addr.c | 195 ++-- + fs/9p/vfs_file.c | 21 +- + fs/afs/file.c | 2 +- + fs/cachefiles/Makefile | 1 - + fs/cachefiles/interface.c | 17 +- + fs/cachefiles/internal.h | 42 +- + fs/cachefiles/io.c | 85 +- + fs/cachefiles/main.c | 1 - + fs/cachefiles/rdwr.c | 972 ------------------- + fs/ceph/cache.h | 2 +- + fs/cifs/file.c | 64 +- + fs/cifs/fscache.c | 105 +- + fs/cifs/fscache.h | 74 +- + fs/fscache/cache.c | 6 - + fs/fscache/cookie.c | 10 - + fs/fscache/internal.h | 58 +- + fs/fscache/io.c | 170 +++- + fs/fscache/object.c | 2 - + fs/fscache/page.c | 1066 --------------------- + fs/fscache/stats.c | 73 +- + fs/netfs/read_helper.c | 8 +- + fs/nfs/file.c | 14 +- + fs/nfs/fscache-index.c | 26 - + fs/nfs/fscache.c | 170 +--- + fs/nfs/fscache.h | 84 +- + fs/nfs/read.c | 25 +- + fs/nfs/write.c | 7 +- + include/linux/fscache-cache.h | 137 +-- + include/linux/fscache.h | 460 +++------ + include/linux/netfs.h | 17 +- + 36 files changed, 617 insertions(+), 4065 deletions(-) + delete mode 100644 fs/cachefiles/rdwr.c +Merging afs/afs-next (7af08140979a Revert "gcov: clang: fix clang-11+ build") +$ git merge -m Merge branch 'afs-next' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git afs/afs-next +Already up to date. +Merging btrfs/for-next (b54d2cb4664c Merge branch 'for-next-next-v5.15-20211021' into for-next-20211021) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git btrfs/for-next +Merge made by the 'recursive' strategy. + fs/btrfs/block-group.c | 242 +++++--- + fs/btrfs/block-group.h | 8 +- + fs/btrfs/btrfs_inode.h | 46 +- + fs/btrfs/check-integrity.c | 205 +++---- + fs/btrfs/compression.c | 681 +++++++++++++---------- + fs/btrfs/compression.h | 4 +- + fs/btrfs/ctree.c | 156 +++--- + fs/btrfs/ctree.h | 83 ++- + fs/btrfs/delayed-inode.c | 41 +- + fs/btrfs/delayed-ref.c | 17 +- + fs/btrfs/delayed-ref.h | 51 +- + fs/btrfs/dev-replace.c | 16 +- + fs/btrfs/disk-io.c | 45 +- + fs/btrfs/disk-io.h | 5 +- + fs/btrfs/extent-tree.c | 326 +++++++---- + fs/btrfs/extent_io.c | 334 +++++++----- + fs/btrfs/extent_io.h | 10 +- + fs/btrfs/extent_map.c | 4 +- + fs/btrfs/file-item.c | 21 +- + fs/btrfs/file.c | 35 +- + fs/btrfs/free-space-cache.c | 106 +++- + fs/btrfs/free-space-cache.h | 2 + + fs/btrfs/inode.c | 586 ++++++++++---------- + fs/btrfs/ioctl.c | 1001 ++++++++++++++++------------------ + fs/btrfs/locking.h | 7 +- + fs/btrfs/lzo.c | 270 +++++---- + fs/btrfs/raid56.c | 175 +++--- + fs/btrfs/raid56.h | 22 +- + fs/btrfs/reada.c | 26 +- + fs/btrfs/ref-verify.c | 4 +- + fs/btrfs/reflink.c | 4 +- + fs/btrfs/relocation.c | 81 +-- + fs/btrfs/scrub.c | 139 ++--- + fs/btrfs/send.c | 15 +- + fs/btrfs/space-info.c | 28 +- + fs/btrfs/subpage.c | 290 +++++++--- + fs/btrfs/subpage.h | 56 +- + fs/btrfs/super.c | 28 +- + fs/btrfs/sysfs.c | 93 ++-- + fs/btrfs/tests/extent-buffer-tests.c | 2 +- + fs/btrfs/tests/extent-io-tests.c | 12 +- + fs/btrfs/tests/inode-tests.c | 4 +- + fs/btrfs/transaction.c | 11 +- + fs/btrfs/tree-log.c | 714 +++++++++++++++--------- + fs/btrfs/tree-log.h | 18 +- + fs/btrfs/volumes.c | 592 +++++++++++--------- + fs/btrfs/volumes.h | 119 ++-- + fs/btrfs/xattr.c | 2 +- + fs/btrfs/zoned.c | 531 ++++++++++++++++-- + fs/btrfs/zoned.h | 39 +- + fs/inode.c | 7 +- + include/linux/fs.h | 2 + + 52 files changed, 4445 insertions(+), 2871 deletions(-) +Merging ceph/master (324bfaa1a6cc libceph: drop ->monmap and err initialization) +$ git merge -m Merge branch 'master' of git://github.com/ceph/ceph-client.git ceph/master +Merge made by the 'recursive' strategy. + fs/ceph/addr.c | 13 +------------ + net/ceph/mon_client.c | 3 +-- + 2 files changed, 2 insertions(+), 14 deletions(-) +Merging cifs/for-next (08e9f52e2dce cifs: for compound requests, use open handle if possible) +$ git merge -m Merge branch 'for-next' of git://git.samba.org/sfrench/cifs-2.6.git cifs/for-next +Merge made by the 'recursive' strategy. + fs/cifs/connect.c | 19 +++++++++++-------- + fs/cifs/fs_context.c | 8 ++++++++ + fs/cifs/fs_context.h | 1 + + fs/cifs/smb2inode.c | 42 +++++++++++++++++++++++++++++++++--------- + 4 files changed, 53 insertions(+), 17 deletions(-) +Merging configfs/for-next (c42dd069be8d configfs: fix a race in configfs_lookup()) +$ git merge -m Merge branch 'for-next' of git://git.infradead.org/users/hch/configfs.git configfs/for-next +Already up to date. +Merging ecryptfs/next (682a8e2b41ef Merge tag 'ecryptfs-5.13-rc1-updates' of git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks/ecryptfs.git ecryptfs/next +Already up to date. +Merging erofs/dev (1d35b798bffe erofs: get rid of ->lru usage) +$ git merge -m Merge branch 'dev' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git erofs/dev +Merge made by the 'recursive' strategy. + Documentation/filesystems/erofs.rst | 12 +- + fs/erofs/Kconfig | 40 +++-- + fs/erofs/Makefile | 1 + + fs/erofs/compress.h | 28 ++-- + fs/erofs/data.c | 73 +++++++-- + fs/erofs/decompressor.c | 138 ++++++----------- + fs/erofs/decompressor_lzma.c | 290 ++++++++++++++++++++++++++++++++++++ + fs/erofs/erofs_fs.h | 73 ++++++--- + fs/erofs/inode.c | 2 +- + fs/erofs/internal.h | 105 +++++++++++-- + fs/erofs/pcpubuf.c | 6 +- + fs/erofs/super.c | 231 ++++++++++++++++++++++------ + fs/erofs/utils.c | 19 ++- + fs/erofs/xattr.c | 4 +- + fs/erofs/zdata.c | 175 +++++++++++++++------- + fs/erofs/zdata.h | 7 - + fs/erofs/zmap.c | 65 +++++--- + include/linux/xz.h | 106 +++++++++++++ + include/trace/events/erofs.h | 2 +- + lib/decompress_unxz.c | 10 +- + lib/xz/Kconfig | 13 ++ + lib/xz/xz_dec_lzma2.c | 182 +++++++++++++++++++++- + lib/xz/xz_dec_stream.c | 6 +- + lib/xz/xz_dec_syms.c | 9 +- + lib/xz/xz_private.h | 3 + + 25 files changed, 1281 insertions(+), 319 deletions(-) + create mode 100644 fs/erofs/decompressor_lzma.c +Merging exfat/dev (efc7bd8527d0 exfat: fix incorrect loading of i_blocks for large files) +$ git merge -m Merge branch 'dev' of git://git.kernel.org/pub/scm/linux/kernel/git/linkinjeon/exfat.git exfat/dev +Merge made by the 'recursive' strategy. + fs/exfat/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging ext3/for_next (e96a1866b405 isofs: Fix out of bound access for corrupted isofs image) +$ git merge -m Merge branch 'for_next' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs.git ext3/for_next +Merge made by the 'recursive' strategy. + fs/isofs/inode.c | 2 ++ + fs/quota/quota_tree.c | 15 +++++++++++++++ + 2 files changed, 17 insertions(+) +Merging ext4/dev (916ff8d5ea0e ext4: prevent partial update of the extent blocks) +$ git merge -m Merge branch 'dev' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git ext4/dev +Auto-merging fs/ext4/super.c +Merge made by the 'recursive' strategy. + fs/ext4/ext4.h | 3 +- + fs/ext4/extents.c | 172 +++++++++++++++++++++++++++++++----------------------- + fs/ext4/inode.c | 15 ++--- + fs/ext4/page-io.c | 8 +-- + fs/ext4/super.c | 9 ++- + 5 files changed, 116 insertions(+), 91 deletions(-) +Merging f2fs/dev (6d135345f6ad f2fs: multidevice: support direct IO) +$ git merge -m Merge branch 'dev' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git f2fs/dev +Auto-merging fs/f2fs/super.c +Merge made by the 'recursive' strategy. + Documentation/ABI/testing/sysfs-fs-f2fs | 16 +++++++ + Documentation/filesystems/f2fs.rst | 18 ++++++++ + fs/f2fs/checkpoint.c | 6 +-- + fs/f2fs/compress.c | 1 + + fs/f2fs/data.c | 57 +++++++++++++++++++++-- + fs/f2fs/f2fs.h | 47 ++++++++++++++++--- + fs/f2fs/gc.c | 5 ++- + fs/f2fs/inode.c | 2 +- + fs/f2fs/namei.c | 2 +- + fs/f2fs/node.c | 1 + + fs/f2fs/node.h | 5 --- + fs/f2fs/recovery.c | 8 +--- + fs/f2fs/segment.c | 80 +++++++++++++++++++++++++-------- + fs/f2fs/segment.h | 1 + + fs/f2fs/super.c | 21 ++++++++- + fs/f2fs/sysfs.c | 24 +++++++++- + include/trace/events/f2fs.h | 33 +++++++++----- + 17 files changed, 268 insertions(+), 59 deletions(-) +Merging fsverity/fsverity (07c99001312c fs-verity: support reading signature with ioctl) +$ git merge -m Merge branch 'fsverity' of git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity/fsverity +Already up to date. +Merging fuse/for-next (85bf4c6df4e9 fuse: only update necessary attributes) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git fuse/for-next +Merge made by the 'recursive' strategy. + fs/fuse/dax.c | 5 +-- + fs/fuse/dev.c | 10 ++--- + fs/fuse/dir.c | 128 +++++++++++++++++++++++++----------------------------- + fs/fuse/file.c | 103 ++++++++++++++++++++++--------------------- + fs/fuse/fuse_i.h | 17 ++++++-- + fs/fuse/inode.c | 45 ++++++++++++++++--- + fs/fuse/ioctl.c | 4 +- + fs/fuse/readdir.c | 6 +-- + fs/fuse/xattr.c | 10 ++--- + 9 files changed, 181 insertions(+), 147 deletions(-) +Merging gfs2/for-next (d000f3b901c3 gfs2: Fix unused value warning in do_gfs2_set_flags()) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git gfs2/for-next +Auto-merging fs/ntfs3/file.c +Auto-merging fs/fuse/file.c +Auto-merging fs/erofs/data.c +Auto-merging fs/btrfs/ioctl.c +Auto-merging fs/btrfs/file.c +Auto-merging arch/x86/kernel/fpu/signal.c +Merge made by the 'recursive' strategy. + arch/powerpc/kernel/kvm.c | 3 +- + arch/powerpc/kernel/signal_32.c | 4 +- + arch/powerpc/kernel/signal_64.c | 2 +- + arch/x86/kernel/fpu/signal.c | 7 +- + drivers/gpu/drm/armada/armada_gem.c | 7 +- + fs/btrfs/file.c | 7 +- + fs/btrfs/ioctl.c | 5 +- + fs/erofs/data.c | 2 +- + fs/ext4/file.c | 5 +- + fs/f2fs/file.c | 2 +- + fs/fuse/file.c | 2 +- + fs/gfs2/bmap.c | 60 +---- + fs/gfs2/file.c | 267 +++++++++++++++++--- + fs/gfs2/glock.c | 471 ++++++++++++++++++++++++++---------- + fs/gfs2/glock.h | 34 ++- + fs/gfs2/glops.c | 29 +-- + fs/gfs2/incore.h | 10 +- + fs/gfs2/inode.c | 12 +- + fs/gfs2/rgrp.c | 70 +++--- + fs/gfs2/rgrp.h | 2 +- + fs/gfs2/super.c | 4 +- + fs/gfs2/trace_gfs2.h | 9 +- + fs/gfs2/util.c | 2 + + fs/iomap/buffered-io.c | 2 +- + fs/iomap/direct-io.c | 29 ++- + fs/ntfs/file.c | 2 +- + fs/ntfs3/file.c | 2 +- + fs/xfs/xfs_file.c | 6 +- + fs/zonefs/super.c | 4 +- + include/linux/iomap.h | 11 +- + include/linux/mm.h | 3 +- + include/linux/pagemap.h | 58 +---- + include/linux/uio.h | 4 +- + lib/iov_iter.c | 103 ++++++-- + mm/filemap.c | 4 +- + mm/gup.c | 139 ++++++++++- + 36 files changed, 972 insertions(+), 411 deletions(-) +Merging jfs/jfs-next (c48a14dca2cb JFS: fix memleak in jfs_mount) +$ git merge -m Merge branch 'jfs-next' of git://github.com/kleikamp/linux-shaggy.git jfs/jfs-next +Merge made by the 'recursive' strategy. + fs/jfs/jfs_mount.c | 51 ++++++++++++++++++++++----------------------------- + 1 file changed, 22 insertions(+), 29 deletions(-) +Merging ksmbd/ksmbd-for-next (0d994cd482ee ksmbd: add buffer validation in session setup) +$ git merge -m Merge branch 'ksmbd-for-next' of https://github.com/smfrench/smb3-kernel.git ksmbd/ksmbd-for-next +Already up to date. +Merging nfs/linux-next (4cd27df88af2 NFS: Remove redundant call to __set_page_dirty_nobuffers) +$ git merge -m Merge branch 'linux-next' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6.git nfs/linux-next +Auto-merging fs/nfs/write.c +Auto-merging fs/nfs/read.c +Merge made by the 'recursive' strategy. + fs/lockd/clntproc.c | 3 - + fs/lockd/svc4proc.c | 2 - + fs/lockd/svcproc.c | 2 - + fs/nfs/client.c | 37 +++-- + fs/nfs/delegation.c | 10 +- + fs/nfs/dir.c | 53 ++++-- + fs/nfs/direct.c | 2 +- + fs/nfs/filelayout/filelayout.c | 2 - + fs/nfs/flexfilelayout/flexfilelayout.c | 2 - + fs/nfs/flexfilelayout/flexfilelayoutdev.c | 4 +- + fs/nfs/inode.c | 59 ++++--- + fs/nfs/internal.h | 3 +- + fs/nfs/nfs3xdr.c | 2 +- + fs/nfs/nfs4client.c | 65 +++----- + fs/nfs/nfs4idmap.c | 2 +- + fs/nfs/nfs4proc.c | 101 ++++-------- + fs/nfs/nfs4state.c | 5 +- + fs/nfs/nfs4trace.h | 9 +- + fs/nfs/nfstrace.h | 266 ++++++++++++++++++++++-------- + fs/nfs/pagelist.c | 13 +- + fs/nfs/pnfs.h | 2 +- + fs/nfs/pnfs_nfs.c | 6 +- + fs/nfs/proc.c | 2 +- + fs/nfs/read.c | 11 +- + fs/nfs/super.c | 7 +- + fs/nfs/write.c | 73 ++++---- + include/linux/nfs_fs.h | 54 +++--- + include/linux/sunrpc/clnt.h | 1 + + include/linux/sunrpc/sched.h | 16 +- + include/trace/events/rpcgss.h | 18 +- + include/trace/events/rpcrdma.h | 55 +++--- + include/trace/events/sunrpc.h | 99 ++++++----- + include/trace/events/sunrpc_base.h | 18 ++ + net/sunrpc/clnt.c | 33 ++-- + net/sunrpc/sched.c | 20 ++- + net/sunrpc/xprt.c | 36 ++-- + net/sunrpc/xprtrdma/frwr_ops.c | 35 ++++ + net/sunrpc/xprtrdma/rpc_rdma.c | 23 ++- + net/sunrpc/xprtrdma/verbs.c | 3 +- + net/sunrpc/xprtrdma/xprt_rdma.h | 6 +- + 40 files changed, 674 insertions(+), 486 deletions(-) + create mode 100644 include/trace/events/sunrpc_base.h +Merging nfs-anna/linux-next (8cfb9015280d NFS: Always provide aligned buffers to the RPC read layers) +$ git merge -m Merge branch 'linux-next' of git://git.linux-nfs.org/projects/anna/linux-nfs.git nfs-anna/linux-next +Already up to date. +Merging nfsd/nfsd-next (291cd656da04 NFSD:fix boolreturn.cocci warning) +$ git merge -m Merge branch 'nfsd-next' of git://git.linux-nfs.org/~bfields/linux.git nfsd/nfsd-next +Auto-merging net/sunrpc/auth_gss/svcauth_gss.c +Removing include/uapi/linux/nfsd/nfsfh.h +Auto-merging include/trace/events/sunrpc.h +CONFLICT (content): Merge conflict in include/trace/events/sunrpc.h +Auto-merging include/trace/events/rpcrdma.h +Auto-merging fs/nfsd/nfsctl.c +Auto-merging fs/nfsd/nfs4xdr.c +Auto-merging fs/nfsd/nfs4state.c +Resolved 'include/trace/events/sunrpc.h' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master a97d53f4ab6e] Merge branch 'nfsd-next' of git://git.linux-nfs.org/~bfields/linux.git +$ git diff -M --stat --summary HEAD^.. + Documentation/filesystems/nfs/index.rst | 1 + + Documentation/filesystems/nfs/reexport.rst | 113 +++++++++ + fs/lockd/svc.c | 6 +- + fs/lockd/xdr.c | 152 ++++++----- + fs/lockd/xdr4.c | 153 ++++++------ + fs/nfs/callback_xdr.c | 4 +- + fs/nfsd/flexfilelayout.c | 2 +- + fs/nfsd/lockd.c | 2 +- + fs/nfsd/nfs2acl.c | 44 ++-- + fs/nfsd/nfs3acl.c | 48 ++-- + fs/nfsd/nfs3proc.c | 3 +- + fs/nfsd/nfs3xdr.c | 387 +++++++++++++---------------- + fs/nfsd/nfs4callback.c | 2 +- + fs/nfsd/nfs4proc.c | 11 +- + fs/nfsd/nfs4state.c | 6 +- + fs/nfsd/nfs4xdr.c | 52 ++-- + fs/nfsd/nfscache.c | 17 +- + fs/nfsd/nfsctl.c | 6 +- + fs/nfsd/nfsd.h | 6 +- + fs/nfsd/nfsfh.c | 173 ++++--------- + fs/nfsd/nfsfh.h | 55 +++- + fs/nfsd/nfsproc.c | 3 +- + fs/nfsd/nfssvc.c | 28 +-- + fs/nfsd/nfsxdr.c | 187 ++++++-------- + fs/nfsd/vfs.c | 4 +- + fs/nfsd/xdr.h | 37 +-- + fs/nfsd/xdr3.h | 63 ++--- + fs/nfsd/xdr4.h | 7 +- + include/linux/lockd/xdr.h | 27 +- + include/linux/lockd/xdr4.h | 29 ++- + include/linux/sunrpc/svc.h | 14 +- + include/trace/events/rpcrdma.h | 185 +++++++++++++- + include/trace/events/sunrpc.h | 38 ++- + include/uapi/linux/nfsd/nfsfh.h | 115 --------- + net/sunrpc/addr.c | 40 ++- + net/sunrpc/auth_gss/svcauth_gss.c | 2 +- + net/sunrpc/svc.c | 80 +----- + net/sunrpc/svc_xprt.c | 1 + + net/sunrpc/xdr.c | 32 +-- + net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 9 +- + net/sunrpc/xprtrdma/svc_rdma_rw.c | 30 ++- + net/sunrpc/xprtrdma/svc_rdma_sendto.c | 14 +- + 42 files changed, 1141 insertions(+), 1047 deletions(-) + create mode 100644 Documentation/filesystems/nfs/reexport.rst + delete mode 100644 include/uapi/linux/nfsd/nfsfh.h +Merging cel/for-next (c20106944eb6 NFSD: Keep existing listeners on portlist error) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux cel/for-next +Already up to date. +Merging ntfs3/master (8607954cf255 fs/ntfs3: Check for NULL pointers in ni_try_remove_attr_list) +$ git merge -m Merge branch 'master' of https://github.com/Paragon-Software-Group/linux-ntfs3.git ntfs3/master +Already up to date. +Merging orangefs/for-next (ac2c63757f4f orangefs: Fix sb refcount leak when allocate sb info failed.) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux orangefs/for-next +Merge made by the 'recursive' strategy. + fs/orangefs/dcache.c | 4 +++- + fs/orangefs/super.c | 4 ++-- + 2 files changed, 5 insertions(+), 3 deletions(-) +Merging overlayfs/overlayfs-next (1dc1eed46f9f ovl: fix IOCB_DIRECT if underlying fs doesn't support direct IO) +$ git merge -m Merge branch 'overlayfs-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git overlayfs/overlayfs-next +Already up to date. +Merging ubifs/next (a801fcfeef96 ubifs: Set/Clear I_LINKABLE under i_lock for whiteout inode) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git ubifs/next +Already up to date. +Merging v9fs/9p-next (9c4d94dc9a64 net/9p: increase default msize to 128k) +$ git merge -m Merge branch '9p-next' of git://github.com/martinetd/linux v9fs/9p-next +Already up to date. +Merging xfs/for-next (5ca5916b6bc9 xfs: punch out data fork delalloc blocks on COW writeback failure) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git xfs/for-next +Merge made by the 'recursive' strategy. + fs/xfs/kmem.h | 4 - + fs/xfs/libxfs/xfs_ag.c | 2 +- + fs/xfs/libxfs/xfs_ag.h | 36 ++-- + fs/xfs/libxfs/xfs_ag_resv.c | 3 +- + fs/xfs/libxfs/xfs_alloc.c | 120 ++++++++++--- + fs/xfs/libxfs/xfs_alloc.h | 38 ++++- + fs/xfs/libxfs/xfs_alloc_btree.c | 63 +++++-- + fs/xfs/libxfs/xfs_alloc_btree.h | 5 + + fs/xfs/libxfs/xfs_attr_leaf.c | 2 +- + fs/xfs/libxfs/xfs_bmap.c | 101 ++++------- + fs/xfs/libxfs/xfs_bmap.h | 35 +--- + fs/xfs/libxfs/xfs_bmap_btree.c | 62 +++++-- + fs/xfs/libxfs/xfs_bmap_btree.h | 5 + + fs/xfs/libxfs/xfs_btree.c | 333 +++++++++++++++++++++++-------------- + fs/xfs/libxfs/xfs_btree.h | 99 ++++++++--- + fs/xfs/libxfs/xfs_btree_staging.c | 8 +- + fs/xfs/libxfs/xfs_da_btree.c | 6 +- + fs/xfs/libxfs/xfs_da_btree.h | 3 +- + fs/xfs/libxfs/xfs_defer.c | 241 ++++++++++++++++++++------- + fs/xfs/libxfs/xfs_defer.h | 41 ++++- + fs/xfs/libxfs/xfs_dquot_buf.c | 4 +- + fs/xfs/libxfs/xfs_format.h | 12 +- + fs/xfs/libxfs/xfs_fs.h | 2 + + fs/xfs/libxfs/xfs_ialloc.c | 5 +- + fs/xfs/libxfs/xfs_ialloc_btree.c | 90 +++++++++- + fs/xfs/libxfs/xfs_ialloc_btree.h | 5 + + fs/xfs/libxfs/xfs_inode_buf.c | 6 +- + fs/xfs/libxfs/xfs_inode_fork.c | 24 +-- + fs/xfs/libxfs/xfs_inode_fork.h | 2 +- + fs/xfs/libxfs/xfs_refcount.c | 46 +++-- + fs/xfs/libxfs/xfs_refcount.h | 7 +- + fs/xfs/libxfs/xfs_refcount_btree.c | 65 +++++++- + fs/xfs/libxfs/xfs_refcount_btree.h | 5 + + fs/xfs/libxfs/xfs_rmap.c | 21 ++- + fs/xfs/libxfs/xfs_rmap.h | 7 +- + fs/xfs/libxfs/xfs_rmap_btree.c | 116 ++++++++++--- + fs/xfs/libxfs/xfs_rmap_btree.h | 5 + + fs/xfs/libxfs/xfs_sb.c | 4 +- + fs/xfs/libxfs/xfs_trans_resv.c | 18 +- + fs/xfs/libxfs/xfs_trans_space.h | 9 +- + fs/xfs/scrub/agheader.c | 13 +- + fs/xfs/scrub/agheader_repair.c | 8 +- + fs/xfs/scrub/bitmap.c | 22 +-- + fs/xfs/scrub/bmap.c | 2 +- + fs/xfs/scrub/btree.c | 121 +++++++------- + fs/xfs/scrub/btree.h | 17 +- + fs/xfs/scrub/dabtree.c | 62 +++---- + fs/xfs/scrub/repair.h | 3 + + fs/xfs/scrub/scrub.c | 64 +++---- + fs/xfs/scrub/trace.c | 11 +- + fs/xfs/scrub/trace.h | 10 +- + fs/xfs/xfs_aops.c | 15 +- + fs/xfs/xfs_attr_inactive.c | 2 +- + fs/xfs/xfs_bmap_item.c | 18 +- + fs/xfs/xfs_bmap_item.h | 6 +- + fs/xfs/xfs_buf.c | 14 +- + fs/xfs/xfs_buf_item.c | 8 +- + fs/xfs/xfs_buf_item.h | 2 +- + fs/xfs/xfs_buf_item_recover.c | 2 +- + fs/xfs/xfs_dquot.c | 28 ++-- + fs/xfs/xfs_extfree_item.c | 33 ++-- + fs/xfs/xfs_extfree_item.h | 6 +- + fs/xfs/xfs_icache.c | 10 +- + fs/xfs/xfs_icreate_item.c | 6 +- + fs/xfs/xfs_icreate_item.h | 2 +- + fs/xfs/xfs_inode.c | 2 +- + fs/xfs/xfs_inode.h | 2 +- + fs/xfs/xfs_inode_item.c | 6 +- + fs/xfs/xfs_inode_item.h | 2 +- + fs/xfs/xfs_ioctl.c | 6 +- + fs/xfs/xfs_log.c | 6 +- + fs/xfs/xfs_log_priv.h | 2 +- + fs/xfs/xfs_log_recover.c | 12 +- + fs/xfs/xfs_mount.c | 14 ++ + fs/xfs/xfs_mount.h | 5 +- + fs/xfs/xfs_mru_cache.c | 2 +- + fs/xfs/xfs_qm.c | 2 +- + fs/xfs/xfs_qm.h | 2 +- + fs/xfs/xfs_refcount_item.c | 18 +- + fs/xfs/xfs_refcount_item.h | 6 +- + fs/xfs/xfs_reflink.c | 2 +- + fs/xfs/xfs_rmap_item.c | 18 +- + fs/xfs/xfs_rmap_item.h | 6 +- + fs/xfs/xfs_super.c | 234 +++++++++++++------------- + fs/xfs/xfs_sysfs.c | 24 +-- + fs/xfs/xfs_trace.h | 2 +- + fs/xfs/xfs_trans.c | 16 +- + fs/xfs/xfs_trans.h | 8 +- + fs/xfs/xfs_trans_dquot.c | 4 +- + 89 files changed, 1651 insertions(+), 895 deletions(-) +Merging zonefs/for-next (95b115332a83 zonefs: remove redundant null bio check) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git zonefs/for-next +Auto-merging fs/zonefs/super.c +Merge made by the 'recursive' strategy. +Merging iomap/iomap-for-next (03b8df8d43ec iomap: standardize tracepoint formatting and storage) +$ git merge -m Merge branch 'iomap-for-next' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git iomap/iomap-for-next +Already up to date. +Merging djw-vfs/vfs-for-next (d03ef4daf33a fs: forbid invalid project ID) +$ git merge -m Merge branch 'vfs-for-next' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git djw-vfs/vfs-for-next +Already up to date. +Merging file-locks/locks-next (e9728cc72d91 locks: remove changelog comments) +$ git merge -m Merge branch 'locks-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git file-locks/locks-next +Auto-merging fs/nfs/file.c +Auto-merging fs/gfs2/file.c +Merge made by the 'recursive' strategy. + Documentation/filesystems/index.rst | 1 - + Documentation/filesystems/locks.rst | 17 ++-- + fs/ceph/locks.c | 3 - + fs/gfs2/file.c | 2 - + fs/locks.c | 161 ++++++------------------------------ + fs/nfs/file.c | 9 -- + include/uapi/asm-generic/fcntl.h | 4 + + 7 files changed, 35 insertions(+), 162 deletions(-) +Merging vfs/for-next (8f40da9494cf Merge branch 'misc.namei' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git vfs/for-next +Already up to date. +Merge made by the 'recursive' strategy. +Merging printk/for-next (c15b5fc054c3 ia64: don't do IA64_CMPXCHG_DEBUG without CONFIG_PRINTK) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git printk/for-next +Auto-merging kernel/printk/printk.c +Merge made by the 'recursive' strategy. + arch/ia64/Kconfig.debug | 2 +- + kernel/printk/index.c | 5 ++--- + kernel/printk/printk.c | 5 +++-- + 3 files changed, 6 insertions(+), 6 deletions(-) +Merging pci/next (1b6c2cdb635d Merge branch 'remotes/lorenzo/pci/xgene') +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git pci/next +Auto-merging kernel/irq/irqdomain.c +Auto-merging include/linux/irqdomain.h +Auto-merging drivers/usb/host/xhci-pci.c +Removing drivers/staging/mt7621-pci/mediatek,mt7621-pci.txt +Removing drivers/staging/mt7621-pci/TODO +Removing drivers/staging/mt7621-pci/Makefile +Removing drivers/staging/mt7621-pci/Kconfig +Auto-merging drivers/pci/controller/pci-hyperv.c +Auto-merging drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +Auto-merging drivers/iommu/apple-dart.c +Auto-merging arch/s390/pci/pci.c +Auto-merging arch/powerpc/platforms/powernv/pci-sriov.c +Auto-merging arch/powerpc/kernel/eeh.c +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + Documentation/ABI/testing/sysfs-bus-pci | 11 + + .../bindings/pci/mediatek,mt7621-pcie.yaml | 142 ++++ + .../devicetree/bindings/pci/qcom,pcie-ep.yaml | 158 ++++ + .../devicetree/bindings/pci/qcom,pcie.txt | 5 +- + .../devicetree/bindings/pci/rockchip-dw-pcie.yaml | 141 ++++ + MAINTAINERS | 23 +- + arch/microblaze/pci/pci-common.c | 3 +- + arch/mips/ralink/Kconfig | 3 +- + arch/powerpc/include/asm/ppc-pci.h | 5 - + arch/powerpc/kernel/eeh.c | 8 + + arch/powerpc/kernel/eeh_driver.c | 10 +- + arch/powerpc/kernel/pci-common.c | 2 +- + arch/powerpc/platforms/powernv/pci-sriov.c | 2 +- + arch/s390/pci/pci.c | 2 +- + arch/sparc/kernel/pci.c | 2 +- + arch/x86/events/intel/uncore.c | 2 +- + arch/x86/kernel/probe_roms.c | 2 +- + arch/x86/pci/common.c | 2 +- + drivers/acpi/pci_root.c | 161 ++-- + drivers/bcma/host_pci.c | 6 +- + drivers/crypto/hisilicon/qm.c | 2 +- + drivers/crypto/qat/qat_4xxx/adf_drv.c | 7 +- + drivers/crypto/qat/qat_c3xxx/adf_drv.c | 7 +- + drivers/crypto/qat/qat_c62x/adf_drv.c | 7 +- + drivers/crypto/qat/qat_common/adf_aer.c | 10 +- + drivers/crypto/qat/qat_common/adf_common_drv.h | 3 +- + drivers/crypto/qat/qat_dh895xcc/adf_drv.c | 7 +- + drivers/iommu/apple-dart.c | 27 + + drivers/message/fusion/mptbase.c | 7 +- + drivers/message/fusion/mptbase.h | 2 +- + drivers/message/fusion/mptctl.c | 4 +- + drivers/message/fusion/mptlan.c | 2 +- + drivers/misc/cxl/guest.c | 30 +- + drivers/misc/cxl/pci.c | 35 +- + drivers/net/ethernet/chelsio/cxgb3/common.h | 2 - + drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 38 +- + drivers/net/ethernet/chelsio/cxgb3/t3_hw.c | 98 +-- + drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 2 +- + .../net/ethernet/marvell/prestera/prestera_pci.c | 2 +- + drivers/net/ethernet/mellanox/mlxsw/pci.c | 2 +- + .../net/ethernet/netronome/nfp/nfp_net_ethtool.c | 3 +- + drivers/of/irq.c | 17 +- + drivers/pci/controller/Kconfig | 28 +- + drivers/pci/controller/Makefile | 3 + + drivers/pci/controller/dwc/Kconfig | 28 +- + drivers/pci/controller/dwc/Makefile | 1 + + drivers/pci/controller/dwc/pci-dra7xx.c | 22 +- + drivers/pci/controller/dwc/pci-imx6.c | 2 +- + drivers/pci/controller/dwc/pcie-designware-ep.c | 3 + + drivers/pci/controller/dwc/pcie-designware-host.c | 19 +- + drivers/pci/controller/dwc/pcie-designware.c | 1 + + drivers/pci/controller/dwc/pcie-qcom-ep.c | 721 ++++++++++++++++++ + drivers/pci/controller/dwc/pcie-qcom.c | 96 ++- + drivers/pci/controller/dwc/pcie-uniphier.c | 26 +- + drivers/pci/controller/dwc/pcie-visconti.c | 5 +- + drivers/pci/controller/pci-aardvark.c | 372 ++++++---- + drivers/pci/controller/pci-hyperv.c | 4 +- + drivers/pci/controller/pci-xgene-msi.c | 2 +- + drivers/pci/controller/pci-xgene.c | 3 +- + drivers/pci/controller/pcie-apple.c | 823 +++++++++++++++++++++ + drivers/pci/controller/pcie-brcmstb.c | 2 +- + drivers/pci/controller/pcie-iproc.c | 2 +- + .../pci-mt7621.c => pci/controller/pcie-mt7621.c} | 24 +- + drivers/pci/controller/pcie-rcar-ep.c | 5 +- + drivers/pci/controller/pcie-rcar-host.c | 2 - + drivers/pci/controller/vmd.c | 45 +- + drivers/pci/endpoint/functions/pci-epf-ntb.c | 22 +- + drivers/pci/endpoint/pci-ep-cfs.c | 48 +- + drivers/pci/endpoint/pci-epc-core.c | 2 +- + drivers/pci/endpoint/pci-epf-core.c | 4 +- + drivers/pci/hotplug/acpiphp_glue.c | 2 +- + drivers/pci/hotplug/cpqphp_ctrl.c | 4 +- + drivers/pci/hotplug/cpqphp_pci.c | 6 +- + drivers/pci/hotplug/ibmphp.h | 4 +- + drivers/pci/hotplug/pciehp.h | 2 + + drivers/pci/hotplug/pciehp_core.c | 2 + + drivers/pci/hotplug/pciehp_hpc.c | 26 + + drivers/pci/hotplug/shpchp_hpc.c | 2 +- + drivers/pci/iov.c | 38 +- + drivers/pci/of.c | 10 +- + drivers/pci/p2pdma.c | 8 +- + drivers/pci/pci-driver.c | 57 +- + drivers/pci/pci-sysfs.c | 51 +- + drivers/pci/pci.c | 58 +- + drivers/pci/pci.h | 1 + + drivers/pci/pcie/Makefile | 4 +- + drivers/pci/pcie/aer.c | 2 +- + drivers/pci/pcie/aspm.c | 4 +- + drivers/pci/pcie/err.c | 40 +- + drivers/pci/pcie/portdrv.h | 6 +- + drivers/pci/pcie/portdrv_core.c | 67 +- + drivers/pci/pcie/portdrv_pci.c | 27 +- + drivers/pci/probe.c | 53 +- + drivers/pci/quirks.c | 59 +- + drivers/pci/setup-irq.c | 26 +- + drivers/pci/switch/switchtec.c | 95 ++- + drivers/pci/vpd.c | 79 +- + drivers/pci/xen-pcifront.c | 58 +- + drivers/ssb/pcihost_wrapper.c | 6 +- + drivers/staging/Kconfig | 2 - + drivers/staging/Makefile | 1 - + drivers/staging/mt7621-pci/Kconfig | 8 - + drivers/staging/mt7621-pci/Makefile | 2 - + drivers/staging/mt7621-pci/TODO | 4 - + drivers/staging/mt7621-pci/mediatek,mt7621-pci.txt | 104 --- + drivers/usb/host/xhci-pci.c | 2 +- + include/linux/acpi.h | 2 - + include/linux/irqdomain.h | 4 + + include/linux/pci.h | 21 +- + include/linux/switchtec.h | 1 + + include/uapi/linux/pci_regs.h | 6 + + kernel/irq/irqdomain.c | 7 +- + 112 files changed, 3285 insertions(+), 995 deletions(-) + create mode 100644 Documentation/devicetree/bindings/pci/mediatek,mt7621-pcie.yaml + create mode 100644 Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml + create mode 100644 Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml + create mode 100644 drivers/pci/controller/dwc/pcie-qcom-ep.c + create mode 100644 drivers/pci/controller/pcie-apple.c + rename drivers/{staging/mt7621-pci/pci-mt7621.c => pci/controller/pcie-mt7621.c} (95%) + delete mode 100644 drivers/staging/mt7621-pci/Kconfig + delete mode 100644 drivers/staging/mt7621-pci/Makefile + delete mode 100644 drivers/staging/mt7621-pci/TODO + delete mode 100644 drivers/staging/mt7621-pci/mediatek,mt7621-pci.txt +Merging pstore/for-next/pstore (c5d4fb2539ca pstore/blk: Use "%lu" to format unsigned long) +$ git merge -m Merge branch 'for-next/pstore' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git pstore/for-next/pstore +Merge made by the 'recursive' strategy. + fs/pstore/blk.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging hid/for-next (26922a6c2aaf Merge branches 'for-5.16/asus' and 'for-5.16/apple' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git hid/for-next +Merge made by the 'recursive' strategy. + drivers/hid/Kconfig | 7 ++ + drivers/hid/Makefile | 1 + + drivers/hid/amd-sfh-hid/amd_sfh_client.c | 3 +- + drivers/hid/amd-sfh-hid/amd_sfh_hid.c | 2 + + drivers/hid/amd-sfh-hid/amd_sfh_hid.h | 2 + + drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 21 ++--- + drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 3 +- + .../amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.c | 3 +- + .../amd-sfh-hid/hid_descriptor/amd_sfh_hid_desc.h | 3 +- + .../hid_descriptor/amd_sfh_hid_report_desc.h | 3 +- + drivers/hid/hid-apple.c | 66 +++++++-------- + drivers/hid/hid-asus.c | 2 +- + drivers/hid/hid-cougar.c | 3 +- + drivers/hid/hid-debug.c | 10 +-- + drivers/hid/hid-ids.h | 11 ++- + drivers/hid/hid-input.c | 1 + + drivers/hid/hid-multitouch.c | 13 +++ + drivers/hid/hid-quirks.c | 3 +- + drivers/hid/hid-xiaomi.c | 94 ++++++++++++++++++++++ + 19 files changed, 193 insertions(+), 58 deletions(-) + create mode 100644 drivers/hid/hid-xiaomi.c +Merging i2c/i2c/for-next (26701d49482a Merge branch 'i2c/for-mergewindow' into i2c/for-next) +$ git merge -m Merge branch 'i2c/for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git i2c/i2c/for-next +Merge made by the 'recursive' strategy. + .../devicetree/bindings/i2c/apple,i2c.yaml | 61 ++++++++ + drivers/i2c/busses/Kconfig | 16 +- + drivers/i2c/busses/Makefile | 3 + + drivers/i2c/busses/i2c-amd-mp2-pci.c | 4 +- + drivers/i2c/busses/i2c-bcm-kona.c | 2 +- + drivers/i2c/busses/i2c-i801.c | 78 ++++------ + drivers/i2c/busses/i2c-ismt.c | 12 +- + drivers/i2c/busses/i2c-kempld.c | 3 +- + drivers/i2c/busses/i2c-mlxcpld.c | 14 +- + drivers/i2c/busses/i2c-mt65xx.c | 82 ++++++++++- + .../i2c/busses/{i2c-pasemi.c => i2c-pasemi-core.c} | 114 ++++----------- + drivers/i2c/busses/i2c-pasemi-core.h | 21 +++ + drivers/i2c/busses/i2c-pasemi-pci.c | 85 +++++++++++ + drivers/i2c/busses/i2c-pasemi-platform.c | 122 ++++++++++++++++ + drivers/i2c/busses/i2c-pxa.c | 1 - + drivers/i2c/busses/i2c-rcar.c | 6 +- + drivers/i2c/busses/i2c-xiic.c | 161 +++++++++------------ + 17 files changed, 535 insertions(+), 250 deletions(-) + create mode 100644 Documentation/devicetree/bindings/i2c/apple,i2c.yaml + rename drivers/i2c/busses/{i2c-pasemi.c => i2c-pasemi-core.c} (77%) + create mode 100644 drivers/i2c/busses/i2c-pasemi-core.h + create mode 100644 drivers/i2c/busses/i2c-pasemi-pci.c + create mode 100644 drivers/i2c/busses/i2c-pasemi-platform.c +Merging i3c/i3c/next (a3587e2c0578 i3c: fix incorrect address slot lookup on 64-bit) +$ git merge -m Merge branch 'i3c/next' of git://git.kernel.org/pub/scm/linux/kernel/git/i3c/linux.git i3c/i3c/next +Merge made by the 'recursive' strategy. + drivers/i3c/master.c | 3 ++- + drivers/i3c/master/mipi-i3c-hci/dma.c | 2 +- + drivers/i3c/master/mipi-i3c-hci/hci.h | 2 +- + 3 files changed, 4 insertions(+), 3 deletions(-) +Merging dmi/dmi-for-next (f97a2103f1a7 firmware: dmi: Move product_sku info to the end of the modalias) +$ git merge -m Merge branch 'dmi-for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging.git dmi/dmi-for-next +Already up to date. +Merging hwmon-staging/hwmon-next (c0d79987a0d8 hwmon: (dell-smm) Speed up setting of fan speed) +$ git merge -m Merge branch 'hwmon-next' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git hwmon-staging/hwmon-next +Auto-merging MAINTAINERS +Removing Documentation/devicetree/bindings/hwmon/tmp108.txt +Removing Documentation/devicetree/bindings/hwmon/sht15.txt +Removing Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt +Removing Documentation/devicetree/bindings/hwmon/mcp3021.txt +Removing Documentation/devicetree/bindings/hwmon/ltc4151.txt +Removing Documentation/devicetree/bindings/hwmon/lm90.txt +Removing Documentation/devicetree/bindings/hwmon/lm70.txt +Removing Documentation/devicetree/bindings/hwmon/jc42.txt +Removing Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt +Removing Documentation/devicetree/bindings/hwmon/hih6130.txt +Removing Documentation/devicetree/bindings/hwmon/dps650ab.txt +Merge made by the 'recursive' strategy. + .../devicetree/bindings/hwmon/dps650ab.txt | 11 - + .../devicetree/bindings/hwmon/hih6130.txt | 12 - + .../devicetree/bindings/hwmon/ibm,cffps1.txt | 26 - + .../devicetree/bindings/hwmon/iio-hwmon.yaml | 37 ++ + Documentation/devicetree/bindings/hwmon/jc42.txt | 46 -- + .../devicetree/bindings/hwmon/jedec,jc42.yaml | 78 +++ + .../devicetree/bindings/hwmon/lltc,ltc4151.yaml | 41 ++ + Documentation/devicetree/bindings/hwmon/lm70.txt | 22 - + Documentation/devicetree/bindings/hwmon/lm90.txt | 51 -- + .../devicetree/bindings/hwmon/ltc4151.txt | 18 - + .../devicetree/bindings/hwmon/mcp3021.txt | 21 - + .../bindings/hwmon/microchip,mcp3021.yaml | 43 ++ + .../devicetree/bindings/hwmon/national,lm90.yaml | 78 +++ + .../devicetree/bindings/hwmon/ntc-thermistor.yaml | 141 ++++ + .../devicetree/bindings/hwmon/ntc_thermistor.txt | 44 -- + .../bindings/hwmon/pmbus/ti,lm25066.yaml | 54 ++ + .../devicetree/bindings/hwmon/sensirion,sht15.yaml | 43 ++ + Documentation/devicetree/bindings/hwmon/sht15.txt | 19 - + .../devicetree/bindings/hwmon/ti,tmp102.yaml | 47 ++ + .../devicetree/bindings/hwmon/ti,tmp108.yaml | 50 ++ + .../devicetree/bindings/hwmon/ti,tmp421.yaml | 110 ++++ + Documentation/devicetree/bindings/hwmon/tmp108.txt | 18 - + .../devicetree/bindings/trivial-devices.yaml | 26 +- + Documentation/hwmon/dell-smm-hwmon.rst | 3 + + Documentation/hwmon/index.rst | 1 + + Documentation/hwmon/lm25066.rst | 2 + + Documentation/hwmon/max6620.rst | 46 ++ + Documentation/hwmon/tmp421.rst | 10 + + MAINTAINERS | 4 +- + drivers/acpi/scan.c | 1 + + drivers/hwmon/Kconfig | 11 + + drivers/hwmon/Makefile | 1 + + drivers/hwmon/abituguru3.c | 6 +- + drivers/hwmon/acpi_power_meter.c | 13 +- + drivers/hwmon/ad7414.c | 4 +- + drivers/hwmon/ad7418.c | 6 +- + drivers/hwmon/adm1021.c | 4 +- + drivers/hwmon/adm1025.c | 4 +- + drivers/hwmon/adm1026.c | 4 +- + drivers/hwmon/adm1029.c | 4 +- + drivers/hwmon/adm1031.c | 6 +- + drivers/hwmon/adt7310.c | 3 +- + drivers/hwmon/adt7410.c | 3 +- + drivers/hwmon/adt7x10.c | 3 +- + drivers/hwmon/adt7x10.h | 2 +- + drivers/hwmon/amc6821.c | 8 +- + drivers/hwmon/applesmc.c | 2 +- + drivers/hwmon/asb100.c | 4 +- + drivers/hwmon/asc7621.c | 4 +- + drivers/hwmon/atxp1.c | 10 +- + drivers/hwmon/coretemp.c | 2 +- + drivers/hwmon/dell-smm-hwmon.c | 103 ++- + drivers/hwmon/dme1737.c | 4 +- + drivers/hwmon/ds1621.c | 4 +- + drivers/hwmon/ds620.c | 4 +- + drivers/hwmon/emc6w201.c | 4 +- + drivers/hwmon/f71805f.c | 4 +- + drivers/hwmon/f71882fg.c | 4 +- + drivers/hwmon/f75375s.c | 4 +- + drivers/hwmon/fschmd.c | 4 +- + drivers/hwmon/g760a.c | 2 +- + drivers/hwmon/gl518sm.c | 4 +- + drivers/hwmon/gl520sm.c | 4 +- + drivers/hwmon/i5500_temp.c | 114 ++-- + drivers/hwmon/ibmaem.c | 2 +- + drivers/hwmon/ibmpex.c | 4 +- + drivers/hwmon/it87.c | 12 +- + drivers/hwmon/lineage-pem.c | 2 +- + drivers/hwmon/lm63.c | 6 +- + drivers/hwmon/lm77.c | 4 +- + drivers/hwmon/lm78.c | 4 +- + drivers/hwmon/lm80.c | 6 +- + drivers/hwmon/lm83.c | 4 +- + drivers/hwmon/lm85.c | 4 +- + drivers/hwmon/lm87.c | 4 +- + drivers/hwmon/lm92.c | 4 +- + drivers/hwmon/lm93.c | 4 +- + drivers/hwmon/lm95241.c | 8 +- + drivers/hwmon/ltc4151.c | 2 +- + drivers/hwmon/ltc4215.c | 2 +- + drivers/hwmon/ltc4261.c | 4 +- + drivers/hwmon/max16065.c | 2 +- + drivers/hwmon/max1619.c | 4 +- + drivers/hwmon/max1668.c | 4 +- + drivers/hwmon/max31722.c | 8 +- + drivers/hwmon/max6620.c | 514 +++++++++++++++ + drivers/hwmon/max6639.c | 4 +- + drivers/hwmon/max6642.c | 2 +- + drivers/hwmon/mlxreg-fan.c | 138 ++-- + drivers/hwmon/nct6683.c | 3 + + drivers/hwmon/nct6775.c | 716 ++++++++++++++------- + drivers/hwmon/pc87360.c | 4 +- + drivers/hwmon/pmbus/ibm-cffps.c | 23 +- + drivers/hwmon/pmbus/lm25066.c | 88 ++- + drivers/hwmon/raspberrypi-hwmon.c | 2 +- + drivers/hwmon/sch5636.c | 4 +- + drivers/hwmon/sht21.c | 4 +- + drivers/hwmon/sis5595.c | 4 +- + drivers/hwmon/smm665.c | 2 +- + drivers/hwmon/smsc47b397.c | 4 +- + drivers/hwmon/smsc47m192.c | 4 +- + drivers/hwmon/thmc50.c | 4 +- + drivers/hwmon/tmp103.c | 105 ++- + drivers/hwmon/tmp401.c | 6 +- + drivers/hwmon/tmp421.c | 186 +++++- + drivers/hwmon/via686a.c | 4 +- + drivers/hwmon/vt1211.c | 4 +- + drivers/hwmon/vt8231.c | 4 +- + drivers/hwmon/w83627ehf.c | 8 +- + drivers/hwmon/w83627hf.c | 6 +- + drivers/hwmon/w83781d.c | 4 +- + drivers/hwmon/w83791d.c | 4 +- + drivers/hwmon/w83792d.c | 6 +- + drivers/hwmon/w83793.c | 6 +- + drivers/hwmon/w83795.c | 6 +- + drivers/hwmon/w83l785ts.c | 4 +- + drivers/hwmon/w83l786ng.c | 4 +- + 117 files changed, 2553 insertions(+), 884 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/hwmon/dps650ab.txt + delete mode 100644 Documentation/devicetree/bindings/hwmon/hih6130.txt + delete mode 100644 Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt + create mode 100644 Documentation/devicetree/bindings/hwmon/iio-hwmon.yaml + delete mode 100644 Documentation/devicetree/bindings/hwmon/jc42.txt + create mode 100644 Documentation/devicetree/bindings/hwmon/jedec,jc42.yaml + create mode 100644 Documentation/devicetree/bindings/hwmon/lltc,ltc4151.yaml + delete mode 100644 Documentation/devicetree/bindings/hwmon/lm70.txt + delete mode 100644 Documentation/devicetree/bindings/hwmon/lm90.txt + delete mode 100644 Documentation/devicetree/bindings/hwmon/ltc4151.txt + delete mode 100644 Documentation/devicetree/bindings/hwmon/mcp3021.txt + create mode 100644 Documentation/devicetree/bindings/hwmon/microchip,mcp3021.yaml + create mode 100644 Documentation/devicetree/bindings/hwmon/national,lm90.yaml + create mode 100644 Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml + delete mode 100644 Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt + create mode 100644 Documentation/devicetree/bindings/hwmon/pmbus/ti,lm25066.yaml + create mode 100644 Documentation/devicetree/bindings/hwmon/sensirion,sht15.yaml + delete mode 100644 Documentation/devicetree/bindings/hwmon/sht15.txt + create mode 100644 Documentation/devicetree/bindings/hwmon/ti,tmp102.yaml + create mode 100644 Documentation/devicetree/bindings/hwmon/ti,tmp108.yaml + create mode 100644 Documentation/devicetree/bindings/hwmon/ti,tmp421.yaml + delete mode 100644 Documentation/devicetree/bindings/hwmon/tmp108.txt + create mode 100644 Documentation/hwmon/max6620.rst + create mode 100644 drivers/hwmon/max6620.c +Merging jc_docs/docs-next (2df9f7f57905 docs: translations: zh_CN: memory-hotplug.rst: fix a typo) +$ git merge -m Merge branch 'docs-next' of git://git.lwn.net/linux.git jc_docs/docs-next +Merge made by the 'recursive' strategy. + Documentation/ABI/testing/sysfs-block | 16 + + Documentation/ABI/testing/sysfs-driver-xen-blkback | 4 +- + .../ABI/testing/sysfs-driver-xen-blkfront | 2 +- + Documentation/admin-guide/mm/pagemap.rst | 22 + + Documentation/arm/marvell.rst | 19 + + Documentation/asm-annotations.rst | 2 +- + Documentation/block/queue-sysfs.rst | 9 +- + Documentation/conf.py | 3 + + Documentation/dev-tools/checkpatch.rst | 81 +++ + Documentation/filesystems/ext4/orphan.rst | 44 +- + Documentation/filesystems/proc.rst | 26 +- + Documentation/kbuild/Kconfig.recursion-issue-02 | 2 +- + Documentation/maintainer/pull-requests.rst | 2 +- + Documentation/networking/msg_zerocopy.rst | 2 +- + Documentation/process/coding-style.rst | 37 +- + Documentation/process/index.rst | 1 + + Documentation/process/maintainer-handbooks.rst | 18 + + Documentation/process/maintainer-tip.rst | 785 +++++++++++++++++++++ + Documentation/process/submitting-drivers.rst | 2 +- + Documentation/process/submitting-patches.rst | 11 +- + Documentation/timers/no_hz.rst | 8 +- + .../it_IT/process/submitting-patches.rst | 4 +- + .../translations/ko_KR/memory-barriers.txt | 8 +- + .../translations/zh_CN/admin-guide/index.rst | 2 +- + .../translations/zh_CN/admin-guide/sysrq.rst | 280 ++++++++ + .../translations/zh_CN/core-api/boot-time-mm.rst | 49 ++ + .../translations/zh_CN/core-api/genalloc.rst | 109 +++ + .../zh_CN/core-api/gfp_mask-from-fs-io.rst | 66 ++ + .../translations/zh_CN/core-api/index.rst | 17 +- + .../zh_CN/core-api/irq/irq-affinity.rst | 2 +- + Documentation/translations/zh_CN/core-api/kref.rst | 311 ++++++++ + .../zh_CN/core-api/memory-allocation.rst | 138 ++++ + .../translations/zh_CN/core-api/memory-hotplug.rst | 2 +- + .../translations/zh_CN/core-api/mm-api.rst | 110 +++ + .../zh_CN/core-api/unaligned-memory-access.rst | 229 ++++++ + .../zh_CN/maintainer/pull-requests.rst | 2 +- + .../translations/zh_CN/process/5.Posting.rst | 8 +- + Documentation/translations/zh_CN/process/howto.rst | 10 +- + .../zh_CN/process/submitting-patches.rst | 8 +- + Documentation/translations/zh_TW/index.rst | 10 +- + .../zh_TW/process/submitting-patches.rst | 4 +- + .../virt/uml/user_mode_linux_howto_v2.rst | 119 ++-- + Documentation/vm/page_migration.rst | 2 +- + Documentation/x86/entry_64.rst | 2 +- + Documentation/x86/orc-unwinder.rst | 4 +- + scripts/kernel-doc | 1 + + 46 files changed, 2437 insertions(+), 156 deletions(-) + create mode 100644 Documentation/process/maintainer-handbooks.rst + create mode 100644 Documentation/process/maintainer-tip.rst + create mode 100644 Documentation/translations/zh_CN/admin-guide/sysrq.rst + create mode 100644 Documentation/translations/zh_CN/core-api/boot-time-mm.rst + create mode 100644 Documentation/translations/zh_CN/core-api/genalloc.rst + create mode 100644 Documentation/translations/zh_CN/core-api/gfp_mask-from-fs-io.rst + create mode 100644 Documentation/translations/zh_CN/core-api/kref.rst + create mode 100644 Documentation/translations/zh_CN/core-api/memory-allocation.rst + create mode 100644 Documentation/translations/zh_CN/core-api/mm-api.rst + create mode 100644 Documentation/translations/zh_CN/core-api/unaligned-memory-access.rst +Merging v4l-dvb/master (fd2eda71a47b media: remove myself from dvb media maintainers) +$ git merge -m Merge branch 'master' of git://linuxtv.org/media_tree.git v4l-dvb/master +Auto-merging drivers/media/platform/Kconfig +Auto-merging MAINTAINERS +Auto-merging Documentation/devicetree/bindings/vendor-prefixes.yaml +Removing Documentation/devicetree/bindings/media/renesas,imr.txt +Removing Documentation/devicetree/bindings/media/i2c/mt9p031.txt +Auto-merging .mailmap +Merge made by the 'recursive' strategy. + .mailmap | 1 + + Documentation/admin-guide/media/i2c-cardlist.rst | 8 +- + Documentation/admin-guide/media/imx7.rst | 60 + + Documentation/admin-guide/media/ipu3.rst | 14 +- + Documentation/admin-guide/media/ivtv.rst | 2 +- + Documentation/admin-guide/media/vimc.rst | 20 +- + .../bindings/media/i2c/aptina,mt9p031.yaml | 108 + + .../devicetree/bindings/media/i2c/hynix,hi846.yaml | 120 ++ + .../devicetree/bindings/media/i2c/mt9p031.txt | 40 - + .../devicetree/bindings/media/mediatek-vcodec.txt | 2 + + .../bindings/media/qcom,sc7280-venus.yaml | 162 ++ + .../devicetree/bindings/media/renesas,csi2.yaml | 1 + + .../devicetree/bindings/media/renesas,imr.txt | 31 - + .../devicetree/bindings/media/renesas,imr.yaml | 67 + + .../devicetree/bindings/media/rockchip-isp1.yaml | 114 +- + .../devicetree/bindings/vendor-prefixes.yaml | 2 + + Documentation/driver-api/media/drivers/rkisp1.rst | 43 + + .../driver-api/media/maintainer-entry-profile.rst | 2 +- + Documentation/driver-api/media/v4l2-subdev.rst | 14 +- + .../userspace-api/media/drivers/cx2341x-uapi.rst | 8 +- + Documentation/userspace-api/media/v4l/buffer.rst | 40 +- + .../userspace-api/media/v4l/ext-ctrls-codec.rst | 57 + + .../media/v4l/ext-ctrls-image-source.rst | 20 + + .../userspace-api/media/v4l/pixfmt-reserved.rst | 29 +- + .../userspace-api/media/v4l/pixfmt-yuv-planar.rst | 50 +- + .../userspace-api/media/v4l/vidioc-create-bufs.rst | 7 +- + .../userspace-api/media/v4l/vidioc-g-ctrl.rst | 3 + + .../userspace-api/media/v4l/vidioc-g-ext-ctrls.rst | 3 + + .../userspace-api/media/v4l/vidioc-queryctrl.rst | 6 + + .../userspace-api/media/v4l/vidioc-reqbufs.rst | 16 +- + .../userspace-api/media/videodev2.h.rst.exceptions | 2 + + MAINTAINERS | 22 +- + drivers/media/cec/Kconfig | 4 + + drivers/media/cec/core/cec-pin.c | 4 +- + drivers/media/cec/platform/meson/ao-cec-g12a.c | 4 +- + drivers/media/cec/platform/meson/ao-cec.c | 4 +- + drivers/media/cec/platform/s5p/s5p_cec.c | 4 +- + drivers/media/cec/platform/sti/stih-cec.c | 4 +- + drivers/media/cec/platform/stm32/stm32-cec.c | 4 +- + drivers/media/common/siano/smscoreapi.c | 7 +- + drivers/media/common/videobuf2/videobuf2-core.c | 149 +- + .../media/common/videobuf2/videobuf2-dma-contig.c | 195 +- + drivers/media/common/videobuf2/videobuf2-dma-sg.c | 39 +- + drivers/media/common/videobuf2/videobuf2-v4l2.c | 59 +- + drivers/media/common/videobuf2/videobuf2-vmalloc.c | 30 +- + drivers/media/dvb-core/dvb_vb2.c | 2 +- + drivers/media/dvb-frontends/cxd2099.c | 9 - + drivers/media/dvb-frontends/cxd2099.h | 9 - + drivers/media/dvb-frontends/cxd2820r_priv.h | 2 +- + drivers/media/dvb-frontends/mb86a20s.c | 4 +- + drivers/media/dvb-frontends/mn88443x.c | 18 +- + drivers/media/dvb-frontends/mxl5xx.c | 9 - + drivers/media/dvb-frontends/mxl5xx.h | 9 - + drivers/media/dvb-frontends/mxl5xx_defs.h | 4 - + drivers/media/dvb-frontends/mxl5xx_regs.h | 10 - + drivers/media/dvb-frontends/mxl692.c | 9 - + drivers/media/dvb-frontends/mxl692.h | 9 - + drivers/media/dvb-frontends/mxl692_defs.h | 9 - + drivers/media/dvb-frontends/rtl2832_sdr.c | 5 +- + drivers/media/dvb-frontends/stv0910.c | 9 - + drivers/media/dvb-frontends/stv0910.h | 9 - + drivers/media/dvb-frontends/stv6111.c | 9 - + drivers/media/dvb-frontends/stv6111.h | 9 - + drivers/media/firewire/firedtv-avc.c | 14 +- + drivers/media/firewire/firedtv-ci.c | 2 + + drivers/media/i2c/Kconfig | 25 + + drivers/media/i2c/Makefile | 2 + + drivers/media/i2c/dw9714.c | 14 +- + drivers/media/i2c/hi846.c | 2190 ++++++++++++++++++++ + drivers/media/i2c/imx258.c | 12 +- + drivers/media/i2c/ir-kbd-i2c.c | 1 + + drivers/media/i2c/max9286.c | 17 +- + drivers/media/i2c/mt9p031.c | 80 +- + drivers/media/i2c/ov13858.c | 11 +- + drivers/media/i2c/ov13b10.c | 1491 +++++++++++++ + drivers/media/i2c/ov5670.c | 11 +- + drivers/media/i2c/ov8856.c | 83 +- + drivers/media/i2c/st-mipid02.c | 22 +- + drivers/media/i2c/tda1997x.c | 12 +- + drivers/media/i2c/video-i2c.c | 21 +- + drivers/media/mc/Kconfig | 8 - + drivers/media/pci/cobalt/cobalt-driver.c | 4 +- + drivers/media/pci/cx18/cx18-driver.c | 2 +- + drivers/media/pci/cx18/cx18-ioctl.c | 4 +- + drivers/media/pci/cx18/cx18-queue.c | 13 +- + drivers/media/pci/cx18/cx18-streams.c | 24 +- + drivers/media/pci/cx23885/cx23885-alsa.c | 3 +- + drivers/media/pci/ddbridge/ddbridge-main.c | 4 +- + drivers/media/pci/intel/ipu3/cio2-bridge.c | 60 +- + drivers/media/pci/intel/ipu3/cio2-bridge.h | 9 +- + drivers/media/pci/intel/ipu3/ipu3-cio2-main.c | 274 ++- + drivers/media/pci/intel/ipu3/ipu3-cio2.h | 4 + + drivers/media/pci/ivtv/ivtv-driver.c | 2 +- + drivers/media/pci/ivtv/ivtv-ioctl.c | 8 +- + drivers/media/pci/ivtv/ivtv-queue.c | 18 +- + drivers/media/pci/ivtv/ivtv-streams.c | 22 +- + drivers/media/pci/ivtv/ivtv-udma.c | 19 +- + drivers/media/pci/ivtv/ivtv-yuv.c | 10 +- + drivers/media/pci/ivtv/ivtvfb.c | 8 +- + drivers/media/pci/netup_unidvb/netup_unidvb_core.c | 29 +- + drivers/media/pci/pluto2/pluto2.c | 20 +- + drivers/media/pci/pt1/pt1.c | 2 +- + drivers/media/pci/saa7164/saa7164-api.c | 2 - + drivers/media/pci/tw5864/tw5864-core.c | 2 +- + drivers/media/platform/Kconfig | 20 + + drivers/media/platform/Makefile | 1 + + drivers/media/platform/am437x/am437x-vpfe.c | 23 +- + drivers/media/platform/aspeed-video.c | 33 +- + drivers/media/platform/atmel/atmel-isc-base.c | 29 +- + drivers/media/platform/atmel/atmel-isc.h | 2 + + drivers/media/platform/atmel/atmel-isi.c | 17 +- + drivers/media/platform/atmel/atmel-sama5d2-isc.c | 54 +- + drivers/media/platform/atmel/atmel-sama7g5-isc.c | 37 +- + drivers/media/platform/cadence/cdns-csi2rx.c | 18 +- + drivers/media/platform/cadence/cdns-csi2tx.c | 4 +- + drivers/media/platform/coda/imx-vdoa.c | 3 +- + drivers/media/platform/davinci/vpbe_venc.c | 9 +- + drivers/media/platform/davinci/vpif.c | 5 +- + drivers/media/platform/davinci/vpif_capture.c | 21 +- + drivers/media/platform/davinci/vpss.c | 10 +- + drivers/media/platform/exynos-gsc/gsc-core.c | 3 +- + drivers/media/platform/exynos4-is/media-dev.c | 20 +- + drivers/media/platform/exynos4-is/mipi-csis.c | 4 +- + drivers/media/platform/imx-jpeg/mxc-jpeg.c | 36 +- + drivers/media/platform/imx-pxp.c | 4 +- + drivers/media/platform/marvell-ccic/cafe-driver.c | 9 +- + drivers/media/platform/marvell-ccic/mcam-core.c | 10 +- + drivers/media/platform/marvell-ccic/mmp-driver.c | 6 +- + drivers/media/platform/meson/ge2d/ge2d.c | 10 +- + drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c | 4 +- + drivers/media/platform/mtk-vcodec/Makefile | 3 + + drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c | 820 ++------ + drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.h | 27 +- + .../media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c | 65 +- + .../platform/mtk-vcodec/mtk_vcodec_dec_stateful.c | 628 ++++++ + .../platform/mtk-vcodec/mtk_vcodec_dec_stateless.c | 360 ++++ + drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h | 59 +- + drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c | 148 +- + .../media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c | 75 +- + .../platform/mtk-vcodec/vdec/vdec_h264_req_if.c | 774 +++++++ + drivers/media/platform/mtk-vcodec/vdec_drv_if.c | 3 + + drivers/media/platform/mtk-vcodec/vdec_drv_if.h | 1 + + drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h | 23 +- + drivers/media/platform/mtk-vcodec/vdec_vpu_if.c | 43 +- + drivers/media/platform/mtk-vcodec/vdec_vpu_if.h | 5 + + drivers/media/platform/mtk-vpu/mtk_vpu.c | 5 +- + drivers/media/platform/mx2_emmaprp.c | 4 +- + drivers/media/platform/omap/omap_vout.c | 18 +- + drivers/media/platform/omap/omap_vout_vrfb.c | 2 +- + drivers/media/platform/omap/omap_voutdef.h | 2 +- + drivers/media/platform/omap3isp/isp.c | 21 +- + drivers/media/platform/pxa_camera.c | 26 +- + drivers/media/platform/qcom/camss/camss-vfe-170.c | 9 +- + drivers/media/platform/qcom/camss/camss-vfe-4-1.c | 28 +- + drivers/media/platform/qcom/camss/camss-vfe-4-7.c | 18 +- + drivers/media/platform/qcom/camss/camss-vfe-4-8.c | 17 +- + drivers/media/platform/qcom/camss/camss-vfe.c | 4 +- + drivers/media/platform/qcom/camss/camss-vfe.h | 2 +- + drivers/media/platform/qcom/camss/camss.c | 18 +- + drivers/media/platform/qcom/venus/core.c | 58 +- + drivers/media/platform/qcom/venus/core.h | 2 + + drivers/media/platform/qcom/venus/firmware.c | 42 +- + drivers/media/platform/qcom/venus/helpers.c | 14 +- + drivers/media/platform/qcom/venus/hfi_cmds.c | 7 + + drivers/media/platform/qcom/venus/hfi_helper.h | 14 + + drivers/media/platform/qcom/venus/hfi_msgs.c | 7 + + .../media/platform/qcom/venus/hfi_plat_bufs_v6.c | 6 +- + drivers/media/platform/qcom/venus/hfi_platform.c | 13 - + drivers/media/platform/qcom/venus/hfi_platform.h | 2 - + .../media/platform/qcom/venus/hfi_platform_v6.c | 6 - + drivers/media/platform/qcom/venus/hfi_venus.c | 4 + + drivers/media/platform/qcom/venus/hfi_venus_io.h | 2 + + drivers/media/platform/qcom/venus/pm_helpers.c | 13 +- + drivers/media/platform/qcom/venus/vdec.c | 42 +- + drivers/media/platform/rcar-isp.c | 515 +++++ + drivers/media/platform/rcar-vin/rcar-core.c | 1077 +++++----- + drivers/media/platform/rcar-vin/rcar-csi2.c | 241 ++- + drivers/media/platform/rcar-vin/rcar-dma.c | 40 +- + drivers/media/platform/rcar-vin/rcar-v4l2.c | 4 + + drivers/media/platform/rcar-vin/rcar-vin.h | 25 +- + drivers/media/platform/rcar_drif.c | 17 +- + drivers/media/platform/rcar_fdp1.c | 4 +- + drivers/media/platform/rcar_jpu.c | 4 +- + drivers/media/platform/renesas-ceu.c | 33 +- + drivers/media/platform/rockchip/rga/rga.c | 5 +- + .../platform/rockchip/rkisp1/rkisp1-capture.c | 9 +- + .../media/platform/rockchip/rkisp1/rkisp1-common.h | 44 +- + .../media/platform/rockchip/rkisp1/rkisp1-dev.c | 98 +- + .../media/platform/rockchip/rkisp1/rkisp1-isp.c | 29 +- + .../media/platform/rockchip/rkisp1/rkisp1-params.c | 557 ++++- + .../media/platform/rockchip/rkisp1/rkisp1-regs.h | 406 ++-- + .../media/platform/rockchip/rkisp1/rkisp1-stats.c | 107 +- + drivers/media/platform/s3c-camif/camif-core.c | 6 +- + drivers/media/platform/s5p-g2d/g2d.c | 4 +- + drivers/media/platform/s5p-jpeg/jpeg-core.c | 5 +- + drivers/media/platform/s5p-mfc/s5p_mfc.c | 9 +- + drivers/media/platform/sti/bdisp/bdisp-v4l2.c | 3 +- + .../media/platform/sti/c8sectpfe/c8sectpfe-core.c | 1 - + .../media/platform/sti/c8sectpfe/c8sectpfe-dvb.c | 1 - + drivers/media/platform/sti/hva/hva-hw.c | 4 +- + drivers/media/platform/stm32/stm32-dcmi.c | 37 +- + drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c | 16 +- + drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c | 33 +- + drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.h | 2 +- + .../media/platform/sunxi/sun6i-csi/sun6i_video.c | 8 +- + drivers/media/platform/sunxi/sun8i-di/sun8i-di.c | 4 +- + drivers/media/platform/ti-vpe/cal.c | 16 +- + drivers/media/platform/via-camera.c | 6 +- + drivers/media/platform/video-mux.c | 17 +- + drivers/media/platform/vsp1/vsp1_drm.c | 8 +- + drivers/media/platform/vsp1/vsp1_drv.c | 18 +- + drivers/media/platform/vsp1/vsp1_regs.h | 11 +- + drivers/media/platform/vsp1/vsp1_wpf.c | 2 +- + drivers/media/platform/xilinx/xilinx-vip.c | 4 +- + drivers/media/platform/xilinx/xilinx-vipp.c | 17 +- + drivers/media/radio/radio-wl1273.c | 2 +- + drivers/media/radio/si470x/radio-si470x-i2c.c | 2 +- + drivers/media/radio/si470x/radio-si470x-usb.c | 2 +- + drivers/media/rc/img-ir/img-ir-core.c | 4 +- + drivers/media/rc/imon.c | 2 + + drivers/media/rc/ir-hix5hd2.c | 4 +- + drivers/media/rc/ir_toy.c | 61 +- + drivers/media/rc/mceusb.c | 2 + + drivers/media/rc/meson-ir-tx.c | 1 - + drivers/media/rc/meson-ir.c | 4 +- + drivers/media/rc/mtk-cir.c | 4 +- + drivers/media/rc/st_rc.c | 5 +- + drivers/media/rc/streamzap.c | 1 + + drivers/media/rc/sunxi-cir.c | 4 +- + drivers/media/spi/cxd2880-spi.c | 2 +- + drivers/media/test-drivers/vidtv/vidtv_bridge.c | 1 + + drivers/media/test-drivers/vim2m.c | 5 - + drivers/media/test-drivers/vimc/vimc-scaler.c | 366 ++-- + drivers/media/test-drivers/vivid/vivid-cec.c | 341 +-- + drivers/media/test-drivers/vivid/vivid-cec.h | 9 +- + drivers/media/test-drivers/vivid/vivid-core.c | 52 +- + drivers/media/test-drivers/vivid/vivid-core.h | 23 +- + drivers/media/tuners/mxl5007t.c | 9 - + drivers/media/tuners/tuner-types.c | 4 + + drivers/media/usb/airspy/airspy.c | 5 +- + drivers/media/usb/dvb-usb-v2/mxl111sf.c | 16 +- + drivers/media/usb/dvb-usb/az6027.c | 1 + + drivers/media/usb/dvb-usb/dibusb-common.c | 2 +- + drivers/media/usb/em28xx/em28xx-cards.c | 12 +- + drivers/media/usb/em28xx/em28xx-core.c | 5 +- + drivers/media/usb/gspca/gspca.c | 2 + + drivers/media/usb/gspca/m5602/m5602_ov7660.h | 1 - + drivers/media/usb/gspca/sn9c20x.c | 22 +- + drivers/media/usb/pvrusb2/pvrusb2-ctrl.c | 25 +- + drivers/media/usb/pvrusb2/pvrusb2-v4l2.c | 4 - + drivers/media/usb/stkwebcam/stk-webcam.c | 11 +- + drivers/media/usb/tm6000/tm6000-video.c | 3 +- + drivers/media/usb/ttusb-dec/ttusb_dec.c | 10 +- + drivers/media/usb/uvc/uvc_ctrl.c | 260 ++- + drivers/media/usb/uvc/uvc_driver.c | 16 +- + drivers/media/usb/uvc/uvc_metadata.c | 2 +- + drivers/media/usb/uvc/uvc_v4l2.c | 103 +- + drivers/media/usb/uvc/uvc_video.c | 5 + + drivers/media/usb/uvc/uvcvideo.h | 17 +- + drivers/media/v4l2-core/v4l2-async.c | 168 +- + drivers/media/v4l2-core/v4l2-common.c | 3 + + drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 9 +- + drivers/media/v4l2-core/v4l2-ctrls-core.c | 6 + + drivers/media/v4l2-core/v4l2-ctrls-defs.c | 5 + + drivers/media/v4l2-core/v4l2-fwnode.c | 83 +- + drivers/media/v4l2-core/v4l2-ioctl.c | 77 +- + drivers/staging/media/atomisp/i2c/atomisp-lm3554.c | 37 +- + .../media/atomisp/i2c/ov5693/atomisp-ov5693.c | 2 + + drivers/staging/media/atomisp/pci/atomisp_csi2.c | 70 +- + drivers/staging/media/hantro/hantro_drv.c | 12 +- + drivers/staging/media/hantro/hantro_g1_h264_dec.c | 2 +- + drivers/staging/media/hantro/hantro_g1_regs.h | 2 + + drivers/staging/media/hantro/hantro_g1_vp8_dec.c | 3 +- + drivers/staging/media/hantro/hantro_g2_hevc_dec.c | 52 + + drivers/staging/media/hantro/hantro_hevc.c | 21 + + drivers/staging/media/hantro/hantro_hw.h | 4 + + drivers/staging/media/imx/TODO | 5 - + drivers/staging/media/imx/imx-media-csi.c | 18 +- + drivers/staging/media/imx/imx-media-dev-common.c | 9 +- + drivers/staging/media/imx/imx-media-dev.c | 6 +- + drivers/staging/media/imx/imx-media-of.c | 6 +- + drivers/staging/media/imx/imx6-mipi-csi2.c | 17 +- + drivers/staging/media/imx/imx7-media-csi.c | 24 +- + drivers/staging/media/imx/imx7-mipi-csis.c | 16 +- + drivers/staging/media/imx/imx8mq-mipi-csi2.c | 16 +- + .../staging/media/ipu3/include/uapi/intel-ipu3.h | 7 +- + drivers/staging/media/ipu3/ipu3-css-fw.c | 7 +- + drivers/staging/media/ipu3/ipu3-css-fw.h | 2 +- + drivers/staging/media/ipu3/ipu3-css.c | 19 +- + drivers/staging/media/ipu3/ipu3-css.h | 1 - + drivers/staging/media/ipu3/ipu3-v4l2.c | 13 +- + drivers/staging/media/ipu3/ipu3.h | 12 + + drivers/staging/media/meson/vdec/esparser.h | 6 +- + drivers/staging/media/meson/vdec/vdec.c | 7 +- + drivers/staging/media/meson/vdec/vdec.h | 16 +- + drivers/staging/media/meson/vdec/vdec_helpers.h | 3 +- + drivers/staging/media/rkvdec/rkvdec.c | 4 +- + drivers/staging/media/sunxi/cedrus/cedrus.c | 55 +- + drivers/staging/media/sunxi/cedrus/cedrus.h | 2 + + drivers/staging/media/sunxi/cedrus/cedrus_dec.c | 2 + + drivers/staging/media/sunxi/cedrus/cedrus_h265.c | 70 +- + drivers/staging/media/sunxi/cedrus/cedrus_hw.c | 2 +- + drivers/staging/media/sunxi/cedrus/cedrus_regs.h | 2 + + drivers/staging/media/sunxi/cedrus/cedrus_video.c | 6 +- + drivers/staging/media/tegra-video/vi.c | 17 +- + include/media/hevc-ctrls.h | 11 + + include/media/i2c/mt9p031.h | 1 + + include/media/tuner.h | 1 + + include/media/v4l2-async.h | 105 +- + include/media/v4l2-dev.h | 3 +- + include/media/v4l2-fwnode.h | 12 +- + include/media/videobuf2-core.h | 59 +- + include/uapi/linux/v4l2-controls.h | 1 + + include/uapi/linux/videodev2.h | 31 +- + 314 files changed, 12209 insertions(+), 3936 deletions(-) + create mode 100644 Documentation/devicetree/bindings/media/i2c/aptina,mt9p031.yaml + create mode 100644 Documentation/devicetree/bindings/media/i2c/hynix,hi846.yaml + delete mode 100644 Documentation/devicetree/bindings/media/i2c/mt9p031.txt + create mode 100644 Documentation/devicetree/bindings/media/qcom,sc7280-venus.yaml + delete mode 100644 Documentation/devicetree/bindings/media/renesas,imr.txt + create mode 100644 Documentation/devicetree/bindings/media/renesas,imr.yaml + create mode 100644 Documentation/driver-api/media/drivers/rkisp1.rst + create mode 100644 drivers/media/i2c/hi846.c + create mode 100644 drivers/media/i2c/ov13b10.c + create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_stateful.c + create mode 100644 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_stateless.c + create mode 100644 drivers/media/platform/mtk-vcodec/vdec/vdec_h264_req_if.c + create mode 100644 drivers/media/platform/rcar-isp.c +Merging v4l-dvb-next/master (57c3b9f55ba8 media: venus: core: Add sdm660 DT compatible and resource struct) +$ git merge -m Merge branch 'master' of git://linuxtv.org/mchehab/media-next.git v4l-dvb-next/master +Removing drivers/media/rc/sir_ir.c +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + .../devicetree/bindings/media/i2c/adv7604.yaml | 13 +- + .../bindings/media/qcom,sdm660-venus.yaml | 186 +++++++++ + MAINTAINERS | 2 +- + drivers/gpu/ipu-v3/ipu-csi.c | 31 +- + .../media/common/videobuf2/videobuf2-dma-contig.c | 3 + + drivers/media/i2c/Kconfig | 2 + + drivers/media/i2c/adv7604.c | 15 +- + drivers/media/i2c/tda1997x.c | 119 +++--- + drivers/media/i2c/tda1997x_regs.h | 3 + + drivers/media/pci/ivtv/ivtvfb.c | 4 +- + drivers/media/pci/saa7134/saa7134-cards.c | 53 +++ + drivers/media/pci/saa7134/saa7134-dvb.c | 29 ++ + drivers/media/pci/saa7134/saa7134.h | 1 + + drivers/media/platform/allegro-dvt/allegro-core.c | 311 +++++++++++++-- + drivers/media/platform/allegro-dvt/allegro-mail.c | 23 +- + drivers/media/platform/allegro-dvt/allegro-mail.h | 10 +- + drivers/media/platform/allegro-dvt/nal-h264.c | 74 ---- + drivers/media/platform/allegro-dvt/nal-h264.h | 200 +++++++++- + drivers/media/platform/allegro-dvt/nal-hevc.c | 202 ++++++---- + drivers/media/platform/allegro-dvt/nal-hevc.h | 189 ++++++++- + drivers/media/platform/aspeed-video.c | 100 +++++ + drivers/media/platform/imx-jpeg/mxc-jpeg.c | 73 +++- + drivers/media/platform/imx-jpeg/mxc-jpeg.h | 2 + + drivers/media/platform/qcom/venus/core.c | 77 +++- + drivers/media/platform/qcom/venus/core.h | 7 +- + drivers/media/platform/qcom/venus/helpers.c | 67 +++- + drivers/media/platform/qcom/venus/helpers.h | 4 + + drivers/media/platform/qcom/venus/hfi.c | 48 ++- + drivers/media/platform/qcom/venus/vdec.c | 25 +- + drivers/media/platform/qcom/venus/venc.c | 116 +++++- + drivers/media/platform/rcar-vin/rcar-v4l2.c | 21 + + drivers/media/rc/Kconfig | 8 - + drivers/media/rc/Makefile | 1 - + drivers/media/rc/ir_toy.c | 2 +- + drivers/media/rc/ite-cir.c | 2 +- + drivers/media/rc/sir_ir.c | 438 --------------------- + drivers/media/test-drivers/vidtv/vidtv_bridge.c | 5 +- + drivers/media/usb/gspca/gl860/gl860-mi1320.c | 87 ++-- + drivers/media/usb/gspca/gl860/gl860-ov9655.c | 169 +++++--- + drivers/staging/media/imx/imx-media-csi.c | 5 + + drivers/staging/media/rkvdec/rkvdec-h264.c | 5 +- + drivers/staging/media/rkvdec/rkvdec.c | 40 +- + drivers/staging/media/sunxi/cedrus/cedrus.c | 1 + + drivers/staging/media/sunxi/cedrus/cedrus_h264.c | 113 +++--- + drivers/staging/media/sunxi/cedrus/cedrus_h265.c | 30 +- + drivers/staging/media/sunxi/cedrus/cedrus_video.c | 1 + + include/uapi/linux/v4l2-controls.h | 5 + + 47 files changed, 2001 insertions(+), 921 deletions(-) + create mode 100644 Documentation/devicetree/bindings/media/qcom,sdm660-venus.yaml + delete mode 100644 drivers/media/rc/sir_ir.c +Merging pm/linux-next (30c7771b41dc Merge branches 'pm-cpufreq', 'pm-sleep' and 'powercap' into linux-next) +$ git merge -m Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git pm/linux-next +Auto-merging include/linux/cpuhotplug.h +Auto-merging drivers/pci/pci.h +Auto-merging drivers/pci/pci.c +Auto-merging drivers/acpi/scan.c +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + Documentation/ABI/testing/sysfs-class-thermal | 259 +++++++++++++++++++++ + Documentation/driver-api/thermal/sysfs-api.rst | 225 +----------------- + Documentation/firmware-guide/acpi/osi.rst | 2 +- + MAINTAINERS | 22 +- + arch/x86/kernel/acpi/boot.c | 9 + + arch/x86/kernel/acpi/cstate.c | 15 ++ + drivers/acpi/Kconfig | 2 +- + drivers/acpi/acpi_lpss.c | 13 +- + drivers/acpi/acpi_pnp.c | 2 - + drivers/acpi/acpica/acglobal.h | 2 + + drivers/acpi/acpica/hwesleep.c | 8 +- + drivers/acpi/acpica/hwsleep.c | 11 +- + drivers/acpi/acpica/hwxfsleep.c | 7 + + drivers/acpi/acpica/utosi.c | 1 + + drivers/acpi/battery.c | 2 +- + drivers/acpi/dock.c | 8 +- + drivers/acpi/glue.c | 59 ++++- + drivers/acpi/internal.h | 1 + + drivers/acpi/power.c | 97 ++++---- + drivers/acpi/processor_idle.c | 3 +- + drivers/acpi/resource.c | 49 +++- + drivers/acpi/scan.c | 6 + + drivers/cpufreq/acpi-cpufreq.c | 3 +- + drivers/cpufreq/amd_freq_sensitivity.c | 3 +- + drivers/cpufreq/cpufreq.c | 19 +- + drivers/cpufreq/cpufreq_conservative.c | 6 +- + drivers/cpufreq/cpufreq_ondemand.c | 16 +- + drivers/cpufreq/intel_pstate.c | 117 +++++++++- + drivers/cpufreq/mediatek-cpufreq-hw.c | 2 +- + drivers/cpufreq/powernv-cpufreq.c | 4 +- + drivers/cpufreq/s5pv210-cpufreq.c | 2 +- + drivers/cpuidle/sysfs.c | 5 +- + drivers/idle/intel_idle.c | 13 +- + drivers/pci/pci-acpi.c | 74 ++---- + drivers/pci/pci-mid.c | 37 +-- + drivers/pci/pci.c | 154 ++++++------ + drivers/pci/pci.h | 96 ++++---- + drivers/pnp/system.c | 2 +- + drivers/powercap/dtpm.c | 78 +++---- + drivers/powercap/dtpm_cpu.c | 228 +++++++++--------- + .../intel/int340x_thermal/int3400_thermal.c | 9 +- + drivers/thermal/intel/intel_powerclamp.c | 8 +- + include/acpi/acpi_bus.h | 1 - + include/acpi/acpixf.h | 2 +- + include/acpi/actbl2.h | 251 +++++++++++++++++++- + include/acpi/actbl3.h | 9 +- + include/acpi/actypes.h | 1 + + include/linux/cpufreq.h | 167 ++++++++++--- + include/linux/cpuhotplug.h | 2 +- + include/linux/dtpm.h | 26 +-- + include/linux/energy_model.h | 68 +++++- + include/linux/pci-acpi.h | 8 + + kernel/power/energy_model.c | 86 +++++-- + kernel/power/swap.c | 21 +- + 54 files changed, 1498 insertions(+), 823 deletions(-) + create mode 100644 Documentation/ABI/testing/sysfs-class-thermal +Merging cpufreq-arm/cpufreq/arm/linux-next (b3c08d1ad2bb cpufreq: Fix parameter in parse_perf_domain()) +$ git merge -m Merge branch 'cpufreq/arm/linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git cpufreq-arm/cpufreq/arm/linux-next +Auto-merging include/linux/cpufreq.h +Merge made by the 'recursive' strategy. + drivers/cpufreq/cppc_cpufreq.c | 2 -- + drivers/cpufreq/s3c2440-cpufreq.c | 2 ++ + drivers/cpufreq/tegra186-cpufreq.c | 4 ++++ + drivers/cpufreq/tegra194-cpufreq.c | 8 +++++++- + include/linux/cpufreq.h | 2 +- + 5 files changed, 14 insertions(+), 4 deletions(-) +Merging cpupower/cpupower (79a0dc5530a9 tools: cpupower: fix typo in cpupower-idle-set(1) manpage) +$ git merge -m Merge branch 'cpupower' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux.git cpupower/cpupower +Merge made by the 'recursive' strategy. + tools/power/cpupower/man/cpupower-idle-set.1 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging devfreq/devfreq-next (6b28c7d0781e PM / devfreq: Strengthen check for freq_table) +$ git merge -m Merge branch 'devfreq-next' of git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git devfreq/devfreq-next +Merge made by the 'recursive' strategy. + drivers/devfreq/devfreq.c | 2 +- + drivers/devfreq/event/exynos-ppmu.c | 12 +++++++----- + 2 files changed, 8 insertions(+), 6 deletions(-) +Merging opp/opp/linux-next (27ff8187f13e opp: Fix return in _opp_add_static_v2()) +$ git merge -m Merge branch 'opp/linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git opp/opp/linux-next +Auto-merging drivers/devfreq/devfreq.c +Merge made by the 'recursive' strategy. + drivers/devfreq/devfreq.c | 26 +++++++++ + drivers/devfreq/governor.h | 3 ++ + drivers/devfreq/tegra30-devfreq.c | 109 ++++++++++++++++---------------------- + drivers/opp/core.c | 6 +-- + drivers/opp/of.c | 50 +++++++++++++---- + include/linux/pm_opp.h | 20 +++++-- + 6 files changed, 136 insertions(+), 78 deletions(-) +Merging thermal/thermal/linux-next (a67a46af4ad6 thermal/core: Deprecate changing cooling device state from userspace) +$ git merge -m Merge branch 'thermal/linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/thermal/linux.git thermal/thermal/linux-next +Merge made by the 'recursive' strategy. + .../bindings/thermal/qcom-spmi-adc-tm-hc.yaml | 149 +++++++++++++++++++++ + .../bindings/thermal/rockchip-thermal.yaml | 23 ++-- + .../thermal/socionext,uniphier-thermal.yaml | 1 + + drivers/thermal/gov_user_space.c | 9 ++ + .../intel/int340x_thermal/int3401_thermal.c | 8 +- + .../int340x_thermal/processor_thermal_device.c | 36 +++-- + .../int340x_thermal/processor_thermal_device.h | 1 + + .../int340x_thermal/processor_thermal_device_pci.c | 18 ++- + .../processor_thermal_device_pci_legacy.c | 8 +- + drivers/thermal/qcom/Kconfig | 2 +- + drivers/thermal/qcom/qcom-spmi-adc-tm5.c | 41 +++++- + drivers/thermal/qcom/tsens.c | 29 ++-- + drivers/thermal/rcar_gen3_thermal.c | 113 ++++++++++++---- + drivers/thermal/rockchip_thermal.c | 2 +- + drivers/thermal/thermal_core.c | 22 ++- + drivers/thermal/thermal_mmio.c | 2 +- + drivers/thermal/thermal_netlink.c | 11 +- + drivers/thermal/thermal_netlink.h | 8 +- + drivers/thermal/thermal_sysfs.c | 3 + + drivers/thermal/uniphier_thermal.c | 4 + + 20 files changed, 408 insertions(+), 82 deletions(-) + create mode 100644 Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm-hc.yaml +Merging ieee1394/for-next (54b3bd99f094 firewire: nosy: switch from 'pci_' to 'dma_' API) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394.git ieee1394/for-next +Already up to date. +Merging dlm/next (ecd95673142e fs: dlm: avoid comms shutdown delay in release_lockspace) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm.git dlm/next +Already up to date. +Merging rdma/for-next (71ee1f127543 Merge brank 'mlx5_mkey' into rdma.git for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git rdma/for-next +Auto-merging include/linux/mlx5/driver.h +Auto-merging drivers/net/ethernet/mellanox/mlx5/core/en_main.c +Auto-merging drivers/infiniband/hw/mlx5/mr.c +Auto-merging drivers/infiniband/hw/irdma/verbs.c +Auto-merging drivers/infiniband/hw/irdma/uk.c +Merge made by the 'recursive' strategy. + drivers/infiniband/core/cma.c | 34 +- + drivers/infiniband/core/cma_priv.h | 11 +- + drivers/infiniband/core/counters.c | 40 ++- + drivers/infiniband/core/device.c | 1 + + drivers/infiniband/core/iwpm_util.c | 2 +- + drivers/infiniband/core/nldev.c | 278 ++++++++++++--- + drivers/infiniband/core/rw.c | 66 ++-- + drivers/infiniband/core/sa_query.c | 1 - + drivers/infiniband/core/sysfs.c | 52 ++- + drivers/infiniband/core/verbs.c | 48 +++ + drivers/infiniband/hw/bnxt_re/bnxt_re.h | 19 +- + drivers/infiniband/hw/bnxt_re/hw_counters.c | 378 +++++++++++++-------- + drivers/infiniband/hw/bnxt_re/hw_counters.h | 30 +- + drivers/infiniband/hw/bnxt_re/ib_verbs.c | 35 +- + drivers/infiniband/hw/bnxt_re/main.c | 13 +- + drivers/infiniband/hw/bnxt_re/qplib_fp.c | 15 +- + drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 6 +- + drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 2 +- + drivers/infiniband/hw/bnxt_re/qplib_res.c | 5 +- + drivers/infiniband/hw/bnxt_re/qplib_res.h | 9 +- + drivers/infiniband/hw/bnxt_re/qplib_sp.c | 51 +++ + drivers/infiniband/hw/bnxt_re/qplib_sp.h | 28 ++ + drivers/infiniband/hw/bnxt_re/roce_hsi.h | 85 +++++ + drivers/infiniband/hw/cxgb4/cm.c | 1 - + drivers/infiniband/hw/cxgb4/device.c | 1 - + drivers/infiniband/hw/cxgb4/provider.c | 22 +- + drivers/infiniband/hw/efa/efa.h | 19 +- + drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 100 +++++- + drivers/infiniband/hw/efa/efa_admin_defs.h | 41 +++ + drivers/infiniband/hw/efa/efa_com.c | 164 +++++++++ + drivers/infiniband/hw/efa/efa_com.h | 38 ++- + drivers/infiniband/hw/efa/efa_com_cmd.c | 35 +- + drivers/infiniband/hw/efa/efa_com_cmd.h | 10 +- + drivers/infiniband/hw/efa/efa_main.c | 181 ++++++++-- + drivers/infiniband/hw/efa/efa_regs_defs.h | 7 +- + drivers/infiniband/hw/efa/efa_verbs.c | 86 ++++- + drivers/infiniband/hw/hfi1/efivar.c | 10 +- + drivers/infiniband/hw/hfi1/ipoib.h | 76 +++-- + drivers/infiniband/hw/hfi1/ipoib_tx.c | 314 ++++++++--------- + drivers/infiniband/hw/hfi1/trace_tx.h | 71 +++- + drivers/infiniband/hw/hfi1/user_exp_rcv.c | 5 +- + drivers/infiniband/hw/hfi1/verbs.c | 53 +-- + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 20 +- + drivers/infiniband/hw/irdma/cm.h | 8 - + drivers/infiniband/hw/irdma/ctrl.c | 38 --- + drivers/infiniband/hw/irdma/osdep.h | 1 - + drivers/infiniband/hw/irdma/protos.h | 2 - + drivers/infiniband/hw/irdma/type.h | 2 +- + drivers/infiniband/hw/irdma/uk.c | 57 ---- + drivers/infiniband/hw/irdma/user.h | 4 +- + drivers/infiniband/hw/irdma/utils.c | 45 --- + drivers/infiniband/hw/irdma/verbs.c | 100 +++--- + drivers/infiniband/hw/mlx4/alias_GUID.c | 4 +- + drivers/infiniband/hw/mlx4/main.c | 44 ++- + drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- + drivers/infiniband/hw/mlx4/qp.c | 4 +- + drivers/infiniband/hw/mlx5/cmd.c | 26 ++ + drivers/infiniband/hw/mlx5/cmd.h | 2 + + drivers/infiniband/hw/mlx5/counters.c | 282 ++++++++++++--- + drivers/infiniband/hw/mlx5/devx.c | 13 +- + drivers/infiniband/hw/mlx5/devx.h | 2 +- + drivers/infiniband/hw/mlx5/fs.c | 187 ++++++++++ + drivers/infiniband/hw/mlx5/main.c | 55 +-- + drivers/infiniband/hw/mlx5/mlx5_ib.h | 59 +++- + drivers/infiniband/hw/mlx5/mr.c | 109 +++--- + drivers/infiniband/hw/mlx5/odp.c | 78 ++--- + drivers/infiniband/hw/mlx5/wr.c | 10 +- + drivers/infiniband/sw/rxe/rxe_av.c | 20 +- + drivers/infiniband/sw/rxe/rxe_comp.c | 57 ++-- + drivers/infiniband/sw/rxe/rxe_cq.c | 28 +- + drivers/infiniband/sw/rxe/rxe_hw_counters.c | 42 +-- + drivers/infiniband/sw/rxe/rxe_loc.h | 2 + + drivers/infiniband/sw/rxe/rxe_mr.c | 267 +++++++++++---- + drivers/infiniband/sw/rxe/rxe_mw.c | 36 +- + drivers/infiniband/sw/rxe/rxe_opcode.h | 6 +- + drivers/infiniband/sw/rxe/rxe_param.h | 34 +- + drivers/infiniband/sw/rxe/rxe_pool.c | 4 +- + drivers/infiniband/sw/rxe/rxe_qp.c | 16 +- + drivers/infiniband/sw/rxe/rxe_queue.c | 30 +- + drivers/infiniband/sw/rxe/rxe_queue.h | 292 +++++++--------- + drivers/infiniband/sw/rxe/rxe_req.c | 65 ++-- + drivers/infiniband/sw/rxe/rxe_resp.c | 50 +-- + drivers/infiniband/sw/rxe/rxe_srq.c | 3 +- + drivers/infiniband/sw/rxe/rxe_verbs.c | 139 +++----- + drivers/infiniband/sw/rxe/rxe_verbs.h | 60 ++-- + drivers/infiniband/sw/siw/siw_cm.c | 4 +- + drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 - + drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c | 49 +-- + drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 11 +- + drivers/infiniband/ulp/rtrs/rtrs-clt.c | 6 + + drivers/infiniband/ulp/rtrs/rtrs-clt.h | 13 +- + drivers/infiniband/ulp/rtrs/rtrs-pri.h | 2 +- + drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c | 3 +- + drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 2 +- + drivers/infiniband/ulp/rtrs/rtrs-srv.c | 6 + + drivers/infiniband/ulp/rtrs/rtrs-srv.h | 3 +- + drivers/infiniband/ulp/rtrs/rtrs.c | 31 +- + .../ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 6 +- + .../ethernet/mellanox/mlx5/core/diag/fw_tracer.h | 2 +- + .../ethernet/mellanox/mlx5/core/diag/rsc_dump.c | 10 +- + drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 2 +- + .../net/ethernet/mellanox/mlx5/core/en_common.c | 6 +- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 +- + .../net/ethernet/mellanox/mlx5/core/fpga/conn.c | 10 +- + .../net/ethernet/mellanox/mlx5/core/fpga/core.h | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 54 ++- + drivers/net/ethernet/mellanox/mlx5/core/mr.c | 27 +- + .../mellanox/mlx5/core/steering/dr_icm_pool.c | 10 +- + .../ethernet/mellanox/mlx5/core/steering/dr_send.c | 11 +- + .../mellanox/mlx5/core/steering/dr_types.h | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/uar.c | 14 +- + drivers/vdpa/mlx5/core/mlx5_vdpa.h | 8 +- + drivers/vdpa/mlx5/core/mr.c | 8 +- + drivers/vdpa/mlx5/core/resources.c | 13 +- + drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- + include/linux/mlx5/device.h | 2 + + include/linux/mlx5/driver.h | 32 +- + include/linux/mlx5/fs.h | 2 + + include/linux/mlx5/mlx5_ifc.h | 26 +- + include/rdma/ib_hdrs.h | 1 + + include/rdma/ib_verbs.h | 65 ++-- + include/rdma/rdma_counter.h | 2 + + include/uapi/rdma/efa-abi.h | 18 +- + include/uapi/rdma/rdma_netlink.h | 5 + + include/uapi/rdma/rdma_user_rxe.h | 10 +- + 127 files changed, 3411 insertions(+), 1835 deletions(-) +Merging net-next/master (4d98bb0d7ec2 net: macb: Use mdio child node for MDIO bus if it exists) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git net-next/master +Auto-merging net/wireless/scan.c +Auto-merging net/netfilter/ipvs/ip_vs_ctl.c +Auto-merging net/mac80211/mesh.c +Auto-merging net/core/skbuff.c +Auto-merging kernel/bpf/core.c +Removing include/linux/netfilter_ingress.h +Auto-merging include/linux/mlx5/mlx5_ifc.h +Auto-merging include/linux/mlx5/fs.h +CONFLICT (content): Merge conflict in include/linux/mlx5/fs.h +Auto-merging include/linux/mlx5/driver.h +Auto-merging include/linux/mlx5/device.h +Auto-merging include/linux/filter.h +Auto-merging drivers/soc/fsl/dpio/qbman-portal.c +Auto-merging drivers/soc/fsl/dpio/dpio-service.c +Removing drivers/ptp/idt8a340_reg.h +Removing drivers/net/wireless/mediatek/mt76/mt7921/eeprom.c +Auto-merging drivers/net/ethernet/smsc/smc91x.c +Auto-merging drivers/net/ethernet/microchip/lan743x_main.c +Auto-merging drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +Auto-merging drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +Auto-merging drivers/net/ethernet/mellanox/mlx5/core/en_main.c +Auto-merging drivers/net/ethernet/mellanox/mlx5/core/en.h +Auto-merging drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +Auto-merging drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +Auto-merging drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +Auto-merging drivers/net/ethernet/chelsio/cxgb3/common.h +Auto-merging drivers/infiniband/hw/mlx5/odp.c +Auto-merging drivers/infiniband/hw/mlx4/qp.c +Auto-merging drivers/infiniband/hw/mlx4/main.c +Auto-merging MAINTAINERS +Auto-merging Documentation/devicetree/bindings/vendor-prefixes.yaml +Removing Documentation/devicetree/bindings/net/wireless/qca,ath9k.txt +Removing Documentation/devicetree/bindings/net/lantiq,xrx200-net.txt +Removing Documentation/devicetree/bindings/net/dsa/qca8k.txt +Resolved 'include/linux/mlx5/fs.h' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master a2a690667703] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git +$ git diff -M --stat --summary HEAD^.. + Documentation/ABI/testing/sysfs-timecard | 174 + + Documentation/bpf/bpf_licensing.rst | 92 + + Documentation/bpf/btf.rst | 29 +- + Documentation/bpf/index.rst | 9 + + .../devicetree/bindings/net/asix,ax88796c.yaml | 73 + + .../devicetree/bindings/net/brcm,bcmgenet.txt | 3 +- + Documentation/devicetree/bindings/net/dsa/dsa.yaml | 12 +- + .../devicetree/bindings/net/dsa/nxp,sja1105.yaml | 43 + + .../devicetree/bindings/net/dsa/qca8k.txt | 215 - + .../devicetree/bindings/net/dsa/qca8k.yaml | 362 + + .../devicetree/bindings/net/dsa/realtek-smi.txt | 87 + + .../devicetree/bindings/net/lantiq,etop-xway.yaml | 69 + + .../devicetree/bindings/net/lantiq,xrx200-net.txt | 21 - + .../devicetree/bindings/net/lantiq,xrx200-net.yaml | 75 + + Documentation/devicetree/bindings/net/macb.txt | 4 + + .../devicetree/bindings/net/qcom,ipq8064-mdio.yaml | 5 +- + .../devicetree/bindings/net/renesas,ether.yaml | 17 +- + .../devicetree/bindings/net/renesas,etheravb.yaml | 3 + + .../bindings/net/socionext,uniphier-ave4.yaml | 1 + + .../bindings/net/wireless/mediatek,mt76.yaml | 5 + + .../devicetree/bindings/net/wireless/qca,ath9k.txt | 48 - + .../bindings/net/wireless/qca,ath9k.yaml | 90 + + .../devicetree/bindings/vendor-prefixes.yaml | 2 + + .../networking/devlink/devlink-region.rst | 4 +- + Documentation/networking/devlink/ice.rst | 4 + + Documentation/networking/devlink/index.rst | 1 + + Documentation/networking/devlink/iosm.rst | 162 + + Documentation/networking/ethtool-netlink.rst | 81 +- + Documentation/networking/ip-sysctl.rst | 8 - + Documentation/networking/ipvs-sysctl.rst | 11 + + Documentation/networking/mctp.rst | 59 + + MAINTAINERS | 19 +- + arch/alpha/include/uapi/asm/socket.h | 2 + + arch/m68k/emu/nfeth.c | 2 +- + arch/mips/include/asm/mach-lantiq/xway/xway_dma.h | 2 +- + arch/mips/include/uapi/asm/socket.h | 2 + + arch/mips/lantiq/xway/dma.c | 57 +- + arch/parisc/include/uapi/asm/socket.h | 2 + + arch/sparc/include/uapi/asm/socket.h | 3 + + arch/x86/events/intel/core.c | 67 +- + arch/x86/events/intel/ds.c | 2 +- + arch/x86/events/intel/lbr.c | 20 +- + arch/x86/events/perf_event.h | 19 + + arch/x86/net/bpf_jit_comp.c | 53 +- + arch/xtensa/platforms/iss/network.c | 2 +- + drivers/base/property.c | 63 - + drivers/base/regmap/regmap-mdio.c | 6 +- + drivers/bcma/main.c | 2 +- + drivers/bluetooth/btintel.c | 239 +- + drivers/bluetooth/btintel.h | 11 + + drivers/bluetooth/btmrvl_main.c | 6 +- + drivers/bluetooth/btmtkuart.c | 13 +- + drivers/bluetooth/btrsi.c | 1 - + drivers/bluetooth/btrtl.c | 26 +- + drivers/bluetooth/btusb.c | 64 +- + drivers/bluetooth/hci_h5.c | 35 +- + drivers/bluetooth/hci_ldisc.c | 3 + + drivers/bluetooth/hci_qca.c | 5 +- + drivers/bluetooth/hci_vhci.c | 122 + + drivers/hsi/clients/ssi_protocol.c | 4 +- + drivers/infiniband/hw/mlx4/main.c | 2 +- + drivers/infiniband/hw/mlx4/qp.c | 2 +- + drivers/infiniband/hw/mlx5/odp.c | 1 + + drivers/infiniband/hw/qedr/main.c | 2 +- + drivers/net/appletalk/cops.c | 2 +- + drivers/net/appletalk/ltpc.c | 3 +- + drivers/net/arcnet/arc-rimi.c | 5 +- + drivers/net/arcnet/arcdevice.h | 5 + + drivers/net/arcnet/com20020-isa.c | 2 +- + drivers/net/arcnet/com20020-pci.c | 2 +- + drivers/net/arcnet/com20020.c | 4 +- + drivers/net/arcnet/com20020_cs.c | 2 +- + drivers/net/arcnet/com90io.c | 2 +- + drivers/net/arcnet/com90xx.c | 3 +- + drivers/net/bonding/bond_main.c | 2 +- + drivers/net/bonding/bond_sysfs.c | 4 +- + drivers/net/dsa/Kconfig | 1 + + drivers/net/dsa/Makefile | 2 +- + drivers/net/dsa/b53/b53_common.c | 59 +- + drivers/net/dsa/b53/b53_priv.h | 1 - + drivers/net/dsa/bcm_sf2.c | 4 +- + drivers/net/dsa/ocelot/felix.c | 4 +- + drivers/net/dsa/qca8k.c | 435 +- + drivers/net/dsa/qca8k.h | 35 +- + drivers/net/dsa/realtek-smi-core.c | 4 + + drivers/net/dsa/realtek-smi-core.h | 4 +- + drivers/net/dsa/rtl8365mb.c | 1982 + + drivers/net/dsa/rtl8366.c | 96 +- + drivers/net/dsa/rtl8366rb.c | 301 +- + drivers/net/dsa/sja1105/sja1105.h | 27 +- + drivers/net/dsa/sja1105/sja1105_clocking.c | 35 +- + drivers/net/dsa/sja1105/sja1105_main.c | 136 +- + drivers/net/dsa/sja1105/sja1105_vl.c | 15 +- + drivers/net/dsa/xrs700x/xrs700x_mdio.c | 12 +- + drivers/net/ethernet/3com/3c509.c | 2 +- + drivers/net/ethernet/3com/3c515.c | 5 +- + drivers/net/ethernet/3com/3c574_cs.c | 11 +- + drivers/net/ethernet/3com/3c589_cs.c | 10 +- + drivers/net/ethernet/3com/3c59x.c | 4 +- + drivers/net/ethernet/8390/apne.c | 3 +- + drivers/net/ethernet/8390/ax88796.c | 12 +- + drivers/net/ethernet/8390/axnet_cs.c | 7 +- + drivers/net/ethernet/8390/mcf8390.c | 3 +- + drivers/net/ethernet/8390/ne.c | 4 +- + drivers/net/ethernet/8390/ne2k-pci.c | 2 +- + drivers/net/ethernet/8390/pcnet_cs.c | 22 +- + drivers/net/ethernet/8390/stnic.c | 5 +- + drivers/net/ethernet/8390/zorro8390.c | 3 +- + drivers/net/ethernet/Kconfig | 1 + + drivers/net/ethernet/Makefile | 1 + + drivers/net/ethernet/actions/owl-emac.c | 6 +- + drivers/net/ethernet/adaptec/starfire.c | 14 +- + drivers/net/ethernet/aeroflex/greth.c | 8 +- + drivers/net/ethernet/agere/et131x.c | 4 +- + drivers/net/ethernet/alacritech/slicoss.c | 4 +- + drivers/net/ethernet/allwinner/sun4i-emac.c | 4 +- + drivers/net/ethernet/alteon/acenic.c | 20 +- + drivers/net/ethernet/altera/altera_tse_main.c | 4 +- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +- + drivers/net/ethernet/amd/Kconfig | 2 +- + drivers/net/ethernet/amd/amd8111e.c | 6 +- + drivers/net/ethernet/amd/atarilance.c | 4 +- + drivers/net/ethernet/amd/au1000_eth.c | 2 +- + drivers/net/ethernet/amd/nmclan_cs.c | 5 +- + drivers/net/ethernet/amd/pcnet32.c | 15 +- + drivers/net/ethernet/amd/sun3lance.c | 4 +- + drivers/net/ethernet/amd/sunlance.c | 4 +- + drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 2 +- + drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 +- + drivers/net/ethernet/amd/xgbe/xgbe-main.c | 2 +- + drivers/net/ethernet/amd/xgbe/xgbe.h | 2 +- + drivers/net/ethernet/apm/xgene-v2/mac.c | 2 +- + drivers/net/ethernet/apm/xgene-v2/main.c | 2 +- + drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 2 +- + drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 2 +- + drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c | 2 +- + drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c | 2 +- + drivers/net/ethernet/apple/bmac.c | 15 +- + drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 6 +- + drivers/net/ethernet/aquantia/atlantic/aq_macsec.c | 2 +- + drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 8 +- + .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 4 +- + .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 4 +- + .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h | 2 +- + .../aquantia/atlantic/hw_atl/hw_atl_utils.c | 4 +- + .../aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c | 4 +- + .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c | 2 +- + drivers/net/ethernet/arc/Kconfig | 4 +- + drivers/net/ethernet/arc/emac_main.c | 4 +- + drivers/net/ethernet/arc/emac_mdio.c | 9 +- + drivers/net/ethernet/asix/Kconfig | 35 + + drivers/net/ethernet/asix/Makefile | 6 + + drivers/net/ethernet/asix/ax88796c_ioctl.c | 239 + + drivers/net/ethernet/asix/ax88796c_ioctl.h | 26 + + drivers/net/ethernet/asix/ax88796c_main.c | 1163 + + drivers/net/ethernet/asix/ax88796c_main.h | 568 + + drivers/net/ethernet/asix/ax88796c_spi.c | 115 + + drivers/net/ethernet/asix/ax88796c_spi.h | 69 + + drivers/net/ethernet/atheros/ag71xx.c | 4 +- + drivers/net/ethernet/atheros/alx/main.c | 4 +- + drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 12 +- + drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 10 +- + drivers/net/ethernet/atheros/atlx/atl1.c | 2 +- + drivers/net/ethernet/atheros/atlx/atl2.c | 4 +- + drivers/net/ethernet/atheros/atlx/atlx.c | 2 +- + drivers/net/ethernet/broadcom/b44.c | 12 +- + drivers/net/ethernet/broadcom/bcm4908_enet.c | 4 +- + drivers/net/ethernet/broadcom/bcm63xx_enet.c | 6 +- + drivers/net/ethernet/broadcom/bcmsysport.c | 6 +- + drivers/net/ethernet/broadcom/bgmac-bcma-mdio.c | 6 +- + drivers/net/ethernet/broadcom/bgmac-bcma.c | 37 +- + drivers/net/ethernet/broadcom/bgmac-platform.c | 2 +- + drivers/net/ethernet/broadcom/bgmac.c | 4 +- + drivers/net/ethernet/broadcom/bnx2.c | 6 +- + drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 +- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 22 +- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 2 +- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 3 +- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 9 +- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 50 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h | 13 - + drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 6 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h | 2 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 2 +- + drivers/net/ethernet/broadcom/genet/bcmgenet.c | 87 +- + drivers/net/ethernet/broadcom/genet/bcmgenet.h | 10 +- + drivers/net/ethernet/broadcom/genet/bcmmii.c | 202 +- + drivers/net/ethernet/broadcom/tg3.c | 60 +- + drivers/net/ethernet/brocade/bna/bnad.c | 5 +- + drivers/net/ethernet/cadence/macb.h | 7 +- + drivers/net/ethernet/cadence/macb_main.c | 31 +- + drivers/net/ethernet/cadence/macb_ptp.c | 13 +- + drivers/net/ethernet/calxeda/xgmac.c | 8 +- + drivers/net/ethernet/cavium/liquidio/lio_core.c | 3 +- + drivers/net/ethernet/cavium/liquidio/lio_main.c | 40 +- + drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 4 +- + drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 2 +- + drivers/net/ethernet/cavium/thunder/nic_main.c | 3 +- + drivers/net/ethernet/cavium/thunder/nicvf_main.c | 11 +- + drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 9 +- + drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 2 +- + drivers/net/ethernet/chelsio/cxgb/gmac.h | 2 +- + drivers/net/ethernet/chelsio/cxgb/pm3393.c | 2 +- + drivers/net/ethernet/chelsio/cxgb/subr.c | 2 +- + drivers/net/ethernet/chelsio/cxgb/vsc7326.c | 4 +- + drivers/net/ethernet/chelsio/cxgb3/common.h | 2 +- + drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 +- + drivers/net/ethernet/chelsio/cxgb3/t3_hw.c | 4 +- + drivers/net/ethernet/chelsio/cxgb3/xgmac.c | 2 +- + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 2 +- + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- + drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 +- + drivers/net/ethernet/chelsio/cxgb4vf/adapter.h | 3 +- + .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 8 +- + .../chelsio/inline_crypto/chtls/chtls_cm.c | 2 +- + .../chelsio/inline_crypto/chtls/chtls_cm.h | 2 +- + drivers/net/ethernet/cirrus/cs89x0.c | 13 +- + drivers/net/ethernet/cirrus/ep93xx_eth.c | 2 +- + drivers/net/ethernet/cirrus/mac89x0.c | 2 +- + drivers/net/ethernet/cisco/enic/enic_ethtool.c | 4 +- + drivers/net/ethernet/cisco/enic/enic_main.c | 9 +- + drivers/net/ethernet/cisco/enic/enic_pp.c | 2 +- + drivers/net/ethernet/cortina/gemini.c | 6 +- + drivers/net/ethernet/davicom/dm9000.c | 9 +- + drivers/net/ethernet/dec/tulip/de2104x.c | 15 +- + drivers/net/ethernet/dec/tulip/de4x5.c | 35 +- + drivers/net/ethernet/dec/tulip/dmfe.c | 9 +- + drivers/net/ethernet/dec/tulip/tulip_core.c | 45 +- + drivers/net/ethernet/dec/tulip/uli526x.c | 11 +- + drivers/net/ethernet/dec/tulip/winbond-840.c | 6 +- + drivers/net/ethernet/dec/tulip/xircom_cb.c | 4 +- + drivers/net/ethernet/dlink/dl2k.c | 5 +- + drivers/net/ethernet/dlink/sundance.c | 6 +- + drivers/net/ethernet/dnet.c | 8 +- + drivers/net/ethernet/ec_bhf.c | 4 +- + drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- + drivers/net/ethernet/emulex/benet/be_cmds.h | 2 +- + drivers/net/ethernet/emulex/benet/be_main.c | 7 +- + drivers/net/ethernet/ethoc.c | 28 +- + drivers/net/ethernet/ezchip/Kconfig | 2 +- + drivers/net/ethernet/ezchip/nps_enet.c | 4 +- + drivers/net/ethernet/faraday/ftgmac100.c | 9 +- + drivers/net/ethernet/fealnx.c | 8 +- + drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 6 +- + .../ethernet/freescale/dpaa2/dpaa2-eth-devlink.c | 21 +- + drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 24 +- + drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 7 +- + .../net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 58 + + drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 2 +- + .../net/ethernet/freescale/dpaa2/dpaa2-switch.c | 2 +- + drivers/net/ethernet/freescale/enetc/enetc.c | 332 +- + drivers/net/ethernet/freescale/enetc/enetc.h | 4 + + drivers/net/ethernet/freescale/enetc/enetc_hw.h | 6 +- + drivers/net/ethernet/freescale/enetc/enetc_pf.c | 24 +- + drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 6 +- + drivers/net/ethernet/freescale/enetc/enetc_qos.c | 18 +- + drivers/net/ethernet/freescale/enetc/enetc_vf.c | 16 +- + drivers/net/ethernet/freescale/fec_main.c | 7 +- + drivers/net/ethernet/freescale/fec_mpc52xx.c | 4 +- + drivers/net/ethernet/freescale/fman/fman_dtsec.c | 8 +- + drivers/net/ethernet/freescale/fman/fman_dtsec.h | 2 +- + drivers/net/ethernet/freescale/fman/fman_memac.c | 8 +- + drivers/net/ethernet/freescale/fman/fman_memac.h | 2 +- + drivers/net/ethernet/freescale/fman/fman_tgec.c | 8 +- + drivers/net/ethernet/freescale/fman/fman_tgec.h | 2 +- + drivers/net/ethernet/freescale/fman/mac.h | 2 +- + .../net/ethernet/freescale/fs_enet/fs_enet-main.c | 2 +- + drivers/net/ethernet/freescale/gianfar.c | 2 +- + drivers/net/ethernet/freescale/ucc_geth.c | 4 +- + drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 14 +- + drivers/net/ethernet/google/gve/gve.h | 31 +- + drivers/net/ethernet/google/gve/gve_adminq.c | 2 +- + drivers/net/ethernet/google/gve/gve_adminq.h | 1 + + drivers/net/ethernet/google/gve/gve_ethtool.c | 3 +- + drivers/net/ethernet/google/gve/gve_main.c | 101 +- + drivers/net/ethernet/google/gve/gve_rx.c | 98 +- + drivers/net/ethernet/google/gve/gve_tx.c | 117 +- + drivers/net/ethernet/google/gve/gve_tx_dqo.c | 84 +- + drivers/net/ethernet/google/gve/gve_utils.c | 4 + + drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- + drivers/net/ethernet/hisilicon/hisi_femac.c | 6 +- + drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 4 +- + drivers/net/ethernet/hisilicon/hns/hnae.h | 4 +- + drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 7 +- + drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 2 +- + drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 2 +- + drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h | 5 +- + drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h | 2 +- + .../net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c | 2 +- + drivers/net/ethernet/hisilicon/hns/hns_enet.c | 4 +- + drivers/net/ethernet/hisilicon/hns3/hnae3.h | 9 +- + drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 78 + + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 +- + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 5 +- + .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 3 + + .../ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c | 18 +- + .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 48 +- + .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 + + .../hisilicon/hns3/hns3vf/hclgevf_devlink.c | 18 +- + .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- + drivers/net/ethernet/huawei/hinic/hinic_devlink.c | 4 +- + drivers/net/ethernet/huawei/hinic/hinic_devlink.h | 2 +- + drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c | 13 +- + drivers/net/ethernet/huawei/hinic/hinic_main.c | 12 +- + drivers/net/ethernet/i825xx/sun3_82586.c | 7 +- + drivers/net/ethernet/ibm/ehea/ehea_main.c | 4 +- + drivers/net/ethernet/ibm/emac/core.c | 14 +- + drivers/net/ethernet/ibm/ibmveth.c | 46 +- + drivers/net/ethernet/ibm/ibmvnic.c | 645 +- + drivers/net/ethernet/ibm/ibmvnic.h | 10 +- + drivers/net/ethernet/intel/Kconfig | 14 + + drivers/net/ethernet/intel/e100.c | 4 +- + drivers/net/ethernet/intel/e1000/e1000_main.c | 4 +- + drivers/net/ethernet/intel/e1000e/e1000.h | 1 + + drivers/net/ethernet/intel/e1000e/netdev.c | 5 +- + drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 2 +- + drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 4 +- + drivers/net/ethernet/intel/i40e/i40e.h | 2 +- + drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +- + drivers/net/ethernet/intel/i40e/i40e_xsk.c | 52 +- + drivers/net/ethernet/intel/iavf/iavf.h | 12 +- + drivers/net/ethernet/intel/iavf/iavf_main.c | 194 +- + drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 6 +- + drivers/net/ethernet/intel/ice/Makefile | 5 +- + drivers/net/ethernet/intel/ice/ice.h | 206 +- + drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 92 +- + drivers/net/ethernet/intel/ice/ice_arfs.c | 4 +- + drivers/net/ethernet/intel/ice/ice_base.c | 121 +- + drivers/net/ethernet/intel/ice/ice_base.h | 8 +- + drivers/net/ethernet/intel/ice/ice_common.c | 129 +- + drivers/net/ethernet/intel/ice/ice_common.h | 7 + + drivers/net/ethernet/intel/ice/ice_dcb.c | 225 +- + drivers/net/ethernet/intel/ice/ice_dcb.h | 18 + + drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 216 +- + drivers/net/ethernet/intel/ice/ice_dcb_lib.h | 32 +- + drivers/net/ethernet/intel/ice/ice_dcb_nl.c | 192 +- + drivers/net/ethernet/intel/ice/ice_devids.h | 2 + + drivers/net/ethernet/intel/ice/ice_devlink.c | 256 +- + drivers/net/ethernet/intel/ice/ice_devlink.h | 8 +- + drivers/net/ethernet/intel/ice/ice_eswitch.c | 649 + + drivers/net/ethernet/intel/ice/ice_eswitch.h | 83 + + drivers/net/ethernet/intel/ice/ice_ethtool.c | 235 +- + drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 4 +- + drivers/net/ethernet/intel/ice/ice_fdir.c | 2 +- + drivers/net/ethernet/intel/ice/ice_fdir.h | 2 +- + drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 275 +- + drivers/net/ethernet/intel/ice/ice_flex_pipe.h | 14 + + drivers/net/ethernet/intel/ice/ice_flex_type.h | 13 + + drivers/net/ethernet/intel/ice/ice_fltr.c | 80 + + drivers/net/ethernet/intel/ice/ice_fltr.h | 3 + + drivers/net/ethernet/intel/ice/ice_hw_autogen.h | 1 + + drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h | 43 + + drivers/net/ethernet/intel/ice/ice_lib.c | 847 +- + drivers/net/ethernet/intel/ice/ice_lib.h | 36 +- + drivers/net/ethernet/intel/ice/ice_main.c | 1383 +- + drivers/net/ethernet/intel/ice/ice_protocol_type.h | 169 + + drivers/net/ethernet/intel/ice/ice_ptp.c | 372 +- + drivers/net/ethernet/intel/ice/ice_ptp.h | 24 +- + drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 151 + + drivers/net/ethernet/intel/ice/ice_ptp_hw.h | 22 + + drivers/net/ethernet/intel/ice/ice_repr.c | 386 + + drivers/net/ethernet/intel/ice/ice_repr.h | 28 + + drivers/net/ethernet/intel/ice/ice_sched.c | 184 + + drivers/net/ethernet/intel/ice/ice_sched.h | 8 + + drivers/net/ethernet/intel/ice/ice_switch.c | 2531 +- + drivers/net/ethernet/intel/ice/ice_switch.h | 149 +- + drivers/net/ethernet/intel/ice/ice_tc_lib.c | 1056 + + drivers/net/ethernet/intel/ice/ice_tc_lib.h | 152 + + drivers/net/ethernet/intel/ice/ice_trace.h | 28 +- + drivers/net/ethernet/intel/ice/ice_txrx.c | 326 +- + drivers/net/ethernet/intel/ice/ice_txrx.h | 147 +- + drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 102 +- + drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 14 +- + drivers/net/ethernet/intel/ice/ice_type.h | 19 +- + drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 403 +- + drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h | 74 +- + drivers/net/ethernet/intel/ice/ice_xsk.c | 158 +- + drivers/net/ethernet/intel/ice/ice_xsk.h | 20 +- + drivers/net/ethernet/intel/igb/igb_main.c | 4 +- + drivers/net/ethernet/intel/igbvf/netdev.c | 8 +- + drivers/net/ethernet/intel/igc/igc_main.c | 4 +- + drivers/net/ethernet/intel/igc/igc_ptp.c | 2 +- + drivers/net/ethernet/intel/ixgb/ixgb_hw.c | 2 +- + drivers/net/ethernet/intel/ixgb/ixgb_hw.h | 2 +- + drivers/net/ethernet/intel/ixgb/ixgb_main.c | 10 +- + drivers/net/ethernet/intel/ixgbe/ixgbe.h | 23 +- + drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 9 +- + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 54 +- + .../net/ethernet/intel/ixgbe/ixgbe_txrx_common.h | 3 +- + drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 16 +- + drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 +- + drivers/net/ethernet/jme.c | 4 +- + drivers/net/ethernet/korina.c | 4 +- + drivers/net/ethernet/lantiq_etop.c | 21 +- + drivers/net/ethernet/lantiq_xrx200.c | 87 +- + drivers/net/ethernet/litex/Kconfig | 2 +- + drivers/net/ethernet/litex/litex_liteeth.c | 2 +- + drivers/net/ethernet/marvell/mv643xx_eth.c | 16 +- + drivers/net/ethernet/marvell/mvneta.c | 19 +- + drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 13 +- + drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 2 +- + drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 11 +- + drivers/net/ethernet/marvell/octeontx2/af/common.h | 1 + + .../ethernet/marvell/octeontx2/af/lmac_common.h | 5 + + drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 138 +- + drivers/net/ethernet/marvell/octeontx2/af/npc.h | 16 +- + .../ethernet/marvell/octeontx2/af/npc_profile.h | 994 +- + drivers/net/ethernet/marvell/octeontx2/af/ptp.c | 133 +- + drivers/net/ethernet/marvell/octeontx2/af/ptp.h | 1 + + drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 17 + + drivers/net/ethernet/marvell/octeontx2/af/rpm.h | 3 + + drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 76 +- + drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 19 +- + .../net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 13 +- + .../net/ethernet/marvell/octeontx2/af/rvu_cn10k.c | 4 +- + .../net/ethernet/marvell/octeontx2/af/rvu_cpt.c | 601 +- + .../ethernet/marvell/octeontx2/af/rvu_devlink.c | 16 +- + .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 222 +- + .../net/ethernet/marvell/octeontx2/af/rvu_npc.c | 96 + + .../net/ethernet/marvell/octeontx2/af/rvu_reg.h | 4 + + .../net/ethernet/marvell/octeontx2/af/rvu_struct.h | 18 + + .../net/ethernet/marvell/octeontx2/nic/Makefile | 6 +- + drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c | 2 +- + .../ethernet/marvell/octeontx2/nic/otx2_common.c | 52 +- + .../ethernet/marvell/octeontx2/nic/otx2_common.h | 18 +- + .../ethernet/marvell/octeontx2/nic/otx2_devlink.c | 21 +- + .../ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 38 +- + .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 234 +- + .../net/ethernet/marvell/octeontx2/nic/otx2_ptp.c | 133 +- + .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 273 +- + .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.h | 16 +- + .../net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 8 +- + .../ethernet/marvell/prestera/prestera_devlink.c | 35 +- + .../ethernet/marvell/prestera/prestera_devlink.h | 4 +- + .../net/ethernet/marvell/prestera/prestera_main.c | 17 +- + drivers/net/ethernet/marvell/pxa168_eth.c | 18 +- + drivers/net/ethernet/marvell/skge.c | 6 +- + drivers/net/ethernet/marvell/sky2.c | 97 +- + drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- + drivers/net/ethernet/mediatek/mtk_star_emac.c | 4 +- + drivers/net/ethernet/mellanox/mlx4/cmd.c | 6 +- + drivers/net/ethernet/mellanox/mlx4/cq.c | 3 +- + drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 8 + + drivers/net/ethernet/mellanox/mlx4/en_main.c | 1 - + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 40 +- + drivers/net/ethernet/mellanox/mlx4/en_port.c | 4 + + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 15 + + drivers/net/ethernet/mellanox/mlx4/en_tx.c | 4 +- + drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +- + drivers/net/ethernet/mellanox/mlx4/main.c | 12 +- + drivers/net/ethernet/mellanox/mlx4/mcg.c | 2 +- + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 + + drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h | 4 +- + drivers/net/ethernet/mellanox/mlx5/core/Makefile | 6 +- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 20 +- + drivers/net/ethernet/mellanox/mlx5/core/dev.c | 14 +- + drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 30 +- + .../mellanox/mlx5/core/diag/fs_tracepoint.c | 3 + + .../ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 1 - + drivers/net/ethernet/mellanox/mlx5/core/en.h | 10 +- + .../net/ethernet/mellanox/mlx5/core/en/devlink.c | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/en/fs.h | 8 +- + .../net/ethernet/mellanox/mlx5/core/en/health.h | 1 - + drivers/net/ethernet/mellanox/mlx5/core/en/qos.c | 102 +- + drivers/net/ethernet/mellanox/mlx5/core/en/qos.h | 9 + + .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 20 +- + .../ethernet/mellanox/mlx5/core/en/reporter_rx.c | 7 +- + .../ethernet/mellanox/mlx5/core/en/reporter_tx.c | 7 +- + drivers/net/ethernet/mellanox/mlx5/core/en/rss.c | 27 +- + .../net/ethernet/mellanox/mlx5/core/en/tc/sample.c | 21 +- + .../net/ethernet/mellanox/mlx5/core/en/tc/sample.h | 27 + + drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 15 +- + .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 12 +- + .../net/ethernet/mellanox/mlx5/core/en/tc_tun.h | 1 + + .../ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c | 9 + + .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 10 +- + drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 4 +- + .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 6 +- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 118 +- + .../net/ethernet/mellanox/mlx5/core/en_selftest.c | 92 +- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 398 +- + drivers/net/ethernet/mellanox/mlx5/core/eq.c | 9 +- + .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 231 +- + .../ethernet/mellanox/mlx5/core/esw/bridge_priv.h | 1 + + .../ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 +- + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 10 +- + .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 28 +- + .../mellanox/mlx5/core/eswitch_offloads_termtbl.c | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 66 +- + drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 4 + + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 72 + + drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 12 +- + .../net/ethernet/mellanox/mlx5/core/fs_counters.c | 3 +- + drivers/net/ethernet/mellanox/mlx5/core/fw.c | 15 +- + drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 17 +- + drivers/net/ethernet/mellanox/mlx5/core/health.c | 21 +- + .../ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 30 + + .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 5 +- + .../ethernet/mellanox/mlx5/core/{ => lag}/lag.c | 102 +- + .../ethernet/mellanox/mlx5/core/{ => lag}/lag.h | 9 +- + .../mellanox/mlx5/core/{lag_mp.c => lag/mp.c} | 4 +- + .../mellanox/mlx5/core/{lag_mp.h => lag/mp.h} | 0 + .../net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 611 + + .../net/ethernet/mellanox/mlx5/core/lag/port_sel.h | 52 + + .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 4 + + .../net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h | 2 + + drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c | 162 + + drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h | 41 + + drivers/net/ethernet/mellanox/mlx5/core/main.c | 87 +- + drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h | 2 - + .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 16 +- + drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 36 +- + .../ethernet/mellanox/mlx5/core/sf/dev/driver.c | 7 +- + .../net/ethernet/mellanox/mlx5/core/sf/devlink.c | 2 +- + .../mellanox/mlx5/core/steering/dr_action.c | 27 +- + .../ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 6 +- + .../mellanox/mlx5/core/steering/dr_domain.c | 212 +- + .../ethernet/mellanox/mlx5/core/steering/dr_fw.c | 2 +- + .../ethernet/mellanox/mlx5/core/steering/dr_rule.c | 4 +- + .../mellanox/mlx5/core/steering/dr_ste_v0.c | 13 +- + .../mellanox/mlx5/core/steering/dr_ste_v1.c | 20 +- + .../mellanox/mlx5/core/steering/dr_types.h | 47 +- + .../ethernet/mellanox/mlx5/core/steering/fs_dr.c | 17 +- + .../ethernet/mellanox/mlx5/core/steering/mlx5dr.h | 2 +- + drivers/net/ethernet/mellanox/mlx5/core/vport.c | 21 +- + .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 2 +- + drivers/net/ethernet/mellanox/mlxfw/mlxfw.h | 2 +- + drivers/net/ethernet/mellanox/mlxsw/core.c | 90 +- + drivers/net/ethernet/mellanox/mlxsw/core.h | 2 - + drivers/net/ethernet/mellanox/mlxsw/core_env.c | 372 +- + drivers/net/ethernet/mellanox/mlxsw/core_env.h | 23 + + drivers/net/ethernet/mellanox/mlxsw/item.h | 56 +- + drivers/net/ethernet/mellanox/mlxsw/minimal.c | 66 +- + drivers/net/ethernet/mellanox/mlxsw/reg.h | 351 +- + drivers/net/ethernet/mellanox/mlxsw/resources.h | 6 - + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 350 +- + drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 9 +- + .../net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c | 1 + + .../net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 2 +- + .../net/ethernet/mellanox/mlxsw/spectrum_ethtool.c | 45 + + .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c | 432 +- + .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h | 27 +- + .../net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 540 +- + .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 315 +- + .../net/ethernet/mellanox/mlxsw/spectrum_router.h | 6 +- + .../net/ethernet/mellanox/mlxsw/spectrum_span.c | 16 + + .../net/ethernet/mellanox/mlxsw/spectrum_span.h | 1 + + drivers/net/ethernet/micrel/ks8842.c | 15 +- + drivers/net/ethernet/micrel/ks8851.h | 2 +- + drivers/net/ethernet/micrel/ks8851_common.c | 14 +- + drivers/net/ethernet/micrel/ks8851_par.c | 4 +- + drivers/net/ethernet/micrel/ks8851_spi.c | 4 +- + drivers/net/ethernet/micrel/ksz884x.c | 16 +- + drivers/net/ethernet/microchip/enc28j60.c | 7 +- + drivers/net/ethernet/microchip/encx24j600.c | 7 +- + drivers/net/ethernet/microchip/lan743x_main.c | 4 +- + drivers/net/ethernet/microchip/lan743x_main.h | 1 + + drivers/net/ethernet/microchip/lan743x_ptp.c | 91 +- + .../net/ethernet/microchip/sparx5/sparx5_main.c | 3 +- + .../net/ethernet/microchip/sparx5/sparx5_netdev.c | 6 +- + drivers/net/ethernet/microsoft/mana/hw_channel.c | 4 +- + drivers/net/ethernet/microsoft/mana/mana_en.c | 2 +- + drivers/net/ethernet/moxa/moxart_ether.c | 2 +- + drivers/net/ethernet/mscc/Kconfig | 2 +- + drivers/net/ethernet/mscc/ocelot.c | 274 +- + drivers/net/ethernet/mscc/ocelot.h | 1 + + drivers/net/ethernet/mscc/ocelot_flower.c | 125 +- + drivers/net/ethernet/mscc/ocelot_mrp.c | 8 +- + drivers/net/ethernet/mscc/ocelot_net.c | 17 +- + drivers/net/ethernet/mscc/ocelot_vsc7514.c | 9 +- + drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 9 +- + drivers/net/ethernet/natsemi/natsemi.c | 6 +- + drivers/net/ethernet/natsemi/ns83820.c | 11 +- + drivers/net/ethernet/neterion/s2io.c | 6 +- + drivers/net/ethernet/neterion/s2io.h | 2 +- + drivers/net/ethernet/neterion/vxge/vxge-main.c | 6 +- + drivers/net/ethernet/netronome/nfp/abm/main.c | 2 +- + drivers/net/ethernet/netronome/nfp/abm/qdisc.c | 2 +- + drivers/net/ethernet/netronome/nfp/devlink_param.c | 9 +- + .../ethernet/netronome/nfp/flower/tunnel_conf.c | 6 +- + drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 11 +- + drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 3 +- + .../net/ethernet/netronome/nfp/nfp_netvf_main.c | 2 +- + drivers/net/ethernet/ni/nixge.c | 2 +- + drivers/net/ethernet/nvidia/forcedeth.c | 51 +- + drivers/net/ethernet/nxp/lpc_eth.c | 10 +- + .../net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 4 +- + drivers/net/ethernet/packetengines/hamachi.c | 5 +- + drivers/net/ethernet/packetengines/yellowfin.c | 6 +- + drivers/net/ethernet/pasemi/pasemi_mac.c | 4 +- + drivers/net/ethernet/pensando/ionic/ionic.h | 8 +- + .../net/ethernet/pensando/ionic/ionic_debugfs.c | 48 +- + drivers/net/ethernet/pensando/ionic/ionic_dev.c | 1 - + drivers/net/ethernet/pensando/ionic/ionic_dev.h | 4 - + .../net/ethernet/pensando/ionic/ionic_devlink.c | 10 +- + .../net/ethernet/pensando/ionic/ionic_ethtool.c | 38 - + drivers/net/ethernet/pensando/ionic/ionic_lif.c | 264 +- + drivers/net/ethernet/pensando/ionic/ionic_lif.h | 49 +- + drivers/net/ethernet/pensando/ionic/ionic_main.c | 92 +- + drivers/net/ethernet/pensando/ionic/ionic_phc.c | 8 +- + .../net/ethernet/pensando/ionic/ionic_rx_filter.c | 241 +- + .../net/ethernet/pensando/ionic/ionic_rx_filter.h | 2 + + drivers/net/ethernet/pensando/ionic/ionic_stats.c | 121 - + drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 14 - + .../net/ethernet/qlogic/netxen/netxen_nic_main.c | 6 +- + drivers/net/ethernet/qlogic/qed/qed.h | 44 +- + drivers/net/ethernet/qlogic/qed/qed_cxt.c | 16 +- + drivers/net/ethernet/qlogic/qed/qed_cxt.h | 143 +- + drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h | 1491 + + drivers/net/ethernet/qlogic/qed/qed_dcbx.h | 11 +- + drivers/net/ethernet/qlogic/qed/qed_debug.c | 1389 +- + drivers/net/ethernet/qlogic/qed/qed_debug.h | 7 +- + drivers/net/ethernet/qlogic/qed/qed_dev.c | 124 +- + drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 347 +- + drivers/net/ethernet/qlogic/qed/qed_devlink.c | 12 +- + drivers/net/ethernet/qlogic/qed/qed_fcoe.c | 25 +- + drivers/net/ethernet/qlogic/qed/qed_hsi.h | 12643 ++--- + drivers/net/ethernet/qlogic/qed/qed_hw.h | 222 +- + .../net/ethernet/qlogic/qed/qed_init_fw_funcs.c | 405 +- + drivers/net/ethernet/qlogic/qed/qed_init_ops.c | 98 +- + drivers/net/ethernet/qlogic/qed/qed_init_ops.h | 60 +- + drivers/net/ethernet/qlogic/qed/qed_int.c | 4 +- + drivers/net/ethernet/qlogic/qed/qed_int.h | 286 +- + drivers/net/ethernet/qlogic/qed/qed_iro_hsi.h | 500 + + drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 15 +- + drivers/net/ethernet/qlogic/qed/qed_iscsi.h | 9 +- + drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 2 + + drivers/net/ethernet/qlogic/qed/qed_l2.c | 43 +- + drivers/net/ethernet/qlogic/qed/qed_l2.h | 135 +- + drivers/net/ethernet/qlogic/qed/qed_ll2.c | 167 +- + drivers/net/ethernet/qlogic/qed/qed_ll2.h | 131 +- + drivers/net/ethernet/qlogic/qed/qed_main.c | 23 +- + drivers/net/ethernet/qlogic/qed/qed_mcp.c | 66 +- + drivers/net/ethernet/qlogic/qed/qed_mcp.h | 765 +- + drivers/net/ethernet/qlogic/qed/qed_mfw_hsi.h | 2474 + + drivers/net/ethernet/qlogic/qed/qed_ooo.c | 20 +- + drivers/net/ethernet/qlogic/qed/qed_rdma.c | 9 +- + drivers/net/ethernet/qlogic/qed/qed_rdma.h | 7 +- + drivers/net/ethernet/qlogic/qed/qed_reg_addr.h | 95 +- + drivers/net/ethernet/qlogic/qed/qed_roce.c | 1 - + drivers/net/ethernet/qlogic/qed/qed_selftest.h | 30 +- + drivers/net/ethernet/qlogic/qed/qed_sp.h | 223 +- + drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 10 +- + drivers/net/ethernet/qlogic/qed/qed_spq.c | 63 +- + drivers/net/ethernet/qlogic/qed/qed_sriov.c | 201 +- + drivers/net/ethernet/qlogic/qed/qed_sriov.h | 138 +- + drivers/net/ethernet/qlogic/qed/qed_vf.c | 13 +- + drivers/net/ethernet/qlogic/qed/qed_vf.h | 311 +- + drivers/net/ethernet/qlogic/qede/qede_filter.c | 53 +- + drivers/net/ethernet/qlogic/qede/qede_main.c | 21 +- + drivers/net/ethernet/qlogic/qla3xxx.c | 12 +- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 4 +- + drivers/net/ethernet/qualcomm/emac/emac-mac.c | 2 +- + drivers/net/ethernet/qualcomm/emac/emac.c | 5 +- + drivers/net/ethernet/qualcomm/qca_spi.c | 2 +- + drivers/net/ethernet/qualcomm/qca_uart.c | 2 +- + drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 2 +- + drivers/net/ethernet/rdc/r6040.c | 24 +- + drivers/net/ethernet/realtek/8139cp.c | 7 +- + drivers/net/ethernet/realtek/8139too.c | 7 +- + drivers/net/ethernet/realtek/atp.c | 4 +- + drivers/net/ethernet/realtek/r8169.h | 2 +- + drivers/net/ethernet/realtek/r8169_main.c | 44 +- + drivers/net/ethernet/realtek/r8169_phy_config.c | 59 - + drivers/net/ethernet/renesas/ravb.h | 52 +- + drivers/net/ethernet/renesas/ravb_main.c | 728 +- + drivers/net/ethernet/renesas/sh_eth.c | 18 +- + drivers/net/ethernet/rocker/rocker_main.c | 10 +- + drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h | 2 +- + drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c | 3 +- + drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 9 +- + .../net/ethernet/samsung/sxgbe/sxgbe_platform.c | 2 +- + drivers/net/ethernet/seeq/sgiseeq.c | 4 +- + drivers/net/ethernet/sfc/ef10.c | 4 +- + drivers/net/ethernet/sfc/ef100_nic.c | 2 +- + drivers/net/ethernet/sfc/ef10_sriov.c | 4 +- + drivers/net/ethernet/sfc/ef10_sriov.h | 6 +- + drivers/net/ethernet/sfc/efx.c | 2 +- + drivers/net/ethernet/sfc/efx_common.c | 4 +- + drivers/net/ethernet/sfc/ethtool_common.c | 10 +- + drivers/net/ethernet/sfc/falcon/efx.c | 6 +- + drivers/net/ethernet/sfc/net_driver.h | 2 +- + drivers/net/ethernet/sfc/siena_sriov.c | 2 +- + drivers/net/ethernet/sfc/siena_sriov.h | 2 +- + drivers/net/ethernet/sgi/ioc3-eth.c | 4 +- + drivers/net/ethernet/sgi/meth.c | 2 +- + drivers/net/ethernet/silan/sc92031.c | 14 +- + drivers/net/ethernet/sis/sis190.c | 10 +- + drivers/net/ethernet/sis/sis900.c | 19 +- + drivers/net/ethernet/smsc/epic100.c | 4 +- + drivers/net/ethernet/smsc/smc911x.c | 4 +- + drivers/net/ethernet/smsc/smc91c92_cs.c | 15 +- + drivers/net/ethernet/smsc/smc91x.c | 4 +- + drivers/net/ethernet/smsc/smsc911x.c | 22 +- + drivers/net/ethernet/smsc/smsc9420.c | 26 +- + drivers/net/ethernet/socionext/netsec.c | 46 +- + drivers/net/ethernet/socionext/sni_ave.c | 17 +- + drivers/net/ethernet/stmicro/stmmac/common.h | 4 +- + drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 2 +- + .../net/ethernet/stmicro/stmmac/dwmac-visconti.c | 7 +- + .../net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- + .../net/ethernet/stmicro/stmmac/dwmac100_core.c | 2 +- + drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- + drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c | 2 +- + drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 2 +- + .../net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 3 +- + drivers/net/ethernet/stmicro/stmmac/hwif.h | 3 +- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 +- + .../net/ethernet/stmicro/stmmac/stmmac_selftests.c | 8 +- + drivers/net/ethernet/sun/cassini.c | 7 +- + drivers/net/ethernet/sun/ldmvsw.c | 7 +- + drivers/net/ethernet/sun/niu.c | 46 +- + drivers/net/ethernet/sun/sunbmac.c | 6 +- + drivers/net/ethernet/sun/sungem.c | 15 +- + drivers/net/ethernet/sun/sunhme.c | 23 +- + drivers/net/ethernet/sun/sunqe.c | 4 +- + drivers/net/ethernet/sun/sunvnet.c | 4 +- + drivers/net/ethernet/synopsys/dwc-xlgmac-common.c | 2 +- + drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c | 2 +- + drivers/net/ethernet/synopsys/dwc-xlgmac-net.c | 2 +- + drivers/net/ethernet/synopsys/dwc-xlgmac.h | 2 +- + drivers/net/ethernet/tehuti/tehuti.c | 8 +- + drivers/net/ethernet/ti/am65-cpsw-ethtool.c | 2 +- + drivers/net/ethernet/ti/am65-cpsw-nuss.c | 26 +- + drivers/net/ethernet/ti/cpmac.c | 2 +- + drivers/net/ethernet/ti/cpsw.c | 6 +- + drivers/net/ethernet/ti/cpsw_new.c | 17 +- + drivers/net/ethernet/ti/cpts.c | 6 +- + drivers/net/ethernet/ti/davinci_emac.c | 8 +- + drivers/net/ethernet/ti/netcp_core.c | 8 +- + drivers/net/ethernet/ti/tlan.c | 14 +- + drivers/net/ethernet/toshiba/ps3_gelic_net.c | 2 +- + drivers/net/ethernet/toshiba/spider_net.c | 2 +- + drivers/net/ethernet/toshiba/tc35815.c | 11 +- + drivers/net/ethernet/via/via-rhine.c | 4 +- + drivers/net/ethernet/via/via-velocity.c | 4 +- + drivers/net/ethernet/wiznet/w5100-spi.c | 4 +- + drivers/net/ethernet/wiznet/w5100.c | 11 +- + drivers/net/ethernet/wiznet/w5100.h | 2 +- + drivers/net/ethernet/wiznet/w5300.c | 4 +- + drivers/net/ethernet/xilinx/ll_temac_main.c | 4 +- + drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- + drivers/net/ethernet/xilinx/xilinx_emaclite.c | 11 +- + drivers/net/ethernet/xircom/xirc2ps_cs.c | 14 +- + drivers/net/ethernet/xscale/ixp4xx_eth.c | 7 +- + drivers/net/fddi/defxx.c | 12 +- + drivers/net/fddi/defza.c | 2 +- + drivers/net/fddi/skfp/h/smc.h | 2 +- + drivers/net/fddi/skfp/skfddi.c | 9 +- + drivers/net/fddi/skfp/smtinit.c | 4 +- + drivers/net/fjes/fjes_hw.c | 3 +- + drivers/net/fjes/fjes_hw.h | 2 +- + drivers/net/fjes/fjes_main.c | 14 +- + drivers/net/gtp.c | 2 +- + drivers/net/hamradio/6pack.c | 6 +- + drivers/net/hamradio/baycom_epp.c | 4 +- + drivers/net/hamradio/bpqether.c | 7 +- + drivers/net/hamradio/dmascc.c | 5 +- + drivers/net/hamradio/hdlcdrv.c | 4 +- + drivers/net/hamradio/mkiss.c | 6 +- + drivers/net/hamradio/scc.c | 7 +- + drivers/net/hamradio/yam.c | 4 +- + drivers/net/hippi/rrunner.c | 6 +- + drivers/net/hyperv/netvsc_drv.c | 6 +- + drivers/net/ieee802154/ca8210.c | 2 - + drivers/net/ifb.c | 3 + + drivers/net/ipvlan/ipvlan_main.c | 4 +- + drivers/net/ipvlan/ipvtap.c | 2 +- + drivers/net/macsec.c | 2 +- + drivers/net/macvlan.c | 4 +- + drivers/net/macvtap.c | 2 +- + drivers/net/net_failover.c | 3 +- + drivers/net/netdevsim/dev.c | 14 +- + drivers/net/netdevsim/ethtool.c | 28 + + drivers/net/netdevsim/health.c | 32 - + drivers/net/netdevsim/netdevsim.h | 1 + + drivers/net/ntb_netdev.c | 2 +- + drivers/net/phy/at803x.c | 196 +- + drivers/net/phy/bcm7xxx.c | 203 + + drivers/net/phy/broadcom.c | 106 +- + drivers/net/phy/dp83867.c | 19 + + drivers/net/phy/marvell10g.c | 107 +- + drivers/net/phy/mdio_bus.c | 28 + + drivers/net/phy/micrel.c | 107 +- + drivers/net/phy/mscc/mscc_main.c | 2 +- + drivers/net/phy/phy_device.c | 10 + + drivers/net/phy/phylink.c | 104 +- + drivers/net/phy/realtek.c | 8 + + drivers/net/plip/plip.c | 8 +- + drivers/net/ppp/ppp_generic.c | 2 +- + drivers/net/rionet.c | 14 +- + drivers/net/sb1000.c | 12 +- + drivers/net/team/team.c | 2 +- + drivers/net/usb/aqc111.c | 4 +- + drivers/net/usb/asix_common.c | 2 +- + drivers/net/usb/asix_devices.c | 2 +- + drivers/net/usb/ax88172a.c | 2 +- + drivers/net/usb/ax88179_178a.c | 12 +- + drivers/net/usb/catc.c | 2 +- + drivers/net/usb/cdc-phonet.c | 4 +- + drivers/net/usb/ch9200.c | 4 +- + drivers/net/usb/cx82310_eth.c | 5 +- + drivers/net/usb/dm9601.c | 7 +- + drivers/net/usb/ipheth.c | 2 +- + drivers/net/usb/kalmia.c | 2 +- + drivers/net/usb/kaweth.c | 3 +- + drivers/net/usb/lan78xx.c | 4 +- + drivers/net/usb/mcs7830.c | 9 +- + drivers/net/usb/pegasus.c | 2 +- + drivers/net/usb/qmi_wwan.c | 7 +- + drivers/net/usb/r8152.c | 4 +- + drivers/net/usb/rndis_host.c | 2 +- + drivers/net/usb/rtl8150.c | 4 +- + drivers/net/usb/sierra_net.c | 6 +- + drivers/net/usb/smsc75xx.c | 9 +- + drivers/net/usb/smsc95xx.c | 9 +- + drivers/net/usb/sr9700.c | 9 +- + drivers/net/usb/sr9800.c | 7 +- + drivers/net/usb/usbnet.c | 6 +- + drivers/net/virtio_net.c | 40 +- + drivers/net/vmxnet3/vmxnet3_drv.c | 4 +- + drivers/net/wan/hdlc_fr.c | 4 +- + drivers/net/wan/lapbether.c | 2 +- + drivers/net/wireless/ath/ar5523/ar5523.c | 3 +- + drivers/net/wireless/ath/ath10k/core.c | 5 +- + drivers/net/wireless/ath/ath10k/mac.c | 37 +- + drivers/net/wireless/ath/ath10k/qmi.c | 3 +- + drivers/net/wireless/ath/ath10k/sdio.c | 6 +- + drivers/net/wireless/ath/ath10k/snoc.c | 77 + + drivers/net/wireless/ath/ath10k/snoc.h | 5 + + drivers/net/wireless/ath/ath10k/wmi.c | 4 + + drivers/net/wireless/ath/ath10k/wmi.h | 3 + + drivers/net/wireless/ath/ath11k/core.c | 68 +- + drivers/net/wireless/ath/ath11k/core.h | 49 +- + drivers/net/wireless/ath/ath11k/dbring.c | 16 +- + drivers/net/wireless/ath/ath11k/debugfs.c | 27 +- + drivers/net/wireless/ath/ath11k/debugfs.h | 4 + + .../net/wireless/ath/ath11k/debugfs_htt_stats.c | 4344 +- + .../net/wireless/ath/ath11k/debugfs_htt_stats.h | 226 + + drivers/net/wireless/ath/ath11k/debugfs_sta.c | 8 +- + drivers/net/wireless/ath/ath11k/dp.c | 10 +- + drivers/net/wireless/ath/ath11k/dp.h | 9 + + drivers/net/wireless/ath/ath11k/dp_rx.c | 253 +- + drivers/net/wireless/ath/ath11k/dp_tx.c | 36 +- + drivers/net/wireless/ath/ath11k/dp_tx.h | 2 +- + drivers/net/wireless/ath/ath11k/hal_desc.h | 2 + + drivers/net/wireless/ath/ath11k/hw.c | 45 + + drivers/net/wireless/ath/ath11k/hw.h | 15 +- + drivers/net/wireless/ath/ath11k/mac.c | 1445 +- + drivers/net/wireless/ath/ath11k/mac.h | 3 + + drivers/net/wireless/ath/ath11k/pci.c | 45 +- + drivers/net/wireless/ath/ath11k/peer.c | 11 + + drivers/net/wireless/ath/ath11k/qmi.c | 349 +- + drivers/net/wireless/ath/ath11k/qmi.h | 18 +- + drivers/net/wireless/ath/ath11k/reg.c | 18 +- + drivers/net/wireless/ath/ath11k/reg.h | 2 +- + drivers/net/wireless/ath/ath11k/spectral.c | 42 +- + drivers/net/wireless/ath/ath11k/trace.h | 11 +- + drivers/net/wireless/ath/ath11k/wmi.c | 162 +- + drivers/net/wireless/ath/ath11k/wmi.h | 107 +- + drivers/net/wireless/ath/ath5k/sysfs.c | 8 +- + drivers/net/wireless/ath/ath6kl/cfg80211.c | 9 +- + .../net/wireless/ath/ath9k/ath9k_pci_owl_loader.c | 105 +- + drivers/net/wireless/ath/ath9k/debug.c | 57 +- + drivers/net/wireless/ath/ath9k/debug.h | 1 + + drivers/net/wireless/ath/ath9k/eeprom.c | 12 +- + drivers/net/wireless/ath/ath9k/hw.h | 2 + + drivers/net/wireless/ath/ath9k/init.c | 58 + + drivers/net/wireless/ath/ath9k/main.c | 4 +- + drivers/net/wireless/ath/dfs_pattern_detector.c | 10 +- + drivers/net/wireless/ath/spectral_common.h | 1 - + drivers/net/wireless/ath/wcn36xx/debug.c | 2 +- + drivers/net/wireless/ath/wcn36xx/hal.h | 6 +- + drivers/net/wireless/ath/wcn36xx/main.c | 11 +- + drivers/net/wireless/ath/wcn36xx/smd.c | 99 +- + drivers/net/wireless/ath/wcn36xx/smd.h | 3 + + drivers/net/wireless/ath/wil6210/cfg80211.c | 10 +- + drivers/net/wireless/ath/wil6210/main.c | 6 +- + drivers/net/wireless/ath/wil6210/wil6210.h | 2 +- + drivers/net/wireless/ath/wil6210/wmi.c | 2 +- + drivers/net/wireless/atmel/atmel.c | 19 +- + drivers/net/wireless/broadcom/b43/phy_g.c | 2 +- + drivers/net/wireless/broadcom/b43legacy/radio.c | 2 +- + .../broadcom/brcm80211/brcmfmac/cfg80211.c | 12 +- + .../wireless/broadcom/brcm80211/brcmfmac/core.c | 6 +- + .../net/wireless/broadcom/brcm80211/brcmfmac/dmi.c | 10 + + .../net/wireless/broadcom/brcm80211/brcmfmac/of.c | 2 +- + .../net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 4 +- + drivers/net/wireless/cisco/airo.c | 27 +- + drivers/net/wireless/intel/ipw2x00/ipw2100.c | 4 +- + drivers/net/wireless/intel/ipw2x00/ipw2200.c | 12 +- + drivers/net/wireless/intel/ipw2x00/ipw2200.h | 2 +- + drivers/net/wireless/intel/iwlegacy/3945-mac.c | 1 - + drivers/net/wireless/intel/iwlegacy/4965-mac.c | 1 - + drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 2 +- + drivers/net/wireless/intel/iwlwifi/dvm/main.c | 1 - + drivers/net/wireless/intel/iwlwifi/fw/pnvm.c | 13 +- + drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 5 +- + drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 6 +- + drivers/net/wireless/intersil/hostap/hostap_hw.c | 5 +- + drivers/net/wireless/intersil/hostap/hostap_main.c | 4 +- + drivers/net/wireless/intersil/orinoco/main.c | 2 +- + drivers/net/wireless/mac80211_hwsim.c | 163 +- + drivers/net/wireless/marvell/libertas/cmd.c | 5 +- + drivers/net/wireless/marvell/libertas/main.c | 4 +- + drivers/net/wireless/marvell/mwifiex/11n.c | 7 +- + drivers/net/wireless/marvell/mwifiex/cfg80211.c | 384 +- + drivers/net/wireless/marvell/mwifiex/cmdevt.c | 21 + + drivers/net/wireless/marvell/mwifiex/main.c | 22 +- + drivers/net/wireless/marvell/mwifiex/main.h | 1 + + drivers/net/wireless/marvell/mwifiex/pcie.c | 36 +- + drivers/net/wireless/marvell/mwifiex/sta_cmd.c | 4 + + drivers/net/wireless/marvell/mwifiex/uap_event.c | 3 +- + drivers/net/wireless/marvell/mwl8k.c | 2 +- + drivers/net/wireless/mediatek/mt76/Makefile | 2 +- + drivers/net/wireless/mediatek/mt76/debugfs.c | 22 +- + drivers/net/wireless/mediatek/mt76/eeprom.c | 14 + + drivers/net/wireless/mediatek/mt76/mac80211.c | 242 +- + drivers/net/wireless/mediatek/mt76/mcu.c | 8 +- + drivers/net/wireless/mediatek/mt76/mt76.h | 126 +- + drivers/net/wireless/mediatek/mt76/mt7603/mac.c | 11 +- + drivers/net/wireless/mediatek/mt76/mt7603/main.c | 3 + + drivers/net/wireless/mediatek/mt76/mt7603/pci.c | 2 +- + drivers/net/wireless/mediatek/mt76/mt7615/Makefile | 2 +- + .../net/wireless/mediatek/mt76/mt7615/debugfs.c | 29 +- + drivers/net/wireless/mediatek/mt76/mt7615/init.c | 6 +- + drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 62 +- + drivers/net/wireless/mediatek/mt76/mt7615/main.c | 10 +- + drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 68 +- + drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h | 20 +- + drivers/net/wireless/mediatek/mt76/mt7615/pci.c | 4 +- + .../net/wireless/mediatek/mt76/mt7615/pci_mac.c | 5 +- + drivers/net/wireless/mediatek/mt76/mt7615/sdio.c | 296 +- + .../net/wireless/mediatek/mt76/mt7615/sdio_mcu.c | 11 +- + .../net/wireless/mediatek/mt76/mt7615/usb_sdio.c | 2 +- + drivers/net/wireless/mediatek/mt76/mt76_connac.h | 7 +- + .../net/wireless/mediatek/mt76/mt76_connac_mcu.c | 355 +- + .../net/wireless/mediatek/mt76/mt76_connac_mcu.h | 37 +- + drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c | 2 +- + drivers/net/wireless/mediatek/mt76/mt76x0/pci.c | 4 +- + drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 15 +- + drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 12 +- + drivers/net/wireless/mediatek/mt76/mt76x02_util.c | 3 + + drivers/net/wireless/mediatek/mt76/mt76x2/pci.c | 5 +- + .../net/wireless/mediatek/mt76/mt7915/debugfs.c | 381 +- + drivers/net/wireless/mediatek/mt76/mt7915/init.c | 170 +- + drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 646 +- + drivers/net/wireless/mediatek/mt76/mt7915/mac.h | 11 +- + drivers/net/wireless/mediatek/mt76/mt7915/main.c | 334 +- + drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 1046 +- + drivers/net/wireless/mediatek/mt76/mt7915/mcu.h | 105 +- + drivers/net/wireless/mediatek/mt76/mt7915/mmio.c | 3 +- + drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h | 144 +- + drivers/net/wireless/mediatek/mt76/mt7915/pci.c | 5 +- + drivers/net/wireless/mediatek/mt76/mt7915/regs.h | 149 +- + .../net/wireless/mediatek/mt76/mt7915/testmode.c | 23 + + .../net/wireless/mediatek/mt76/mt7915/testmode.h | 6 + + drivers/net/wireless/mediatek/mt76/mt7921/Kconfig | 18 +- + drivers/net/wireless/mediatek/mt76/mt7921/Makefile | 7 +- + .../net/wireless/mediatek/mt76/mt7921/debugfs.c | 99 +- + drivers/net/wireless/mediatek/mt76/mt7921/dma.c | 74 +- + drivers/net/wireless/mediatek/mt76/mt7921/eeprom.c | 100 - + drivers/net/wireless/mediatek/mt76/mt7921/init.c | 93 +- + drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 776 +- + drivers/net/wireless/mediatek/mt76/mt7921/mac.h | 32 + + drivers/net/wireless/mediatek/mt76/mt7921/main.c | 328 +- + drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 448 +- + drivers/net/wireless/mediatek/mt76/mt7921/mcu.h | 63 +- + drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h | 179 +- + drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 66 +- + .../net/wireless/mediatek/mt76/mt7921/pci_mac.c | 348 + + .../net/wireless/mediatek/mt76/mt7921/pci_mcu.c | 115 + + drivers/net/wireless/mediatek/mt76/mt7921/regs.h | 58 +- + drivers/net/wireless/mediatek/mt76/mt7921/sdio.c | 317 + + .../net/wireless/mediatek/mt76/mt7921/sdio_mac.c | 220 + + .../net/wireless/mediatek/mt76/mt7921/sdio_mcu.c | 135 + + .../net/wireless/mediatek/mt76/mt7921/testmode.c | 197 + + drivers/net/wireless/mediatek/mt76/sdio.c | 303 +- + .../net/wireless/mediatek/mt76/{mt7615 => }/sdio.h | 33 +- + .../mediatek/mt76/{mt7615 => }/sdio_txrx.c | 134 +- + drivers/net/wireless/mediatek/mt76/testmode.c | 4 +- + drivers/net/wireless/mediatek/mt76/testmode.h | 7 + + drivers/net/wireless/mediatek/mt76/tx.c | 84 +- + drivers/net/wireless/mediatek/mt76/usb.c | 2 +- + drivers/net/wireless/mediatek/mt7601u/dma.c | 2 +- + drivers/net/wireless/microchip/wilc1000/cfg80211.c | 11 +- + drivers/net/wireless/microchip/wilc1000/hif.c | 31 +- + drivers/net/wireless/microchip/wilc1000/hif.h | 1 + + drivers/net/wireless/microchip/wilc1000/netdev.c | 14 +- + drivers/net/wireless/microchip/wilc1000/netdev.h | 5 +- + drivers/net/wireless/microchip/wilc1000/sdio.c | 1 + + drivers/net/wireless/microchip/wilc1000/spi.c | 91 +- + drivers/net/wireless/microchip/wilc1000/wlan.c | 134 +- + drivers/net/wireless/microchip/wilc1000/wlan.h | 5 +- + drivers/net/wireless/microchip/wilc1000/wlan_cfg.c | 1 + + drivers/net/wireless/microchip/wilc1000/wlan_if.h | 7 +- + drivers/net/wireless/quantenna/qtnfmac/core.c | 6 +- + drivers/net/wireless/quantenna/qtnfmac/pcie/pcie.c | 2 - + drivers/net/wireless/ralink/rt2x00/rt2800usb.c | 1 - + drivers/net/wireless/ray_cs.c | 2 +- + drivers/net/wireless/realtek/Kconfig | 1 + + drivers/net/wireless/realtek/Makefile | 1 + + .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 6 +- + .../net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h | 2 + + drivers/net/wireless/realtek/rtlwifi/pci.c | 1 - + .../net/wireless/realtek/rtlwifi/rtl8192ee/hw.c | 2 +- + drivers/net/wireless/realtek/rtw88/debug.c | 46 +- + drivers/net/wireless/realtek/rtw88/debug.h | 1 + + drivers/net/wireless/realtek/rtw88/fw.c | 54 +- + drivers/net/wireless/realtek/rtw88/fw.h | 24 + + drivers/net/wireless/realtek/rtw88/main.c | 22 +- + drivers/net/wireless/realtek/rtw88/main.h | 49 +- + drivers/net/wireless/realtek/rtw88/phy.c | 119 +- + drivers/net/wireless/realtek/rtw88/phy.h | 2 + + drivers/net/wireless/realtek/rtw88/reg.h | 6 + + drivers/net/wireless/realtek/rtw88/regd.c | 753 +- + drivers/net/wireless/realtek/rtw88/regd.h | 8 +- + drivers/net/wireless/realtek/rtw88/rtw8821c.c | 19 +- + drivers/net/wireless/realtek/rtw88/rtw8822b.c | 46 +- + drivers/net/wireless/realtek/rtw88/rtw8822b.h | 8 + + drivers/net/wireless/realtek/rtw88/rtw8822c.c | 47 + + drivers/net/wireless/realtek/rtw88/rtw8822c.h | 3 + + drivers/net/wireless/realtek/rtw89/Kconfig | 50 + + drivers/net/wireless/realtek/rtw89/Makefile | 25 + + drivers/net/wireless/realtek/rtw89/cam.c | 695 + + drivers/net/wireless/realtek/rtw89/cam.h | 165 + + drivers/net/wireless/realtek/rtw89/coex.c | 5716 +++ + drivers/net/wireless/realtek/rtw89/coex.h | 181 + + drivers/net/wireless/realtek/rtw89/core.c | 2502 + + drivers/net/wireless/realtek/rtw89/core.h | 3384 ++ + drivers/net/wireless/realtek/rtw89/debug.c | 2489 + + drivers/net/wireless/realtek/rtw89/debug.h | 77 + + drivers/net/wireless/realtek/rtw89/efuse.c | 188 + + drivers/net/wireless/realtek/rtw89/efuse.h | 13 + + drivers/net/wireless/realtek/rtw89/fw.c | 1641 + + drivers/net/wireless/realtek/rtw89/fw.h | 1378 + + drivers/net/wireless/realtek/rtw89/mac.c | 3836 ++ + drivers/net/wireless/realtek/rtw89/mac.h | 860 + + drivers/net/wireless/realtek/rtw89/mac80211.c | 676 + + drivers/net/wireless/realtek/rtw89/pci.c | 3060 ++ + drivers/net/wireless/realtek/rtw89/pci.h | 635 + + drivers/net/wireless/realtek/rtw89/phy.c | 2868 ++ + drivers/net/wireless/realtek/rtw89/phy.h | 311 + + drivers/net/wireless/realtek/rtw89/ps.c | 150 + + drivers/net/wireless/realtek/rtw89/ps.h | 16 + + drivers/net/wireless/realtek/rtw89/reg.h | 2159 + + drivers/net/wireless/realtek/rtw89/regd.c | 353 + + drivers/net/wireless/realtek/rtw89/rtw8852a.c | 2036 + + drivers/net/wireless/realtek/rtw89/rtw8852a.h | 109 + + drivers/net/wireless/realtek/rtw89/rtw8852a_rfk.c | 3911 ++ + drivers/net/wireless/realtek/rtw89/rtw8852a_rfk.h | 24 + + .../wireless/realtek/rtw89/rtw8852a_rfk_table.c | 1607 + + .../wireless/realtek/rtw89/rtw8852a_rfk_table.h | 133 + + .../net/wireless/realtek/rtw89/rtw8852a_table.c | 48725 +++++++++++++++++++ + .../net/wireless/realtek/rtw89/rtw8852a_table.h | 28 + + drivers/net/wireless/realtek/rtw89/sar.c | 190 + + drivers/net/wireless/realtek/rtw89/sar.h | 26 + + drivers/net/wireless/realtek/rtw89/ser.c | 491 + + drivers/net/wireless/realtek/rtw89/ser.h | 15 + + drivers/net/wireless/realtek/rtw89/txrx.h | 358 + + drivers/net/wireless/realtek/rtw89/util.h | 17 + + drivers/net/wireless/rndis_wlan.c | 2 - + drivers/net/wireless/rsi/rsi_91x_core.c | 2 + + drivers/net/wireless/rsi/rsi_91x_hal.c | 10 +- + drivers/net/wireless/rsi/rsi_91x_mac80211.c | 74 +- + drivers/net/wireless/rsi/rsi_91x_main.c | 17 +- + drivers/net/wireless/rsi/rsi_91x_mgmt.c | 24 +- + drivers/net/wireless/rsi/rsi_91x_sdio.c | 5 +- + drivers/net/wireless/rsi/rsi_91x_usb.c | 5 +- + drivers/net/wireless/rsi/rsi_hal.h | 11 + + drivers/net/wireless/rsi/rsi_main.h | 15 +- + drivers/net/wireless/st/cw1200/bh.c | 2 - + drivers/net/wireless/wl3501_cs.c | 3 +- + drivers/net/wireless/zydas/zd1201.c | 9 +- + drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 1 - + drivers/net/wwan/Kconfig | 1 + + drivers/net/wwan/iosm/Makefile | 5 +- + drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c | 6 +- + drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.h | 1 + + drivers/net/wwan/iosm/iosm_ipc_coredump.c | 125 + + drivers/net/wwan/iosm/iosm_ipc_coredump.h | 59 + + drivers/net/wwan/iosm/iosm_ipc_devlink.c | 321 + + drivers/net/wwan/iosm/iosm_ipc_devlink.h | 205 + + drivers/net/wwan/iosm/iosm_ipc_flash.c | 594 + + drivers/net/wwan/iosm/iosm_ipc_flash.h | 229 + + drivers/net/wwan/iosm/iosm_ipc_imem.c | 107 +- + drivers/net/wwan/iosm/iosm_ipc_imem.h | 18 +- + drivers/net/wwan/iosm/iosm_ipc_imem_ops.c | 317 + + drivers/net/wwan/iosm/iosm_ipc_imem_ops.h | 49 +- + drivers/net/xen-netback/interface.c | 6 +- + drivers/net/xen-netback/netback.c | 2 +- + drivers/net/xen-netfront.c | 4 +- + drivers/nfc/fdp/i2c.c | 1 - + drivers/nfc/microread/i2c.c | 4 - + drivers/nfc/microread/mei.c | 6 +- + drivers/nfc/nfcmrvl/fw_dnld.c | 4 +- + drivers/nfc/pn533/i2c.c | 6 +- + drivers/nfc/pn533/pn533.c | 6 +- + drivers/nfc/pn533/pn533.h | 4 +- + drivers/nfc/pn533/uart.c | 4 +- + drivers/nfc/pn533/usb.c | 2 +- + drivers/nfc/pn544/mei.c | 8 +- + drivers/nfc/s3fwrn5/firmware.c | 29 +- + drivers/nfc/s3fwrn5/nci.c | 18 +- + drivers/nfc/st-nci/i2c.c | 4 - + drivers/nfc/st-nci/ndlc.c | 4 - + drivers/nfc/st-nci/se.c | 6 - + drivers/nfc/st-nci/spi.c | 4 - + drivers/nfc/st21nfca/i2c.c | 4 - + drivers/nfc/st21nfca/se.c | 4 - + drivers/nfc/trf7970a.c | 8 - + drivers/of/Kconfig | 4 - + drivers/of/Makefile | 1 - + drivers/pcmcia/pcmcia_cis.c | 5 +- + drivers/phy/broadcom/phy-bcm-ns-usb3.c | 2 +- + drivers/phy/broadcom/phy-bcm-ns2-pcie.c | 6 +- + drivers/ptp/idt8a340_reg.h | 720 - + drivers/ptp/ptp_clockmatrix.c | 1588 +- + drivers/ptp/ptp_clockmatrix.h | 109 +- + drivers/ptp/ptp_ocp.c | 1354 +- + drivers/s390/net/ctcm_fsms.c | 60 +- + drivers/s390/net/ctcm_main.c | 38 +- + drivers/s390/net/ctcm_mpc.c | 8 +- + drivers/s390/net/fsm.c | 2 +- + drivers/s390/net/ism_drv.c | 2 +- + drivers/s390/net/lcs.c | 123 +- + drivers/s390/net/netiucv.c | 104 +- + drivers/s390/net/qeth_core_main.c | 4 +- + drivers/s390/net/qeth_l2_main.c | 6 +- + drivers/s390/net/qeth_l3_main.c | 3 +- + drivers/scsi/qedf/drv_fcoe_fw_funcs.c | 8 +- + drivers/scsi/qedf/drv_fcoe_fw_funcs.h | 2 +- + drivers/scsi/qedf/qedf.h | 4 +- + drivers/scsi/qedf/qedf_els.c | 2 +- + drivers/scsi/qedf/qedf_io.c | 12 +- + drivers/scsi/qedf/qedf_main.c | 10 +- + drivers/scsi/qedi/qedi_debugfs.c | 4 +- + drivers/scsi/qedi/qedi_fw.c | 40 +- + drivers/scsi/qedi/qedi_fw_api.c | 22 +- + drivers/scsi/qedi/qedi_fw_iscsi.h | 2 +- + drivers/scsi/qedi/qedi_iscsi.h | 2 +- + drivers/scsi/qedi/qedi_main.c | 11 +- + drivers/soc/fsl/Kconfig | 1 + + drivers/soc/fsl/dpio/dpio-cmd.h | 3 + + drivers/soc/fsl/dpio/dpio-driver.c | 1 + + drivers/soc/fsl/dpio/dpio-service.c | 117 + + drivers/soc/fsl/dpio/dpio.c | 1 + + drivers/soc/fsl/dpio/dpio.h | 2 + + drivers/soc/fsl/dpio/qbman-portal.c | 58 + + drivers/soc/fsl/dpio/qbman-portal.h | 13 + + drivers/staging/qlge/qlge_main.c | 12 +- + drivers/usb/gadget/function/f_phonet.c | 5 +- + include/linux/bpf.h | 7 +- + include/linux/brcmphy.h | 11 + + include/linux/dsa/8021q.h | 5 +- + include/linux/dsa/ocelot.h | 4 +- + include/linux/dsa/sja1105.h | 1 - + include/linux/etherdevice.h | 37 +- + include/linux/ethtool.h | 23 + + include/linux/filter.h | 7 +- + include/linux/ieee80211.h | 39 + + include/linux/mdio.h | 26 + + include/linux/mfd/idt8a340_reg.h | 31 +- + include/linux/micrel_phy.h | 1 + + include/linux/mlx4/device.h | 2 +- + include/linux/mlx4/driver.h | 22 - + include/linux/mlx5/device.h | 19 +- + include/linux/mlx5/driver.h | 27 +- + include/linux/mlx5/eq.h | 1 - + include/linux/mlx5/eswitch.h | 9 + + include/linux/mlx5/fs.h | 9 + + include/linux/mlx5/mlx5_ifc.h | 338 +- + include/linux/mm_types.h | 13 +- + include/linux/netdevice.h | 17 +- + include/linux/netfilter_arp/arp_tables.h | 5 +- + include/linux/netfilter_bridge/ebtables.h | 5 +- + include/linux/netfilter_ingress.h | 58 - + include/linux/netfilter_ipv4/ip_tables.h | 6 +- + include/linux/netfilter_ipv6/ip6_tables.h | 5 +- + include/linux/netfilter_netdev.h | 146 + + include/linux/netlink.h | 4 - + include/linux/of_net.h | 8 +- + include/linux/perf_event.h | 23 + + include/linux/phylink.h | 1 + + include/linux/platform_data/brcmfmac.h | 2 +- + include/linux/property.h | 5 +- + include/linux/qed/common_hsi.h | 141 +- + include/linux/qed/eth_common.h | 1 + + include/linux/qed/fcoe_common.h | 362 +- + include/linux/qed/iscsi_common.h | 360 +- + include/linux/qed/nvmetcp_common.h | 18 +- + include/linux/qed/qed_chain.h | 97 +- + include/linux/qed/qed_eth_if.h | 23 +- + include/linux/qed/qed_if.h | 265 +- + include/linux/qed/qed_iscsi_if.h | 2 +- + include/linux/qed/qed_ll2_if.h | 42 +- + include/linux/qed/qed_nvmetcp_if.h | 17 + + include/linux/qed/qed_rdma_if.h | 3 +- + include/linux/qed/rdma_common.h | 1 + + include/linux/skbuff.h | 4 + + include/linux/soc/marvell/octeontx2/asm.h | 15 + + include/linux/socket.h | 1 + + include/linux/u64_stats_sync.h | 10 + + include/net/act_api.h | 10 +- + include/net/ax25.h | 13 +- + include/net/bluetooth/bluetooth.h | 90 + + include/net/bluetooth/hci.h | 117 + + include/net/bluetooth/hci_core.h | 75 +- + include/net/cfg80211.h | 79 +- + include/net/codel.h | 5 + + include/net/codel_impl.h | 18 +- + include/net/datalink.h | 2 +- + include/net/devlink.h | 108 +- + include/net/dn.h | 2 +- + include/net/dsa.h | 43 +- + include/net/gen_stats.h | 59 +- + include/net/inet_connection_sock.h | 2 +- + include/net/inet_ecn.h | 17 + + include/net/ioam6.h | 3 +- + include/net/ip.h | 6 +- + include/net/ip_vs.h | 11 + + include/net/llc.h | 2 +- + include/net/llc_if.h | 3 +- + include/net/mac80211.h | 11 + + include/net/mctp.h | 56 +- + include/net/mctpdevice.h | 5 + + include/net/mptcp.h | 4 + + include/net/ndisc.h | 2 +- + include/net/neighbour.h | 34 +- + include/net/netfilter/xt_rateest.h | 2 +- + include/net/page_pool.h | 12 +- + include/net/pkt_cls.h | 6 +- + include/net/rose.h | 8 +- + include/net/sch_generic.h | 84 +- + include/net/sock.h | 70 +- + include/net/tcp.h | 41 +- + include/net/tls.h | 3 +- + include/net/xdp.h | 8 +- + include/net/xdp_sock_drv.h | 22 + + include/net/xsk_buff_pool.h | 48 +- + include/soc/fsl/dpaa2-io.h | 9 + + include/soc/mscc/ocelot.h | 24 +- + include/soc/mscc/ocelot_vcap.h | 10 + + include/trace/events/devlink.h | 72 +- + include/trace/events/mctp.h | 75 + + include/uapi/asm-generic/socket.h | 2 + + include/uapi/linux/bpf.h | 42 +- + include/uapi/linux/btf.h | 55 +- + include/uapi/linux/devlink.h | 2 + + include/uapi/linux/ethtool.h | 29 + + include/uapi/linux/ethtool_netlink.h | 17 + + include/uapi/linux/if_ether.h | 1 + + include/uapi/linux/ioam6_iptunnel.h | 29 + + include/uapi/linux/mptcp.h | 35 + + include/uapi/linux/neighbour.h | 35 +- + include/uapi/linux/netfilter.h | 1 + + include/uapi/linux/nl80211-vnd-intel.h | 29 + + include/uapi/linux/nl80211.h | 115 +- + include/uapi/linux/pkt_sched.h | 2 + + include/uapi/linux/smc.h | 44 +- + include/uapi/linux/tls.h | 30 + + include/uapi/linux/vm_sockets.h | 13 +- + kernel/bpf/arraymap.c | 7 +- + kernel/bpf/btf.c | 128 + + kernel/bpf/core.c | 5 + + kernel/bpf/hashtab.c | 13 +- + kernel/bpf/helpers.c | 11 +- + kernel/bpf/trampoline.c | 3 +- + kernel/bpf/verifier.c | 123 +- + kernel/events/core.c | 2 + + kernel/trace/bpf_trace.c | 84 +- + lib/test_bpf.c | 17182 ++++--- + net/802/hippi.c | 2 +- + net/802/p8022.c | 2 +- + net/802/psnap.c | 2 +- + net/8021q/vlan_dev.c | 6 +- + net/Kconfig | 2 +- + net/atm/br2684.c | 2 +- + net/atm/lec.c | 3 +- + net/ax25/af_ax25.c | 2 +- + net/ax25/ax25_dev.c | 2 +- + net/ax25/ax25_iface.c | 6 +- + net/ax25/ax25_in.c | 4 +- + net/ax25/ax25_out.c | 2 +- + net/batman-adv/bridge_loop_avoidance.c | 14 +- + net/batman-adv/multicast.c | 2 +- + net/batman-adv/routing.c | 3 +- + net/batman-adv/soft-interface.c | 2 +- + net/batman-adv/tp_meter.c | 2 +- + net/batman-adv/tvlv.c | 4 +- + net/batman-adv/tvlv.h | 4 +- + net/bluetooth/Makefile | 3 +- + net/bluetooth/eir.c | 335 + + net/bluetooth/eir.h | 72 + + net/bluetooth/hci_codec.c | 238 + + net/bluetooth/hci_codec.h | 7 + + net/bluetooth/hci_conn.c | 168 +- + net/bluetooth/hci_core.c | 320 +- + net/bluetooth/hci_debugfs.c | 123 + + net/bluetooth/hci_debugfs.h | 5 + + net/bluetooth/hci_event.c | 135 +- + net/bluetooth/hci_request.c | 478 +- + net/bluetooth/hci_request.h | 25 +- + net/bluetooth/hci_sock.c | 214 +- + net/bluetooth/l2cap_core.c | 2 +- + net/bluetooth/l2cap_sock.c | 10 +- + net/bluetooth/mgmt.c | 445 +- + net/bluetooth/msft.c | 172 +- + net/bluetooth/msft.h | 9 + + net/bluetooth/rfcomm/core.c | 50 +- + net/bluetooth/rfcomm/sock.c | 46 +- + net/bluetooth/sco.c | 209 +- + net/bpf/test_run.c | 22 +- + net/bridge/br.c | 4 +- + net/bridge/br_fdb.c | 6 +- + net/bridge/br_if.c | 2 +- + net/bridge/br_ioctl.c | 10 +- + net/bridge/br_mdb.c | 4 +- + net/bridge/br_netfilter_hooks.c | 2 +- + net/bridge/br_netlink.c | 4 +- + net/bridge/br_stp_if.c | 2 +- + net/bridge/netfilter/ebtable_broute.c | 2 +- + net/bridge/netfilter/ebtable_filter.c | 13 +- + net/bridge/netfilter/ebtable_nat.c | 12 +- + net/bridge/netfilter/ebtables.c | 6 +- + net/core/Makefile | 1 + + net/core/dev.c | 87 +- + net/core/devlink.c | 635 +- + net/core/filter.c | 21 + + net/core/flow_dissector.c | 3 +- + net/core/gen_estimator.c | 52 +- + net/core/gen_stats.c | 186 +- + net/core/neighbour.c | 204 +- + net/core/net-sysfs.c | 57 +- + net/core/net_namespace.c | 4 + + {drivers/of => net/core}/of_net.c | 25 + + net/core/page_pool.c | 10 +- + net/core/rtnetlink.c | 9 +- + net/core/skbuff.c | 27 +- + net/core/sock.c | 104 +- + net/core/stream.c | 5 +- + net/dccp/dccp.h | 2 +- + net/dccp/proto.c | 14 +- + net/dsa/Kconfig | 20 +- + net/dsa/Makefile | 3 +- + net/dsa/dsa.c | 22 +- + net/dsa/dsa2.c | 76 +- + net/dsa/port.c | 21 +- + net/dsa/slave.c | 40 +- + net/dsa/switch.c | 169 +- + net/dsa/tag_8021q.c | 114 +- + net/dsa/tag_ksz.c | 1 - + net/dsa/tag_ocelot.c | 39 + + net/dsa/tag_ocelot_8021q.c | 2 +- + net/dsa/tag_rtl4_a.c | 2 +- + net/dsa/tag_rtl8_4.c | 178 + + net/dsa/tag_sja1105.c | 9 +- + net/ethernet/eth.c | 102 +- + net/ethtool/Makefile | 2 +- + net/ethtool/ioctl.c | 16 +- + net/ethtool/module.c | 180 + + net/ethtool/netlink.c | 19 + + net/ethtool/netlink.h | 4 + + net/hsr/hsr_device.c | 2 +- + net/hsr/hsr_main.c | 2 +- + net/ieee802154/6lowpan/core.c | 2 +- + net/ipv4/af_inet.c | 12 +- + net/ipv4/cipso_ipv4.c | 2 +- + net/ipv4/datagram.c | 1 - + net/ipv4/fib_notifier.c | 1 - + net/ipv4/inet_connection_sock.c | 4 +- + net/ipv4/inet_hashtables.c | 2 +- + net/ipv4/ip_gre.c | 2 +- + net/ipv4/ip_tunnel.c | 2 +- + net/ipv4/ip_vti.c | 2 +- + net/ipv4/ipip.c | 2 +- + net/ipv4/netfilter/arp_tables.c | 7 +- + net/ipv4/netfilter/arptable_filter.c | 10 +- + net/ipv4/netfilter/ip_tables.c | 7 +- + net/ipv4/netfilter/iptable_filter.c | 9 +- + net/ipv4/netfilter/iptable_mangle.c | 8 +- + net/ipv4/netfilter/iptable_nat.c | 15 +- + net/ipv4/netfilter/iptable_raw.c | 10 +- + net/ipv4/netfilter/iptable_security.c | 9 +- + net/ipv4/proc.c | 2 +- + net/ipv4/route.c | 8 - + net/ipv4/syncookies.c | 2 - + net/ipv4/sysctl_net_ipv4.c | 21 - + net/ipv4/tcp.c | 76 +- + net/ipv4/tcp_fastopen.c | 6 - + net/ipv4/tcp_input.c | 37 +- + net/ipv4/tcp_ipv4.c | 6 - + net/ipv4/tcp_minisocks.c | 7 - + net/ipv4/tcp_nv.c | 1 - + net/ipv4/tcp_output.c | 5 +- + net/ipv4/tcp_rate.c | 6 + + net/ipv4/udp_tunnel_core.c | 3 - + net/ipv6/Kconfig | 6 +- + net/ipv6/Makefile | 11 +- + net/ipv6/addrconf.c | 7 +- + net/ipv6/exthdrs.c | 2 +- + net/ipv6/ila/ila_xlat.c | 6 +- + net/ipv6/ioam6.c | 11 +- + net/ipv6/ioam6_iptunnel.c | 300 +- + net/ipv6/ip6_gre.c | 4 +- + net/ipv6/ip6_tunnel.c | 2 +- + net/ipv6/ip6_vti.c | 2 +- + net/ipv6/ndisc.c | 4 +- + net/ipv6/netfilter/ip6_tables.c | 6 +- + net/ipv6/netfilter/ip6table_filter.c | 10 +- + net/ipv6/netfilter/ip6table_mangle.c | 8 +- + net/ipv6/netfilter/ip6table_nat.c | 15 +- + net/ipv6/netfilter/ip6table_raw.c | 10 +- + net/ipv6/netfilter/ip6table_security.c | 9 +- + net/ipv6/seg6.c | 8 +- + net/ipv6/seg6_hmac.c | 4 +- + net/ipv6/sit.c | 4 +- + net/ipv6/tcp_ipv6.c | 6 - + net/llc/llc_c_ac.c | 2 +- + net/llc/llc_if.c | 2 +- + net/llc/llc_output.c | 2 +- + net/llc/llc_proc.c | 2 +- + net/mac80211/agg-rx.c | 14 +- + net/mac80211/cfg.c | 38 + + net/mac80211/debugfs_sta.c | 123 +- + net/mac80211/fils_aead.c | 22 +- + net/mac80211/ibss.c | 33 +- + net/mac80211/ieee80211_i.h | 35 +- + net/mac80211/iface.c | 39 +- + net/mac80211/mesh.c | 87 +- + net/mac80211/mesh_hwmp.c | 44 +- + net/mac80211/mesh_plink.c | 11 +- + net/mac80211/mesh_sync.c | 26 +- + net/mac80211/mlme.c | 355 +- + net/mac80211/pm.c | 4 + + net/mac80211/rx.c | 12 +- + net/mac80211/s1g.c | 8 +- + net/mac80211/scan.c | 16 +- + net/mac80211/sta_info.c | 3 + + net/mac80211/tdls.c | 63 +- + net/mac80211/tx.c | 206 +- + net/mac80211/util.c | 40 +- + net/mac802154/iface.c | 17 +- + net/mctp/Kconfig | 5 + + net/mctp/Makefile | 3 + + net/mctp/af_mctp.c | 66 +- + net/mctp/device.c | 53 +- + net/mctp/neigh.c | 4 +- + net/mctp/route.c | 199 +- + net/mctp/test/route-test.c | 544 + + net/mctp/test/utils.c | 67 + + net/mctp/test/utils.h | 20 + + net/mptcp/mib.c | 17 +- + net/mptcp/mptcp_diag.c | 26 +- + net/mptcp/options.c | 15 +- + net/mptcp/pm_netlink.c | 9 +- + net/mptcp/protocol.c | 212 +- + net/mptcp/protocol.h | 4 - + net/mptcp/sockopt.c | 279 + + net/netfilter/Kconfig | 11 + + net/netfilter/core.c | 38 +- + net/netfilter/ipvs/ip_vs_core.c | 166 +- + net/netfilter/ipvs/ip_vs_ctl.c | 8 + + net/netfilter/ipvs/ip_vs_est.c | 5 + + net/netfilter/nfnetlink_hook.c | 16 +- + net/netfilter/nft_chain_filter.c | 4 +- + net/netfilter/nft_dynset.c | 11 +- + net/netfilter/xt_RATEEST.c | 7 +- + net/netlink/af_netlink.c | 23 +- + net/netrom/af_netrom.c | 4 +- + net/netrom/nr_dev.c | 8 +- + net/netrom/nr_route.c | 4 +- + net/nfc/hci/command.c | 16 - + net/nfc/hci/llc_shdlc.c | 35 +- + net/nfc/llcp_commands.c | 8 - + net/nfc/llcp_core.c | 5 +- + net/nfc/nci/core.c | 4 - + net/nfc/nci/hci.c | 4 - + net/nfc/nci/ntf.c | 9 - + net/nfc/nci/uart.c | 16 +- + net/packet/af_packet.c | 35 + + net/qrtr/Makefile | 3 +- + net/qrtr/{qrtr.c => af_qrtr.c} | 0 + net/rose/af_rose.c | 5 +- + net/rose/rose_dev.c | 8 +- + net/rose/rose_link.c | 8 +- + net/rose/rose_route.c | 10 +- + net/rxrpc/rtt.c | 2 +- + net/sched/act_api.c | 21 +- + net/sched/act_bpf.c | 2 +- + net/sched/act_ife.c | 4 +- + net/sched/act_mpls.c | 2 +- + net/sched/act_police.c | 4 +- + net/sched/act_sample.c | 2 +- + net/sched/act_simple.c | 3 +- + net/sched/act_skbedit.c | 2 +- + net/sched/act_skbmod.c | 2 +- + net/sched/sch_api.c | 25 +- + net/sched/sch_atm.c | 6 +- + net/sched/sch_cbq.c | 15 +- + net/sched/sch_drr.c | 13 +- + net/sched/sch_ets.c | 17 +- + net/sched/sch_fq_codel.c | 20 +- + net/sched/sch_generic.c | 46 +- + net/sched/sch_gred.c | 15 +- + net/sched/sch_hfsc.c | 11 +- + net/sched/sch_htb.c | 43 +- + net/sched/sch_mq.c | 31 +- + net/sched/sch_mqprio.c | 64 +- + net/sched/sch_multiq.c | 3 +- + net/sched/sch_netem.c | 2 +- + net/sched/sch_prio.c | 4 +- + net/sched/sch_qfq.c | 13 +- + net/sched/sch_taprio.c | 2 +- + net/sched/sch_tbf.c | 16 + + net/smc/af_smc.c | 447 +- + net/smc/smc.h | 23 +- + net/smc/smc_clc.c | 463 +- + net/smc/smc_clc.h | 72 +- + net/smc/smc_core.c | 183 +- + net/smc/smc_core.h | 51 +- + net/smc/smc_ib.c | 160 +- + net/smc/smc_ib.h | 16 +- + net/smc/smc_ism.c | 16 +- + net/smc/smc_ism.h | 2 +- + net/smc/smc_llc.c | 623 +- + net/smc/smc_llc.h | 12 +- + net/smc/smc_netlink.c | 47 +- + net/smc/smc_netlink.h | 2 + + net/smc/smc_pnet.c | 41 +- + net/smc/smc_wr.c | 237 +- + net/smc/smc_wr.h | 8 + + net/tipc/bearer.c | 4 +- + net/tipc/bearer.h | 2 +- + net/tipc/eth_media.c | 2 +- + net/tipc/ib_media.c | 2 +- + net/tls/tls_main.c | 46 + + net/tls/tls_sw.c | 54 +- + net/vmw_vsock/af_vsock.c | 80 +- + net/wireless/Makefile | 4 +- + net/wireless/core.c | 10 + + net/wireless/nl80211.c | 452 +- + net/wireless/rdev-ops.h | 14 + + net/wireless/scan.c | 59 +- + net/wireless/trace.h | 31 + + net/wireless/util.c | 2 + + net/xdp/xsk.c | 15 - + net/xdp/xsk_buff_pool.c | 132 +- + net/xdp/xsk_queue.h | 12 +- + samples/bpf/xdp_router_ipv4_user.c | 39 +- + tools/bpf/bpftool/Makefile | 3 + + tools/bpf/bpftool/btf.c | 12 + + tools/bpf/bpftool/feature.c | 1 + + tools/bpf/bpftool/gen.c | 36 +- + tools/bpf/resolve_btfids/Makefile | 5 +- + tools/include/uapi/linux/bpf.h | 42 +- + tools/include/uapi/linux/btf.h | 55 +- + tools/lib/bpf/.gitignore | 1 - + tools/lib/bpf/Makefile | 39 +- + tools/lib/bpf/bpf_helpers.h | 51 +- + tools/lib/bpf/btf.c | 84 +- + tools/lib/bpf/btf.h | 87 + + tools/lib/bpf/btf_dump.c | 3 + + tools/lib/bpf/gen_loader.c | 7 +- + tools/lib/bpf/libbpf.c | 1091 +- + tools/lib/bpf/libbpf.h | 108 +- + tools/lib/bpf/libbpf.map | 5 + + tools/lib/bpf/libbpf_common.h | 24 + + tools/lib/bpf/libbpf_internal.h | 34 +- + tools/lib/bpf/libbpf_legacy.h | 9 + + tools/lib/bpf/libbpf_version.h | 9 + + tools/lib/bpf/skel_internal.h | 6 +- + tools/lib/bpf/xsk.c | 4 +- + tools/testing/selftests/bpf/.gitignore | 5 +- + tools/testing/selftests/bpf/Makefile | 7 +- + tools/testing/selftests/bpf/README.rst | 27 + + .../selftests/bpf/bpf_testmod/bpf_testmod.c | 19 +- + tools/testing/selftests/bpf/btf_helpers.c | 7 +- + .../selftests/bpf/prog_tests/attach_probe.c | 24 +- + tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 6 +- + tools/testing/selftests/bpf/prog_tests/btf.c | 441 +- + tools/testing/selftests/bpf/prog_tests/btf_dump.c | 27 +- + tools/testing/selftests/bpf/prog_tests/btf_tag.c | 20 + + tools/testing/selftests/bpf/prog_tests/btf_write.c | 21 + + .../testing/selftests/bpf/prog_tests/core_reloc.c | 17 +- + .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 43 +- + .../selftests/bpf/prog_tests/flow_dissector.c | 4 +- + .../selftests/bpf/prog_tests/get_branch_snapshot.c | 99 + + .../selftests/bpf/prog_tests/module_attach.c | 39 - + .../testing/selftests/bpf/prog_tests/probe_user.c | 4 +- + .../selftests/bpf/prog_tests/reference_tracking.c | 52 +- + tools/testing/selftests/bpf/prog_tests/sk_assign.c | 2 +- + tools/testing/selftests/bpf/prog_tests/skb_ctx.c | 6 + + tools/testing/selftests/bpf/prog_tests/skeleton.c | 6 + + .../selftests/bpf/prog_tests/sockopt_multi.c | 30 +- + tools/testing/selftests/bpf/prog_tests/tailcalls.c | 83 +- + .../testing/selftests/bpf/prog_tests/tc_redirect.c | 2 +- + .../selftests/bpf/prog_tests/trace_printk.c | 24 +- + .../selftests/bpf/prog_tests/trace_vprintk.c | 68 + + tools/testing/selftests/bpf/prog_tests/xdpwall.c | 15 + + tools/testing/selftests/bpf/progs/bpf_cubic.c | 12 +- + tools/testing/selftests/bpf/progs/bpf_flow.c | 3 +- + .../bpf/progs/cg_storage_multi_isolated.c | 4 +- + .../selftests/bpf/progs/cg_storage_multi_shared.c | 4 +- + .../selftests/bpf/progs/for_each_array_map_elem.c | 2 +- + .../selftests/bpf/progs/for_each_hash_map_elem.c | 2 +- + .../selftests/bpf/progs/get_branch_snapshot.c | 40 + + tools/testing/selftests/bpf/progs/kfree_skb.c | 4 +- + .../testing/selftests/bpf/progs/kfunc_call_test.c | 4 +- + .../selftests/bpf/progs/kfunc_call_test_subprog.c | 2 +- + .../selftests/bpf/progs/perf_event_stackmap.c | 4 +- + tools/testing/selftests/bpf/progs/skb_pkt_end.c | 2 +- + .../selftests/bpf/progs/sockmap_verdict_prog.c | 12 +- + tools/testing/selftests/bpf/progs/sockopt_multi.c | 5 +- + tools/testing/selftests/bpf/progs/tag.c | 49 + + tools/testing/selftests/bpf/progs/tailcall1.c | 7 +- + tools/testing/selftests/bpf/progs/tailcall2.c | 23 +- + tools/testing/selftests/bpf/progs/tailcall3.c | 7 +- + tools/testing/selftests/bpf/progs/tailcall4.c | 7 +- + tools/testing/selftests/bpf/progs/tailcall5.c | 7 +- + tools/testing/selftests/bpf/progs/tailcall6.c | 34 + + .../selftests/bpf/progs/tailcall_bpf2bpf1.c | 7 +- + .../selftests/bpf/progs/tailcall_bpf2bpf2.c | 7 +- + .../selftests/bpf/progs/tailcall_bpf2bpf3.c | 11 +- + .../selftests/bpf/progs/tailcall_bpf2bpf4.c | 15 +- + .../selftests/bpf/progs/test_btf_map_in_map.c | 14 +- + .../selftests/bpf/progs/test_btf_skc_cls_ingress.c | 2 +- + .../testing/selftests/bpf/progs/test_cgroup_link.c | 4 +- + tools/testing/selftests/bpf/progs/test_check_mtu.c | 12 +- + .../selftests/bpf/progs/test_cls_redirect.c | 2 +- + .../testing/selftests/bpf/progs/test_global_data.c | 2 +- + .../selftests/bpf/progs/test_global_func1.c | 2 +- + .../selftests/bpf/progs/test_global_func3.c | 2 +- + .../selftests/bpf/progs/test_global_func5.c | 2 +- + .../selftests/bpf/progs/test_global_func6.c | 2 +- + .../selftests/bpf/progs/test_global_func7.c | 2 +- + .../testing/selftests/bpf/progs/test_map_in_map.c | 12 +- + .../selftests/bpf/progs/test_map_in_map_invalid.c | 2 +- + .../bpf/progs/test_misc_tcp_hdr_options.c | 2 +- + .../selftests/bpf/progs/test_pe_preserve_elems.c | 8 +- + .../testing/selftests/bpf/progs/test_perf_buffer.c | 4 +- + .../testing/selftests/bpf/progs/test_pkt_access.c | 2 +- + .../selftests/bpf/progs/test_pkt_md_access.c | 4 +- + .../testing/selftests/bpf/progs/test_probe_user.c | 28 +- + .../bpf/progs/test_select_reuseport_kern.c | 4 +- + tools/testing/selftests/bpf/progs/test_sk_assign.c | 3 +- + tools/testing/selftests/bpf/progs/test_sk_lookup.c | 44 +- + .../selftests/bpf/progs/test_sk_lookup_kern.c | 37 +- + tools/testing/selftests/bpf/progs/test_skb_ctx.c | 6 + + .../testing/selftests/bpf/progs/test_skb_helpers.c | 2 +- + .../selftests/bpf/progs/test_sockmap_listen.c | 2 +- + .../bpf/progs/test_sockmap_skb_verdict_attach.c | 2 +- + .../selftests/bpf/progs/test_sockmap_update.c | 2 +- + .../selftests/bpf/progs/test_stacktrace_build_id.c | 4 +- + .../selftests/bpf/progs/test_stacktrace_map.c | 4 +- + tools/testing/selftests/bpf/progs/test_tc_bpf.c | 2 +- + tools/testing/selftests/bpf/progs/test_tc_neigh.c | 6 +- + .../selftests/bpf/progs/test_tc_neigh_fib.c | 6 +- + tools/testing/selftests/bpf/progs/test_tc_peer.c | 10 +- + .../bpf/progs/test_tcp_check_syncookie_kern.c | 4 +- + .../selftests/bpf/progs/test_tcp_hdr_options.c | 2 +- + .../selftests/bpf/progs/test_tcpnotify_kern.c | 4 +- + tools/testing/selftests/bpf/progs/test_xdp.c | 2 +- + .../bpf/progs/test_xdp_adjust_tail_grow.c | 2 +- + .../bpf/progs/test_xdp_adjust_tail_shrink.c | 4 +- + .../testing/selftests/bpf/progs/test_xdp_bpf2bpf.c | 4 +- + .../selftests/bpf/progs/test_xdp_devmap_helpers.c | 2 +- + tools/testing/selftests/bpf/progs/test_xdp_link.c | 2 +- + tools/testing/selftests/bpf/progs/test_xdp_loop.c | 2 +- + .../selftests/bpf/progs/test_xdp_noinline.c | 4 +- + .../bpf/progs/test_xdp_with_cpumap_helpers.c | 4 +- + .../bpf/progs/test_xdp_with_devmap_helpers.c | 4 +- + tools/testing/selftests/bpf/progs/trace_vprintk.c | 33 + + tools/testing/selftests/bpf/progs/xdp_dummy.c | 2 +- + .../selftests/bpf/progs/xdp_redirect_multi_kern.c | 4 +- + tools/testing/selftests/bpf/progs/xdping_kern.c | 4 +- + tools/testing/selftests/bpf/progs/xdpwall.c | 365 + + tools/testing/selftests/bpf/test_bpftool.py | 22 +- + tools/testing/selftests/bpf/test_btf.h | 3 + + tools/testing/selftests/bpf/test_progs.c | 39 + + tools/testing/selftests/bpf/test_progs.h | 2 + + .../selftests/bpf/test_tcp_check_syncookie.sh | 4 +- + tools/testing/selftests/bpf/test_tunnel.sh | 5 +- + tools/testing/selftests/bpf/test_xdp_meta.sh | 5 +- + tools/testing/selftests/bpf/test_xdp_redirect.sh | 4 +- + .../selftests/bpf/test_xdp_redirect_multi.sh | 2 +- + tools/testing/selftests/bpf/test_xdp_veth.sh | 4 +- + tools/testing/selftests/bpf/test_xdp_vlan.sh | 7 +- + tools/testing/selftests/bpf/trace_helpers.c | 37 + + tools/testing/selftests/bpf/trace_helpers.h | 5 + + tools/testing/selftests/bpf/verifier/ctx_skb.c | 60 + + tools/testing/selftests/bpf/verifier/jit.c | 22 +- + tools/testing/selftests/bpf/verifier/spill_fill.c | 161 + + tools/testing/selftests/bpf/xdping.c | 5 +- + tools/testing/selftests/bpf/xdpxceiver.c | 961 +- + tools/testing/selftests/bpf/xdpxceiver.h | 75 +- + .../drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh | 50 +- + .../selftests/drivers/net/mlxsw/sch_offload.sh | 276 + + .../selftests/drivers/net/mlxsw/sch_red_core.sh | 127 +- + .../selftests/drivers/net/mlxsw/sch_red_ets.sh | 64 +- + .../selftests/drivers/net/mlxsw/sch_red_root.sh | 8 + + .../mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh | 250 + + .../drivers/net/netdevsim/ethtool-common.sh | 2 +- + .../drivers/net/netdevsim/tc-mq-visibility.sh | 77 + + .../drivers/net/ocelot/tc_flower_chains.sh | 50 +- + tools/testing/selftests/net/fib_nexthops.sh | 1 + + .../net/forwarding/forwarding.config.sample | 4 + + .../selftests/net/forwarding/ip6gre_flat.sh | 65 + + .../selftests/net/forwarding/ip6gre_flat_key.sh | 65 + + .../selftests/net/forwarding/ip6gre_flat_keys.sh | 65 + + .../selftests/net/forwarding/ip6gre_hier.sh | 65 + + .../selftests/net/forwarding/ip6gre_hier_key.sh | 65 + + .../selftests/net/forwarding/ip6gre_hier_keys.sh | 65 + + .../testing/selftests/net/forwarding/ip6gre_lib.sh | 438 + + .../testing/selftests/net/forwarding/tc_common.sh | 10 + + tools/testing/selftests/net/ioam6.sh | 208 +- + tools/testing/selftests/net/mptcp/.gitignore | 1 + + tools/testing/selftests/net/mptcp/Makefile | 2 +- + tools/testing/selftests/net/mptcp/mptcp_join.sh | 5 +- + tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 683 + + tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 31 +- + tools/testing/selftests/net/mptcp/pm_netlink.sh | 6 +- + tools/testing/selftests/net/tls.c | 28 +- + 1726 files changed, 182115 insertions(+), 37888 deletions(-) + create mode 100644 Documentation/ABI/testing/sysfs-timecard + create mode 100644 Documentation/bpf/bpf_licensing.rst + create mode 100644 Documentation/devicetree/bindings/net/asix,ax88796c.yaml + delete mode 100644 Documentation/devicetree/bindings/net/dsa/qca8k.txt + create mode 100644 Documentation/devicetree/bindings/net/dsa/qca8k.yaml + create mode 100644 Documentation/devicetree/bindings/net/lantiq,etop-xway.yaml + delete mode 100644 Documentation/devicetree/bindings/net/lantiq,xrx200-net.txt + create mode 100644 Documentation/devicetree/bindings/net/lantiq,xrx200-net.yaml + delete mode 100644 Documentation/devicetree/bindings/net/wireless/qca,ath9k.txt + create mode 100644 Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml + create mode 100644 Documentation/networking/devlink/iosm.rst + create mode 100644 drivers/net/dsa/rtl8365mb.c + create mode 100644 drivers/net/ethernet/asix/Kconfig + create mode 100644 drivers/net/ethernet/asix/Makefile + create mode 100644 drivers/net/ethernet/asix/ax88796c_ioctl.c + create mode 100644 drivers/net/ethernet/asix/ax88796c_ioctl.h + create mode 100644 drivers/net/ethernet/asix/ax88796c_main.c + create mode 100644 drivers/net/ethernet/asix/ax88796c_main.h + create mode 100644 drivers/net/ethernet/asix/ax88796c_spi.c + create mode 100644 drivers/net/ethernet/asix/ax88796c_spi.h + create mode 100644 drivers/net/ethernet/intel/ice/ice_eswitch.c + create mode 100644 drivers/net/ethernet/intel/ice/ice_eswitch.h + create mode 100644 drivers/net/ethernet/intel/ice/ice_repr.c + create mode 100644 drivers/net/ethernet/intel/ice/ice_repr.h + create mode 100644 drivers/net/ethernet/intel/ice/ice_tc_lib.c + create mode 100644 drivers/net/ethernet/intel/ice/ice_tc_lib.h + rename drivers/net/ethernet/mellanox/mlx5/core/{ => lag}/lag.c (92%) + rename drivers/net/ethernet/mellanox/mlx5/core/{ => lag}/lag.h (89%) + rename drivers/net/ethernet/mellanox/mlx5/core/{lag_mp.c => lag/mp.c} (99%) + rename drivers/net/ethernet/mellanox/mlx5/core/{lag_mp.h => lag/mp.h} (100%) + create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c + create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h + create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c + create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h + create mode 100644 drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h + create mode 100644 drivers/net/ethernet/qlogic/qed/qed_iro_hsi.h + create mode 100644 drivers/net/ethernet/qlogic/qed/qed_mfw_hsi.h + delete mode 100644 drivers/net/wireless/mediatek/mt76/mt7921/eeprom.c + create mode 100644 drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c + create mode 100644 drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c + create mode 100644 drivers/net/wireless/mediatek/mt76/mt7921/sdio.c + create mode 100644 drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c + create mode 100644 drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c + create mode 100644 drivers/net/wireless/mediatek/mt76/mt7921/testmode.c + rename drivers/net/wireless/mediatek/mt76/{mt7615 => }/sdio.h (72%) + rename drivers/net/wireless/mediatek/mt76/{mt7615 => }/sdio_txrx.c (67%) + create mode 100644 drivers/net/wireless/realtek/rtw89/Kconfig + create mode 100644 drivers/net/wireless/realtek/rtw89/Makefile + create mode 100644 drivers/net/wireless/realtek/rtw89/cam.c + create mode 100644 drivers/net/wireless/realtek/rtw89/cam.h + create mode 100644 drivers/net/wireless/realtek/rtw89/coex.c + create mode 100644 drivers/net/wireless/realtek/rtw89/coex.h + create mode 100644 drivers/net/wireless/realtek/rtw89/core.c + create mode 100644 drivers/net/wireless/realtek/rtw89/core.h + create mode 100644 drivers/net/wireless/realtek/rtw89/debug.c + create mode 100644 drivers/net/wireless/realtek/rtw89/debug.h + create mode 100644 drivers/net/wireless/realtek/rtw89/efuse.c + create mode 100644 drivers/net/wireless/realtek/rtw89/efuse.h + create mode 100644 drivers/net/wireless/realtek/rtw89/fw.c + create mode 100644 drivers/net/wireless/realtek/rtw89/fw.h + create mode 100644 drivers/net/wireless/realtek/rtw89/mac.c + create mode 100644 drivers/net/wireless/realtek/rtw89/mac.h + create mode 100644 drivers/net/wireless/realtek/rtw89/mac80211.c + create mode 100644 drivers/net/wireless/realtek/rtw89/pci.c + create mode 100644 drivers/net/wireless/realtek/rtw89/pci.h + create mode 100644 drivers/net/wireless/realtek/rtw89/phy.c + create mode 100644 drivers/net/wireless/realtek/rtw89/phy.h + create mode 100644 drivers/net/wireless/realtek/rtw89/ps.c + create mode 100644 drivers/net/wireless/realtek/rtw89/ps.h + create mode 100644 drivers/net/wireless/realtek/rtw89/reg.h + create mode 100644 drivers/net/wireless/realtek/rtw89/regd.c + create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8852a.c + create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8852a.h + create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8852a_rfk.c + create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8852a_rfk.h + create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8852a_rfk_table.c + create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8852a_rfk_table.h + create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8852a_table.c + create mode 100644 drivers/net/wireless/realtek/rtw89/rtw8852a_table.h + create mode 100644 drivers/net/wireless/realtek/rtw89/sar.c + create mode 100644 drivers/net/wireless/realtek/rtw89/sar.h + create mode 100644 drivers/net/wireless/realtek/rtw89/ser.c + create mode 100644 drivers/net/wireless/realtek/rtw89/ser.h + create mode 100644 drivers/net/wireless/realtek/rtw89/txrx.h + create mode 100644 drivers/net/wireless/realtek/rtw89/util.h + create mode 100644 drivers/net/wwan/iosm/iosm_ipc_coredump.c + create mode 100644 drivers/net/wwan/iosm/iosm_ipc_coredump.h + create mode 100644 drivers/net/wwan/iosm/iosm_ipc_devlink.c + create mode 100644 drivers/net/wwan/iosm/iosm_ipc_devlink.h + create mode 100644 drivers/net/wwan/iosm/iosm_ipc_flash.c + create mode 100644 drivers/net/wwan/iosm/iosm_ipc_flash.h + delete mode 100644 drivers/ptp/idt8a340_reg.h + delete mode 100644 include/linux/netfilter_ingress.h + create mode 100644 include/linux/netfilter_netdev.h + create mode 100644 include/trace/events/mctp.h + create mode 100644 net/bluetooth/eir.c + create mode 100644 net/bluetooth/eir.h + create mode 100644 net/bluetooth/hci_codec.c + create mode 100644 net/bluetooth/hci_codec.h + rename {drivers/of => net/core}/of_net.c (85%) + create mode 100644 net/dsa/tag_rtl8_4.c + create mode 100644 net/ethtool/module.c + create mode 100644 net/mctp/test/route-test.c + create mode 100644 net/mctp/test/utils.c + create mode 100644 net/mctp/test/utils.h + rename net/qrtr/{qrtr.c => af_qrtr.c} (100%) + create mode 100644 tools/lib/bpf/libbpf_version.h + create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_tag.c + create mode 100644 tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c + create mode 100644 tools/testing/selftests/bpf/prog_tests/trace_vprintk.c + create mode 100644 tools/testing/selftests/bpf/prog_tests/xdpwall.c + create mode 100644 tools/testing/selftests/bpf/progs/get_branch_snapshot.c + create mode 100644 tools/testing/selftests/bpf/progs/tag.c + create mode 100644 tools/testing/selftests/bpf/progs/tailcall6.c + create mode 100644 tools/testing/selftests/bpf/progs/trace_vprintk.c + create mode 100644 tools/testing/selftests/bpf/progs/xdpwall.c + create mode 100755 tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh + create mode 100755 tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh + create mode 100755 tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh + create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_flat.sh + create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh + create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh + create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_hier.sh + create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh + create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh + create mode 100644 tools/testing/selftests/net/forwarding/ip6gre_lib.sh + create mode 100644 tools/testing/selftests/net/mptcp/mptcp_sockopt.c +Merging bpf-next/for-next (c825f5fee19c libbpf: Fix BTF header parsing checks) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git bpf-next/for-next +Auto-merging tools/lib/bpf/libbpf.c +Auto-merging samples/bpf/Makefile +Auto-merging kernel/bpf/syscall.c +Auto-merging kernel/bpf/core.c +Removing arch/mips/net/ebpf_jit.c +Removing arch/mips/net/bpf_jit_asm.S +Removing arch/mips/net/bpf_jit.h +Removing arch/mips/net/bpf_jit.c +Auto-merging arch/mips/Kconfig +Auto-merging arch/arm/net/bpf_jit_32.c +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + Documentation/bpf/btf.rst | 28 +- + .../bpf/libbpf/libbpf_naming_convention.rst | 40 + + MAINTAINERS | 1 + + arch/arm/net/bpf_jit_32.c | 5 - + arch/mips/Kconfig | 15 +- + arch/mips/include/asm/uasm.h | 5 + + arch/mips/mm/uasm-mips.c | 4 +- + arch/mips/mm/uasm.c | 3 +- + arch/mips/net/Makefile | 9 +- + arch/mips/net/bpf_jit.c | 1299 ------------- + arch/mips/net/bpf_jit.h | 81 - + arch/mips/net/bpf_jit_asm.S | 285 --- + arch/mips/net/bpf_jit_comp.c | 1034 +++++++++++ + arch/mips/net/bpf_jit_comp.h | 235 +++ + arch/mips/net/bpf_jit_comp32.c | 1899 +++++++++++++++++++ + arch/mips/net/bpf_jit_comp64.c | 1060 +++++++++++ + arch/mips/net/ebpf_jit.c | 1938 -------------------- + arch/x86/net/bpf_jit_comp.c | 130 +- + include/linux/bpf.h | 10 +- + include/linux/bpf_verifier.h | 2 + + include/linux/bpfptr.h | 1 + + include/linux/btf.h | 39 + + include/trace/bpf_probe.h | 19 +- + include/uapi/linux/bpf.h | 8 + + include/uapi/linux/btf.h | 8 +- + kernel/bpf/btf.c | 103 +- + kernel/bpf/core.c | 4 + + kernel/bpf/preload/.gitignore | 4 +- + kernel/bpf/preload/Makefile | 26 +- + kernel/bpf/preload/iterators/Makefile | 38 +- + kernel/bpf/syscall.c | 8 +- + kernel/bpf/verifier.c | 227 ++- + kernel/trace/bpf_trace.c | 2 + + lib/test_bpf.c | 120 +- + net/bpf/test_run.c | 28 +- + net/core/filter.c | 23 + + net/ipv4/bpf_tcp_ca.c | 36 +- + net/ipv4/tcp_bbr.c | 28 +- + net/ipv4/tcp_cubic.c | 26 +- + net/ipv4/tcp_dctcp.c | 26 +- + samples/bpf/.gitignore | 4 + + samples/bpf/Makefile | 47 +- + samples/bpf/xdp1_user.c | 2 +- + samples/bpf/xdp_redirect_cpu_user.c | 6 +- + samples/bpf/xdp_sample_pkts_user.c | 2 +- + scripts/Makefile.modfinal | 1 + + scripts/bpf_doc.py | 2 + + tools/bpf/bpftool/Makefile | 56 +- + tools/bpf/bpftool/btf.c | 18 +- + tools/bpf/bpftool/gen.c | 161 +- + tools/bpf/bpftool/iter.c | 2 +- + tools/bpf/bpftool/map_perf_ring.c | 1 - + tools/bpf/bpftool/prog.c | 19 +- + tools/bpf/resolve_btfids/Makefile | 16 +- + tools/bpf/resolve_btfids/main.c | 36 +- + tools/bpf/runqslower/Makefile | 22 +- + tools/include/uapi/linux/bpf.h | 8 + + tools/include/uapi/linux/btf.h | 8 +- + tools/lib/bpf/Makefile | 35 +- + tools/lib/bpf/bpf.c | 1 + + tools/lib/bpf/bpf_gen_internal.h | 16 +- + tools/lib/bpf/btf.c | 315 ++-- + tools/lib/bpf/btf.h | 39 +- + tools/lib/bpf/btf_dump.c | 56 +- + tools/lib/bpf/gen_loader.c | 314 +++- + tools/lib/bpf/libbpf.c | 1144 +++++++----- + tools/lib/bpf/libbpf.h | 36 +- + tools/lib/bpf/libbpf.map | 9 +- + tools/lib/bpf/libbpf_internal.h | 15 +- + tools/lib/bpf/libbpf_legacy.h | 3 + + tools/lib/bpf/linker.c | 29 +- + tools/perf/util/bpf-event.c | 2 +- + tools/testing/selftests/bpf/Makefile | 35 +- + tools/testing/selftests/bpf/README.rst | 4 +- + .../selftests/bpf/bpf_testmod/bpf_testmod-events.h | 15 + + .../selftests/bpf/bpf_testmod/bpf_testmod.c | 33 +- + .../selftests/bpf/bpf_testmod/bpf_testmod.h | 5 + + tools/testing/selftests/bpf/btf_helpers.c | 12 +- + tools/testing/selftests/bpf/cgroup_helpers.c | 5 +- + tools/testing/selftests/bpf/cgroup_helpers.h | 2 +- + tools/testing/selftests/bpf/flow_dissector_load.c | 18 +- + tools/testing/selftests/bpf/flow_dissector_load.h | 10 +- + tools/testing/selftests/bpf/prog_tests/atomics.c | 1 + + .../selftests/bpf/prog_tests/bpf_iter_setsockopt.c | 2 +- + .../testing/selftests/bpf/prog_tests/bpf_obj_id.c | 2 +- + tools/testing/selftests/bpf/prog_tests/btf.c | 247 ++- + tools/testing/selftests/bpf/prog_tests/btf_dump.c | 10 +- + .../testing/selftests/bpf/prog_tests/btf_endian.c | 12 +- + tools/testing/selftests/bpf/prog_tests/btf_split.c | 2 +- + tools/testing/selftests/bpf/prog_tests/btf_write.c | 159 +- + .../selftests/bpf/prog_tests/cg_storage_multi.c | 2 +- + .../bpf/prog_tests/cgroup_attach_autodetach.c | 2 +- + .../selftests/bpf/prog_tests/cgroup_attach_multi.c | 2 +- + .../bpf/prog_tests/cgroup_attach_override.c | 2 +- + .../testing/selftests/bpf/prog_tests/cgroup_link.c | 2 +- + .../testing/selftests/bpf/prog_tests/cgroup_v1v2.c | 2 +- + tools/testing/selftests/bpf/prog_tests/check_mtu.c | 2 +- + .../selftests/bpf/prog_tests/core_autosize.c | 4 +- + .../testing/selftests/bpf/prog_tests/core_reloc.c | 4 +- + .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 9 +- + .../bpf/prog_tests/flow_dissector_load_bytes.c | 2 +- + .../bpf/prog_tests/flow_dissector_reattach.c | 2 +- + .../selftests/bpf/prog_tests/get_branch_snapshot.c | 36 +- + .../testing/selftests/bpf/prog_tests/global_data.c | 11 +- + .../selftests/bpf/prog_tests/global_data_init.c | 2 +- + tools/testing/selftests/bpf/prog_tests/kfree_skb.c | 5 +- + .../selftests/bpf/prog_tests/ksyms_module.c | 29 +- + .../selftests/bpf/prog_tests/ksyms_module_libbpf.c | 28 + + .../selftests/bpf/prog_tests/migrate_reuseport.c | 2 +- + .../selftests/bpf/prog_tests/modify_return.c | 3 +- + .../selftests/bpf/prog_tests/module_attach.c | 35 + + .../selftests/bpf/prog_tests/ns_current_pid_tgid.c | 3 +- + .../testing/selftests/bpf/prog_tests/perf_buffer.c | 24 +- + tools/testing/selftests/bpf/prog_tests/perf_link.c | 3 +- + .../testing/selftests/bpf/prog_tests/probe_user.c | 3 +- + .../bpf/prog_tests/raw_tp_writable_test_run.c | 3 +- + .../testing/selftests/bpf/prog_tests/rdonly_maps.c | 2 +- + tools/testing/selftests/bpf/prog_tests/recursion.c | 10 +- + .../selftests/bpf/prog_tests/resolve_btfids.c | 4 +- + .../selftests/bpf/prog_tests/select_reuseport.c | 4 +- + .../bpf/prog_tests/send_signal_sched_switch.c | 3 +- + .../selftests/bpf/prog_tests/sk_storage_tracing.c | 2 +- + .../selftests/bpf/prog_tests/skc_to_unix_sock.c | 54 + + tools/testing/selftests/bpf/prog_tests/skeleton.c | 29 + + .../selftests/bpf/prog_tests/snprintf_btf.c | 2 +- + .../testing/selftests/bpf/prog_tests/sock_fields.c | 2 +- + .../selftests/bpf/prog_tests/sockmap_listen.c | 2 +- + tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 2 +- + tools/testing/selftests/bpf/prog_tests/timer.c | 3 +- + tools/testing/selftests/bpf/prog_tests/timer_mim.c | 2 +- + .../selftests/bpf/prog_tests/tp_attach_query.c | 2 +- + .../selftests/bpf/prog_tests/trace_printk.c | 2 +- + .../selftests/bpf/prog_tests/trace_vprintk.c | 2 +- + .../selftests/bpf/prog_tests/trampoline_count.c | 3 +- + .../testing/selftests/bpf/prog_tests/verif_stats.c | 28 + + .../testing/selftests/bpf/prog_tests/xdp_attach.c | 2 +- + .../testing/selftests/bpf/prog_tests/xdp_bonding.c | 2 +- + .../selftests/bpf/prog_tests/xdp_cpumap_attach.c | 2 +- + .../selftests/bpf/prog_tests/xdp_devmap_attach.c | 2 +- + tools/testing/selftests/bpf/prog_tests/xdp_info.c | 2 +- + tools/testing/selftests/bpf/prog_tests/xdp_link.c | 2 +- + tools/testing/selftests/bpf/progs/atomics.c | 16 + + .../bpf/progs/btf_dump_test_case_bitfields.c | 10 +- + .../bpf/progs/btf_dump_test_case_packing.c | 4 +- + .../bpf/progs/btf_dump_test_case_padding.c | 2 +- + .../bpf/progs/btf_dump_test_case_syntax.c | 2 +- + .../bpf/progs/cgroup_skb_sk_lookup_kern.c | 1 - + .../testing/selftests/bpf/progs/connect4_dropper.c | 2 +- + tools/testing/selftests/bpf/progs/connect4_prog.c | 2 - + tools/testing/selftests/bpf/progs/connect6_prog.c | 2 - + .../selftests/bpf/progs/connect_force_port4.c | 1 - + .../selftests/bpf/progs/connect_force_port6.c | 1 - + tools/testing/selftests/bpf/progs/dev_cgroup.c | 1 - + tools/testing/selftests/bpf/progs/fexit_sleep.c | 4 +- + .../selftests/bpf/progs/get_cgroup_id_kern.c | 1 - + tools/testing/selftests/bpf/progs/map_ptr_kern.c | 1 - + tools/testing/selftests/bpf/progs/netcnt_prog.c | 1 - + tools/testing/selftests/bpf/progs/recursion.c | 9 +- + tools/testing/selftests/bpf/progs/sendmsg4_prog.c | 2 - + tools/testing/selftests/bpf/progs/sendmsg6_prog.c | 2 - + .../selftests/bpf/progs/sockmap_parse_prog.c | 2 - + .../selftests/bpf/progs/sockmap_tcp_msg_prog.c | 2 - + .../selftests/bpf/progs/sockmap_verdict_prog.c | 2 - + .../testing/selftests/bpf/progs/sockopt_inherit.c | 1 - + tools/testing/selftests/bpf/progs/tag.c | 15 +- + tools/testing/selftests/bpf/progs/tcp_rtt.c | 1 - + tools/testing/selftests/bpf/progs/test_btf_haskv.c | 2 - + tools/testing/selftests/bpf/progs/test_btf_newkv.c | 2 - + tools/testing/selftests/bpf/progs/test_btf_nokv.c | 2 - + .../selftests/bpf/progs/test_enable_stats.c | 2 +- + .../selftests/bpf/progs/test_ksyms_module.c | 46 +- + tools/testing/selftests/bpf/progs/test_l4lb.c | 2 - + .../testing/selftests/bpf/progs/test_map_in_map.c | 1 - + .../selftests/bpf/progs/test_module_attach.c | 14 + + .../testing/selftests/bpf/progs/test_perf_buffer.c | 18 +- + tools/testing/selftests/bpf/progs/test_pinning.c | 2 - + .../selftests/bpf/progs/test_pinning_invalid.c | 2 - + .../testing/selftests/bpf/progs/test_pkt_access.c | 1 - + .../selftests/bpf/progs/test_queue_stack_map.h | 2 - + .../bpf/progs/test_select_reuseport_kern.c | 2 - + tools/testing/selftests/bpf/progs/test_sk_lookup.c | 1 - + .../selftests/bpf/progs/test_skb_cgroup_id_kern.c | 2 - + tools/testing/selftests/bpf/progs/test_skb_ctx.c | 1 - + .../selftests/bpf/progs/test_skc_to_unix_sock.c | 40 + + tools/testing/selftests/bpf/progs/test_skeleton.c | 18 + + .../selftests/bpf/progs/test_sockmap_kern.h | 1 - + .../selftests/bpf/progs/test_sockmap_listen.c | 1 - + .../selftests/bpf/progs/test_stacktrace_build_id.c | 1 - + .../testing/selftests/bpf/progs/test_tcp_estats.c | 1 - + .../testing/selftests/bpf/progs/test_tcpbpf_kern.c | 1 - + .../selftests/bpf/progs/test_tcpnotify_kern.c | 2 - + .../testing/selftests/bpf/progs/test_tracepoint.c | 1 - + .../testing/selftests/bpf/progs/test_tunnel_kern.c | 2 - + tools/testing/selftests/bpf/progs/test_xdp.c | 2 - + tools/testing/selftests/bpf/progs/test_xdp_loop.c | 2 - + .../selftests/bpf/progs/test_xdp_redirect.c | 2 - + tools/testing/selftests/bpf/test_bpftool_build.sh | 4 + + tools/testing/selftests/bpf/test_btf.h | 4 +- + tools/testing/selftests/bpf/test_flow_dissector.sh | 10 +- + tools/testing/selftests/bpf/test_progs.c | 671 ++++++- + tools/testing/selftests/bpf/test_progs.h | 38 +- + tools/testing/selftests/bpf/test_verifier.c | 12 +- + .../selftests/bpf/verifier/atomic_cmpxchg.c | 38 + + .../testing/selftests/bpf/verifier/atomic_fetch.c | 57 + + .../selftests/bpf/verifier/atomic_invalid.c | 25 + + tools/testing/selftests/bpf/verifier/calls.c | 23 + + tools/testing/selftests/bpf/verifier/jit.c | 47 + + tools/testing/selftests/bpf/xdping.c | 2 +- + 208 files changed, 8380 insertions(+), 5062 deletions(-) + delete mode 100644 arch/mips/net/bpf_jit.c + delete mode 100644 arch/mips/net/bpf_jit.h + delete mode 100644 arch/mips/net/bpf_jit_asm.S + create mode 100644 arch/mips/net/bpf_jit_comp.c + create mode 100644 arch/mips/net/bpf_jit_comp.h + create mode 100644 arch/mips/net/bpf_jit_comp32.c + create mode 100644 arch/mips/net/bpf_jit_comp64.c + delete mode 100644 arch/mips/net/ebpf_jit.c + create mode 100644 tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c + create mode 100644 tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c + create mode 100644 tools/testing/selftests/bpf/prog_tests/verif_stats.c + create mode 100644 tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c + create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch.c + create mode 100644 tools/testing/selftests/bpf/verifier/atomic_invalid.c +Merging ipsec-next/master (83688aec17bf net/ipv4/xfrm4_tunnel.c: remove superfluous header files from xfrm4_tunnel.c) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git ipsec-next/master +Merge made by the 'recursive' strategy. + net/ipv4/xfrm4_tunnel.c | 2 -- + 1 file changed, 2 deletions(-) +Merging mlx5-next/mlx5-next (60dd57c74794 Merge brank 'mlx5_mkey' into rdma.git for-next) +$ git merge -m Merge branch 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux.git mlx5-next/mlx5-next +Already up to date. +Merge made by the 'recursive' strategy. +Merging netfilter-next/master (241eb3f3ee42 netfilter: ebtables: use array_size() helper in copy_{from,to}_user()) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git netfilter-next/master +Auto-merging net/bridge/netfilter/ebtables.c +Merge made by the 'recursive' strategy. + net/bridge/netfilter/ebtables.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) +Merging ipvs-next/master (ffdd33dd9c12 netfilter: core: Fix clang warnings about unused static inlines) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git ipvs-next/master +Already up to date. +Merging wireless-drivers-next/master (753453afacc0 mt76: mt7615: mt7622: fix ibss and meshpoint) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git wireless-drivers-next/master +Merge made by the 'recursive' strategy. + drivers/net/wireless/mediatek/mt76/mt7615/main.c | 4 ++-- + drivers/net/wireless/mediatek/mt76/mt7921/Kconfig | 1 + + drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2 +- + 3 files changed, 4 insertions(+), 3 deletions(-) +Merging bluetooth/master (f33b0068cdaf Bluetooth: vhci: Fix checking of msft_opcode) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git bluetooth/master +Merge made by the 'recursive' strategy. + drivers/bluetooth/btintel.c | 22 +++++--- + drivers/bluetooth/btsdio.c | 2 + + drivers/bluetooth/btusb.c | 7 ++- + drivers/bluetooth/hci_vhci.c | 120 +++++++++++++++++++++++++++++++++++++++--- + drivers/bluetooth/virtio_bt.c | 3 ++ + net/bluetooth/hci_codec.c | 18 ++++++- + net/bluetooth/hci_core.c | 1 + + net/bluetooth/hci_event.c | 16 +++--- + net/bluetooth/hci_sock.c | 11 ++-- + net/bluetooth/hci_sysfs.c | 2 + + net/bluetooth/l2cap_sock.c | 19 +++++++ + net/bluetooth/mgmt.c | 15 +++--- + 12 files changed, 199 insertions(+), 37 deletions(-) +Merging mac80211-next/master (47b068247aa7 net: liquidio: Make use of the helper macro kthread_run()) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git mac80211-next/master +Already up to date. +Merging mtd/mtd/next (c13de2386c78 mtd: core: don't remove debugfs directory if device is in use) +$ git merge -m Merge branch 'mtd/next' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/mtd/next +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 3 +-- + drivers/mtd/chips/Kconfig | 2 ++ + drivers/mtd/devices/block2mtd.c | 29 +++++++++++++++++++++-------- + drivers/mtd/maps/Kconfig | 2 +- + drivers/mtd/mtdcore.c | 4 ++-- + drivers/mtd/mtdswap.c | 1 - + 6 files changed, 27 insertions(+), 14 deletions(-) +Merging nand/nand/next (fc9e18f9e987 mtd: rawnand: arasan: Prevent an unsupported configuration) +$ git merge -m Merge branch 'nand/next' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git nand/nand/next +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 8 ++++++++ + drivers/mtd/nand/ecc-sw-hamming.c | 7 ++++--- + drivers/mtd/nand/onenand/Kconfig | 9 +++++---- + drivers/mtd/nand/raw/ams-delta.c | 12 +++++++++--- + drivers/mtd/nand/raw/arasan-nand-controller.c | 15 +++++++++++++++ + drivers/mtd/nand/raw/atmel/pmecc.c | 7 ++----- + drivers/mtd/nand/raw/au1550nd.c | 12 +++++++++--- + drivers/mtd/nand/raw/brcmnand/bcm6368_nand.c | 5 +---- + drivers/mtd/nand/raw/cs553x_nand.c | 12 +----------- + drivers/mtd/nand/raw/denali_dt.c | 7 ++----- + drivers/mtd/nand/raw/fsmc_nand.c | 4 +++- + drivers/mtd/nand/raw/gpio.c | 15 ++++++++++----- + drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 4 +--- + drivers/mtd/nand/raw/hisi504_nand.c | 7 ++----- + drivers/mtd/nand/raw/intel-nand-controller.c | 5 +++++ + drivers/mtd/nand/raw/lpc32xx_slc.c | 15 +-------------- + drivers/mtd/nand/raw/mpc5121_nfc.c | 12 +++++++++--- + drivers/mtd/nand/raw/mtk_ecc.c | 4 +--- + drivers/mtd/nand/raw/mtk_nand.c | 4 +--- + drivers/mtd/nand/raw/nand_hynix.c | 14 ++++++++++++++ + drivers/mtd/nand/raw/nand_ids.c | 4 ++++ + drivers/mtd/nand/raw/ndfc.c | 12 +----------- + drivers/mtd/nand/raw/omap_elm.c | 5 ++--- + drivers/mtd/nand/raw/orion_nand.c | 12 +++++++++--- + drivers/mtd/nand/raw/oxnas_nand.c | 4 +--- + drivers/mtd/nand/raw/pasemi_nand.c | 12 +++++++++--- + drivers/mtd/nand/raw/plat_nand.c | 16 ++++++++++------ + drivers/mtd/nand/raw/sharpsl.c | 12 +----------- + drivers/mtd/nand/raw/socrates_nand.c | 12 +++++++++--- + drivers/mtd/nand/raw/stm32_fmc2_nand.c | 8 ++------ + drivers/mtd/nand/raw/tegra_nand.c | 4 +--- + drivers/mtd/nand/raw/tmio_nand.c | 8 +++----- + drivers/mtd/nand/raw/txx9ndfmc.c | 9 +++------ + drivers/mtd/nand/raw/vf610_nfc.c | 4 +--- + drivers/mtd/nand/raw/xway_nand.c | 16 ++++++++++------ + include/linux/mtd/mtd.h | 2 -- + 36 files changed, 169 insertions(+), 149 deletions(-) +Merging spi-nor/spi-nor/next (df872ab1ffe4 mtd: spi-nor: nxp-spifi: Make use of the helper function devm_platform_ioremap_resource_byname()) +$ git merge -m Merge branch 'spi-nor/next' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git spi-nor/spi-nor/next +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 1 + + drivers/mtd/spi-nor/controllers/hisi-sfc.c | 8 ++------ + drivers/mtd/spi-nor/controllers/nxp-spifi.c | 7 ++----- + drivers/mtd/spi-nor/micron-st.c | 4 +++- + 4 files changed, 8 insertions(+), 12 deletions(-) +Merging crypto/master (3ae88f676aa6 crypto: tcrypt - fix skcipher multi-buffer tests for 1420B blocks) +$ git merge -m Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git crypto/master +Auto-merging drivers/crypto/qat/qat_common/adf_common_drv.h +Auto-merging drivers/crypto/hisilicon/qm.c +Merge made by the 'recursive' strategy. + arch/arm64/crypto/Kconfig | 6 - + arch/arm64/crypto/aes-ce-ccm-core.S | 24 +-- + arch/arm64/crypto/aes-ce-ccm-glue.c | 203 ++++++------------ + arch/arm64/crypto/aes-glue.c | 102 ++------- + arch/arm64/crypto/aes-neonbs-glue.c | 122 +---------- + arch/arm64/crypto/ghash-ce-glue.c | 209 +++++------------- + arch/x86/crypto/aesni-intel_glue.c | 2 +- + crypto/Kconfig | 2 +- + crypto/algapi.c | 73 +++++-- + crypto/api.c | 53 ++++- + crypto/drbg.c | 2 +- + crypto/internal.h | 10 + + crypto/jitterentropy.c | 24 +-- + crypto/tcrypt.c | 5 +- + crypto/testmgr.c | 4 +- + drivers/char/hw_random/Kconfig | 12 +- + drivers/char/hw_random/ixp4xx-rng.c | 4 +- + drivers/char/hw_random/meson-rng.c | 5 +- + drivers/char/hw_random/mtk-rng.c | 9 +- + drivers/char/hw_random/s390-trng.c | 4 +- + drivers/crypto/caam/caampkc.c | 19 +- + drivers/crypto/caam/regs.h | 3 + + drivers/crypto/ccp/sev-dev.c | 2 +- + drivers/crypto/ccree/cc_driver.c | 3 +- + drivers/crypto/hisilicon/qm.c | 74 ++++++- + drivers/crypto/hisilicon/zip/zip_main.c | 2 +- + drivers/crypto/img-hash.c | 7 +- + drivers/crypto/marvell/cesa/cesa.c | 1 - + drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c | 1 + + drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 35 ++- + drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h | 10 + + drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 89 +------- + drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h | 13 +- + drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c | 87 +------- + drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.h | 12 -- + drivers/crypto/qat/qat_common/adf_accel_devices.h | 29 ++- + drivers/crypto/qat/qat_common/adf_common_drv.h | 9 +- + drivers/crypto/qat/qat_common/adf_gen2_hw_data.c | 98 +++++++++ + drivers/crypto/qat/qat_common/adf_gen2_hw_data.h | 27 +++ + drivers/crypto/qat/qat_common/adf_init.c | 5 + + drivers/crypto/qat/qat_common/adf_isr.c | 190 +++++++--------- + drivers/crypto/qat/qat_common/adf_pf2vf_msg.c | 238 ++++++++++++--------- + drivers/crypto/qat/qat_common/adf_pf2vf_msg.h | 9 - + drivers/crypto/qat/qat_common/adf_vf2pf_msg.c | 4 +- + drivers/crypto/qat/qat_common/adf_vf_isr.c | 30 +-- + .../crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 123 +++++------ + .../crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h | 14 +- + lib/crypto/sm4.c | 4 +- + 48 files changed, 899 insertions(+), 1114 deletions(-) +Merging drm/drm-next (6f2f7c83303d Merge tag 'drm-intel-gt-next-2021-10-21' of git://anongit.freedesktop.org/drm/drm-intel into drm-next) +$ git merge -m Merge branch 'drm-next' of git://git.freedesktop.org/git/drm/drm.git drm/drm-next +Removing include/linux/seqno-fence.h +Removing drivers/gpu/drm/zte/zx_vou_regs.h +Removing drivers/gpu/drm/zte/zx_vou.h +Removing drivers/gpu/drm/zte/zx_vou.c +Removing drivers/gpu/drm/zte/zx_vga_regs.h +Removing drivers/gpu/drm/zte/zx_vga.c +Removing drivers/gpu/drm/zte/zx_tvenc_regs.h +Removing drivers/gpu/drm/zte/zx_tvenc.c +Removing drivers/gpu/drm/zte/zx_plane_regs.h +Removing drivers/gpu/drm/zte/zx_plane.h +Removing drivers/gpu/drm/zte/zx_plane.c +Removing drivers/gpu/drm/zte/zx_hdmi_regs.h +Removing drivers/gpu/drm/zte/zx_hdmi.c +Removing drivers/gpu/drm/zte/zx_drm_drv.h +Removing drivers/gpu/drm/zte/zx_drm_drv.c +Removing drivers/gpu/drm/zte/zx_common_regs.h +Removing drivers/gpu/drm/zte/Makefile +Removing drivers/gpu/drm/zte/Kconfig +Auto-merging drivers/gpu/drm/vc4/vc4_hdmi.c +Auto-merging drivers/gpu/drm/ttm/ttm_bo_util.c +Auto-merging drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +Auto-merging drivers/gpu/drm/rcar-du/rcar_lvds.c +Auto-merging drivers/gpu/drm/rcar-du/rcar_du_encoder.c +Auto-merging drivers/gpu/drm/r128/ati_pcigart.c +Auto-merging drivers/gpu/drm/panel/Kconfig +Auto-merging drivers/gpu/drm/nouveau/nouveau_bo.c +Auto-merging drivers/gpu/drm/msm/msm_gem_submit.c +CONFLICT (content): Merge conflict in drivers/gpu/drm/msm/msm_gem_submit.c +Auto-merging drivers/gpu/drm/msm/Kconfig +Removing drivers/gpu/drm/i915/intel_sideband.c +Auto-merging drivers/gpu/drm/i915/intel_pm.c +Auto-merging drivers/gpu/drm/i915/intel_dram.c +Auto-merging drivers/gpu/drm/i915/i915_request.c +Auto-merging drivers/gpu/drm/i915/i915_reg.h +Auto-merging drivers/gpu/drm/i915/gvt/scheduler.c +Auto-merging drivers/gpu/drm/i915/gt/intel_rps.c +Auto-merging drivers/gpu/drm/i915/gt/intel_context.c +CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/gt/intel_context.c +Removing drivers/gpu/drm/i915/gt/debugfs_gt_pm.h +Removing drivers/gpu/drm/i915/gt/debugfs_gt.c +Removing drivers/gpu/drm/i915/gt/debugfs_engines.h +Removing drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +Auto-merging drivers/gpu/drm/i915/gem/i915_gem_ttm.c +CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/gem/i915_gem_ttm.c +Auto-merging drivers/gpu/drm/i915/gem/i915_gem_context.c +Auto-merging drivers/gpu/drm/i915/display/intel_dmc.c +Auto-merging drivers/gpu/drm/i915/display/intel_display.c +Auto-merging drivers/gpu/drm/i915/display/intel_ddi.c +Auto-merging drivers/gpu/drm/i915/display/intel_bw.c +Auto-merging drivers/gpu/drm/i915/display/intel_bios.c +Auto-merging drivers/gpu/drm/i915/display/intel_audio.c +Auto-merging drivers/gpu/drm/i915/display/intel_acpi.c +Auto-merging drivers/gpu/drm/i915/display/icl_dsi.c +Auto-merging drivers/gpu/drm/drm_panel_orientation_quirks.c +CONFLICT (content): Merge conflict in drivers/gpu/drm/drm_panel_orientation_quirks.c +Auto-merging drivers/gpu/drm/drm_edid.c +Auto-merging drivers/gpu/drm/amd/display/include/dal_asic_id.h +Auto-merging drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +Auto-merging drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dio_link_encoder.c +Auto-merging drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +Auto-merging drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +Auto-merging drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +Auto-merging drivers/gpu/drm/amd/amdkfd/kfd_svm.c +Auto-merging drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +CONFLICT (content): Merge conflict in drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +Auto-merging drivers/gpu/drm/amd/amdkfd/kfd_device.c +Auto-merging drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +Auto-merging drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +Auto-merging drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +Auto-merging drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu.h +Removing drivers/dma-buf/seqno-fence.c +Auto-merging arch/arm64/configs/defconfig +Auto-merging arch/arm/configs/shmobile_defconfig +Auto-merging arch/arm/configs/multi_v7_defconfig +Auto-merging arch/arm/configs/imx_v6_v7_defconfig +Auto-merging MAINTAINERS +Removing Documentation/gpu/rfc/i915_parallel_execbuf.h +Resolved 'drivers/gpu/drm/amd/amdkfd/kfd_migrate.c' using previous resolution. +Resolved 'drivers/gpu/drm/drm_panel_orientation_quirks.c' using previous resolution. +Resolved 'drivers/gpu/drm/i915/gem/i915_gem_ttm.c' using previous resolution. +Resolved 'drivers/gpu/drm/i915/gt/intel_context.c' using previous resolution. +Resolved 'drivers/gpu/drm/msm/msm_gem_submit.c' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master 5bc9b1b943d9] Merge branch 'drm-next' of git://git.freedesktop.org/git/drm/drm.git +$ git diff -M --stat --summary HEAD^.. + .../bindings/display/panel/boe,tv101wum-nl6.yaml | 7 + + .../bindings/display/panel/panel-edp.yaml | 188 + + .../bindings/display/panel/samsung,s6d27a1.yaml | 98 + + .../devicetree/bindings/display/renesas,du.yaml | 51 + + Documentation/driver-api/dma-buf.rst | 6 - + Documentation/gpu/drm-kms-helpers.rst | 12 + + Documentation/gpu/drm-mm.rst | 84 +- + Documentation/gpu/i915.rst | 35 +- + Documentation/gpu/rfc/i915_parallel_execbuf.h | 122 - + Documentation/gpu/rfc/i915_scheduler.rst | 4 +- + Documentation/gpu/todo.rst | 17 - + Documentation/locking/ww-mutex-design.rst | 2 +- + MAINTAINERS | 14 + + arch/arm/configs/at91_dt_defconfig | 1 + + arch/arm/configs/exynos_defconfig | 1 + + arch/arm/configs/imx_v6_v7_defconfig | 1 + + arch/arm/configs/lpc32xx_defconfig | 1 + + arch/arm/configs/multi_v5_defconfig | 1 + + arch/arm/configs/multi_v7_defconfig | 1 + + arch/arm/configs/omap2plus_defconfig | 1 + + arch/arm/configs/qcom_defconfig | 1 + + arch/arm/configs/realview_defconfig | 1 + + arch/arm/configs/sama5_defconfig | 1 + + arch/arm/configs/shmobile_defconfig | 1 + + arch/arm/configs/sunxi_defconfig | 1 + + arch/arm/configs/tegra_defconfig | 1 + + arch/arm/configs/versatile_defconfig | 1 + + arch/arm/configs/vexpress_defconfig | 1 + + arch/arm64/configs/defconfig | 1 + + arch/x86/include/asm/mce.h | 2 +- + arch/x86/kernel/cpu/mce/amd.c | 3 +- + drivers/dma-buf/Makefile | 2 +- + drivers/dma-buf/dma-buf.c | 153 +- + drivers/dma-buf/dma-fence.c | 13 +- + drivers/dma-buf/dma-resv.c | 442 +- + drivers/dma-buf/heaps/system_heap.c | 5 +- + drivers/dma-buf/seqno-fence.c | 71 - + drivers/gpu/drm/Kconfig | 11 +- + drivers/gpu/drm/Makefile | 1 - + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 17 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 7 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 143 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 8 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 256 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 6 - + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 118 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_df.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 843 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 663 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 12 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 + + drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 7 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 7 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 9 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 23 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 8 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 57 - + drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 - + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 730 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 46 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 385 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 25 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 18 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 19 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 44 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 173 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h | 51 + + drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 119 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 43 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 164 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 + + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 9 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 30 +- + drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 4 +- + drivers/gpu/drm/amd/amdgpu/athub_v2_0.c | 7 +- + drivers/gpu/drm/amd/amdgpu/athub_v2_1.c | 9 +- + drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 31 + + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 380 +- + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 227 +- + drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 6 +- + drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 91 +- + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 136 +- + drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 15 +- + drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 20 - + drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h | 20 + + drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 40 +- + drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 9 +- + drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 73 +- + drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 6 +- + drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 13 +- + drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 34 +- + drivers/gpu/drm/amd/amdgpu/nv.c | 95 +- + drivers/gpu/drm/amd/amdgpu/nv.h | 2 + + drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 22 +- + drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 93 +- + drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 14 +- + drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 14 +- + drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 100 +- + drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 32 +- + drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 59 +- + drivers/gpu/drm/amd/amdgpu/soc15.c | 167 +- + drivers/gpu/drm/amd/amdgpu/soc15.h | 4 +- + drivers/gpu/drm/amd/amdgpu/ta_ras_if.h | 46 +- + drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 34 + + drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c | 24 +- + drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 24 +- + drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 24 +- + drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 11 +- + drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 43 +- + drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 23 +- + drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 32 +- + drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 52 +- + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 5 +- + drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 13 +- + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 27 +- + drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 34 +- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 259 +- + drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 4 +- + drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 7 +- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 823 +- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 103 +- + .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 11 +- + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 16 +- + .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 64 +- + .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 4 +- + drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 2 +- + drivers/gpu/drm/amd/display/dc/Makefile | 3 +- + drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 10 + + .../gpu/drm/amd/display/dc/bios/command_table2.c | 14 +- + .../amd/display/dc/bios/command_table_helper2.c | 1 + + drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 55 +- + drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile | 9 + + drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 5 + + .../amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 12 +- + .../amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c | 258 + + .../amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.h | 34 + + .../drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 16 +- + .../drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c | 4 +- + .../amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 6 +- + drivers/gpu/drm/amd/display/dc/core/dc.c | 303 +- + drivers/gpu/drm/amd/display/dc/core/dc_link.c | 700 +- + drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c | 9 +- + drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 1435 +- + drivers/gpu/drm/amd/display/dc/core/dc_link_dpcd.c | 11 +- + drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c | 953 + + .../gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c | 493 +- + drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c | 361 +- + drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 146 +- + drivers/gpu/drm/amd/display/dc/core/dc_stat.c | 8 + + drivers/gpu/drm/amd/display/dc/dc.h | 79 +- + drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 297 + + drivers/gpu/drm/amd/display/dc/dc_dsc.h | 11 +- + drivers/gpu/drm/amd/display/dc/dc_link.h | 19 +- + drivers/gpu/drm/amd/display/dc/dc_types.h | 22 + + drivers/gpu/drm/amd/display/dc/dce/dce_abm.h | 16 + + drivers/gpu/drm/amd/display/dc/dce/dce_aux.c | 15 +- + .../gpu/drm/amd/display/dc/dce/dce_clock_source.h | 9 + + drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h | 39 + + .../drm/amd/display/dc/dce/dce_stream_encoder.c | 2 + + drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 21 + + .../amd/display/dc/dce110/dce110_hw_sequencer.c | 150 +- + .../drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 153 +- + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.h | 33 +- + .../drm/amd/display/dc/dcn10/dcn10_link_encoder.c | 9 + + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 2 +- + .../gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 2 +- + .../amd/display/dc/dcn10/dcn10_stream_encoder.c | 31 + + .../amd/display/dc/dcn10/dcn10_stream_encoder.h | 2 + + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 45 +- + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c | 5 + + .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 38 +- + .../amd/display/dc/dcn20/dcn20_stream_encoder.c | 17 +- + .../amd/display/dc/dcn20/dcn20_stream_encoder.h | 1 + + drivers/gpu/drm/amd/display/dc/dcn201/Makefile | 36 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c | 84 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_dccg.h | 37 + + drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c | 316 + + drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.h | 83 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.c | 107 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.h | 45 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c | 150 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_hubp.h | 132 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c | 630 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.h | 46 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_init.c | 131 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_init.h | 33 + + .../amd/display/dc/dcn201/dcn201_link_encoder.c | 209 + + .../amd/display/dc/dcn201/dcn201_link_encoder.h | 59 + + drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.c | 125 + + drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.h | 86 + + drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c | 72 + + drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h | 74 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_optc.c | 203 + + .../gpu/drm/amd/display/dc/dcn201/dcn201_optc.h | 74 + + .../drm/amd/display/dc/dcn201/dcn201_resource.c | 1307 ++ + .../drm/amd/display/dc/dcn201/dcn201_resource.h | 50 + + .../gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.c | 24 +- + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_afmt.h | 24 + + .../amd/display/dc/dcn30/dcn30_dio_link_encoder.c | 4 + + .../display/dc/dcn30/dcn30_dio_stream_encoder.c | 18 +- + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c | 59 +- + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c | 6 - + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 39 +- + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c | 1 + + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c | 17 +- + .../gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 8 +- + .../gpu/drm/amd/display/dc/dcn30/dcn30_resource.h | 7 + + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.c | 200 +- + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_vpg.h | 15 +- + .../drm/amd/display/dc/dcn301/dcn301_resource.c | 102 +- + .../drm/amd/display/dc/dcn302/dcn302_resource.c | 8 +- + .../drm/amd/display/dc/dcn303/dcn303_resource.c | 2 +- + drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 4 +- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_afmt.c | 92 + + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_afmt.h | 126 + + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c | 173 + + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h | 115 + + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c | 162 + + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h | 18 + + .../amd/display/dc/dcn31/dcn31_dio_link_encoder.c | 136 +- + .../display/dc/dcn31/dcn31_hpo_dp_link_encoder.c | 616 + + .../display/dc/dcn31/dcn31_hpo_dp_link_encoder.h | 222 + + .../display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c | 752 + + .../display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h | 241 + + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 69 +- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h | 2 +- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 3 +- + .../gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 325 +- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c | 87 + + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h | 162 + + drivers/gpu/drm/amd/display/dc/dm_cp_psp.h | 1 + + drivers/gpu/drm/amd/display/dc/dm_helpers.h | 11 + + drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 +- + .../dc/dml/{dcn2x/dcn2x.c => dcn20/dcn20_fpu.c} | 2 +- + .../dc/dml/{dcn2x/dcn2x.h => dcn20/dcn20_fpu.h} | 6 +- + .../display/dc/dml/dcn20/display_rq_dlg_calc_20.c | 158 +- + .../display/dc/dml/dcn20/display_rq_dlg_calc_20.h | 4 +- + .../dc/dml/dcn20/display_rq_dlg_calc_20v2.c | 156 +- + .../dc/dml/dcn20/display_rq_dlg_calc_20v2.h | 4 +- + .../amd/display/dc/dml/dcn21/display_mode_vba_21.c | 236 +- + .../display/dc/dml/dcn21/display_rq_dlg_calc_21.c | 156 +- + .../display/dc/dml/dcn21/display_rq_dlg_calc_21.h | 4 +- + .../display/dc/dml/dcn30/display_rq_dlg_calc_30.c | 132 +- + .../display/dc/dml/dcn30/display_rq_dlg_calc_30.h | 4 +- + .../display/dc/dml/dcn31/display_rq_dlg_calc_31.c | 166 +- + .../display/dc/dml/dcn31/display_rq_dlg_calc_31.h | 4 +- + .../drm/amd/display/dc/dml/display_mode_enums.h | 4 +- + .../gpu/drm/amd/display/dc/dml/display_mode_lib.c | 1 + + .../gpu/drm/amd/display/dc/dml/display_mode_lib.h | 5 +- + .../amd/display/dc/dml/display_rq_dlg_helpers.c | 256 +- + .../amd/display/dc/dml/display_rq_dlg_helpers.h | 20 +- + .../amd/display/dc/dml/dml1_display_rq_dlg_calc.c | 246 +- + .../amd/display/dc/dml/dml1_display_rq_dlg_calc.h | 10 +- + drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 195 +- + drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c | 1 + + drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c | 1 + + drivers/gpu/drm/amd/display/dc/inc/core_types.h | 36 +- + drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h | 1 + + drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h | 29 +- + drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h | 99 + + drivers/gpu/drm/amd/display/dc/inc/hw/abm.h | 1 + + .../drm/amd/display/dc/inc/hw/clk_mgr_internal.h | 13 + + drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 23 + + drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h | 12 + + drivers/gpu/drm/amd/display/dc/inc/hw/dwb.h | 5 +- + drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 4 + + .../gpu/drm/amd/display/dc/inc/hw/link_encoder.h | 97 + + drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 1 + + .../gpu/drm/amd/display/dc/inc/hw/stream_encoder.h | 87 +- + .../drm/amd/display/dc/inc/hw/timing_generator.h | 3 + + drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h | 2 +- + .../drm/amd/display/dc/inc/hw_sequencer_private.h | 7 + + drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h | 26 +- + drivers/gpu/drm/amd/display/dc/inc/link_hwss.h | 1 + + drivers/gpu/drm/amd/display/dc/inc/resource.h | 19 + + drivers/gpu/drm/amd/display/dc/irq/Makefile | 10 + + .../amd/display/dc/irq/dcn20/irq_service_dcn20.c | 25 + + .../amd/display/dc/irq/dcn20/irq_service_dcn20.h | 2 + + .../amd/display/dc/irq/dcn201/irq_service_dcn201.c | 374 + + .../amd/display/dc/irq/dcn201/irq_service_dcn201.h | 34 + + .../amd/display/dc/irq/dcn21/irq_service_dcn21.c | 25 + + .../amd/display/dc/irq/dcn21/irq_service_dcn21.h | 2 + + drivers/gpu/drm/amd/display/dc/irq/irq_service.c | 2 +- + drivers/gpu/drm/amd/display/dc/irq/irq_service.h | 4 + + drivers/gpu/drm/amd/display/dc/os_types.h | 2 + + .../display/dc/virtual/virtual_stream_encoder.c | 5 +- + drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 8 + + drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 201 +- + drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 9 + + drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h | 2 + + drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 20 +- + .../gpu/drm/amd/display/dmub/src/dmub_srv_stat.c | 16 + + .../drm/amd/display/include/bios_parser_types.h | 8 + + drivers/gpu/drm/amd/display/include/dal_asic_id.h | 1 + + drivers/gpu/drm/amd/display/include/dal_types.h | 1 + + drivers/gpu/drm/amd/display/include/dpcd_defs.h | 17 + + .../gpu/drm/amd/display/include/grph_object_defs.h | 12 + + .../gpu/drm/amd/display/include/grph_object_id.h | 8 + + .../drm/amd/display/include/link_service_types.h | 57 +- + drivers/gpu/drm/amd/display/include/logger_types.h | 6 + + .../drm/amd/display/modules/color/color_gamma.c | 32 +- + .../amd/include/asic_reg/clk/clk_11_0_1_offset.h | 32 + + .../amd/include/asic_reg/clk/clk_11_0_1_sh_mask.h | 37 + + .../amd/include/asic_reg/dcn/dcn_2_0_3_offset.h | 6193 ++++++ + .../amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h | 22091 +++++++++++++++++++ + .../drm/amd/include/asic_reg/df/df_3_6_offset.h | 5 + + .../drm/amd/include/asic_reg/df/df_3_6_sh_mask.h | 132 + + .../amd/include/asic_reg/dpcs/dpcs_2_0_3_offset.h | 151 + + .../amd/include/asic_reg/dpcs/dpcs_2_0_3_sh_mask.h | 952 + + .../amd/include/asic_reg/mp/mp_11_0_8_sh_mask.h | 355 + + drivers/gpu/drm/amd/include/atombios.h | 2 +- + drivers/gpu/drm/amd/include/atomfirmware.h | 4 + + drivers/gpu/drm/amd/include/soc15_hw_ip.h | 2 + + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 +- + .../gpu/drm/amd/pm/powerplay/hwmgr/ppatomfwctrl.h | 4 +- + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 126 +- + drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 64 +- + .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 24 +- + drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 96 +- + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 22 +- + drivers/gpu/drm/arm/malidp_planes.c | 2 +- + drivers/gpu/drm/ast/ast_drv.h | 2 - + drivers/gpu/drm/ast/ast_mm.c | 27 +- + drivers/gpu/drm/bridge/adv7511/adv7511_cec.c | 15 +- + drivers/gpu/drm/bridge/analogix/anx7625.c | 27 +- + drivers/gpu/drm/bridge/cdns-dsi.c | 4 +- + drivers/gpu/drm/bridge/ite-it66121.c | 21 +- + drivers/gpu/drm/bridge/panel.c | 37 + + drivers/gpu/drm/bridge/parade-ps8640.c | 292 +- + drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c | 6 +- + drivers/gpu/drm/bridge/ti-sn65dsi86.c | 25 +- + drivers/gpu/drm/drm_bridge.c | 78 +- + drivers/gpu/drm/drm_connector.c | 83 +- + drivers/gpu/drm/drm_crtc_internal.h | 2 + + drivers/gpu/drm/drm_dp_helper.c | 42 +- + drivers/gpu/drm/drm_edid.c | 367 +- + drivers/gpu/drm/drm_format_helper.c | 88 + + drivers/gpu/drm/drm_fourcc.c | 1 + + drivers/gpu/drm/drm_gem_shmem_helper.c | 23 +- + drivers/gpu/drm/drm_gem_vram_helper.c | 1 - + drivers/gpu/drm/drm_ioctl.c | 21 +- + drivers/gpu/drm/drm_kms_helper_common.c | 11 - + drivers/gpu/drm/drm_lease.c | 39 +- + drivers/gpu/drm/drm_mipi_dsi.c | 81 + + drivers/gpu/drm/drm_modeset_lock.c | 2 +- + drivers/gpu/drm/drm_of.c | 3 + + drivers/gpu/drm/drm_panel_orientation_quirks.c | 49 +- + drivers/gpu/drm/drm_probe_helper.c | 119 +- + drivers/gpu/drm/drm_property.c | 9 +- + drivers/gpu/drm/drm_sysfs.c | 87 +- + drivers/gpu/drm/etnaviv/etnaviv_sched.c | 4 +- + drivers/gpu/drm/gma500/backlight.c | 12 +- + drivers/gpu/drm/gma500/cdv_device.c | 24 +- + drivers/gpu/drm/gma500/cdv_intel_display.c | 10 +- + drivers/gpu/drm/gma500/cdv_intel_dp.c | 12 +- + drivers/gpu/drm/gma500/cdv_intel_lvds.c | 22 +- + drivers/gpu/drm/gma500/framebuffer.c | 16 +- + drivers/gpu/drm/gma500/gem.c | 2 +- + drivers/gpu/drm/gma500/gma_device.c | 2 +- + drivers/gpu/drm/gma500/gma_display.c | 14 +- + drivers/gpu/drm/gma500/gtt.c | 18 +- + drivers/gpu/drm/gma500/intel_bios.c | 10 +- + drivers/gpu/drm/gma500/intel_gmbus.c | 12 +- + drivers/gpu/drm/gma500/mid_bios.c | 11 +- + drivers/gpu/drm/gma500/mmu.c | 12 +- + drivers/gpu/drm/gma500/oaktrail_crtc.c | 8 +- + drivers/gpu/drm/gma500/oaktrail_device.c | 20 +- + drivers/gpu/drm/gma500/oaktrail_hdmi.c | 18 +- + drivers/gpu/drm/gma500/oaktrail_lvds.c | 14 +- + drivers/gpu/drm/gma500/oaktrail_lvds_i2c.c | 2 +- + drivers/gpu/drm/gma500/opregion.c | 14 +- + drivers/gpu/drm/gma500/power.c | 20 +- + drivers/gpu/drm/gma500/psb_device.c | 16 +- + drivers/gpu/drm/gma500/psb_drv.c | 147 +- + drivers/gpu/drm/gma500/psb_drv.h | 24 +- + drivers/gpu/drm/gma500/psb_intel_display.c | 10 +- + drivers/gpu/drm/gma500/psb_intel_lvds.c | 31 +- + drivers/gpu/drm/gma500/psb_intel_sdvo.c | 10 +- + drivers/gpu/drm/gma500/psb_irq.c | 26 +- + drivers/gpu/drm/gma500/psb_lid.c | 2 +- + drivers/gpu/drm/gud/Kconfig | 2 +- + drivers/gpu/drm/gud/gud_drv.c | 6 + + drivers/gpu/drm/gud/gud_internal.h | 12 + + drivers/gpu/drm/gud/gud_pipe.c | 6 + + drivers/gpu/drm/i915/Kconfig | 11 + + drivers/gpu/drm/i915/Makefile | 36 +- + drivers/gpu/drm/i915/display/g4x_dp.c | 90 +- + drivers/gpu/drm/i915/display/g4x_hdmi.c | 2 +- + drivers/gpu/drm/i915/display/icl_dsi.c | 165 +- + drivers/gpu/drm/i915/display/intel_acpi.c | 46 + + drivers/gpu/drm/i915/display/intel_acpi.h | 3 + + drivers/gpu/drm/i915/display/intel_atomic_plane.c | 209 + + drivers/gpu/drm/i915/display/intel_audio.c | 43 +- + drivers/gpu/drm/i915/display/intel_backlight.c | 1776 ++ + drivers/gpu/drm/i915/display/intel_backlight.h | 52 + + drivers/gpu/drm/i915/display/intel_bios.c | 420 +- + drivers/gpu/drm/i915/display/intel_bw.c | 2 +- + drivers/gpu/drm/i915/display/intel_cdclk.c | 348 +- + drivers/gpu/drm/i915/display/intel_cdclk.h | 4 +- + drivers/gpu/drm/i915/display/intel_color.c | 140 +- + drivers/gpu/drm/i915/display/intel_combo_phy.c | 8 +- + drivers/gpu/drm/i915/display/intel_connector.c | 6 +- + drivers/gpu/drm/i915/display/intel_crt.c | 4 +- + drivers/gpu/drm/i915/display/intel_cursor.c | 11 +- + drivers/gpu/drm/i915/display/intel_ddi.c | 535 +- + drivers/gpu/drm/i915/display/intel_ddi.h | 7 +- + drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c | 672 +- + drivers/gpu/drm/i915/display/intel_ddi_buf_trans.h | 23 +- + drivers/gpu/drm/i915/display/intel_display.c | 2523 +-- + drivers/gpu/drm/i915/display/intel_display.h | 47 +- + .../gpu/drm/i915/display/intel_display_debugfs.c | 127 +- + .../gpu/drm/i915/display/intel_display_debugfs.h | 10 +- + drivers/gpu/drm/i915/display/intel_display_power.c | 15 +- + drivers/gpu/drm/i915/display/intel_display_power.h | 4 + + drivers/gpu/drm/i915/display/intel_display_types.h | 48 +- + drivers/gpu/drm/i915/display/intel_dmc.c | 20 +- + drivers/gpu/drm/i915/display/intel_dp.c | 777 +- + drivers/gpu/drm/i915/display/intel_dp.h | 22 +- + drivers/gpu/drm/i915/display/intel_dp_aux.c | 6 +- + .../gpu/drm/i915/display/intel_dp_aux_backlight.c | 12 +- + drivers/gpu/drm/i915/display/intel_dp_hdcp.c | 78 +- + .../gpu/drm/i915/display/intel_dp_link_training.c | 467 +- + .../gpu/drm/i915/display/intel_dp_link_training.h | 1 + + drivers/gpu/drm/i915/display/intel_dp_mst.c | 45 +- + drivers/gpu/drm/i915/display/intel_dp_mst.h | 4 +- + drivers/gpu/drm/i915/display/intel_dpio_phy.c | 33 +- + drivers/gpu/drm/i915/display/intel_dpio_phy.h | 5 +- + drivers/gpu/drm/i915/display/intel_dpll.c | 674 +- + drivers/gpu/drm/i915/display/intel_dpll.h | 26 +- + drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 46 +- + drivers/gpu/drm/i915/display/intel_dpll_mgr.h | 11 - + drivers/gpu/drm/i915/display/intel_dpt.c | 239 + + drivers/gpu/drm/i915/display/intel_dpt.h | 19 + + drivers/gpu/drm/i915/display/intel_drrs.c | 437 + + drivers/gpu/drm/i915/display/intel_drrs.h | 36 + + drivers/gpu/drm/i915/display/intel_dsi.c | 16 +- + drivers/gpu/drm/i915/display/intel_dsi.h | 3 + + .../gpu/drm/i915/display/intel_dsi_dcs_backlight.c | 33 +- + drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 3 +- + drivers/gpu/drm/i915/display/intel_dvo.c | 24 +- + drivers/gpu/drm/i915/display/intel_fb.c | 606 +- + drivers/gpu/drm/i915/display/intel_fb.h | 20 +- + drivers/gpu/drm/i915/display/intel_fb_pin.c | 274 + + drivers/gpu/drm/i915/display/intel_fb_pin.h | 28 + + drivers/gpu/drm/i915/display/intel_fbc.c | 292 +- + drivers/gpu/drm/i915/display/intel_fbc.h | 2 +- + drivers/gpu/drm/i915/display/intel_fbdev.c | 4 +- + drivers/gpu/drm/i915/display/intel_fdi.c | 321 +- + drivers/gpu/drm/i915/display/intel_fdi.h | 17 +- + drivers/gpu/drm/i915/display/intel_frontbuffer.c | 5 +- + drivers/gpu/drm/i915/display/intel_frontbuffer.h | 4 +- + drivers/gpu/drm/i915/display/intel_hdcp.c | 70 +- + drivers/gpu/drm/i915/display/intel_hdmi.c | 20 +- + drivers/gpu/drm/i915/display/intel_hotplug.c | 4 +- + drivers/gpu/drm/i915/display/intel_lvds.c | 33 +- + drivers/gpu/drm/i915/display/intel_opregion.c | 5 +- + drivers/gpu/drm/i915/display/intel_panel.c | 1835 +- + drivers/gpu/drm/i915/display/intel_panel.h | 48 +- + drivers/gpu/drm/i915/display/intel_plane_initial.c | 283 + + drivers/gpu/drm/i915/display/intel_plane_initial.h | 13 + + drivers/gpu/drm/i915/display/intel_pps.c | 59 + + drivers/gpu/drm/i915/display/intel_pps.h | 3 + + drivers/gpu/drm/i915/display/intel_psr.c | 476 +- + drivers/gpu/drm/i915/display/intel_psr.h | 13 +- + drivers/gpu/drm/i915/display/intel_sdvo.c | 19 +- + drivers/gpu/drm/i915/display/intel_snps_phy.c | 225 +- + drivers/gpu/drm/i915/display/intel_snps_phy.h | 4 +- + drivers/gpu/drm/i915/display/intel_tc.c | 290 +- + drivers/gpu/drm/i915/display/intel_tc.h | 6 +- + drivers/gpu/drm/i915/display/intel_tv.c | 2 +- + drivers/gpu/drm/i915/display/intel_vdsc.c | 77 +- + drivers/gpu/drm/i915/display/intel_vdsc.h | 6 +- + drivers/gpu/drm/i915/display/skl_universal_plane.c | 58 +- + drivers/gpu/drm/i915/display/vlv_dsi.c | 53 +- + drivers/gpu/drm/i915/display/vlv_dsi_pll.c | 25 +- + drivers/gpu/drm/i915/gem/i915_gem_busy.c | 57 +- + drivers/gpu/drm/i915/gem/i915_gem_context.c | 514 +- + drivers/gpu/drm/i915/gem/i915_gem_context.h | 19 +- + drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 58 +- + drivers/gpu/drm/i915/gem/i915_gem_create.c | 75 +- + drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 9 +- + drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 823 +- + drivers/gpu/drm/i915/gem/i915_gem_internal.c | 2 + + drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 33 +- + drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 4 + + drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- + drivers/gpu/drm/i915/gem/i915_gem_object.c | 70 +- + drivers/gpu/drm/i915/gem/i915_gem_object.h | 29 +- + drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 57 +- + drivers/gpu/drm/i915/gem/i915_gem_pm.c | 91 + + drivers/gpu/drm/i915/gem/i915_gem_pm.h | 1 + + drivers/gpu/drm/i915/gem/i915_gem_region.c | 70 + + drivers/gpu/drm/i915/gem/i915_gem_region.h | 37 + + drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 29 +- + drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 201 +- + drivers/gpu/drm/i915/gem/i915_gem_ttm.h | 14 + + drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 206 + + drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h | 26 + + drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 8 +- + drivers/gpu/drm/i915/gem/i915_gemfs.c | 22 +- + drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 48 +- + .../drm/i915/gem/selftests/i915_gem_client_blt.c | 29 +- + .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 36 +- + .../drm/i915/gem/selftests/i915_gem_execbuffer.c | 190 - + drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 2 + + drivers/gpu/drm/i915/gem/selftests/mock_context.c | 5 +- + drivers/gpu/drm/i915/gt/debugfs_engines.h | 14 - + drivers/gpu/drm/i915/gt/debugfs_gt.c | 47 - + drivers/gpu/drm/i915/gt/debugfs_gt_pm.h | 14 - + drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 2 +- + drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 7 +- + drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 4 +- + drivers/gpu/drm/i915/gt/intel_context.c | 64 +- + drivers/gpu/drm/i915/gt/intel_context.h | 56 +- + drivers/gpu/drm/i915/gt/intel_context_types.h | 153 +- + drivers/gpu/drm/i915/gt/intel_engine.h | 19 +- + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 150 +- + drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 +- + drivers/gpu/drm/i915/gt/intel_engine_pm.c | 36 + + drivers/gpu/drm/i915/gt/intel_engine_pm.h | 39 + + drivers/gpu/drm/i915/gt/intel_engine_types.h | 31 +- + .../gpu/drm/i915/gt/intel_execlists_submission.c | 17 +- + drivers/gpu/drm/i915/gt/intel_ggtt.c | 52 +- + drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 22 +- + drivers/gpu/drm/i915/gt/intel_gt.c | 22 +- + drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c | 2 - + drivers/gpu/drm/i915/gt/intel_gt_debugfs.c | 104 + + .../i915/gt/{debugfs_gt.h => intel_gt_debugfs.h} | 18 +- + ...ebugfs_engines.c => intel_gt_engines_debugfs.c} | 10 +- + drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h | 14 + + drivers/gpu/drm/i915/gt/intel_gt_irq.c | 7 + + drivers/gpu/drm/i915/gt/intel_gt_pm.c | 22 +- + drivers/gpu/drm/i915/gt/intel_gt_pm.h | 14 + + .../gt/{debugfs_gt_pm.c => intel_gt_pm_debugfs.c} | 197 +- + drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h | 20 + + drivers/gpu/drm/i915/gt/intel_gt_types.h | 12 + + drivers/gpu/drm/i915/gt/intel_gtt.c | 9 +- + drivers/gpu/drm/i915/gt/intel_gtt.h | 11 +- + drivers/gpu/drm/i915/gt/intel_llc.c | 3 +- + drivers/gpu/drm/i915/gt/intel_lrc.c | 93 +- + drivers/gpu/drm/i915/gt/intel_migrate.c | 2 +- + drivers/gpu/drm/i915/gt/intel_mocs.c | 176 +- + drivers/gpu/drm/i915/gt/intel_mocs.h | 1 + + drivers/gpu/drm/i915/gt/intel_ppgtt.c | 13 +- + drivers/gpu/drm/i915/gt/intel_rc6.c | 2 +- + drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 +- + drivers/gpu/drm/i915/gt/intel_ring.c | 3 +- + drivers/gpu/drm/i915/gt/intel_ring_submission.c | 7 +- + drivers/gpu/drm/i915/gt/intel_rps.c | 22 +- + drivers/gpu/drm/i915/gt/intel_rps.h | 1 + + drivers/gpu/drm/i915/gt/intel_sseu.c | 65 +- + drivers/gpu/drm/i915/gt/intel_sseu.h | 11 +- + drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 10 +- + drivers/gpu/drm/i915/gt/intel_timeline.c | 4 +- + drivers/gpu/drm/i915/gt/intel_workarounds.c | 262 +- + drivers/gpu/drm/i915/gt/intel_workarounds.h | 2 +- + drivers/gpu/drm/i915/gt/mock_engine.c | 2 + + .../gpu/drm/i915/gt/selftest_engine_heartbeat.c | 4 +- + drivers/gpu/drm/i915/gt/selftest_execlists.c | 28 +- + drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 10 +- + drivers/gpu/drm/i915/gt/selftest_workarounds.c | 2 +- + drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h | 1 + + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 39 +- + drivers/gpu/drm/i915/gt/uc/intel_guc.h | 119 +- + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 28 +- + drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 60 +- + drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 18 +- + drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 13 +- + drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 34 +- + drivers/gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c | 8 +- + drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2298 +- + drivers/gpu/drm/i915/gt/uc/intel_huc.c | 14 +- + drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c | 6 +- + drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- + drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c | 6 +- + drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 93 +- + drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 9 + + drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 127 + + .../gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c | 179 + + drivers/gpu/drm/i915/gvt/gtt.c | 17 +- + drivers/gpu/drm/i915/gvt/kvmgt.c | 4 +- + drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- + drivers/gpu/drm/i915/i915_buddy.c | 45 + + drivers/gpu/drm/i915/i915_buddy.h | 8 + + drivers/gpu/drm/i915/i915_config.c | 2 +- + drivers/gpu/drm/i915/i915_debugfs.c | 286 +- + drivers/gpu/drm/i915/i915_drv.c | 17 +- + drivers/gpu/drm/i915/i915_drv.h | 168 +- + drivers/gpu/drm/i915/i915_gem.c | 2 - + drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +- + drivers/gpu/drm/i915/i915_gem_ww.h | 25 +- + drivers/gpu/drm/i915/i915_gpu_error.c | 42 +- + drivers/gpu/drm/i915/i915_irq.c | 94 +- + drivers/gpu/drm/i915/i915_irq.h | 51 +- + drivers/gpu/drm/i915/i915_module.c | 4 +- + drivers/gpu/drm/i915/i915_params.h | 2 +- + drivers/gpu/drm/i915/i915_pci.c | 14 +- + drivers/gpu/drm/i915/i915_pci.h | 12 +- + drivers/gpu/drm/i915/i915_query.c | 5 +- + drivers/gpu/drm/i915/i915_reg.h | 172 +- + drivers/gpu/drm/i915/i915_request.c | 157 +- + drivers/gpu/drm/i915/i915_request.h | 49 +- + drivers/gpu/drm/i915/i915_sysfs.c | 1 - + drivers/gpu/drm/i915/i915_trace.h | 14 +- + drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 20 +- + drivers/gpu/drm/i915/i915_utils.h | 13 - + drivers/gpu/drm/i915/i915_vma.c | 21 +- + drivers/gpu/drm/i915/i915_vma.h | 13 +- + drivers/gpu/drm/i915/i915_vma_types.h | 7 +- + drivers/gpu/drm/i915/intel_device_info.h | 1 + + drivers/gpu/drm/i915/intel_dram.c | 6 +- + drivers/gpu/drm/i915/intel_memory_region.c | 12 + + drivers/gpu/drm/i915/intel_memory_region.h | 4 + + drivers/gpu/drm/i915/intel_pcode.c | 235 + + drivers/gpu/drm/i915/intel_pcode.h | 26 + + drivers/gpu/drm/i915/intel_pm.c | 307 +- + drivers/gpu/drm/i915/intel_pm.h | 3 +- + drivers/gpu/drm/i915/intel_runtime_pm.h | 2 - + drivers/gpu/drm/i915/intel_sbi.c | 73 + + drivers/gpu/drm/i915/intel_sbi.h | 23 + + drivers/gpu/drm/i915/intel_sideband.c | 577 - + drivers/gpu/drm/i915/intel_uncore.c | 447 +- + drivers/gpu/drm/i915/intel_uncore.h | 20 +- + drivers/gpu/drm/i915/intel_wakeref.h | 12 + + drivers/gpu/drm/i915/pxp/intel_pxp.c | 299 + + drivers/gpu/drm/i915/pxp/intel_pxp.h | 64 + + drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c | 141 + + drivers/gpu/drm/i915/pxp/intel_pxp_cmd.h | 15 + + drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c | 78 + + drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h | 21 + + drivers/gpu/drm/i915/pxp/intel_pxp_irq.c | 101 + + drivers/gpu/drm/i915/pxp/intel_pxp_irq.h | 32 + + drivers/gpu/drm/i915/pxp/intel_pxp_pm.c | 46 + + drivers/gpu/drm/i915/pxp/intel_pxp_pm.h | 24 + + drivers/gpu/drm/i915/pxp/intel_pxp_session.c | 175 + + drivers/gpu/drm/i915/pxp/intel_pxp_session.h | 15 + + drivers/gpu/drm/i915/pxp/intel_pxp_tee.c | 172 + + drivers/gpu/drm/i915/pxp/intel_pxp_tee.h | 17 + + drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h | 36 + + drivers/gpu/drm/i915/pxp/intel_pxp_types.h | 83 + + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 8 +- + .../gpu/drm/i915/selftests/i915_live_selftests.h | 2 + + drivers/gpu/drm/i915/selftests/i915_vma.c | 4 +- + .../drm/i915/selftests/intel_scheduler_helpers.c | 12 + + .../drm/i915/selftests/intel_scheduler_helpers.h | 2 + + drivers/gpu/drm/i915/selftests/intel_uncore.c | 34 +- + drivers/gpu/drm/i915/selftests/mock_region.c | 2 - + drivers/gpu/drm/i915/vlv_sideband.c | 266 + + .../drm/i915/{intel_sideband.h => vlv_sideband.h} | 34 +- + drivers/gpu/drm/lima/lima_gem.c | 9 +- + drivers/gpu/drm/lima/lima_sched.c | 28 +- + drivers/gpu/drm/lima/lima_sched.h | 6 +- + drivers/gpu/drm/mcde/mcde_drv.c | 4 +- + drivers/gpu/drm/mcde/mcde_dsi.c | 4 +- + drivers/gpu/drm/meson/meson_drv.c | 3 +- + drivers/gpu/drm/meson/meson_dw_hdmi.c | 4 +- + drivers/gpu/drm/mgag200/mgag200_drv.h | 2 - + drivers/gpu/drm/mgag200/mgag200_mm.c | 35 +- + drivers/gpu/drm/msm/Kconfig | 4 +- + drivers/gpu/drm/msm/msm_gem.c | 2 +- + drivers/gpu/drm/msm/msm_gem.h | 5 - + drivers/gpu/drm/msm/msm_gem_submit.c | 35 +- + drivers/gpu/drm/msm/msm_ringbuffer.c | 12 - + drivers/gpu/drm/nouveau/nouveau_bo.c | 8 +- + drivers/gpu/drm/nouveau/nouveau_sgdma.c | 2 - + drivers/gpu/drm/omapdrm/Kconfig | 3 +- + drivers/gpu/drm/omapdrm/dss/dsi.c | 4 +- + drivers/gpu/drm/omapdrm/omap_drv.c | 2 +- + drivers/gpu/drm/panel/Kconfig | 27 +- + drivers/gpu/drm/panel/Makefile | 2 + + drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 743 +- + drivers/gpu/drm/panel/panel-edp.c | 1896 ++ + drivers/gpu/drm/panel/panel-orisetech-otm8009a.c | 85 +- + drivers/gpu/drm/panel/panel-samsung-s6d27a1.c | 320 + + drivers/gpu/drm/panel/panel-simple.c | 1098 +- + drivers/gpu/drm/panfrost/panfrost_device.c | 10 +- + drivers/gpu/drm/panfrost/panfrost_drv.c | 33 +- + drivers/gpu/drm/panfrost/panfrost_job.c | 48 +- + drivers/gpu/drm/panfrost/panfrost_job.h | 5 +- + drivers/gpu/drm/panfrost/panfrost_mmu.c | 42 +- + drivers/gpu/drm/panfrost/panfrost_perfcnt.c | 4 +- + drivers/gpu/drm/qxl/qxl_release.c | 4 +- + drivers/gpu/drm/qxl/qxl_ttm.c | 1 - + drivers/gpu/drm/r128/ati_pcigart.c | 11 +- + drivers/gpu/drm/radeon/atombios.h | 2 +- + drivers/gpu/drm/radeon/ci_dpm.c | 3 +- + drivers/gpu/drm/radeon/r600_dpm.c | 10 +- + drivers/gpu/drm/radeon/radeon_fence.c | 24 +- + drivers/gpu/drm/radeon/radeon_ttm.c | 15 +- + drivers/gpu/drm/rcar-du/rcar_du_crtc.c | 9 +- + drivers/gpu/drm/rcar-du/rcar_du_crtc.h | 11 - + drivers/gpu/drm/rcar-du/rcar_du_drv.c | 108 +- + drivers/gpu/drm/rcar-du/rcar_du_drv.h | 26 +- + drivers/gpu/drm/rcar-du/rcar_du_encoder.c | 12 +- + drivers/gpu/drm/rcar-du/rcar_du_group.c | 6 +- + drivers/gpu/drm/rcar-du/rcar_du_kms.c | 50 +- + drivers/gpu/drm/rcar-du/rcar_du_kms.h | 7 + + drivers/gpu/drm/rcar-du/rcar_du_regs.h | 9 +- + drivers/gpu/drm/rcar-du/rcar_du_vsp.c | 36 +- + drivers/gpu/drm/rcar-du/rcar_lvds.c | 4 +- + drivers/gpu/drm/rockchip/Kconfig | 1 - + drivers/gpu/drm/rockchip/analogix_dp-rockchip.c | 2 +- + drivers/gpu/drm/rockchip/cdn-dp-core.c | 4 +- + drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 3 +- + drivers/gpu/drm/rockchip/inno_hdmi.c | 4 +- + drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 14 +- + drivers/gpu/drm/rockchip/rockchip_drm_drv.h | 2 - + drivers/gpu/drm/rockchip/rockchip_lvds.c | 33 +- + drivers/gpu/drm/rockchip/rockchip_rgb.c | 26 +- + drivers/gpu/drm/rockchip/rockchip_vop_reg.c | 2 +- + drivers/gpu/drm/scheduler/sched_entity.c | 140 +- + drivers/gpu/drm/scheduler/sched_fence.c | 62 +- + drivers/gpu/drm/scheduler/sched_main.c | 199 +- + drivers/gpu/drm/shmobile/shmob_drm_drv.c | 4 +- + drivers/gpu/drm/sti/sti_hqvdp.c | 4 +- + drivers/gpu/drm/stm/ltdc.c | 7 +- + drivers/gpu/drm/sun4i/sun4i_backend.c | 4 +- + drivers/gpu/drm/sun4i/sun4i_frontend.c | 4 +- + drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 4 +- + drivers/gpu/drm/sun4i/sun4i_tcon.c | 4 +- + drivers/gpu/drm/sun4i/sun4i_tv.c | 4 +- + drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c | 18 +- + drivers/gpu/drm/sun4i/sun8i_csc.h | 4 +- + drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 21 +- + drivers/gpu/drm/sun4i/sun8i_mixer.c | 4 +- + drivers/gpu/drm/sun4i/sun8i_tcon_top.c | 4 +- + drivers/gpu/drm/tegra/fb.c | 2 +- + drivers/gpu/drm/tegra/plane.c | 2 +- + drivers/gpu/drm/tiny/Kconfig | 4 +- + drivers/gpu/drm/tiny/bochs.c | 8 + + drivers/gpu/drm/ttm/ttm_bo.c | 71 +- + drivers/gpu/drm/ttm/ttm_bo_util.c | 21 +- + drivers/gpu/drm/ttm/ttm_bo_vm.c | 15 +- + drivers/gpu/drm/ttm/ttm_device.c | 48 + + drivers/gpu/drm/ttm/ttm_module.c | 12 + + drivers/gpu/drm/ttm/ttm_pool.c | 42 +- + drivers/gpu/drm/ttm/ttm_range_manager.c | 8 +- + drivers/gpu/drm/ttm/ttm_resource.c | 49 + + drivers/gpu/drm/ttm/ttm_tt.c | 69 +- + drivers/gpu/drm/udl/Kconfig | 1 + + drivers/gpu/drm/v3d/Kconfig | 2 +- + drivers/gpu/drm/v3d/v3d_drv.c | 15 +- + drivers/gpu/drm/v3d/v3d_drv.h | 30 +- + drivers/gpu/drm/v3d/v3d_gem.c | 473 +- + drivers/gpu/drm/v3d/v3d_sched.c | 44 +- + drivers/gpu/drm/vboxvideo/vbox_drv.c | 5 +- + drivers/gpu/drm/vboxvideo/vbox_drv.h | 1 - + drivers/gpu/drm/vboxvideo/vbox_ttm.c | 17 +- + drivers/gpu/drm/vc4/vc4_dpi.c | 15 +- + drivers/gpu/drm/vc4/vc4_drv.c | 6 +- + drivers/gpu/drm/vc4/vc4_dsi.c | 28 +- + drivers/gpu/drm/vc4/vc4_hdmi.c | 5 +- + drivers/gpu/drm/vgem/vgem_drv.c | 342 +- + drivers/gpu/drm/virtio/virtgpu_debugfs.c | 1 + + drivers/gpu/drm/virtio/virtgpu_drv.c | 44 +- + drivers/gpu/drm/virtio/virtgpu_drv.h | 36 +- + drivers/gpu/drm/virtio/virtgpu_fence.c | 30 +- + drivers/gpu/drm/virtio/virtgpu_ioctl.c | 195 +- + drivers/gpu/drm/virtio/virtgpu_kms.c | 26 +- + drivers/gpu/drm/virtio/virtgpu_plane.c | 3 +- + drivers/gpu/drm/virtio/virtgpu_prime.c | 32 +- + drivers/gpu/drm/virtio/virtgpu_vq.c | 27 +- + drivers/gpu/drm/virtio/virtgpu_vram.c | 61 + + drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 15 +- + drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 10 +- + drivers/gpu/drm/zte/Kconfig | 10 - + drivers/gpu/drm/zte/Makefile | 10 - + drivers/gpu/drm/zte/zx_common_regs.h | 28 - + drivers/gpu/drm/zte/zx_drm_drv.c | 184 - + drivers/gpu/drm/zte/zx_drm_drv.h | 34 - + drivers/gpu/drm/zte/zx_hdmi.c | 760 - + drivers/gpu/drm/zte/zx_hdmi_regs.h | 66 - + drivers/gpu/drm/zte/zx_plane.c | 537 - + drivers/gpu/drm/zte/zx_plane.h | 26 - + drivers/gpu/drm/zte/zx_plane_regs.h | 120 - + drivers/gpu/drm/zte/zx_tvenc.c | 400 - + drivers/gpu/drm/zte/zx_tvenc_regs.h | 27 - + drivers/gpu/drm/zte/zx_vga.c | 527 - + drivers/gpu/drm/zte/zx_vga_regs.h | 33 - + drivers/gpu/drm/zte/zx_vou.c | 921 - + drivers/gpu/drm/zte/zx_vou.h | 64 - + drivers/gpu/drm/zte/zx_vou_regs.h | 212 - + drivers/misc/mei/Kconfig | 2 + + drivers/misc/mei/Makefile | 1 + + drivers/misc/mei/pxp/Kconfig | 13 + + drivers/misc/mei/pxp/Makefile | 7 + + drivers/misc/mei/pxp/mei_pxp.c | 229 + + drivers/misc/mei/pxp/mei_pxp.h | 18 + + drivers/regulator/core.c | 2 +- + drivers/usb/typec/altmodes/Kconfig | 1 + + drivers/usb/typec/altmodes/displayport.c | 58 +- + drivers/video/fbdev/efifb.c | 21 +- + include/drm/amd_asic_type.h | 1 + + include/drm/drm_bridge.h | 23 +- + include/drm/drm_connector.h | 37 + + include/drm/drm_displayid.h | 101 +- + include/drm/drm_dp_helper.h | 26 + + include/drm/drm_edid.h | 47 + + include/drm/drm_format_helper.h | 4 + + include/drm/drm_ioctl.h | 1 - + include/drm/drm_mipi_dsi.h | 4 + + include/drm/drm_mode_config.h | 13 +- + include/drm/drm_plane.h | 2 +- + include/drm/drm_print.h | 30 + + include/drm/drm_probe_helper.h | 1 + + include/drm/gpu_scheduler.h | 188 +- + include/drm/gud.h | 6 +- + include/drm/i915_component.h | 1 + + include/drm/i915_pciids.h | 8 +- + include/drm/i915_pxp_tee_interface.h | 42 + + include/drm/ttm/ttm_bo_api.h | 12 - + include/drm/ttm/ttm_bo_driver.h | 2 +- + include/drm/ttm/ttm_caching.h | 17 + + include/drm/ttm/ttm_device.h | 79 +- + include/drm/ttm/ttm_placement.h | 1 + + include/drm/ttm/ttm_pool.h | 5 +- + include/drm/ttm/ttm_range_manager.h | 18 +- + include/drm/ttm/ttm_resource.h | 9 +- + include/drm/ttm/ttm_tt.h | 98 +- + include/linux/dma-buf.h | 9 +- + include/linux/dma-fence.h | 32 +- + include/linux/dma-resv.h | 201 +- + include/linux/io.h | 5 + + include/linux/seqno-fence.h | 109 - + include/linux/shrinker.h | 1 + + include/linux/ww_mutex.h | 15 +- + include/uapi/drm/drm_fourcc.h | 6 + + include/uapi/drm/drm_mode.h | 4 + + include/uapi/drm/i915_drm.h | 242 +- + include/uapi/drm/v3d_drm.h | 78 + + include/uapi/drm/virtgpu_drm.h | 27 + + include/uapi/linux/virtio_gpu.h | 18 +- + kernel/locking/mutex.c | 41 + + kernel/locking/test-ww_mutex.c | 86 +- + kernel/locking/ww_rt_mutex.c | 25 + + lib/devres.c | 82 + + lib/locking-selftest.c | 2 +- + mm/vmscan.c | 15 + + 853 files changed, 76917 insertions(+), 22744 deletions(-) + create mode 100644 Documentation/devicetree/bindings/display/panel/panel-edp.yaml + create mode 100644 Documentation/devicetree/bindings/display/panel/samsung,s6d27a1.yaml + delete mode 100644 Documentation/gpu/rfc/i915_parallel_execbuf.h + delete mode 100644 drivers/dma-buf/seqno-fence.c + create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h + create mode 100644 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c + create mode 100644 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.h + create mode 100644 drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/Makefile + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dccg.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_dpp.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubbub.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hubp.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_hwseq.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_init.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_link_encoder.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_mpc.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_opp.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_optc.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_afmt.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_afmt.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_link_encoder.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.h + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.c + create mode 100644 drivers/gpu/drm/amd/display/dc/dcn31/dcn31_vpg.h + rename drivers/gpu/drm/amd/display/dc/dml/{dcn2x/dcn2x.c => dcn20/dcn20_fpu.c} (99%) + rename drivers/gpu/drm/amd/display/dc/dml/{dcn2x/dcn2x.h => dcn20/dcn20_fpu.h} (94%) + create mode 100644 drivers/gpu/drm/amd/display/dc/inc/dc_link_dpia.h + create mode 100644 drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c + create mode 100644 drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h + create mode 100755 drivers/gpu/drm/amd/include/asic_reg/clk/clk_11_0_1_offset.h + create mode 100755 drivers/gpu/drm/amd/include/asic_reg/clk/clk_11_0_1_sh_mask.h + create mode 100755 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h + create mode 100755 drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h + create mode 100755 drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_3_offset.h + create mode 100755 drivers/gpu/drm/amd/include/asic_reg/dpcs/dpcs_2_0_3_sh_mask.h + create mode 100644 drivers/gpu/drm/amd/include/asic_reg/mp/mp_11_0_8_sh_mask.h + create mode 100644 drivers/gpu/drm/i915/display/intel_backlight.c + create mode 100644 drivers/gpu/drm/i915/display/intel_backlight.h + create mode 100644 drivers/gpu/drm/i915/display/intel_dpt.c + create mode 100644 drivers/gpu/drm/i915/display/intel_dpt.h + create mode 100644 drivers/gpu/drm/i915/display/intel_drrs.c + create mode 100644 drivers/gpu/drm/i915/display/intel_drrs.h + create mode 100644 drivers/gpu/drm/i915/display/intel_fb_pin.c + create mode 100644 drivers/gpu/drm/i915/display/intel_fb_pin.h + create mode 100644 drivers/gpu/drm/i915/display/intel_plane_initial.c + create mode 100644 drivers/gpu/drm/i915/display/intel_plane_initial.h + create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c + create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h + delete mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c + delete mode 100644 drivers/gpu/drm/i915/gt/debugfs_engines.h + delete mode 100644 drivers/gpu/drm/i915/gt/debugfs_gt.c + delete mode 100644 drivers/gpu/drm/i915/gt/debugfs_gt_pm.h + create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_debugfs.c + rename drivers/gpu/drm/i915/gt/{debugfs_gt.h => intel_gt_debugfs.h} (58%) + rename drivers/gpu/drm/i915/gt/{debugfs_engines.c => intel_gt_engines_debugfs.c} (70%) + create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.h + rename drivers/gpu/drm/i915/gt/{debugfs_gt_pm.c => intel_gt_pm_debugfs.c} (79%) + create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.h + create mode 100644 drivers/gpu/drm/i915/gt/uc/selftest_guc.c + create mode 100644 drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c + create mode 100644 drivers/gpu/drm/i915/intel_pcode.c + create mode 100644 drivers/gpu/drm/i915/intel_pcode.h + create mode 100644 drivers/gpu/drm/i915/intel_sbi.c + create mode 100644 drivers/gpu/drm/i915/intel_sbi.h + delete mode 100644 drivers/gpu/drm/i915/intel_sideband.c + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp.c + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp.h + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_cmd.c + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_cmd.h + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.c + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_irq.c + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_irq.h + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_pm.c + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_pm.h + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_session.c + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_session.h + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_tee.c + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_tee.h + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h + create mode 100644 drivers/gpu/drm/i915/pxp/intel_pxp_types.h + create mode 100644 drivers/gpu/drm/i915/vlv_sideband.c + rename drivers/gpu/drm/i915/{intel_sideband.h => vlv_sideband.h} (77%) + create mode 100644 drivers/gpu/drm/panel/panel-edp.c + create mode 100644 drivers/gpu/drm/panel/panel-samsung-s6d27a1.c + delete mode 100644 drivers/gpu/drm/zte/Kconfig + delete mode 100644 drivers/gpu/drm/zte/Makefile + delete mode 100644 drivers/gpu/drm/zte/zx_common_regs.h + delete mode 100644 drivers/gpu/drm/zte/zx_drm_drv.c + delete mode 100644 drivers/gpu/drm/zte/zx_drm_drv.h + delete mode 100644 drivers/gpu/drm/zte/zx_hdmi.c + delete mode 100644 drivers/gpu/drm/zte/zx_hdmi_regs.h + delete mode 100644 drivers/gpu/drm/zte/zx_plane.c + delete mode 100644 drivers/gpu/drm/zte/zx_plane.h + delete mode 100644 drivers/gpu/drm/zte/zx_plane_regs.h + delete mode 100644 drivers/gpu/drm/zte/zx_tvenc.c + delete mode 100644 drivers/gpu/drm/zte/zx_tvenc_regs.h + delete mode 100644 drivers/gpu/drm/zte/zx_vga.c + delete mode 100644 drivers/gpu/drm/zte/zx_vga_regs.h + delete mode 100644 drivers/gpu/drm/zte/zx_vou.c + delete mode 100644 drivers/gpu/drm/zte/zx_vou.h + delete mode 100644 drivers/gpu/drm/zte/zx_vou_regs.h + create mode 100644 drivers/misc/mei/pxp/Kconfig + create mode 100644 drivers/misc/mei/pxp/Makefile + create mode 100644 drivers/misc/mei/pxp/mei_pxp.c + create mode 100644 drivers/misc/mei/pxp/mei_pxp.h + create mode 100644 include/drm/i915_pxp_tee_interface.h + delete mode 100644 include/linux/seqno-fence.h +Merging drm-misc/for-linux-next (4b2b5e142ff4 drm: Move GEM memory managers into modules) +$ git merge -m Merge branch 'for-linux-next' of git://anongit.freedesktop.org/drm/drm-misc drm-misc/for-linux-next +Auto-merging include/linux/dma-resv.h +Auto-merging include/drm/drm_dp_helper.h +Auto-merging include/drm/drm_connector.h +Auto-merging drivers/gpu/drm/rockchip/rockchip_drm_vop.c +Auto-merging drivers/gpu/drm/panel/panel-ilitek-ili9881c.c +Auto-merging drivers/gpu/drm/panel/panel-abt-y030xx067a.c +Auto-merging drivers/gpu/drm/panel/Kconfig +Auto-merging drivers/gpu/drm/nouveau/nouveau_gem.c +Auto-merging drivers/gpu/drm/nouveau/nouveau_bo.c +Auto-merging drivers/gpu/drm/i915/i915_request.c +Auto-merging drivers/gpu/drm/drm_modeset_lock.c +Auto-merging drivers/gpu/drm/drm_dp_helper.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +Auto-merging MAINTAINERS +Auto-merging Documentation/devicetree/bindings/vendor-prefixes.yaml +Removing Documentation/devicetree/bindings/display/bridge/sii9234.txt +Removing Documentation/devicetree/bindings/display/bridge/ptn3460.txt +Merge made by the 'recursive' strategy. + .../bindings/display/bridge/lvds-codec.yaml | 33 +- + .../bindings/display/bridge/nxp,ptn3460.yaml | 106 ++++ + .../devicetree/bindings/display/bridge/ps8640.yaml | 19 +- + .../devicetree/bindings/display/bridge/ptn3460.txt | 39 -- + .../devicetree/bindings/display/bridge/sii9234.txt | 49 -- + .../bindings/display/bridge/sil,sii9234.yaml | 110 ++++ + .../bindings/display/panel/boe,bf060y8m-aj0.yaml | 81 +++ + .../bindings/display/panel/ilitek,ili9163.yaml | 69 ++ + .../bindings/display/panel/ilitek,ili9881c.yaml | 2 +- + .../bindings/display/panel/novatek,nt35950.yaml | 106 ++++ + .../bindings/display/panel/panel-simple-dsi.yaml | 2 + + .../bindings/display/panel/panel-simple.yaml | 5 + + .../bindings/display/panel/sharp,ls060t1sx01.yaml | 56 ++ + .../display/panel/sony,tulip-truly-nt35521.yaml | 72 +++ + .../devicetree/bindings/vendor-prefixes.yaml | 4 + + Documentation/gpu/drm-kms-helpers.rst | 15 + + Documentation/gpu/drm-kms.rst | 2 + + Documentation/gpu/todo.rst | 13 +- + MAINTAINERS | 8 + + drivers/dma-buf/Makefile | 3 +- + drivers/dma-buf/dma-buf.c | 64 +- + drivers/dma-buf/dma-resv.c | 69 +- + drivers/dma-buf/selftests.h | 1 + + drivers/dma-buf/st-dma-resv.c | 370 +++++++++++ + drivers/gpu/drm/Kconfig | 23 +- + drivers/gpu/drm/Makefile | 27 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 44 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 26 +- + drivers/gpu/drm/bridge/display-connector.c | 2 +- + drivers/gpu/drm/bridge/lontium-lt9611uxc.c | 9 +- + drivers/gpu/drm/bridge/lvds-codec.c | 69 +- + drivers/gpu/drm/bridge/nwl-dsi.c | 35 + + drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 1 + + drivers/gpu/drm/bridge/tc358768.c | 94 ++- + drivers/gpu/drm/bridge/ti-sn65dsi83.c | 47 +- + drivers/gpu/drm/drm_atomic_uapi.c | 4 + + drivers/gpu/drm/drm_connector.c | 235 ++++++- + drivers/gpu/drm/drm_dp_helper.c | 153 ++++- + drivers/gpu/drm/drm_drv.c | 13 +- + drivers/gpu/drm/drm_gem.c | 26 +- + drivers/gpu/drm/drm_gem_cma_helper.c | 4 + + drivers/gpu/drm/drm_gem_shmem_helper.c | 4 + + drivers/gpu/drm/drm_irq.c | 2 - + drivers/gpu/drm/drm_modeset_lock.c | 49 +- + drivers/gpu/drm/drm_of.c | 33 + + drivers/gpu/drm/drm_plane_helper.c | 1 - + drivers/gpu/drm/drm_privacy_screen.c | 467 ++++++++++++++ + drivers/gpu/drm/drm_privacy_screen_x86.c | 86 +++ + drivers/gpu/drm/gma500/framebuffer.c | 52 +- + drivers/gpu/drm/gma500/gem.c | 234 +++++-- + drivers/gpu/drm/gma500/gem.h | 28 +- + drivers/gpu/drm/gma500/gma_display.c | 51 +- + drivers/gpu/drm/gma500/gtt.c | 326 ++-------- + drivers/gpu/drm/gma500/gtt.h | 29 +- + drivers/gpu/drm/gma500/oaktrail_crtc.c | 3 +- + drivers/gpu/drm/gma500/psb_drv.c | 12 + + drivers/gpu/drm/gma500/psb_intel_display.c | 17 +- + drivers/gpu/drm/gma500/psb_intel_drv.h | 2 +- + drivers/gpu/drm/i915/i915_request.c | 34 +- + drivers/gpu/drm/msm/msm_gem.c | 19 +- + drivers/gpu/drm/mxsfb/mxsfb_kms.c | 8 +- + drivers/gpu/drm/nouveau/nouveau_bo.c | 4 - + drivers/gpu/drm/nouveau/nouveau_fence.c | 48 +- + drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +- + drivers/gpu/drm/nouveau/nvkm/engine/nvenc/base.c | 1 - + drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c | 2 +- + drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 4 +- + drivers/gpu/drm/panel/Kconfig | 51 ++ + drivers/gpu/drm/panel/Makefile | 5 + + drivers/gpu/drm/panel/panel-abt-y030xx067a.c | 14 +- + drivers/gpu/drm/panel/panel-boe-bf060y8m-aj0.c | 445 +++++++++++++ + drivers/gpu/drm/panel/panel-dsi-cm.c | 4 +- + drivers/gpu/drm/panel/panel-feixin-k101-im2ba02.c | 13 +- + .../gpu/drm/panel/panel-feiyang-fy07024di26a30d.c | 29 +- + drivers/gpu/drm/panel/panel-ilitek-ili9881c.c | 254 +++++++- + drivers/gpu/drm/panel/panel-innolux-ej030na.c | 14 +- + drivers/gpu/drm/panel/panel-innolux-p079zca.c | 10 +- + drivers/gpu/drm/panel/panel-jdi-fhd-r63452.c | 323 ++++++++++ + drivers/gpu/drm/panel/panel-jdi-lt070me05000.c | 8 +- + drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c | 8 +- + drivers/gpu/drm/panel/panel-lvds.c | 18 +- + drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c | 9 + + drivers/gpu/drm/panel/panel-novatek-nt35950.c | 702 +++++++++++++++++++++ + drivers/gpu/drm/panel/panel-novatek-nt36672a.c | 10 +- + drivers/gpu/drm/panel/panel-novatek-nt39016.c | 20 +- + .../gpu/drm/panel/panel-panasonic-vvx10f034n00.c | 8 +- + drivers/gpu/drm/panel/panel-ronbo-rb070d30.c | 8 +- + drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c | 21 +- + drivers/gpu/drm/panel/panel-samsung-s6e63m0-dsi.c | 3 +- + drivers/gpu/drm/panel/panel-samsung-s6e63m0-spi.c | 3 +- + drivers/gpu/drm/panel/panel-samsung-s6e63m0.c | 4 +- + drivers/gpu/drm/panel/panel-samsung-s6e63m0.h | 2 +- + .../drm/panel/panel-samsung-s6e88a0-ams452ef01.c | 1 + + drivers/gpu/drm/panel/panel-samsung-sofef00.c | 17 +- + drivers/gpu/drm/panel/panel-sharp-ls037v7dw01.c | 21 +- + drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c | 8 +- + drivers/gpu/drm/panel/panel-sharp-ls060t1sx01.c | 333 ++++++++++ + drivers/gpu/drm/panel/panel-simple.c | 91 +++ + drivers/gpu/drm/panel/panel-sitronix-st7703.c | 8 + + .../gpu/drm/panel/panel-sony-tulip-truly-nt35521.c | 552 ++++++++++++++++ + drivers/gpu/drm/panel/panel-tpo-td043mtea1.c | 14 +- + drivers/gpu/drm/panel/panel-xinpeng-xpp055c272.c | 25 +- + drivers/gpu/drm/radeon/radeon_sync.c | 22 +- + drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 82 ++- + drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 13 +- + drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c | 3 +- + drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 44 +- + drivers/gpu/drm/rockchip/rockchip_drm_gem.h | 7 - + drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 4 +- + drivers/gpu/drm/scheduler/sched_main.c | 29 +- + drivers/gpu/drm/sun4i/sun4i_drv.c | 2 +- + drivers/gpu/drm/tiny/Kconfig | 13 + + drivers/gpu/drm/tiny/Makefile | 1 + + drivers/gpu/drm/tiny/ili9163.c | 225 +++++++ + drivers/gpu/drm/ttm/ttm_bo.c | 16 +- + drivers/gpu/drm/v3d/v3d_gem.c | 17 +- + drivers/gpu/drm/virtio/virtgpu_display.c | 4 +- + drivers/platform/x86/Kconfig | 2 + + drivers/platform/x86/thinkpad_acpi.c | 137 ++-- + drivers/video/fbdev/core/bitblit.c | 16 - + drivers/video/fbdev/core/fbcon.c | 509 +-------------- + drivers/video/fbdev/core/fbcon.h | 59 -- + drivers/video/fbdev/core/fbcon_ccw.c | 28 +- + drivers/video/fbdev/core/fbcon_cw.c | 28 +- + drivers/video/fbdev/core/fbcon_rotate.h | 9 - + drivers/video/fbdev/core/fbcon_ud.c | 37 +- + drivers/video/fbdev/core/fbmem.c | 5 +- + drivers/video/fbdev/core/fbsysfs.c | 14 +- + drivers/video/fbdev/core/tileblit.c | 16 - + drivers/video/fbdev/omap/omapfb_main.c | 1 + + .../video/fbdev/omap2/omapfb/dss/display-sysfs.c | 14 +- + .../video/fbdev/omap2/omapfb/dss/manager-sysfs.c | 18 +- + .../video/fbdev/omap2/omapfb/dss/overlay-sysfs.c | 20 +- + drivers/video/fbdev/omap2/omapfb/omapfb-sysfs.c | 12 +- + drivers/video/fbdev/skeletonfb.c | 12 +- + include/drm/drm_connector.h | 55 ++ + include/drm/drm_dp_helper.h | 21 +- + include/drm/drm_modeset_lock.h | 8 + + include/drm/drm_of.h | 7 + + include/drm/drm_privacy_screen_consumer.h | 65 ++ + include/drm/drm_privacy_screen_driver.h | 84 +++ + include/drm/drm_privacy_screen_machine.h | 46 ++ + include/drm/ttm/ttm_bo_api.h | 6 +- + include/linux/dma-buf.h | 4 +- + include/linux/dma-resv.h | 25 +- + include/linux/fb.h | 2 +- + 147 files changed, 6550 insertions(+), 1890 deletions(-) + create mode 100644 Documentation/devicetree/bindings/display/bridge/nxp,ptn3460.yaml + delete mode 100644 Documentation/devicetree/bindings/display/bridge/ptn3460.txt + delete mode 100644 Documentation/devicetree/bindings/display/bridge/sii9234.txt + create mode 100644 Documentation/devicetree/bindings/display/bridge/sil,sii9234.yaml + create mode 100644 Documentation/devicetree/bindings/display/panel/boe,bf060y8m-aj0.yaml + create mode 100644 Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml + create mode 100644 Documentation/devicetree/bindings/display/panel/novatek,nt35950.yaml + create mode 100644 Documentation/devicetree/bindings/display/panel/sharp,ls060t1sx01.yaml + create mode 100644 Documentation/devicetree/bindings/display/panel/sony,tulip-truly-nt35521.yaml + create mode 100644 drivers/dma-buf/st-dma-resv.c + create mode 100644 drivers/gpu/drm/drm_privacy_screen.c + create mode 100644 drivers/gpu/drm/drm_privacy_screen_x86.c + create mode 100644 drivers/gpu/drm/panel/panel-boe-bf060y8m-aj0.c + create mode 100644 drivers/gpu/drm/panel/panel-jdi-fhd-r63452.c + create mode 100644 drivers/gpu/drm/panel/panel-novatek-nt35950.c + create mode 100644 drivers/gpu/drm/panel/panel-sharp-ls060t1sx01.c + create mode 100644 drivers/gpu/drm/panel/panel-sony-tulip-truly-nt35521.c + create mode 100644 drivers/gpu/drm/tiny/ili9163.c + create mode 100644 include/drm/drm_privacy_screen_consumer.h + create mode 100644 include/drm/drm_privacy_screen_driver.h + create mode 100644 include/drm/drm_privacy_screen_machine.h +Merging amdgpu/drm-next (519fc0a14c81 drm/amdgpu/display: add quirk handling for stutter mode) +$ git merge -m Merge branch 'drm-next' of https://gitlab.freedesktop.org/agd5f/linux amdgpu/drm-next +Auto-merging drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +Removing drivers/gpu/drm/amd/amdgpu/yellow_carp_reg_init.c +Removing drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c +Removing drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c +Removing drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c +Removing drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c +Removing drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c +Removing drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c +Removing drivers/gpu/drm/amd/amdgpu/beige_goby_reg_init.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +Removing drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +Auto-merging drivers/gpu/drm/Kconfig +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 1 + + drivers/gpu/drm/Kconfig | 1 + + drivers/gpu/drm/amd/amdgpu/Makefile | 8 +- + drivers/gpu/drm/amd/amdgpu/aldebaran.c | 2 + + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 19 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 11 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 + + drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 388 --------------------- + drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 30 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 20 -- + drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 37 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 33 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 27 ++ + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 + + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 1 + + drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 3 +- + drivers/gpu/drm/amd/amdgpu/beige_goby_reg_init.c | 54 --- + .../gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c | 51 --- + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 27 +- + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 + + drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c | 55 --- + drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c | 52 --- + drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c | 53 --- + drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 31 ++ + drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h | 1 + + drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 32 +- + drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h | 1 + + drivers/gpu/drm/amd/amdgpu/nv.c | 295 +--------------- + drivers/gpu/drm/amd/amdgpu/nv.h | 10 - + .../gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c | 54 --- + drivers/gpu/drm/amd/amdgpu/soc15.c | 182 +--------- + drivers/gpu/drm/amd/amdgpu/soc15.h | 1 - + drivers/gpu/drm/amd/amdgpu/ta_ras_if.h | 7 +- + drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c | 50 --- + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 10 +- + drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 15 +- + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 17 +- + drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 28 +- + drivers/gpu/drm/amd/amdgpu/yellow_carp_reg_init.c | 51 --- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 + + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 8 +- + drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 150 +++++--- + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 185 ++++++++-- + drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 + + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 19 +- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 153 ++++---- + .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +- + drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 90 ++++- + .../amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 21 +- + drivers/gpu/drm/amd/display/dc/core/dc.c | 25 +- + drivers/gpu/drm/amd/display/dc/core/dc_link.c | 63 +--- + drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c | 2 +- + drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 40 ++- + .../gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c | 9 +- + drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 5 + + drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 4 + + drivers/gpu/drm/amd/display/dc/dc.h | 3 +- + drivers/gpu/drm/amd/display/dc/dc_link.h | 1 - + drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + + drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 2 + + .../amd/display/dc/dce110/dce110_hw_sequencer.c | 24 +- + .../drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 49 ++- + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dccg.h | 34 +- + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 4 +- + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 42 ++- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c | 2 +- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c | 237 ++++++++++++- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h | 34 +- + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 115 +++--- + .../gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 17 +- + .../amd/display/dc/dml/dcn31/display_mode_vba_31.c | 6 +- + drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 9 + + drivers/gpu/drm/amd/display/dc/inc/link_enc_cfg.h | 2 +- + drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 +- + drivers/gpu/drm/amd/display/include/dal_asic_id.h | 2 +- + drivers/gpu/drm/amd/include/amd_shared.h | 5 +- + .../amd/include/asic_reg/dcn/dcn_3_1_2_offset.h | 2 + + .../amd/include/asic_reg/dcn/dcn_3_1_2_sh_mask.h | 8 + + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 14 +- + drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 4 +- + drivers/gpu/drm/amd/pm/inc/smu_types.h | 2 +- + drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 4 +- + drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h | 2 +- + drivers/gpu/drm/amd/pm/inc/smu_v13_0.h | 4 +- + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 9 +- + drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 14 + + .../drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c | 17 +- + .../drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 95 ++--- + drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 21 +- + drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 100 ++---- + drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h | 1 - + drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 6 +- + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 11 +- + drivers/gpu/drm/radeon/radeon.h | 3 + + drivers/gpu/drm/radeon/radeon_audio.c | 95 +++++ + 102 files changed, 1536 insertions(+), 1974 deletions(-) + delete mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c + delete mode 100644 drivers/gpu/drm/amd/amdgpu/beige_goby_reg_init.c + delete mode 100644 drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c + delete mode 100644 drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c + delete mode 100644 drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c + delete mode 100644 drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c + delete mode 100644 drivers/gpu/drm/amd/amdgpu/sienna_cichlid_reg_init.c + delete mode 100644 drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c + delete mode 100644 drivers/gpu/drm/amd/amdgpu/yellow_carp_reg_init.c +Merging drm-intel/for-linux-next (877d074939a5 drm/i915/cdclk: put the cdclk vtables in const data) +$ git merge -m Merge branch 'for-linux-next' of git://anongit.freedesktop.org/drm-intel drm-intel/for-linux-next +Auto-merging drivers/gpu/drm/i915/intel_pm.c +Auto-merging drivers/gpu/drm/i915/gem/i915_gem_context.c +Merge made by the 'recursive' strategy. + drivers/gpu/drm/i915/Makefile | 2 + + drivers/gpu/drm/i915/display/i9xx_plane.c | 30 +- + drivers/gpu/drm/i915/display/intel_atomic_plane.c | 1 + + drivers/gpu/drm/i915/display/intel_bios.c | 47 - + drivers/gpu/drm/i915/display/intel_cdclk.c | 44 +- + drivers/gpu/drm/i915/display/intel_crt.c | 13 +- + drivers/gpu/drm/i915/display/intel_crtc.h | 5 + + drivers/gpu/drm/i915/display/intel_cursor.c | 19 +- + drivers/gpu/drm/i915/display/intel_ddi.c | 44 +- + drivers/gpu/drm/i915/display/intel_ddi.h | 1 + + drivers/gpu/drm/i915/display/intel_display.c | 1213 +------------------- + drivers/gpu/drm/i915/display/intel_display.h | 17 +- + drivers/gpu/drm/i915/display/intel_display_power.c | 7 +- + drivers/gpu/drm/i915/display/intel_display_power.h | 2 +- + drivers/gpu/drm/i915/display/intel_display_types.h | 19 +- + drivers/gpu/drm/i915/display/intel_dp.c | 130 ++- + .../gpu/drm/i915/display/intel_dp_link_training.c | 38 +- + drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 7 +- + drivers/gpu/drm/i915/display/intel_fb.c | 520 ++++++++- + drivers/gpu/drm/i915/display/intel_fb.h | 30 +- + drivers/gpu/drm/i915/display/intel_fdi.c | 136 +-- + drivers/gpu/drm/i915/display/intel_fdi.h | 2 +- + drivers/gpu/drm/i915/display/intel_hdmi.c | 117 +- + drivers/gpu/drm/i915/display/intel_pch_display.c | 501 ++++++++ + drivers/gpu/drm/i915/display/intel_pch_display.h | 27 + + drivers/gpu/drm/i915/display/intel_pch_refclk.c | 648 +++++++++++ + drivers/gpu/drm/i915/display/intel_pch_refclk.h | 21 + + drivers/gpu/drm/i915/display/intel_psr.c | 6 +- + drivers/gpu/drm/i915/display/intel_sprite.c | 35 +- + drivers/gpu/drm/i915/display/intel_sprite.h | 4 - + drivers/gpu/drm/i915/display/skl_scaler.c | 1 + + drivers/gpu/drm/i915/display/skl_universal_plane.c | 288 ++--- + drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- + drivers/gpu/drm/i915/i915_drv.c | 1 + + drivers/gpu/drm/i915/i915_drv.h | 1 - + drivers/gpu/drm/i915/i915_reg.h | 4 +- + drivers/gpu/drm/i915/intel_pm.c | 7 +- + 37 files changed, 2156 insertions(+), 1834 deletions(-) + create mode 100644 drivers/gpu/drm/i915/display/intel_pch_display.c + create mode 100644 drivers/gpu/drm/i915/display/intel_pch_display.h + create mode 100644 drivers/gpu/drm/i915/display/intel_pch_refclk.c + create mode 100644 drivers/gpu/drm/i915/display/intel_pch_refclk.h +Merging drm-tegra/drm/tegra/for-next (5dccbc9de8f0 drm/tegra: dc: rgb: Allow changing PLLD rate on Tegra30+) +$ git merge -m Merge branch 'drm/tegra/for-next' of git://anongit.freedesktop.org/tegra/linux.git drm-tegra/drm/tegra/for-next +Auto-merging drivers/gpu/drm/tegra/plane.c +Merge made by the 'recursive' strategy. + drivers/gpu/drm/tegra/Makefile | 3 +- + drivers/gpu/drm/tegra/dc.c | 117 ++++++++-- + drivers/gpu/drm/tegra/dc.h | 1 + + drivers/gpu/drm/tegra/drm.c | 4 + + drivers/gpu/drm/tegra/drm.h | 1 + + drivers/gpu/drm/tegra/gem.c | 171 +++++++++------ + drivers/gpu/drm/tegra/gr2d.c | 33 ++- + drivers/gpu/drm/tegra/hub.h | 1 - + drivers/gpu/drm/tegra/nvdec.c | 470 +++++++++++++++++++++++++++++++++++++++++ + drivers/gpu/drm/tegra/plane.c | 65 ++---- + drivers/gpu/drm/tegra/plane.h | 2 +- + drivers/gpu/drm/tegra/rgb.c | 53 ++++- + drivers/gpu/drm/tegra/submit.c | 63 ++++-- + drivers/gpu/drm/tegra/uapi.c | 68 +++--- + drivers/gpu/drm/tegra/uapi.h | 5 +- + drivers/gpu/drm/tegra/vic.c | 8 + + drivers/gpu/host1x/Kconfig | 1 + + drivers/gpu/host1x/bus.c | 79 ++++++- + drivers/gpu/host1x/dev.c | 20 ++ + drivers/gpu/host1x/dev.h | 2 + + drivers/gpu/host1x/job.c | 160 +++++--------- + drivers/gpu/host1x/job.h | 6 +- + include/linux/host1x.h | 75 +++++-- + 23 files changed, 1087 insertions(+), 321 deletions(-) + create mode 100644 drivers/gpu/drm/tegra/nvdec.c +Merging drm-msm/msm-next (e60af4f8550f dt-bindings: msm/dp: Add SC8180x compatibles) +$ git merge -m Merge branch 'msm-next' of https://gitlab.freedesktop.org/drm/msm.git drm-msm/msm-next +Auto-merging drivers/gpu/drm/msm/msm_gpu_devfreq.c +Auto-merging drivers/gpu/drm/msm/msm_gpu.h +Auto-merging drivers/gpu/drm/msm/msm_gem.c +Removing drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c +Auto-merging drivers/gpu/drm/msm/Kconfig +Auto-merging Documentation/devicetree/bindings/display/msm/dp-controller.yaml +Merge made by the 'recursive' strategy. + .../bindings/display/msm/dp-controller.yaml | 15 +- + .../bindings/display/msm/dsi-phy-14nm.yaml | 1 + + drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 1 - + drivers/gpu/drm/msm/Kconfig | 2 + + drivers/gpu/drm/msm/Makefile | 1 - + drivers/gpu/drm/msm/adreno/a5xx_debugfs.c | 6 +- + drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 10 +- + drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 2 +- + drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c | 256 ------------------ + drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 8 +- + drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 39 ++- + drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h | 4 +- + .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c | 2 +- + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 8 +- + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h | 8 - + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c | 267 ++++++++++++++----- + drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h | 92 +------ + drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 70 +++-- + drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 13 - + drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 4 +- + drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 18 +- + drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c | 89 +++++++ + drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 18 +- + drivers/gpu/drm/msm/disp/mdp5/mdp5_mdss.c | 12 +- + drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c | 8 +- + drivers/gpu/drm/msm/dp/dp_catalog.c | 64 ++--- + drivers/gpu/drm/msm/dp/dp_debug.c | 294 +++++---------------- + drivers/gpu/drm/msm/dp/dp_debug.h | 4 +- + drivers/gpu/drm/msm/dp/dp_display.c | 143 +++++----- + drivers/gpu/drm/msm/dp/dp_display.h | 2 + + drivers/gpu/drm/msm/dp/dp_drm.c | 13 +- + drivers/gpu/drm/msm/dp/dp_panel.c | 2 +- + drivers/gpu/drm/msm/dp/dp_parser.c | 137 ++++++---- + drivers/gpu/drm/msm/dp/dp_parser.h | 14 +- + drivers/gpu/drm/msm/dsi/dsi.h | 2 + + drivers/gpu/drm/msm/dsi/dsi_host.c | 123 ++++----- + drivers/gpu/drm/msm/dsi/dsi_manager.c | 66 ++++- + drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 2 + + drivers/gpu/drm/msm/dsi/phy/dsi_phy.h | 1 + + drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c | 25 +- + drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 4 +- + drivers/gpu/drm/msm/edp/edp_ctrl.c | 5 +- + drivers/gpu/drm/msm/hdmi/hdmi.c | 4 +- + drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c | 4 +- + drivers/gpu/drm/msm/msm_atomic.c | 21 +- + drivers/gpu/drm/msm/msm_drv.c | 33 ++- + drivers/gpu/drm/msm/msm_drv.h | 31 ++- + drivers/gpu/drm/msm/msm_gem.c | 5 +- + drivers/gpu/drm/msm/msm_gpu.c | 2 +- + drivers/gpu/drm/msm/msm_gpu.h | 7 + + drivers/gpu/drm/msm/msm_gpu_devfreq.c | 38 ++- + drivers/gpu/drm/msm/msm_kms.h | 3 +- + drivers/gpu/drm/msm/msm_submitqueue.c | 1 + + drivers/gpu/drm/tilcdc/tilcdc_drv.c | 4 +- + drivers/gpu/drm/vmwgfx/ttm_memory.c | 1 - + 55 files changed, 942 insertions(+), 1067 deletions(-) + delete mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_core_irq.c +Merging imx-drm/imx-drm/next (20fbfc81e390 drm/imx: imx-tve: Make use of the helper function devm_platform_ioremap_resource()) +$ git merge -m Merge branch 'imx-drm/next' of https://git.pengutronix.de/git/pza/linux imx-drm/imx-drm/next +Merge made by the 'recursive' strategy. + drivers/gpu/drm/imx/imx-tve.c | 4 +--- + drivers/gpu/ipu-v3/ipu-image-convert.c | 9 +++------ + 2 files changed, 4 insertions(+), 9 deletions(-) +Merging etnaviv/etnaviv/next (81fd23e2b3cc drm/etnaviv: Implement mmap as GEM object function) +$ git merge -m Merge branch 'etnaviv/next' of https://git.pengutronix.de/git/lst/linux etnaviv/etnaviv/next +Already up to date. +Merging regmap/for-next (a8d880671c13 Merge remote-tracking branch 'regmap/for-5.16' into regmap-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regmap.git regmap/for-next +Merge made by the 'recursive' strategy. + drivers/base/regmap/regmap-spi.c | 36 ++++++++++++++++++++++++++++++++---- + 1 file changed, 32 insertions(+), 4 deletions(-) +Merging sound/for-next (f917c04fac45 ALSA: memalloc: Fix a typo in snd_dma_buffer_sync() description) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git sound/for-next +Auto-merging sound/usb/quirks-table.h +Auto-merging sound/usb/mixer.c +Removing sound/core/sgbuf.c +Merge made by the 'recursive' strategy. + include/sound/hdaudio_ext.h | 2 + + include/sound/memalloc.h | 58 ++- + include/uapi/sound/asound.h | 2 +- + include/uapi/sound/firewire.h | 145 +++++++ + sound/core/Makefile | 3 +- + sound/core/memalloc.c | 207 +++++++++- + sound/core/memalloc_local.h | 1 + + sound/core/pcm_compat.c | 4 + + sound/core/pcm_lib.c | 5 + + sound/core/pcm_local.h | 7 + + sound/core/pcm_memory.c | 13 +- + sound/core/pcm_native.c | 66 +++- + sound/core/sgbuf.c | 201 ---------- + sound/firewire/motu/Makefile | 3 +- + sound/firewire/motu/amdtp-motu.c | 9 + + .../motu/motu-command-dsp-message-parser.c | 178 +++++++++ + sound/firewire/motu/motu-hwdep.c | 113 +++++- + sound/firewire/motu/motu-protocol-v2.c | 14 +- + sound/firewire/motu/motu-protocol-v3.c | 14 +- + .../motu/motu-register-dsp-message-parser.c | 416 +++++++++++++++++++++ + sound/firewire/motu/motu-stream.c | 10 + + sound/firewire/motu/motu.c | 10 + + sound/firewire/motu/motu.h | 23 ++ + sound/hda/ext/hdac_ext_stream.c | 46 ++- + sound/hda/hdac_stream.c | 4 +- + sound/isa/Kconfig | 2 +- + sound/pci/Kconfig | 1 + + sound/pci/hda/hda_intel.c | 49 ++- + sound/soc/sof/intel/hda-dai.c | 7 +- + sound/usb/card.h | 11 +- + sound/usb/clock.c | 4 +- + sound/usb/endpoint.c | 230 ++++++++---- + sound/usb/endpoint.h | 13 +- + sound/usb/implicit.c | 2 - + sound/usb/mixer.c | 42 ++- + sound/usb/mixer_quirks.c | 34 ++ + sound/usb/pcm.c | 164 ++++++-- + sound/usb/quirks-table.h | 58 +++ + sound/usb/usx2y/usbusx2yaudio.c | 11 +- + sound/virtio/virtio_pcm_msg.c | 5 +- + 40 files changed, 1744 insertions(+), 443 deletions(-) + delete mode 100644 sound/core/sgbuf.c + create mode 100644 sound/firewire/motu/motu-command-dsp-message-parser.c + create mode 100644 sound/firewire/motu/motu-register-dsp-message-parser.c +Merging sound-asoc/for-next (d55e4e7df171 Merge remote-tracking branch 'asoc/for-5.16' into asoc-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git sound-asoc/for-next +Removing sound/soc/sof/probe.h +Auto-merging sound/soc/sof/intel/hda-dai.c +Removing sound/soc/sof/compress.h +Auto-merging sound/soc/cirrus/ep93xx-i2s.c +Auto-merging MAINTAINERS +Removing Documentation/devicetree/bindings/sound/spdif-transmitter.txt +Removing Documentation/devicetree/bindings/sound/simple-amplifier.txt +Removing Documentation/devicetree/bindings/sound/rockchip,pdm.txt +Removing Documentation/devicetree/bindings/sound/name-prefix.txt +Removing Documentation/devicetree/bindings/sound/bt-sco.txt +Merge made by the 'recursive' strategy. + .../bindings/sound/audio-graph-card2.yaml | 57 + + Documentation/devicetree/bindings/sound/bt-sco.txt | 13 - + .../devicetree/bindings/sound/cirrus,cs35l41.yaml | 157 + + .../devicetree/bindings/sound/linux,bt-sco.yaml | 38 + + .../devicetree/bindings/sound/linux,spdif-dit.yaml | 32 + + .../devicetree/bindings/sound/max9892x.txt | 3 + + .../devicetree/bindings/sound/mt8192-afe-pcm.yaml | 100 + + .../devicetree/bindings/sound/name-prefix.txt | 24 - + .../devicetree/bindings/sound/name-prefix.yaml | 21 + + .../devicetree/bindings/sound/nau8821.txt | 55 + + .../bindings/sound/nvidia,tegra186-dspk.yaml | 9 +- + .../bindings/sound/nvidia,tegra210-adx.yaml | 76 + + .../bindings/sound/nvidia,tegra210-ahub.yaml | 20 + + .../bindings/sound/nvidia,tegra210-amx.yaml | 76 + + .../bindings/sound/nvidia,tegra210-dmic.yaml | 9 +- + .../bindings/sound/nvidia,tegra210-i2s.yaml | 9 +- + .../bindings/sound/nvidia,tegra210-mixer.yaml | 74 + + .../bindings/sound/nvidia,tegra210-mvc.yaml | 76 + + .../bindings/sound/nvidia,tegra210-sfc.yaml | 73 + + .../devicetree/bindings/sound/nxp,tfa989x.yaml | 9 +- + .../devicetree/bindings/sound/qcom,q6afe.txt | 2 +- + .../devicetree/bindings/sound/qcom,q6asm.txt | 2 +- + .../devicetree/bindings/sound/realtek,rt5682s.yaml | 117 + + .../devicetree/bindings/sound/richtek,rt9120.yaml | 59 + + .../bindings/sound/rockchip,i2s-tdm.yaml | 182 + + .../devicetree/bindings/sound/rockchip,pdm.txt | 46 - + .../devicetree/bindings/sound/rockchip,pdm.yaml | 120 + + Documentation/devicetree/bindings/sound/rt5659.txt | 2 +- + .../devicetree/bindings/sound/simple-amplifier.txt | 17 - + .../bindings/sound/simple-audio-amplifier.yaml | 45 + + .../bindings/sound/simple-audio-mux.yaml | 9 +- + .../bindings/sound/socionext,uniphier-aio.yaml | 22 +- + .../bindings/sound/socionext,uniphier-evea.yaml | 6 +- + .../bindings/sound/spdif-transmitter.txt | 10 - + .../devicetree/bindings/sound/test-component.yaml | 33 + + .../devicetree/bindings/sound/wlf,wm8978.yaml | 58 + + Documentation/sound/alsa-configuration.rst | 9 + + MAINTAINERS | 18 + + arch/sh/boards/mach-ecovec24/setup.c | 2 +- + arch/sh/boards/mach-se/7724/setup.c | 2 +- + drivers/firmware/Kconfig | 1 + + drivers/firmware/Makefile | 1 + + drivers/firmware/cirrus/Kconfig | 5 + + drivers/firmware/cirrus/Makefile | 3 + + drivers/firmware/cirrus/cs_dsp.c | 3109 +++++++++++++++++ + include/dt-bindings/sound/qcom,lpass.h | 5 + + include/linux/firmware/cirrus/cs_dsp.h | 242 ++ + .../linux/firmware/cirrus}/wmfw.h | 8 +- + include/sound/cs35l41.h | 34 + + include/sound/graph_card.h | 21 + + include/sound/rt5682s.h | 48 + + include/sound/simple_card_utils.h | 6 +- + include/sound/soc-component.h | 21 +- + include/sound/soc-dpcm.h | 1 + + include/sound/soc-topology.h | 2 +- + include/sound/sof.h | 5 - + include/sound/sof/dai-intel.h | 4 + + include/sound/sof/dai.h | 10 +- + include/uapi/sound/sof/tokens.h | 1 + + sound/hda/intel-dsp-config.c | 22 +- + sound/soc/amd/Kconfig | 36 + + sound/soc/amd/Makefile | 2 + + sound/soc/amd/acp-da7219-max98357a.c | 6 +- + sound/soc/amd/acp-pcm-dma.c | 6 +- + sound/soc/amd/acp-rt5645.c | 2 +- + sound/soc/amd/acp.h | 2 + + sound/soc/amd/acp/Kconfig | 51 + + sound/soc/amd/acp/Makefile | 26 + + sound/soc/amd/acp/acp-i2s.c | 340 ++ + sound/soc/amd/acp/acp-legacy-mach.c | 104 + + sound/soc/amd/acp/acp-mach-common.c | 600 ++++ + sound/soc/amd/acp/acp-mach.h | 57 + + sound/soc/amd/acp/acp-platform.c | 315 ++ + sound/soc/amd/acp/acp-renoir.c | 144 + + sound/soc/amd/acp/acp-sof-mach.c | 131 + + sound/soc/amd/acp/amd.h | 146 + + sound/soc/amd/acp/chip_offset_byte.h | 76 + + sound/soc/amd/vangogh/Makefile | 2 + + sound/soc/amd/vangogh/acp5x-i2s.c | 2 +- + sound/soc/amd/vangogh/acp5x-mach.c | 386 +++ + sound/soc/amd/vangogh/acp5x.h | 2 +- + sound/soc/amd/vangogh/pci-acp5x.c | 3 + + sound/soc/amd/yc/Makefile | 9 + + sound/soc/amd/yc/acp6x-mach.c | 194 ++ + sound/soc/amd/yc/acp6x-pdm-dma.c | 448 +++ + sound/soc/amd/yc/acp6x.h | 107 + + sound/soc/amd/yc/acp6x_chip_offset_byte.h | 444 +++ + sound/soc/amd/yc/pci-acp6x.c | 338 ++ + sound/soc/atmel/atmel-i2s.c | 6 +- + sound/soc/atmel/atmel_ssc_dai.c | 26 +- + sound/soc/atmel/atmel_wm8904.c | 2 +- + sound/soc/atmel/mchp-i2s-mcc.c | 10 +- + sound/soc/atmel/mikroe-proto.c | 4 +- + sound/soc/atmel/sam9g20_wm8731.c | 2 +- + sound/soc/atmel/sam9x5_wm8731.c | 2 +- + sound/soc/atmel/tse850-pcm5142.c | 2 +- + sound/soc/au1x/db1200.c | 6 +- + sound/soc/au1x/i2sc.c | 6 +- + sound/soc/au1x/psc-i2s.c | 10 +- + sound/soc/bcm/bcm2835-i2s.c | 56 +- + sound/soc/bcm/cygnus-ssp.c | 6 +- + sound/soc/cirrus/edb93xx.c | 2 +- + sound/soc/cirrus/ep93xx-i2s.c | 10 +- + sound/soc/cirrus/snappercl15.c | 2 +- + sound/soc/codecs/88pm860x-codec.c | 18 +- + sound/soc/codecs/Kconfig | 33 + + sound/soc/codecs/Makefile | 10 + + sound/soc/codecs/ab8500-codec.c | 20 +- + sound/soc/codecs/ad1836.c | 6 +- + sound/soc/codecs/ad193x.c | 10 +- + sound/soc/codecs/adau1372.c | 14 +- + sound/soc/codecs/adau1373.c | 14 +- + sound/soc/codecs/adau1701.c | 6 +- + sound/soc/codecs/adau17x1.c | 6 +- + sound/soc/codecs/adau1977.c | 31 +- + sound/soc/codecs/adav80x.c | 6 +- + sound/soc/codecs/ak4104.c | 4 +- + sound/soc/codecs/ak4118.c | 20 +- + sound/soc/codecs/ak4458.c | 12 +- + sound/soc/codecs/ak4642.c | 8 +- + sound/soc/codecs/ak4671.c | 6 +- + sound/soc/codecs/ak5558.c | 10 +- + sound/soc/codecs/alc5623.c | 8 +- + sound/soc/codecs/alc5632.c | 8 +- + sound/soc/codecs/cpcap.c | 18 +- + sound/soc/codecs/cros_ec_codec.c | 4 +- + sound/soc/codecs/cs35l41-i2c.c | 113 + + sound/soc/codecs/cs35l41-spi.c | 138 + + sound/soc/codecs/cs35l41-tables.c | 594 ++++ + sound/soc/codecs/cs35l41.c | 1447 ++++++++ + sound/soc/codecs/cs35l41.h | 775 +++++ + sound/soc/codecs/cs42l42.c | 189 +- + sound/soc/codecs/cs42l42.h | 59 + + sound/soc/codecs/cs47l15.c | 26 +- + sound/soc/codecs/cs47l24.c | 20 +- + sound/soc/codecs/cs47l35.c | 26 +- + sound/soc/codecs/cs47l85.c | 34 +- + sound/soc/codecs/cs47l90.c | 36 +- + sound/soc/codecs/cs47l92.c | 20 +- + sound/soc/codecs/es8316.c | 7 +- + sound/soc/codecs/madera.c | 18 +- + sound/soc/codecs/max98390.c | 2 +- + sound/soc/codecs/max98927.c | 25 + + sound/soc/codecs/max98927.h | 1 + + sound/soc/codecs/mt6359.c | 2 +- + sound/soc/codecs/nau8821.c | 1712 ++++++++++ + sound/soc/codecs/nau8821.h | 533 +++ + sound/soc/codecs/nau8824.c | 70 + + sound/soc/codecs/nau8824.h | 3 +- + sound/soc/codecs/pcm5102a.c | 2 +- + sound/soc/codecs/rt1011.c | 10 + + sound/soc/codecs/rt1015.c | 2 +- + sound/soc/codecs/rt1016.c | 2 +- + sound/soc/codecs/rt1019.c | 2 +- + sound/soc/codecs/rt1305.c | 2 +- + sound/soc/codecs/rt1308.c | 2 +- + sound/soc/codecs/rt5514.c | 2 +- + sound/soc/codecs/rt5616.c | 2 +- + sound/soc/codecs/rt5640.c | 2 +- + sound/soc/codecs/rt5645.c | 2 +- + sound/soc/codecs/rt5651.c | 9 +- + sound/soc/codecs/rt5659.c | 2 +- + sound/soc/codecs/rt5660.c | 2 +- + sound/soc/codecs/rt5663.c | 2 +- + sound/soc/codecs/rt5665.c | 2 +- + sound/soc/codecs/rt5668.c | 2 +- + sound/soc/codecs/rt5670.c | 2 +- + sound/soc/codecs/rt5677.c | 2 +- + sound/soc/codecs/rt5682-i2c.c | 22 + + sound/soc/codecs/rt5682.c | 132 +- + sound/soc/codecs/rt5682.h | 23 + + sound/soc/codecs/rt5682s.c | 3197 ++++++++++++++++++ + sound/soc/codecs/rt5682s.h | 1474 ++++++++ + sound/soc/codecs/rt9120.c | 495 +++ + sound/soc/codecs/tlv320aic32x4-i2c.c | 4 +- + sound/soc/codecs/tlv320aic32x4-spi.c | 4 +- + sound/soc/codecs/tlv320aic32x4.c | 4 +- + sound/soc/codecs/tlv320aic32x4.h | 2 +- + sound/soc/codecs/wcd9335.c | 2 +- + sound/soc/codecs/wm2200.c | 30 +- + sound/soc/codecs/wm5102.c | 16 +- + sound/soc/codecs/wm5110.c | 24 +- + sound/soc/codecs/wm_adsp.c | 3303 ++---------------- + sound/soc/codecs/wm_adsp.h | 105 +- + sound/soc/codecs/zl38060.c | 4 +- + sound/soc/dwc/dwc-i2s.c | 12 +- + sound/soc/fsl/eukrea-tlv320.c | 2 +- + sound/soc/fsl/fsl-asoc-card.c | 54 +- + sound/soc/fsl/fsl_audmix.c | 8 +- + sound/soc/fsl/fsl_esai.c | 28 +- + sound/soc/fsl/fsl_mqs.c | 4 +- + sound/soc/fsl/fsl_rpmsg.c | 47 +- + sound/soc/fsl/fsl_rpmsg.h | 12 + + sound/soc/fsl/fsl_sai.c | 34 +- + sound/soc/fsl/fsl_sai.h | 2 +- + sound/soc/fsl/fsl_spdif.c | 85 + + sound/soc/fsl/fsl_ssi.c | 38 +- + sound/soc/fsl/imx-audmix.c | 12 +- + sound/soc/fsl/imx-card.c | 6 +- + sound/soc/fsl/imx-es8328.c | 2 +- + sound/soc/fsl/imx-hdmi.c | 6 +- + sound/soc/fsl/imx-rpmsg.c | 2 +- + sound/soc/fsl/imx-sgtl5000.c | 2 +- + sound/soc/fsl/mpc8610_hpcd.c | 16 +- + sound/soc/fsl/p1022_ds.c | 16 +- + sound/soc/fsl/p1022_rdk.c | 2 +- + sound/soc/generic/Kconfig | 20 + + sound/soc/generic/Makefile | 6 + + sound/soc/generic/audio-graph-card.c | 4 +- + .../soc/generic/audio-graph-card2-custom-sample.c | 183 + + .../generic/audio-graph-card2-custom-sample.dtsi | 227 ++ + sound/soc/generic/audio-graph-card2.c | 1281 +++++++ + sound/soc/generic/simple-card-utils.c | 50 +- + sound/soc/generic/test-component.c | 659 ++++ + sound/soc/intel/boards/Kconfig | 15 + + sound/soc/intel/boards/Makefile | 2 + + sound/soc/intel/boards/bdw-rt5650.c | 2 +- + sound/soc/intel/boards/bdw-rt5677.c | 2 +- + sound/soc/intel/boards/broadwell.c | 2 +- + sound/soc/intel/boards/bxt_da7219_max98357a.c | 4 +- + sound/soc/intel/boards/bxt_rt298.c | 2 +- + sound/soc/intel/boards/bytcht_cx2072x.c | 4 +- + sound/soc/intel/boards/bytcht_da7213.c | 4 +- + sound/soc/intel/boards/bytcht_es8316.c | 4 +- + sound/soc/intel/boards/bytcht_nocodec.c | 4 +- + sound/soc/intel/boards/bytcr_rt5640.c | 120 +- + sound/soc/intel/boards/bytcr_rt5651.c | 122 +- + sound/soc/intel/boards/bytcr_wm5102.c | 4 +- + sound/soc/intel/boards/cht_bsw_max98090_ti.c | 4 +- + sound/soc/intel/boards/cht_bsw_nau8824.c | 4 +- + sound/soc/intel/boards/cht_bsw_rt5645.c | 6 +- + sound/soc/intel/boards/cht_bsw_rt5672.c | 2 +- + sound/soc/intel/boards/glk_rt5682_max98357a.c | 4 +- + sound/soc/intel/boards/haswell.c | 2 +- + sound/soc/intel/boards/kbl_da7219_max98357a.c | 4 +- + sound/soc/intel/boards/kbl_da7219_max98927.c | 6 +- + sound/soc/intel/boards/kbl_rt5660.c | 2 +- + sound/soc/intel/boards/kbl_rt5663_max98927.c | 6 +- + .../soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 4 +- + sound/soc/intel/boards/skl_nau88l25_max98357a.c | 4 +- + sound/soc/intel/boards/skl_nau88l25_ssm4567.c | 4 +- + sound/soc/intel/boards/skl_rt286.c | 2 +- + sound/soc/intel/boards/sof_es8336.c | 569 ++++ + sound/soc/intel/boards/sof_rt5682.c | 122 +- + sound/soc/intel/boards/sof_sdw.c | 10 + + sound/soc/intel/common/soc-acpi-intel-adl-match.c | 15 +- + sound/soc/intel/common/soc-acpi-intel-bxt-match.c | 6 + + sound/soc/intel/common/soc-acpi-intel-glk-match.c | 7 +- + sound/soc/intel/common/soc-acpi-intel-jsl-match.c | 26 +- + sound/soc/intel/common/soc-acpi-intel-tgl-match.c | 49 +- + sound/soc/intel/skylake/skl-topology.c | 6 +- + sound/soc/mediatek/common/mtk-afe-fe-dai.c | 1 - + sound/soc/mediatek/mt2701/mt2701-cs42448.c | 2 +- + sound/soc/mediatek/mt2701/mt2701-wm8960.c | 2 +- + .../mt8183/mt8183-mt6358-ts3a227-max98357.c | 2 +- + sound/soc/mediatek/mt8195/mt8195-afe-pcm.c | 6 +- + sound/soc/mediatek/mt8195/mt8195-audsys-clk.c | 152 +- + sound/soc/mediatek/mt8195/mt8195-dai-adda.c | 8 +- + sound/soc/mediatek/mt8195/mt8195-dai-etdm.c | 2 +- + .../mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c | 54 +- + sound/soc/meson/aiu-encoder-spdif.c | 2 +- + sound/soc/meson/axg-card.c | 1 + + sound/soc/meson/axg-tdm-interface.c | 26 +- + sound/soc/meson/meson-card-utils.c | 1 + + sound/soc/meson/meson-codec-glue.c | 3 - + sound/soc/qcom/Kconfig | 3 +- + sound/soc/qcom/apq8096.c | 2 +- + sound/soc/qcom/qdsp6/q6afe-dai.c | 2 +- + sound/soc/qcom/sm8250.c | 79 + + sound/soc/rockchip/Kconfig | 11 + + sound/soc/rockchip/Makefile | 2 + + sound/soc/rockchip/rockchip_i2s_tdm.c | 1762 ++++++++++ + sound/soc/rockchip/rockchip_i2s_tdm.h | 398 +++ + sound/soc/rockchip/rockchip_pdm.c | 112 +- + sound/soc/rockchip/rockchip_pdm.h | 6 + + sound/soc/samsung/s3c-i2s-v2.c | 2 + + sound/soc/soc-component.c | 87 +- + sound/soc/soc-compress.c | 43 +- + sound/soc/soc-core.c | 51 +- + sound/soc/soc-dapm.c | 2 + + sound/soc/soc-generic-dmaengine-pcm.c | 6 +- + sound/soc/soc-pcm.c | 27 +- + sound/soc/soc-topology.c | 16 +- + sound/soc/sof/Kconfig | 6 +- + sound/soc/sof/Makefile | 6 +- + sound/soc/sof/compress.c | 158 +- + sound/soc/sof/compress.h | 32 - + sound/soc/sof/control.c | 192 +- + sound/soc/sof/core.c | 35 +- + sound/soc/sof/debug.c | 87 +- + sound/soc/sof/imx/Kconfig | 2 + + sound/soc/sof/imx/imx-ops.h | 10 + + sound/soc/sof/imx/imx8.c | 47 +- + sound/soc/sof/imx/imx8m.c | 41 +- + sound/soc/sof/intel/Makefile | 5 +- + sound/soc/sof/intel/apl.c | 7 +- + sound/soc/sof/intel/atom.c | 5 +- + sound/soc/sof/intel/bdw.c | 19 +- + sound/soc/sof/intel/byt.c | 30 +- + sound/soc/sof/intel/cnl.c | 7 +- + sound/soc/sof/intel/hda-dai.c | 262 +- + sound/soc/sof/intel/hda-dsp.c | 61 +- + sound/soc/sof/intel/hda-ipc.c | 15 +- + sound/soc/sof/intel/hda-loader.c | 11 +- + .../soc/sof/intel/{hda-compress.c => hda-probes.c} | 0 + sound/soc/sof/intel/hda-stream.c | 92 +- + sound/soc/sof/intel/hda.c | 232 +- + sound/soc/sof/intel/hda.h | 52 +- + sound/soc/sof/intel/icl.c | 7 +- + sound/soc/sof/intel/pci-apl.c | 2 - + sound/soc/sof/intel/pci-cnl.c | 3 - + sound/soc/sof/intel/pci-icl.c | 2 - + sound/soc/sof/intel/pci-tgl.c | 5 - + sound/soc/sof/intel/pci-tng.c | 16 +- + sound/soc/sof/intel/tgl.c | 7 +- + sound/soc/sof/ipc.c | 217 +- + sound/soc/sof/loader.c | 161 +- + sound/soc/sof/ops.c | 3 + + sound/soc/sof/ops.h | 77 +- + sound/soc/sof/pcm.c | 71 +- + sound/soc/sof/pm.c | 12 +- + sound/soc/sof/probe.h | 85 - + sound/soc/sof/sof-audio.c | 715 +++- + sound/soc/sof/sof-audio.h | 52 +- + sound/soc/sof/sof-of-dev.c | 24 +- + sound/soc/sof/sof-priv.h | 154 +- + sound/soc/sof/{probe.c => sof-probes.c} | 280 +- + sound/soc/sof/sof-probes.h | 38 + + sound/soc/sof/{intel/intel-ipc.c => stream-ipc.c} | 56 +- + sound/soc/sof/topology.c | 417 +-- + sound/soc/sof/trace.c | 5 +- + sound/soc/sof/utils.c | 28 +- + sound/soc/sof/xtensa/core.c | 2 +- + sound/soc/tegra/Kconfig | 48 + + sound/soc/tegra/Makefile | 10 + + sound/soc/tegra/tegra210_adx.c | 531 +++ + sound/soc/tegra/tegra210_adx.h | 72 + + sound/soc/tegra/tegra210_ahub.c | 511 ++- + sound/soc/tegra/tegra210_amx.c | 600 ++++ + sound/soc/tegra/tegra210_amx.h | 93 + + sound/soc/tegra/tegra210_mixer.c | 674 ++++ + sound/soc/tegra/tegra210_mixer.h | 100 + + sound/soc/tegra/tegra210_mvc.c | 645 ++++ + sound/soc/tegra/tegra210_mvc.h | 117 + + sound/soc/tegra/tegra210_sfc.c | 3549 ++++++++++++++++++++ + sound/soc/tegra/tegra210_sfc.h | 78 + + sound/soc/tegra/tegra_asoc_machine.c | 2 +- + sound/soc/ti/Kconfig | 2 +- + sound/soc/ti/davinci-evm.c | 2 +- + sound/soc/ti/omap-abe-twl6040.c | 2 +- + sound/soc/ux500/mop500_ab8500.c | 2 +- + sound/soc/ux500/mop500_ab8500.h | 2 +- + 352 files changed, 36387 insertions(+), 5742 deletions(-) + create mode 100644 Documentation/devicetree/bindings/sound/audio-graph-card2.yaml + delete mode 100644 Documentation/devicetree/bindings/sound/bt-sco.txt + create mode 100644 Documentation/devicetree/bindings/sound/cirrus,cs35l41.yaml + create mode 100644 Documentation/devicetree/bindings/sound/linux,bt-sco.yaml + create mode 100644 Documentation/devicetree/bindings/sound/linux,spdif-dit.yaml + create mode 100644 Documentation/devicetree/bindings/sound/mt8192-afe-pcm.yaml + delete mode 100644 Documentation/devicetree/bindings/sound/name-prefix.txt + create mode 100644 Documentation/devicetree/bindings/sound/name-prefix.yaml + create mode 100644 Documentation/devicetree/bindings/sound/nau8821.txt + create mode 100644 Documentation/devicetree/bindings/sound/nvidia,tegra210-adx.yaml + create mode 100644 Documentation/devicetree/bindings/sound/nvidia,tegra210-amx.yaml + create mode 100644 Documentation/devicetree/bindings/sound/nvidia,tegra210-mixer.yaml + create mode 100644 Documentation/devicetree/bindings/sound/nvidia,tegra210-mvc.yaml + create mode 100644 Documentation/devicetree/bindings/sound/nvidia,tegra210-sfc.yaml + create mode 100644 Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml + create mode 100644 Documentation/devicetree/bindings/sound/richtek,rt9120.yaml + create mode 100644 Documentation/devicetree/bindings/sound/rockchip,i2s-tdm.yaml + delete mode 100644 Documentation/devicetree/bindings/sound/rockchip,pdm.txt + create mode 100644 Documentation/devicetree/bindings/sound/rockchip,pdm.yaml + delete mode 100644 Documentation/devicetree/bindings/sound/simple-amplifier.txt + create mode 100644 Documentation/devicetree/bindings/sound/simple-audio-amplifier.yaml + delete mode 100644 Documentation/devicetree/bindings/sound/spdif-transmitter.txt + create mode 100644 Documentation/devicetree/bindings/sound/test-component.yaml + create mode 100644 Documentation/devicetree/bindings/sound/wlf,wm8978.yaml + create mode 100644 drivers/firmware/cirrus/Kconfig + create mode 100644 drivers/firmware/cirrus/Makefile + create mode 100644 drivers/firmware/cirrus/cs_dsp.c + create mode 100644 include/linux/firmware/cirrus/cs_dsp.h + rename {sound/soc/codecs => include/linux/firmware/cirrus}/wmfw.h (91%) + create mode 100644 include/sound/cs35l41.h + create mode 100644 include/sound/rt5682s.h + create mode 100644 sound/soc/amd/acp/Kconfig + create mode 100644 sound/soc/amd/acp/Makefile + create mode 100644 sound/soc/amd/acp/acp-i2s.c + create mode 100644 sound/soc/amd/acp/acp-legacy-mach.c + create mode 100644 sound/soc/amd/acp/acp-mach-common.c + create mode 100644 sound/soc/amd/acp/acp-mach.h + create mode 100644 sound/soc/amd/acp/acp-platform.c + create mode 100644 sound/soc/amd/acp/acp-renoir.c + create mode 100644 sound/soc/amd/acp/acp-sof-mach.c + create mode 100644 sound/soc/amd/acp/amd.h + create mode 100644 sound/soc/amd/acp/chip_offset_byte.h + create mode 100644 sound/soc/amd/vangogh/acp5x-mach.c + create mode 100644 sound/soc/amd/yc/Makefile + create mode 100644 sound/soc/amd/yc/acp6x-mach.c + create mode 100644 sound/soc/amd/yc/acp6x-pdm-dma.c + create mode 100644 sound/soc/amd/yc/acp6x.h + create mode 100644 sound/soc/amd/yc/acp6x_chip_offset_byte.h + create mode 100644 sound/soc/amd/yc/pci-acp6x.c + create mode 100644 sound/soc/codecs/cs35l41-i2c.c + create mode 100644 sound/soc/codecs/cs35l41-spi.c + create mode 100644 sound/soc/codecs/cs35l41-tables.c + create mode 100644 sound/soc/codecs/cs35l41.c + create mode 100644 sound/soc/codecs/cs35l41.h + create mode 100644 sound/soc/codecs/nau8821.c + create mode 100644 sound/soc/codecs/nau8821.h + create mode 100644 sound/soc/codecs/rt5682s.c + create mode 100644 sound/soc/codecs/rt5682s.h + create mode 100644 sound/soc/codecs/rt9120.c + create mode 100644 sound/soc/generic/audio-graph-card2-custom-sample.c + create mode 100644 sound/soc/generic/audio-graph-card2-custom-sample.dtsi + create mode 100644 sound/soc/generic/audio-graph-card2.c + create mode 100644 sound/soc/generic/test-component.c + create mode 100644 sound/soc/intel/boards/sof_es8336.c + create mode 100644 sound/soc/rockchip/rockchip_i2s_tdm.c + create mode 100644 sound/soc/rockchip/rockchip_i2s_tdm.h + delete mode 100644 sound/soc/sof/compress.h + create mode 100644 sound/soc/sof/imx/imx-ops.h + rename sound/soc/sof/intel/{hda-compress.c => hda-probes.c} (100%) + delete mode 100644 sound/soc/sof/probe.h + rename sound/soc/sof/{probe.c => sof-probes.c} (52%) + create mode 100644 sound/soc/sof/sof-probes.h + rename sound/soc/sof/{intel/intel-ipc.c => stream-ipc.c} (56%) + create mode 100644 sound/soc/tegra/tegra210_adx.c + create mode 100644 sound/soc/tegra/tegra210_adx.h + create mode 100644 sound/soc/tegra/tegra210_amx.c + create mode 100644 sound/soc/tegra/tegra210_amx.h + create mode 100644 sound/soc/tegra/tegra210_mixer.c + create mode 100644 sound/soc/tegra/tegra210_mixer.h + create mode 100644 sound/soc/tegra/tegra210_mvc.c + create mode 100644 sound/soc/tegra/tegra210_mvc.h + create mode 100644 sound/soc/tegra/tegra210_sfc.c + create mode 100644 sound/soc/tegra/tegra210_sfc.h +$ git reset --hard HEAD^ +HEAD is now at ba496d2867fc Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git +Merging next-20211022 version of sound-asoc +$ git merge -m next-20211022/sound-asoc fbdf6ab3d36eacb52e25e37cc87071019fea5865 +Removing sound/soc/sof/probe.h +Auto-merging sound/soc/sof/intel/hda-dai.c +Removing sound/soc/sof/compress.h +Auto-merging sound/soc/cirrus/ep93xx-i2s.c +Auto-merging MAINTAINERS +Removing Documentation/devicetree/bindings/sound/spdif-transmitter.txt +Removing Documentation/devicetree/bindings/sound/simple-amplifier.txt +Removing Documentation/devicetree/bindings/sound/rockchip,pdm.txt +Removing Documentation/devicetree/bindings/sound/name-prefix.txt +Removing Documentation/devicetree/bindings/sound/bt-sco.txt +Merge made by the 'recursive' strategy. + .../bindings/sound/audio-graph-card2.yaml | 57 + + Documentation/devicetree/bindings/sound/bt-sco.txt | 13 - + .../devicetree/bindings/sound/cirrus,cs35l41.yaml | 157 + + .../devicetree/bindings/sound/linux,bt-sco.yaml | 38 + + .../devicetree/bindings/sound/linux,spdif-dit.yaml | 32 + + .../devicetree/bindings/sound/max9892x.txt | 3 + + .../devicetree/bindings/sound/mt8192-afe-pcm.yaml | 100 + + .../devicetree/bindings/sound/name-prefix.txt | 24 - + .../devicetree/bindings/sound/name-prefix.yaml | 21 + + .../devicetree/bindings/sound/nau8821.txt | 55 + + .../bindings/sound/nvidia,tegra186-dspk.yaml | 9 +- + .../bindings/sound/nvidia,tegra210-adx.yaml | 76 + + .../bindings/sound/nvidia,tegra210-ahub.yaml | 20 + + .../bindings/sound/nvidia,tegra210-amx.yaml | 76 + + .../bindings/sound/nvidia,tegra210-dmic.yaml | 9 +- + .../bindings/sound/nvidia,tegra210-i2s.yaml | 9 +- + .../bindings/sound/nvidia,tegra210-mixer.yaml | 74 + + .../bindings/sound/nvidia,tegra210-mvc.yaml | 76 + + .../bindings/sound/nvidia,tegra210-sfc.yaml | 73 + + .../devicetree/bindings/sound/nxp,tfa989x.yaml | 9 +- + .../devicetree/bindings/sound/qcom,q6afe.txt | 2 +- + .../devicetree/bindings/sound/qcom,q6asm.txt | 2 +- + .../devicetree/bindings/sound/realtek,rt5682s.yaml | 117 + + .../devicetree/bindings/sound/richtek,rt9120.yaml | 59 + + .../bindings/sound/rockchip,i2s-tdm.yaml | 182 + + .../devicetree/bindings/sound/rockchip,pdm.txt | 46 - + .../devicetree/bindings/sound/rockchip,pdm.yaml | 120 + + Documentation/devicetree/bindings/sound/rt5659.txt | 2 +- + .../devicetree/bindings/sound/simple-amplifier.txt | 17 - + .../bindings/sound/simple-audio-amplifier.yaml | 45 + + .../bindings/sound/simple-audio-mux.yaml | 9 +- + .../bindings/sound/socionext,uniphier-aio.yaml | 22 +- + .../bindings/sound/socionext,uniphier-evea.yaml | 6 +- + .../bindings/sound/spdif-transmitter.txt | 10 - + .../devicetree/bindings/sound/test-component.yaml | 33 + + .../devicetree/bindings/sound/wlf,wm8978.yaml | 58 + + Documentation/sound/alsa-configuration.rst | 9 + + MAINTAINERS | 18 + + arch/sh/boards/mach-ecovec24/setup.c | 2 +- + arch/sh/boards/mach-se/7724/setup.c | 2 +- + drivers/firmware/Kconfig | 1 + + drivers/firmware/Makefile | 1 + + drivers/firmware/cirrus/Kconfig | 5 + + drivers/firmware/cirrus/Makefile | 3 + + drivers/firmware/cirrus/cs_dsp.c | 3109 +++++++++++++++++ + include/dt-bindings/sound/qcom,lpass.h | 5 + + include/linux/firmware/cirrus/cs_dsp.h | 242 ++ + .../linux/firmware/cirrus}/wmfw.h | 8 +- + include/sound/cs35l41.h | 34 + + include/sound/graph_card.h | 21 + + include/sound/rt5682s.h | 48 + + include/sound/simple_card_utils.h | 6 +- + include/sound/soc-component.h | 21 +- + include/sound/soc-dpcm.h | 1 + + include/sound/soc-topology.h | 2 +- + include/sound/sof.h | 5 - + include/sound/sof/dai-intel.h | 4 + + include/sound/sof/dai.h | 10 +- + include/uapi/sound/sof/tokens.h | 1 + + sound/hda/intel-dsp-config.c | 22 +- + sound/soc/amd/Kconfig | 13 + + sound/soc/amd/acp-da7219-max98357a.c | 6 +- + sound/soc/amd/acp-pcm-dma.c | 6 +- + sound/soc/amd/acp-rt5645.c | 2 +- + sound/soc/amd/acp.h | 2 + + sound/soc/amd/vangogh/Makefile | 2 + + sound/soc/amd/vangogh/acp5x-i2s.c | 2 +- + sound/soc/amd/vangogh/acp5x-mach.c | 386 +++ + sound/soc/amd/vangogh/acp5x.h | 2 +- + sound/soc/amd/vangogh/pci-acp5x.c | 3 + + sound/soc/atmel/atmel-i2s.c | 6 +- + sound/soc/atmel/atmel_ssc_dai.c | 26 +- + sound/soc/atmel/atmel_wm8904.c | 2 +- + sound/soc/atmel/mchp-i2s-mcc.c | 10 +- + sound/soc/atmel/mikroe-proto.c | 4 +- + sound/soc/atmel/sam9g20_wm8731.c | 2 +- + sound/soc/atmel/sam9x5_wm8731.c | 2 +- + sound/soc/atmel/tse850-pcm5142.c | 2 +- + sound/soc/au1x/db1200.c | 6 +- + sound/soc/au1x/i2sc.c | 6 +- + sound/soc/au1x/psc-i2s.c | 10 +- + sound/soc/bcm/bcm2835-i2s.c | 56 +- + sound/soc/bcm/cygnus-ssp.c | 6 +- + sound/soc/cirrus/edb93xx.c | 2 +- + sound/soc/cirrus/ep93xx-i2s.c | 10 +- + sound/soc/cirrus/snappercl15.c | 2 +- + sound/soc/codecs/88pm860x-codec.c | 18 +- + sound/soc/codecs/Kconfig | 33 + + sound/soc/codecs/Makefile | 10 + + sound/soc/codecs/ab8500-codec.c | 20 +- + sound/soc/codecs/ad1836.c | 6 +- + sound/soc/codecs/ad193x.c | 10 +- + sound/soc/codecs/adau1372.c | 14 +- + sound/soc/codecs/adau1373.c | 14 +- + sound/soc/codecs/adau1701.c | 6 +- + sound/soc/codecs/adau17x1.c | 6 +- + sound/soc/codecs/adau1977.c | 31 +- + sound/soc/codecs/adav80x.c | 6 +- + sound/soc/codecs/ak4104.c | 4 +- + sound/soc/codecs/ak4118.c | 20 +- + sound/soc/codecs/ak4458.c | 12 +- + sound/soc/codecs/ak4642.c | 8 +- + sound/soc/codecs/ak4671.c | 6 +- + sound/soc/codecs/ak5558.c | 10 +- + sound/soc/codecs/alc5623.c | 8 +- + sound/soc/codecs/alc5632.c | 8 +- + sound/soc/codecs/cpcap.c | 18 +- + sound/soc/codecs/cros_ec_codec.c | 4 +- + sound/soc/codecs/cs35l41-i2c.c | 113 + + sound/soc/codecs/cs35l41-spi.c | 138 + + sound/soc/codecs/cs35l41-tables.c | 594 ++++ + sound/soc/codecs/cs35l41.c | 1447 ++++++++ + sound/soc/codecs/cs35l41.h | 775 +++++ + sound/soc/codecs/cs42l42.c | 138 +- + sound/soc/codecs/cs42l42.h | 59 + + sound/soc/codecs/cs47l15.c | 26 +- + sound/soc/codecs/cs47l24.c | 20 +- + sound/soc/codecs/cs47l35.c | 26 +- + sound/soc/codecs/cs47l85.c | 34 +- + sound/soc/codecs/cs47l90.c | 36 +- + sound/soc/codecs/cs47l92.c | 20 +- + sound/soc/codecs/es8316.c | 7 +- + sound/soc/codecs/madera.c | 18 +- + sound/soc/codecs/max98390.c | 2 +- + sound/soc/codecs/max98927.c | 25 + + sound/soc/codecs/max98927.h | 1 + + sound/soc/codecs/mt6359.c | 2 +- + sound/soc/codecs/nau8821.c | 1712 ++++++++++ + sound/soc/codecs/nau8821.h | 533 +++ + sound/soc/codecs/nau8824.c | 70 + + sound/soc/codecs/nau8824.h | 3 +- + sound/soc/codecs/pcm5102a.c | 2 +- + sound/soc/codecs/rt1011.c | 10 + + sound/soc/codecs/rt1015.c | 2 +- + sound/soc/codecs/rt1016.c | 2 +- + sound/soc/codecs/rt1019.c | 2 +- + sound/soc/codecs/rt1305.c | 2 +- + sound/soc/codecs/rt1308.c | 2 +- + sound/soc/codecs/rt5514.c | 2 +- + sound/soc/codecs/rt5616.c | 2 +- + sound/soc/codecs/rt5640.c | 2 +- + sound/soc/codecs/rt5645.c | 2 +- + sound/soc/codecs/rt5651.c | 9 +- + sound/soc/codecs/rt5659.c | 2 +- + sound/soc/codecs/rt5660.c | 2 +- + sound/soc/codecs/rt5663.c | 2 +- + sound/soc/codecs/rt5665.c | 2 +- + sound/soc/codecs/rt5668.c | 2 +- + sound/soc/codecs/rt5670.c | 2 +- + sound/soc/codecs/rt5677.c | 2 +- + sound/soc/codecs/rt5682-i2c.c | 22 + + sound/soc/codecs/rt5682.c | 132 +- + sound/soc/codecs/rt5682.h | 23 + + sound/soc/codecs/rt5682s.c | 3197 ++++++++++++++++++ + sound/soc/codecs/rt5682s.h | 1474 ++++++++ + sound/soc/codecs/rt9120.c | 495 +++ + sound/soc/codecs/tlv320aic32x4-i2c.c | 4 +- + sound/soc/codecs/tlv320aic32x4-spi.c | 4 +- + sound/soc/codecs/tlv320aic32x4.c | 4 +- + sound/soc/codecs/tlv320aic32x4.h | 2 +- + sound/soc/codecs/wcd9335.c | 2 +- + sound/soc/codecs/wm2200.c | 30 +- + sound/soc/codecs/wm5102.c | 16 +- + sound/soc/codecs/wm5110.c | 24 +- + sound/soc/codecs/wm_adsp.c | 3303 ++---------------- + sound/soc/codecs/wm_adsp.h | 105 +- + sound/soc/codecs/zl38060.c | 4 +- + sound/soc/dwc/dwc-i2s.c | 12 +- + sound/soc/fsl/eukrea-tlv320.c | 2 +- + sound/soc/fsl/fsl-asoc-card.c | 54 +- + sound/soc/fsl/fsl_audmix.c | 8 +- + sound/soc/fsl/fsl_esai.c | 28 +- + sound/soc/fsl/fsl_mqs.c | 4 +- + sound/soc/fsl/fsl_rpmsg.c | 47 +- + sound/soc/fsl/fsl_rpmsg.h | 12 + + sound/soc/fsl/fsl_sai.c | 34 +- + sound/soc/fsl/fsl_sai.h | 2 +- + sound/soc/fsl/fsl_spdif.c | 85 + + sound/soc/fsl/fsl_ssi.c | 38 +- + sound/soc/fsl/imx-audmix.c | 12 +- + sound/soc/fsl/imx-card.c | 6 +- + sound/soc/fsl/imx-es8328.c | 2 +- + sound/soc/fsl/imx-hdmi.c | 6 +- + sound/soc/fsl/imx-rpmsg.c | 2 +- + sound/soc/fsl/imx-sgtl5000.c | 2 +- + sound/soc/fsl/mpc8610_hpcd.c | 16 +- + sound/soc/fsl/p1022_ds.c | 16 +- + sound/soc/fsl/p1022_rdk.c | 2 +- + sound/soc/generic/Kconfig | 20 + + sound/soc/generic/Makefile | 6 + + sound/soc/generic/audio-graph-card.c | 4 +- + .../soc/generic/audio-graph-card2-custom-sample.c | 183 + + .../generic/audio-graph-card2-custom-sample.dtsi | 227 ++ + sound/soc/generic/audio-graph-card2.c | 1281 +++++++ + sound/soc/generic/simple-card-utils.c | 50 +- + sound/soc/generic/test-component.c | 659 ++++ + sound/soc/intel/boards/Kconfig | 15 + + sound/soc/intel/boards/Makefile | 2 + + sound/soc/intel/boards/bdw-rt5650.c | 2 +- + sound/soc/intel/boards/bdw-rt5677.c | 2 +- + sound/soc/intel/boards/broadwell.c | 2 +- + sound/soc/intel/boards/bxt_da7219_max98357a.c | 4 +- + sound/soc/intel/boards/bxt_rt298.c | 2 +- + sound/soc/intel/boards/bytcht_cx2072x.c | 4 +- + sound/soc/intel/boards/bytcht_da7213.c | 4 +- + sound/soc/intel/boards/bytcht_es8316.c | 4 +- + sound/soc/intel/boards/bytcht_nocodec.c | 4 +- + sound/soc/intel/boards/bytcr_rt5640.c | 120 +- + sound/soc/intel/boards/bytcr_rt5651.c | 122 +- + sound/soc/intel/boards/bytcr_wm5102.c | 4 +- + sound/soc/intel/boards/cht_bsw_max98090_ti.c | 4 +- + sound/soc/intel/boards/cht_bsw_nau8824.c | 4 +- + sound/soc/intel/boards/cht_bsw_rt5645.c | 6 +- + sound/soc/intel/boards/cht_bsw_rt5672.c | 2 +- + sound/soc/intel/boards/glk_rt5682_max98357a.c | 4 +- + sound/soc/intel/boards/haswell.c | 2 +- + sound/soc/intel/boards/kbl_da7219_max98357a.c | 4 +- + sound/soc/intel/boards/kbl_da7219_max98927.c | 6 +- + sound/soc/intel/boards/kbl_rt5660.c | 2 +- + sound/soc/intel/boards/kbl_rt5663_max98927.c | 6 +- + .../soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 4 +- + sound/soc/intel/boards/skl_nau88l25_max98357a.c | 4 +- + sound/soc/intel/boards/skl_nau88l25_ssm4567.c | 4 +- + sound/soc/intel/boards/skl_rt286.c | 2 +- + sound/soc/intel/boards/sof_es8336.c | 569 ++++ + sound/soc/intel/boards/sof_rt5682.c | 122 +- + sound/soc/intel/boards/sof_sdw.c | 10 + + sound/soc/intel/common/soc-acpi-intel-adl-match.c | 15 +- + sound/soc/intel/common/soc-acpi-intel-bxt-match.c | 6 + + sound/soc/intel/common/soc-acpi-intel-glk-match.c | 7 +- + sound/soc/intel/common/soc-acpi-intel-jsl-match.c | 26 +- + sound/soc/intel/common/soc-acpi-intel-tgl-match.c | 49 +- + sound/soc/intel/skylake/skl-topology.c | 6 +- + sound/soc/mediatek/common/mtk-afe-fe-dai.c | 1 - + sound/soc/mediatek/mt2701/mt2701-cs42448.c | 2 +- + sound/soc/mediatek/mt2701/mt2701-wm8960.c | 2 +- + .../mt8183/mt8183-mt6358-ts3a227-max98357.c | 2 +- + sound/soc/mediatek/mt8195/mt8195-afe-pcm.c | 6 +- + sound/soc/mediatek/mt8195/mt8195-audsys-clk.c | 152 +- + sound/soc/mediatek/mt8195/mt8195-dai-adda.c | 8 +- + sound/soc/mediatek/mt8195/mt8195-dai-etdm.c | 2 +- + .../mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c | 54 +- + sound/soc/meson/aiu-encoder-spdif.c | 2 +- + sound/soc/meson/meson-card-utils.c | 1 + + sound/soc/meson/meson-codec-glue.c | 3 - + sound/soc/qcom/apq8096.c | 2 +- + sound/soc/qcom/qdsp6/q6afe-dai.c | 2 +- + sound/soc/qcom/sm8250.c | 79 + + sound/soc/rockchip/Kconfig | 11 + + sound/soc/rockchip/Makefile | 2 + + sound/soc/rockchip/rockchip_i2s_tdm.c | 1762 ++++++++++ + sound/soc/rockchip/rockchip_i2s_tdm.h | 398 +++ + sound/soc/rockchip/rockchip_pdm.c | 112 +- + sound/soc/rockchip/rockchip_pdm.h | 6 + + sound/soc/samsung/s3c-i2s-v2.c | 2 + + sound/soc/soc-component.c | 87 +- + sound/soc/soc-compress.c | 43 +- + sound/soc/soc-core.c | 51 +- + sound/soc/soc-dapm.c | 2 + + sound/soc/soc-generic-dmaengine-pcm.c | 6 +- + sound/soc/soc-pcm.c | 27 +- + sound/soc/soc-topology.c | 16 +- + sound/soc/sof/Kconfig | 6 +- + sound/soc/sof/Makefile | 6 +- + sound/soc/sof/compress.c | 158 +- + sound/soc/sof/compress.h | 32 - + sound/soc/sof/control.c | 192 +- + sound/soc/sof/core.c | 35 +- + sound/soc/sof/debug.c | 87 +- + sound/soc/sof/imx/Kconfig | 2 + + sound/soc/sof/imx/imx-ops.h | 10 + + sound/soc/sof/imx/imx8.c | 47 +- + sound/soc/sof/imx/imx8m.c | 41 +- + sound/soc/sof/intel/Makefile | 5 +- + sound/soc/sof/intel/apl.c | 7 +- + sound/soc/sof/intel/atom.c | 5 +- + sound/soc/sof/intel/bdw.c | 19 +- + sound/soc/sof/intel/byt.c | 30 +- + sound/soc/sof/intel/cnl.c | 7 +- + sound/soc/sof/intel/hda-dai.c | 262 +- + sound/soc/sof/intel/hda-dsp.c | 61 +- + sound/soc/sof/intel/hda-ipc.c | 15 +- + sound/soc/sof/intel/hda-loader.c | 11 +- + .../soc/sof/intel/{hda-compress.c => hda-probes.c} | 0 + sound/soc/sof/intel/hda-stream.c | 92 +- + sound/soc/sof/intel/hda.c | 232 +- + sound/soc/sof/intel/hda.h | 52 +- + sound/soc/sof/intel/icl.c | 7 +- + sound/soc/sof/intel/pci-apl.c | 2 - + sound/soc/sof/intel/pci-cnl.c | 3 - + sound/soc/sof/intel/pci-icl.c | 2 - + sound/soc/sof/intel/pci-tgl.c | 5 - + sound/soc/sof/intel/pci-tng.c | 16 +- + sound/soc/sof/intel/tgl.c | 7 +- + sound/soc/sof/ipc.c | 217 +- + sound/soc/sof/loader.c | 161 +- + sound/soc/sof/ops.c | 3 + + sound/soc/sof/ops.h | 77 +- + sound/soc/sof/pcm.c | 71 +- + sound/soc/sof/pm.c | 12 +- + sound/soc/sof/probe.h | 85 - + sound/soc/sof/sof-audio.c | 715 +++- + sound/soc/sof/sof-audio.h | 52 +- + sound/soc/sof/sof-of-dev.c | 24 +- + sound/soc/sof/sof-priv.h | 154 +- + sound/soc/sof/{probe.c => sof-probes.c} | 280 +- + sound/soc/sof/sof-probes.h | 38 + + sound/soc/sof/{intel/intel-ipc.c => stream-ipc.c} | 56 +- + sound/soc/sof/topology.c | 417 +-- + sound/soc/sof/trace.c | 5 +- + sound/soc/sof/utils.c | 28 +- + sound/soc/sof/xtensa/core.c | 2 +- + sound/soc/tegra/Kconfig | 48 + + sound/soc/tegra/Makefile | 10 + + sound/soc/tegra/tegra210_adx.c | 531 +++ + sound/soc/tegra/tegra210_adx.h | 72 + + sound/soc/tegra/tegra210_ahub.c | 511 ++- + sound/soc/tegra/tegra210_amx.c | 600 ++++ + sound/soc/tegra/tegra210_amx.h | 93 + + sound/soc/tegra/tegra210_mixer.c | 674 ++++ + sound/soc/tegra/tegra210_mixer.h | 100 + + sound/soc/tegra/tegra210_mvc.c | 645 ++++ + sound/soc/tegra/tegra210_mvc.h | 117 + + sound/soc/tegra/tegra210_sfc.c | 3549 ++++++++++++++++++++ + sound/soc/tegra/tegra210_sfc.h | 78 + + sound/soc/tegra/tegra_asoc_machine.c | 2 +- + sound/soc/ti/Kconfig | 2 +- + sound/soc/ti/davinci-evm.c | 2 +- + sound/soc/ti/omap-abe-twl6040.c | 2 +- + sound/soc/ux500/mop500_ab8500.c | 2 +- + sound/soc/ux500/mop500_ab8500.h | 2 +- + 331 files changed, 32807 insertions(+), 5686 deletions(-) + create mode 100644 Documentation/devicetree/bindings/sound/audio-graph-card2.yaml + delete mode 100644 Documentation/devicetree/bindings/sound/bt-sco.txt + create mode 100644 Documentation/devicetree/bindings/sound/cirrus,cs35l41.yaml + create mode 100644 Documentation/devicetree/bindings/sound/linux,bt-sco.yaml + create mode 100644 Documentation/devicetree/bindings/sound/linux,spdif-dit.yaml + create mode 100644 Documentation/devicetree/bindings/sound/mt8192-afe-pcm.yaml + delete mode 100644 Documentation/devicetree/bindings/sound/name-prefix.txt + create mode 100644 Documentation/devicetree/bindings/sound/name-prefix.yaml + create mode 100644 Documentation/devicetree/bindings/sound/nau8821.txt + create mode 100644 Documentation/devicetree/bindings/sound/nvidia,tegra210-adx.yaml + create mode 100644 Documentation/devicetree/bindings/sound/nvidia,tegra210-amx.yaml + create mode 100644 Documentation/devicetree/bindings/sound/nvidia,tegra210-mixer.yaml + create mode 100644 Documentation/devicetree/bindings/sound/nvidia,tegra210-mvc.yaml + create mode 100644 Documentation/devicetree/bindings/sound/nvidia,tegra210-sfc.yaml + create mode 100644 Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml + create mode 100644 Documentation/devicetree/bindings/sound/richtek,rt9120.yaml + create mode 100644 Documentation/devicetree/bindings/sound/rockchip,i2s-tdm.yaml + delete mode 100644 Documentation/devicetree/bindings/sound/rockchip,pdm.txt + create mode 100644 Documentation/devicetree/bindings/sound/rockchip,pdm.yaml + delete mode 100644 Documentation/devicetree/bindings/sound/simple-amplifier.txt + create mode 100644 Documentation/devicetree/bindings/sound/simple-audio-amplifier.yaml + delete mode 100644 Documentation/devicetree/bindings/sound/spdif-transmitter.txt + create mode 100644 Documentation/devicetree/bindings/sound/test-component.yaml + create mode 100644 Documentation/devicetree/bindings/sound/wlf,wm8978.yaml + create mode 100644 drivers/firmware/cirrus/Kconfig + create mode 100644 drivers/firmware/cirrus/Makefile + create mode 100644 drivers/firmware/cirrus/cs_dsp.c + create mode 100644 include/linux/firmware/cirrus/cs_dsp.h + rename {sound/soc/codecs => include/linux/firmware/cirrus}/wmfw.h (91%) + create mode 100644 include/sound/cs35l41.h + create mode 100644 include/sound/rt5682s.h + create mode 100644 sound/soc/amd/vangogh/acp5x-mach.c + create mode 100644 sound/soc/codecs/cs35l41-i2c.c + create mode 100644 sound/soc/codecs/cs35l41-spi.c + create mode 100644 sound/soc/codecs/cs35l41-tables.c + create mode 100644 sound/soc/codecs/cs35l41.c + create mode 100644 sound/soc/codecs/cs35l41.h + create mode 100644 sound/soc/codecs/nau8821.c + create mode 100644 sound/soc/codecs/nau8821.h + create mode 100644 sound/soc/codecs/rt5682s.c + create mode 100644 sound/soc/codecs/rt5682s.h + create mode 100644 sound/soc/codecs/rt9120.c + create mode 100644 sound/soc/generic/audio-graph-card2-custom-sample.c + create mode 100644 sound/soc/generic/audio-graph-card2-custom-sample.dtsi + create mode 100644 sound/soc/generic/audio-graph-card2.c + create mode 100644 sound/soc/generic/test-component.c + create mode 100644 sound/soc/intel/boards/sof_es8336.c + create mode 100644 sound/soc/rockchip/rockchip_i2s_tdm.c + create mode 100644 sound/soc/rockchip/rockchip_i2s_tdm.h + delete mode 100644 sound/soc/sof/compress.h + create mode 100644 sound/soc/sof/imx/imx-ops.h + rename sound/soc/sof/intel/{hda-compress.c => hda-probes.c} (100%) + delete mode 100644 sound/soc/sof/probe.h + rename sound/soc/sof/{probe.c => sof-probes.c} (52%) + create mode 100644 sound/soc/sof/sof-probes.h + rename sound/soc/sof/{intel/intel-ipc.c => stream-ipc.c} (56%) + create mode 100644 sound/soc/tegra/tegra210_adx.c + create mode 100644 sound/soc/tegra/tegra210_adx.h + create mode 100644 sound/soc/tegra/tegra210_amx.c + create mode 100644 sound/soc/tegra/tegra210_amx.h + create mode 100644 sound/soc/tegra/tegra210_mixer.c + create mode 100644 sound/soc/tegra/tegra210_mixer.h + create mode 100644 sound/soc/tegra/tegra210_mvc.c + create mode 100644 sound/soc/tegra/tegra210_mvc.h + create mode 100644 sound/soc/tegra/tegra210_sfc.c + create mode 100644 sound/soc/tegra/tegra210_sfc.h +Merging modules/modules-next (ced75a2f5da7 MAINTAINERS: Add Luis Chamberlain as modules maintainer) +$ git merge -m Merge branch 'modules-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules/modules-next +Already up to date. +Merging input/next (c6ac8f0b4ca9 Input: ili210x - add ili251x firmware update support) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git input/next +Auto-merging kernel/reboot.c +Auto-merging drivers/input/touchscreen/goodix.c +Auto-merging drivers/input/touchscreen/elants_i2c.c +Auto-merging drivers/input/keyboard/Kconfig +Auto-merging drivers/input/joystick/analog.c +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + .../devicetree/bindings/input/cypress-sf.yaml | 61 +++ + MAINTAINERS | 12 +- + drivers/input/joystick/analog.c | 18 +- + drivers/input/joystick/tmdc.c | 2 +- + drivers/input/keyboard/Kconfig | 10 + + drivers/input/keyboard/Makefile | 1 + + drivers/input/keyboard/cypress-sf.c | 224 +++++++++ + drivers/input/keyboard/ep93xx_keypad.c | 172 +++---- + drivers/input/keyboard/mpr121_touchkey.c | 4 +- + drivers/input/keyboard/omap-keypad.c | 3 +- + drivers/input/keyboard/tm2-touchkey.c | 7 + + drivers/input/misc/adxl34x-i2c.c | 4 +- + drivers/input/misc/adxl34x-spi.c | 4 +- + drivers/input/misc/adxl34x.c | 6 +- + drivers/input/misc/adxl34x.h | 2 +- + drivers/input/misc/ariel-pwrbutton.c | 7 + + drivers/input/misc/cpcap-pwrbutton.c | 7 +- + drivers/input/misc/max77693-haptic.c | 1 - + drivers/input/misc/max8925_onkey.c | 2 +- + drivers/input/misc/palmas-pwrbutton.c | 5 + + drivers/input/misc/pm8941-pwrkey.c | 6 +- + drivers/input/touchscreen/Kconfig | 1 + + drivers/input/touchscreen/Makefile | 3 +- + drivers/input/touchscreen/ads7846.c | 200 +++----- + drivers/input/touchscreen/elants_i2c.c | 4 +- + drivers/input/touchscreen/goodix.c | 231 +++++---- + drivers/input/touchscreen/goodix.h | 117 +++++ + drivers/input/touchscreen/goodix_fwupload.c | 427 +++++++++++++++++ + drivers/input/touchscreen/ili210x.c | 529 ++++++++++++++++++++- + drivers/input/touchscreen/raydium_i2c_ts.c | 54 ++- + drivers/input/touchscreen/st1232.c | 3 +- + drivers/input/touchscreen/tsc2004.c | 4 +- + drivers/input/touchscreen/tsc2005.c | 4 +- + drivers/input/touchscreen/tsc200x-core.c | 4 +- + drivers/input/touchscreen/tsc200x-core.h | 2 +- + include/linux/spi/ads7846.h | 15 - + kernel/reboot.c | 1 + + 37 files changed, 1736 insertions(+), 421 deletions(-) + create mode 100644 Documentation/devicetree/bindings/input/cypress-sf.yaml + create mode 100644 drivers/input/keyboard/cypress-sf.c + create mode 100644 drivers/input/touchscreen/goodix.h + create mode 100644 drivers/input/touchscreen/goodix_fwupload.c +Merging block/for-next (a43753be92b4 Merge branch 'for-5.16/io_uring' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.dk/linux-block.git block/for-next +Auto-merging mm/filemap.c +Auto-merging kernel/sched/core.c +Removing include/linux/keyslot-manager.h +Auto-merging include/linux/iomap.h +Auto-merging include/linux/fs.h +Auto-merging fs/zonefs/super.c +Auto-merging fs/xfs/xfs_file.c +Auto-merging fs/pstore/blk.c +Auto-merging fs/orangefs/super.c +Auto-merging fs/ntfs3/file.c +Auto-merging fs/ntfs/file.c +Auto-merging fs/nfs/direct.c +Auto-merging fs/iomap/direct-io.c +Auto-merging fs/io_uring.c +CONFLICT (content): Merge conflict in fs/io_uring.c +Auto-merging fs/io-wq.c +Auto-merging fs/gfs2/file.c +Auto-merging fs/fuse/file.c +Auto-merging fs/f2fs/compress.c +Auto-merging fs/ext4/super.c +Auto-merging fs/ext4/file.c +Auto-merging fs/cifs/file.c +Auto-merging fs/ceph/file.c +Auto-merging fs/cachefiles/io.c +Auto-merging fs/btrfs/volumes.c +Auto-merging fs/btrfs/ioctl.c +Auto-merging fs/btrfs/inode.c +Auto-merging fs/btrfs/disk-io.c +Auto-merging fs/btrfs/dev-replace.c +Auto-merging fs/btrfs/ctree.c +Auto-merging fs/btrfs/compression.c +Auto-merging drivers/scsi/sd.c +Auto-merging drivers/s390/block/dcssblk.c +Auto-merging drivers/s390/block/dasd_genhd.c +Auto-merging drivers/gpu/drm/i915/i915_utils.h +Removing drivers/block/cryptoloop.c +Auto-merging block/partitions/core.c +Removing block/keyslot-manager.c +Auto-merging block/blk-cgroup.c +Auto-merging Makefile +Auto-merging Documentation/block/inline-encryption.rst +Recorded preimage for 'fs/io_uring.c' +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +Recorded resolution for 'fs/io_uring.c'. +[master 32e42b8171d0] Merge branch 'for-next' of git://git.kernel.dk/linux-block.git +$ git diff -M --stat --summary HEAD^.. + Documentation/block/inline-encryption.rst | 451 +++---- + Documentation/cdrom/cdrom-standard.rst | 11 + + Documentation/userspace-api/ioctl/cdrom.rst | 113 +- + Makefile | 3 +- + arch/m68k/emu/nfblock.c | 12 +- + arch/mips/rb532/prom.c | 1 - + arch/mips/sibyte/common/cfe.c | 1 - + arch/mips/sibyte/swarm/setup.c | 1 - + arch/openrisc/mm/init.c | 1 - + arch/powerpc/platforms/cell/spufs/inode.c | 1 + + arch/um/drivers/ubd_kern.c | 14 +- + arch/xtensa/platforms/iss/simdisk.c | 16 +- + block/Kconfig | 28 +- + block/Kconfig.iosched | 4 - + block/Makefile | 4 +- + block/bdev.c | 46 +- + block/bfq-cgroup.c | 14 +- + block/bfq-iosched.c | 6 +- + block/bio-integrity.c | 4 +- + block/bio.c | 136 ++- + block/blk-cgroup.c | 17 +- + block/blk-core.c | 413 +++---- + block/blk-crypto-fallback.c | 119 +- + block/blk-crypto-internal.h | 2 +- + block/blk-crypto-profile.c | 565 +++++++++ + block/blk-crypto.c | 29 +- + block/blk-exec.c | 10 +- + block/blk-flush.c | 12 +- + block/blk-integrity.c | 6 +- + block/blk-iocost.c | 12 +- + block/blk-iolatency.c | 1 + + block/blk-merge.c | 127 +- + block/blk-mq-debugfs.c | 132 +- + block/blk-mq-sched.c | 129 +- + block/blk-mq-sched.h | 49 +- + block/blk-mq-tag.c | 163 ++- + block/blk-mq-tag.h | 38 +- + block/blk-mq.c | 1010 ++++++++++------ + block/blk-mq.h | 71 +- + block/blk-rq-qos.h | 5 +- + block/blk-sysfs.c | 24 +- + block/blk-throttle.c | 163 +-- + block/blk-throttle.h | 182 +++ + block/blk-wbt.c | 3 + + block/blk.h | 127 +- + block/bounce.c | 1 + + block/bsg-lib.c | 32 +- + block/elevator.c | 4 +- + {include/linux => block}/elevator.h | 21 +- + block/fops.c | 167 ++- + block/genhd.c | 36 +- + block/holder.c | 1 + + block/ioctl.c | 37 +- + block/keyslot-manager.c | 578 --------- + block/kyber-iosched.c | 6 +- + block/mq-deadline.c | 224 ++-- + block/partitions/Kconfig | 4 + + block/partitions/core.c | 6 +- + block/partitions/efi.c | 2 +- + block/partitions/ibm.c | 19 +- + block/t10-pi.c | 2 +- + crypto/af_alg.c | 2 +- + drivers/block/Kconfig | 25 +- + drivers/block/Makefile | 1 - + drivers/block/amiflop.c | 9 +- + drivers/block/aoe/aoeblk.c | 19 +- + drivers/block/ataflop.c | 66 +- + drivers/block/brd.c | 12 +- + drivers/block/cryptoloop.c | 206 ---- + drivers/block/drbd/drbd_int.h | 5 +- + drivers/block/drbd/drbd_main.c | 6 +- + drivers/block/drbd/drbd_req.c | 3 +- + drivers/block/floppy.c | 35 +- + drivers/block/loop.c | 403 +------ + drivers/block/loop.h | 30 - + drivers/block/mtip32xx/mtip32xx.c | 6 +- + drivers/block/n64cart.c | 24 +- + drivers/block/nbd.c | 173 ++- + drivers/block/null_blk/main.c | 111 +- + drivers/block/null_blk/null_blk.h | 4 + + drivers/block/paride/pcd.c | 312 +++-- + drivers/block/paride/pd.c | 144 +-- + drivers/block/paride/pf.c | 236 ++-- + drivers/block/pktcdvd.c | 18 +- + drivers/block/ps3vram.c | 6 +- + drivers/block/rbd.c | 8 +- + drivers/block/rnbd/rnbd-clt.c | 15 +- + drivers/block/rnbd/rnbd-proto.h | 2 +- + drivers/block/rsxx/core.c | 4 +- + drivers/block/rsxx/dev.c | 19 +- + drivers/block/swim.c | 36 +- + drivers/block/swim3.c | 5 +- + drivers/block/sx8.c | 15 +- + drivers/block/xen-blkback/xenbus.c | 2 +- + drivers/block/xen-blkfront.c | 9 +- + drivers/block/zram/zram_drv.c | 10 +- + drivers/cdrom/cdrom.c | 63 +- + drivers/cdrom/gdrom.c | 7 +- + drivers/gpu/drm/i915/i915_utils.h | 1 + + drivers/md/bcache/bcache.h | 4 - + drivers/md/bcache/btree.c | 2 +- + drivers/md/bcache/debug.c | 15 +- + drivers/md/bcache/io.c | 16 +- + drivers/md/bcache/request.c | 19 +- + drivers/md/bcache/request.h | 4 +- + drivers/md/bcache/super.c | 91 +- + drivers/md/bcache/sysfs.c | 2 +- + drivers/md/bcache/util.h | 12 - + drivers/md/bcache/writeback.c | 2 +- + drivers/md/dm-bio-record.h | 1 + + drivers/md/dm-bufio.c | 2 +- + drivers/md/dm-cache-metadata.c | 2 +- + drivers/md/dm-cache-target.c | 2 +- + drivers/md/dm-clone-target.c | 2 +- + drivers/md/dm-core.h | 4 +- + drivers/md/dm-crypt.c | 1 + + drivers/md/dm-dust.c | 5 +- + drivers/md/dm-ebs-target.c | 2 +- + drivers/md/dm-era-target.c | 2 +- + drivers/md/dm-exception-store.h | 2 +- + drivers/md/dm-flakey.c | 3 +- + drivers/md/dm-ima.c | 1 + + drivers/md/dm-integrity.c | 6 +- + drivers/md/dm-linear.c | 3 +- + drivers/md/dm-log-writes.c | 4 +- + drivers/md/dm-log.c | 2 +- + drivers/md/dm-mpath.c | 2 +- + drivers/md/dm-ps-historical-service-time.c | 1 + + drivers/md/dm-raid.c | 6 +- + drivers/md/dm-rq.c | 1 - + drivers/md/dm-switch.c | 2 +- + drivers/md/dm-table.c | 172 ++- + drivers/md/dm-thin-metadata.c | 2 +- + drivers/md/dm-thin.c | 2 +- + drivers/md/dm-verity-target.c | 4 +- + drivers/md/dm-writecache.c | 2 +- + drivers/md/dm-zoned-target.c | 2 +- + drivers/md/dm.c | 42 +- + drivers/md/md.c | 130 +- + drivers/md/md.h | 2 +- + drivers/md/raid1.c | 13 +- + drivers/md/raid10.c | 2 +- + drivers/md/raid5.c | 7 +- + drivers/mmc/core/crypto.c | 11 +- + drivers/mmc/core/sd.c | 1 + + drivers/mmc/host/cqhci-crypto.c | 33 +- + drivers/mtd/mtd_blkdevs.c | 6 +- + drivers/mtd/mtdsuper.c | 1 + + drivers/nvdimm/blk.c | 5 +- + drivers/nvdimm/btt.c | 5 +- + drivers/nvdimm/core.c | 1 + + drivers/nvdimm/pmem.c | 3 +- + drivers/nvme/host/core.c | 140 ++- + drivers/nvme/host/fabrics.c | 6 +- + drivers/nvme/host/fabrics.h | 8 + + drivers/nvme/host/fc.c | 34 +- + drivers/nvme/host/multipath.c | 52 +- + drivers/nvme/host/nvme.h | 19 + + drivers/nvme/host/pci.c | 58 +- + drivers/nvme/host/rdma.c | 28 +- + drivers/nvme/host/tcp.c | 20 +- + drivers/nvme/host/zns.c | 2 + + drivers/nvme/target/admin-cmd.c | 16 +- + drivers/nvme/target/configfs.c | 41 + + drivers/nvme/target/core.c | 17 +- + drivers/nvme/target/discovery.c | 2 + + drivers/nvme/target/fabrics-cmd.c | 3 +- + drivers/nvme/target/io-cmd-bdev.c | 5 +- + drivers/nvme/target/io-cmd-file.c | 4 +- + drivers/nvme/target/loop.c | 6 +- + drivers/nvme/target/nvmet.h | 6 + + drivers/nvme/target/rdma.c | 31 + + drivers/nvme/target/tcp.c | 16 + + drivers/s390/block/dasd.c | 9 +- + drivers/s390/block/dasd_3990_erp.c | 6 +- + drivers/s390/block/dasd_eckd.c | 294 +++-- + drivers/s390/block/dasd_eckd.h | 13 +- + drivers/s390/block/dasd_erp.c | 8 +- + drivers/s390/block/dasd_genhd.c | 1 + + drivers/s390/block/dasd_int.h | 11 +- + drivers/s390/block/dasd_ioctl.c | 4 +- + drivers/s390/block/dcssblk.c | 7 +- + drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 1 + + drivers/scsi/lpfc/lpfc.h | 1 + + drivers/scsi/qla2xxx/qla_nvme.c | 15 + + drivers/scsi/scsi_bsg.c | 4 +- + drivers/scsi/scsi_debug.c | 10 +- + drivers/scsi/scsi_error.c | 2 +- + drivers/scsi/scsi_ioctl.c | 4 +- + drivers/scsi/scsi_lib.c | 30 +- + drivers/scsi/scsi_scan.c | 1 - + drivers/scsi/sd.c | 40 + + drivers/scsi/sd_dif.c | 2 +- + drivers/scsi/sg.c | 5 +- + drivers/scsi/sr.c | 3 +- + drivers/scsi/st.c | 3 +- + drivers/scsi/ufs/ufshcd-crypto.c | 32 +- + drivers/scsi/ufs/ufshcd-crypto.h | 9 +- + drivers/scsi/ufs/ufshcd.c | 2 +- + drivers/scsi/ufs/ufshcd.h | 6 +- + drivers/scsi/virtio_scsi.c | 1 + + drivers/target/target_core_file.c | 5 +- + drivers/target/target_core_iblock.c | 6 +- + drivers/target/target_core_pscsi.c | 3 +- + drivers/usb/gadget/function/f_fs.c | 2 +- + drivers/usb/gadget/legacy/inode.c | 7 +- + fs/affs/super.c | 2 +- + fs/aio.c | 6 +- + fs/btrfs/compression.c | 1 + + fs/btrfs/ctree.c | 1 + + fs/btrfs/dev-replace.c | 3 +- + fs/btrfs/disk-io.c | 2 +- + fs/btrfs/inode.c | 9 +- + fs/btrfs/ioctl.c | 4 +- + fs/btrfs/volumes.c | 10 +- + fs/buffer.c | 4 +- + fs/cachefiles/io.c | 12 +- + fs/ceph/file.c | 2 +- + fs/cifs/file.c | 4 +- + fs/cramfs/inode.c | 2 +- + fs/direct-io.c | 16 +- + fs/ext4/file.c | 2 +- + fs/ext4/super.c | 2 +- + fs/f2fs/compress.c | 1 + + fs/fat/inode.c | 11 +- + fs/fs-writeback.c | 5 +- + fs/fuse/file.c | 2 +- + fs/gfs2/file.c | 4 +- + fs/hfs/mdb.c | 2 +- + fs/hfsplus/wrapper.c | 2 +- + fs/internal.h | 11 - + fs/io-wq.c | 12 +- + fs/io-wq.h | 59 +- + fs/io_uring.c | 1735 ++++++++++++++------------- + fs/iomap/direct-io.c | 59 +- + fs/jfs/resize.c | 5 +- + fs/jfs/super.c | 5 +- + fs/nfs/blocklayout/dev.c | 4 +- + fs/nfs/direct.c | 2 +- + fs/nfsd/Kconfig | 1 - + fs/nfsd/blocklayout.c | 158 +-- + fs/nfsd/nfs4layouts.c | 5 +- + fs/nilfs2/ioctl.c | 2 +- + fs/nilfs2/super.c | 2 +- + fs/nilfs2/the_nilfs.c | 2 +- + fs/ntfs/file.c | 1 + + fs/ntfs/super.c | 8 +- + fs/ntfs3/file.c | 1 + + fs/ntfs3/inode.c | 2 +- + fs/ntfs3/super.c | 2 +- + fs/orangefs/inode.c | 2 +- + fs/orangefs/super.c | 1 + + fs/overlayfs/file.c | 4 +- + fs/pstore/blk.c | 8 +- + fs/quota/quota.c | 1 + + fs/ramfs/inode.c | 1 + + fs/reiserfs/super.c | 8 +- + fs/squashfs/super.c | 5 +- + fs/sync.c | 62 +- + fs/udf/lowlevel.c | 5 +- + fs/udf/super.c | 9 +- + fs/xfs/xfs_file.c | 2 +- + fs/zonefs/super.c | 2 +- + include/linux/backing-dev.h | 19 +- + include/linux/bio.h | 146 +-- + include/linux/blk-crypto-profile.h | 166 +++ + include/linux/blk-integrity.h | 183 +++ + include/linux/blk-mq.h | 586 ++++++++- + include/linux/blk_types.h | 55 +- + include/linux/blkdev.h | 908 ++------------ + include/linux/blktrace_api.h | 2 +- + include/linux/bvec.h | 2 +- + include/linux/cdrom.h | 1 + + include/linux/device-mapper.h | 4 +- + include/linux/fs.h | 12 +- + include/linux/genhd.h | 38 +- + include/linux/iomap.h | 5 +- + include/linux/keyslot-manager.h | 120 -- + include/linux/mmc/host.h | 4 +- + include/linux/nvme-fc-driver.h | 7 + + include/linux/nvme-rdma.h | 2 + + include/linux/nvme.h | 11 +- + include/linux/part_stat.h | 1 + + include/linux/percpu-refcount.h | 33 +- + include/linux/sbitmap.h | 24 + + include/linux/sched.h | 2 - + include/linux/t10-pi.h | 2 +- + include/linux/writeback.h | 14 +- + include/scsi/scsi_cmnd.h | 3 + + include/scsi/scsi_device.h | 2 +- + include/trace/events/block.h | 6 +- + include/trace/events/io_uring.h | 61 + + include/uapi/linux/bcache.h | 4 +- + include/uapi/linux/cdrom.h | 19 + + include/uapi/linux/io_uring.h | 1 + + init/main.c | 1 - + kernel/acct.c | 1 - + kernel/exit.c | 1 - + kernel/fork.c | 1 - + kernel/sched/core.c | 7 +- + kernel/sched/sched.h | 1 - + kernel/trace/blktrace.c | 7 +- + lib/random32.c | 1 + + lib/sbitmap.c | 95 +- + mm/backing-dev.c | 19 +- + mm/filemap.c | 1 - + mm/highmem.c | 1 - + mm/mempool.c | 1 - + mm/nommu.c | 1 - + mm/page_io.c | 10 +- + mm/readahead.c | 1 - + mm/shmem.c | 1 - + mm/swapfile.c | 2 +- + 313 files changed, 7297 insertions(+), 6791 deletions(-) + create mode 100644 block/blk-crypto-profile.c + create mode 100644 block/blk-throttle.h + rename {include/linux => block}/elevator.h (92%) + delete mode 100644 block/keyslot-manager.c + delete mode 100644 drivers/block/cryptoloop.c + create mode 100644 include/linux/blk-crypto-profile.h + create mode 100644 include/linux/blk-integrity.h + delete mode 100644 include/linux/keyslot-manager.h +Merging device-mapper/for-next (dffca4d565b3 dm: Remove redundant flush_workqueue() calls) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git device-mapper/for-next +Auto-merging drivers/md/dm-zoned-target.c +Auto-merging drivers/md/dm-integrity.c +Auto-merging drivers/md/dm-crypt.c +Auto-merging drivers/md/dm-bufio.c +Auto-merging drivers/md/bcache/writeback.c +Merge made by the 'recursive' strategy. + drivers/md/Kconfig | 10 ++++++ + drivers/md/Makefile | 4 +++ + drivers/md/bcache/writeback.c | 4 +-- + drivers/md/dm-audit.c | 84 +++++++++++++++++++++++++++++++++++++++++++ + drivers/md/dm-audit.h | 66 ++++++++++++++++++++++++++++++++++ + drivers/md/dm-bufio.c | 1 - + drivers/md/dm-crypt.c | 22 +++++++++--- + drivers/md/dm-integrity.c | 25 ++++++++++--- + drivers/md/dm-zoned-target.c | 1 - + include/uapi/linux/audit.h | 2 ++ + 10 files changed, 206 insertions(+), 13 deletions(-) + create mode 100644 drivers/md/dm-audit.c + create mode 100644 drivers/md/dm-audit.h +Merging libata/for-next (1af5f7af2484 pata_radisys: fix checking of DMA state) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git libata/for-next +Merge made by the 'recursive' strategy. + drivers/ata/ahci.c | 13 +++++-------- + drivers/ata/libata-core.c | 2 +- + drivers/ata/libata-scsi.c | 4 ++-- + drivers/ata/pata_ali.c | 4 ++-- + drivers/ata/pata_amd.c | 2 +- + drivers/ata/pata_optidma.c | 4 ++-- + drivers/ata/pata_radisys.c | 4 ++-- + 7 files changed, 15 insertions(+), 18 deletions(-) +Merging pcmcia/pcmcia-next (e39cdacf2f66 pcmcia: i82092: fix a null pointer dereference bug) +$ git merge -m Merge branch 'pcmcia-next' of git://git.kernel.org/pub/scm/linux/kernel/git/brodo/linux.git pcmcia/pcmcia-next +Already up to date. +Merging mmc/next (61840edc8813 mmc: dw_mmc: Drop use of ->init_card() callback) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git mmc/next +Removing include/linux/mmc/sdhci-pci-data.h +Auto-merging include/linux/mmc/host.h +Removing drivers/mmc/host/sdhci-pci-data.c +Removing Documentation/devicetree/bindings/mmc/mmc-card.txt +Merge made by the 'recursive' strategy. + .../devicetree/bindings/mmc/arasan,sdhci.yaml | 26 +- + Documentation/devicetree/bindings/mmc/mmc-card.txt | 30 -- + .../devicetree/bindings/mmc/mmc-card.yaml | 48 +++ + .../devicetree/bindings/mmc/mmc-controller.yaml | 6 - + Documentation/devicetree/bindings/mmc/mtk-sd.yaml | 12 + + .../devicetree/bindings/mmc/sdhci-msm.txt | 1 + + .../devicetree/bindings/mmc/sdhci-omap.txt | 9 +- + drivers/memstick/core/ms_block.c | 8 +- + drivers/memstick/core/mspro_block.c | 6 +- + drivers/memstick/host/jmb38x_ms.c | 5 +- + drivers/memstick/host/r592.c | 8 +- + drivers/mmc/core/block.c | 7 +- + drivers/mmc/core/mmc.c | 8 + + drivers/mmc/core/mmc_ops.h | 1 - + drivers/mmc/core/slot-gpio.c | 42 +-- + drivers/mmc/host/Kconfig | 8 +- + drivers/mmc/host/Makefile | 1 - + drivers/mmc/host/cqhci-core.c | 4 +- + drivers/mmc/host/dw_mmc.c | 42 ++- + drivers/mmc/host/mmci.c | 4 + + drivers/mmc/host/moxart-mmc.c | 29 +- + drivers/mmc/host/mtk-sd.c | 99 ++++++- + drivers/mmc/host/mxs-mmc.c | 10 + + drivers/mmc/host/omap_hsmmc.c | 12 +- + drivers/mmc/host/sdhci-acpi.c | 14 +- + drivers/mmc/host/sdhci-of-arasan.c | 29 +- + drivers/mmc/host/sdhci-omap.c | 322 ++++++++++++++++----- + drivers/mmc/host/sdhci-pci-core.c | 152 +--------- + drivers/mmc/host/sdhci-pci-data.c | 6 - + drivers/mmc/host/sdhci-pci-o2micro.c | 2 +- + drivers/mmc/host/sdhci-pci.h | 5 - + drivers/mmc/host/sdhci-s3c.c | 1 - + drivers/mmc/host/sdhci-sprd.c | 13 + + drivers/mmc/host/sdhci.c | 42 ++- + drivers/mmc/host/sdhci.h | 2 +- + include/linux/mmc/host.h | 4 + + include/linux/mmc/sdhci-pci-data.h | 18 -- + 37 files changed, 641 insertions(+), 395 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/mmc/mmc-card.txt + create mode 100644 Documentation/devicetree/bindings/mmc/mmc-card.yaml + delete mode 100644 drivers/mmc/host/sdhci-pci-data.c + delete mode 100644 include/linux/mmc/sdhci-pci-data.h +Merging mfd/for-mfd-next (813c24f4caf3 mfd: altera-sysmgr: Fix a mistake caused by resource_size conversion) +$ git merge -m Merge branch 'for-mfd-next' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git mfd/for-mfd-next +Removing include/linux/mfd/hi6421-spmi-pmic.h +Removing Documentation/devicetree/bindings/mfd/axp20x.txt +Removing Documentation/devicetree/bindings/mfd/ac100.txt +Removing Documentation/devicetree/bindings/gpio/gpio-axp209.txt +Merge made by the 'recursive' strategy. + .../devicetree/bindings/clock/maxim,max77686.txt | 4 +- + .../devicetree/bindings/gpio/gpio-axp209.txt | 75 ---- + .../bindings/gpio/x-powers,axp209-gpio.yaml | 55 +++ + .../bindings/i2c/allwinner,sun6i-a31-p2wi.yaml | 2 +- + Documentation/devicetree/bindings/mfd/ac100.txt | 50 --- + Documentation/devicetree/bindings/mfd/axp20x.txt | 273 -------------- + .../devicetree/bindings/mfd/brcm,cru.yaml | 21 ++ + .../devicetree/bindings/mfd/brcm,misc.yaml | 60 ++++ + Documentation/devicetree/bindings/mfd/max14577.txt | 4 +- + Documentation/devicetree/bindings/mfd/max77686.txt | 2 +- + Documentation/devicetree/bindings/mfd/max77693.txt | 2 +- + .../devicetree/bindings/mfd/qcom,tcsr.txt | 1 + + Documentation/devicetree/bindings/mfd/syscon.yaml | 2 + + .../devicetree/bindings/mfd/x-powers,ac100.yaml | 116 ++++++ + .../devicetree/bindings/mfd/x-powers,axp152.yaml | 400 +++++++++++++++++++++ + .../devicetree/bindings/mfd/xylon,logicvc.yaml | 3 + + .../devicetree/bindings/regulator/max77686.txt | 2 +- + drivers/mfd/Kconfig | 8 +- + drivers/mfd/altera-a10sr.c | 8 + + drivers/mfd/altera-sysmgr.c | 2 +- + drivers/mfd/arizona-core.c | 13 - + drivers/mfd/arizona-i2c.c | 14 +- + drivers/mfd/arizona-spi.c | 13 +- + drivers/mfd/arizona.h | 2 - + drivers/mfd/cros_ec_dev.c | 1 - + drivers/mfd/da9063-i2c.c | 2 + + drivers/mfd/hi6421-spmi-pmic.c | 16 +- + drivers/mfd/intel-lpss-pci.c | 2 + + drivers/mfd/max14577.c | 6 +- + drivers/mfd/max77686.c | 3 +- + drivers/mfd/max77693.c | 12 +- + drivers/mfd/mfd-core.c | 2 + + drivers/mfd/motorola-cpcap.c | 8 + + drivers/mfd/qcom-pm8xxx.c | 39 +- + drivers/mfd/rk808.c | 4 + + drivers/mfd/sec-irq.c | 3 +- + drivers/mfd/sprd-sc27xx-spi.c | 17 + + drivers/mfd/ti_am335x_tscadc.c | 2 +- + drivers/misc/hi6421v600-irq.c | 9 +- + drivers/regulator/hi6421v600-regulator.c | 10 +- + include/linux/mfd/da9063/core.h | 1 + + include/linux/mfd/hi6421-spmi-pmic.h | 25 -- + 42 files changed, 781 insertions(+), 513 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/gpio/gpio-axp209.txt + create mode 100644 Documentation/devicetree/bindings/gpio/x-powers,axp209-gpio.yaml + delete mode 100644 Documentation/devicetree/bindings/mfd/ac100.txt + delete mode 100644 Documentation/devicetree/bindings/mfd/axp20x.txt + create mode 100644 Documentation/devicetree/bindings/mfd/brcm,misc.yaml + create mode 100644 Documentation/devicetree/bindings/mfd/x-powers,ac100.yaml + create mode 100644 Documentation/devicetree/bindings/mfd/x-powers,axp152.yaml + delete mode 100644 include/linux/mfd/hi6421-spmi-pmic.h +Merging backlight/for-backlight-next (3976e974df1f video: backlight: ili9320: Make ili9320_remove() return void) +$ git merge -m Merge branch 'for-backlight-next' of git://git.kernel.org/pub/scm/linux/kernel/git/lee/backlight.git backlight/for-backlight-next +Merge made by the 'recursive' strategy. + drivers/video/backlight/backlight.c | 28 +++++++++++++++++----------- + drivers/video/backlight/ili9320.c | 3 +-- + drivers/video/backlight/ili9320.h | 2 +- + drivers/video/backlight/vgg2432a4.c | 4 +++- + 4 files changed, 22 insertions(+), 15 deletions(-) +Merging battery/for-next (172d0ccea55c power: bq25890: add return values to error messages) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git battery/for-next +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + Documentation/ABI/testing/sysfs-class-power | 13 ++ + .../bindings/power/supply/maxim,max17040.yaml | 2 +- + .../bindings/power/supply/samsung,battery.yaml | 56 +++++++ + .../power/supply/stericsson,ab8500-btemp.yaml | 10 +- + .../power/supply/stericsson,ab8500-chargalg.yaml | 10 +- + .../power/supply/stericsson,ab8500-charger.yaml | 10 +- + .../power/supply/stericsson,ab8500-fg.yaml | 10 +- + MAINTAINERS | 21 +++ + drivers/power/reset/at91-reset.c | 4 +- + drivers/power/reset/ltc2952-poweroff.c | 4 +- + drivers/power/supply/Kconfig | 23 +-- + drivers/power/supply/ab8500_bmdata.c | 3 +- + drivers/power/supply/axp288_charger.c | 178 ++++++++++++--------- + drivers/power/supply/bq25890_charger.c | 34 ++-- + drivers/power/supply/cpcap-battery.c | 15 +- + drivers/power/supply/max17040_battery.c | 2 + + drivers/power/supply/max17042_battery.c | 14 +- + drivers/power/supply/power_supply_core.c | 65 ++++---- + drivers/power/supply/rt5033_battery.c | 2 +- + drivers/power/supply/wm831x_power.c | 12 +- + include/linux/power/max17042_battery.h | 4 +- + 21 files changed, 318 insertions(+), 174 deletions(-) + create mode 100644 Documentation/devicetree/bindings/power/supply/samsung,battery.yaml +Merging regulator/for-next (7492b724df4d Merge series "Remove TPS80031 driver" from Dmitry Osipenko :) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git regulator/for-next +Removing drivers/regulator/tps80031-regulator.c +Auto-merging drivers/regulator/core.c +Auto-merging MAINTAINERS +Removing Documentation/devicetree/bindings/regulator/sy8106a-regulator.txt +Removing Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt +Removing Documentation/devicetree/bindings/regulator/samsung,s2mps11.txt +Removing Documentation/devicetree/bindings/regulator/samsung,s2mpa01.txt +Removing Documentation/devicetree/bindings/regulator/max8997-regulator.txt +Removing Documentation/devicetree/bindings/regulator/max8973-regulator.txt +Removing Documentation/devicetree/bindings/regulator/max8952.txt +Removing Documentation/devicetree/bindings/clock/samsung,s2mps11.txt +Merge made by the 'recursive' strategy. + .../devicetree/bindings/clock/samsung,s2mps11.txt | 49 -- + .../devicetree/bindings/clock/samsung,s2mps11.yaml | 45 ++ + .../devicetree/bindings/regulator/max8952.txt | 52 -- + .../bindings/regulator/max8973-regulator.txt | 52 -- + .../bindings/regulator/max8997-regulator.txt | 145 ---- + .../bindings/regulator/maxim,max8952.yaml | 109 +++ + .../bindings/regulator/maxim,max8973.yaml | 139 ++++ + .../bindings/regulator/maxim,max8997.yaml | 445 ++++++++++++ + .../bindings/regulator/qcom,rpmh-regulator.yaml | 2 + + .../bindings/regulator/qcom,smd-rpm-regulator.yaml | 4 + + .../bindings/regulator/samsung,s2mpa01.txt | 79 --- + .../bindings/regulator/samsung,s2mpa01.yaml | 62 ++ + .../bindings/regulator/samsung,s2mps11.txt | 102 --- + .../bindings/regulator/samsung,s2mps11.yaml | 44 ++ + .../bindings/regulator/samsung,s2mps13.yaml | 44 ++ + .../bindings/regulator/samsung,s2mps14.yaml | 44 ++ + .../bindings/regulator/samsung,s2mps15.yaml | 44 ++ + .../bindings/regulator/samsung,s2mpu02.yaml | 44 ++ + .../bindings/regulator/samsung,s5m8767.txt | 145 ---- + .../bindings/regulator/samsung,s5m8767.yaml | 74 ++ + .../bindings/regulator/silergy,sy8106a.yaml | 52 ++ + .../regulator/socionext,uniphier-regulator.yaml | 1 + + .../bindings/regulator/sy8106a-regulator.txt | 23 - + MAINTAINERS | 6 +- + drivers/regulator/Kconfig | 15 +- + drivers/regulator/Makefile | 1 - + drivers/regulator/bd71815-regulator.c | 4 +- + drivers/regulator/core.c | 12 +- + drivers/regulator/dummy.c | 3 +- + drivers/regulator/lp872x.c | 52 +- + drivers/regulator/max8973-regulator.c | 4 +- + drivers/regulator/pwm-regulator.c | 12 +- + drivers/regulator/qcom-rpmh-regulator.c | 32 + + drivers/regulator/qcom_smd-regulator.c | 49 ++ + drivers/regulator/rtq6752-regulator.c | 18 +- + drivers/regulator/s5m8767.c | 21 +- + drivers/regulator/sy7636a-regulator.c | 2 +- + drivers/regulator/ti-abb-regulator.c | 31 +- + drivers/regulator/tps62360-regulator.c | 59 +- + drivers/regulator/tps80031-regulator.c | 753 --------------------- + drivers/regulator/uniphier-regulator.c | 4 + + drivers/regulator/vqmmc-ipq4019-regulator.c | 4 +- + drivers/spi/spi.c | 41 ++ + include/linux/regulator/lp872x.h | 17 +- + include/linux/regulator/tps62360.h | 6 - + 45 files changed, 1382 insertions(+), 1564 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/clock/samsung,s2mps11.txt + create mode 100644 Documentation/devicetree/bindings/clock/samsung,s2mps11.yaml + delete mode 100644 Documentation/devicetree/bindings/regulator/max8952.txt + delete mode 100644 Documentation/devicetree/bindings/regulator/max8973-regulator.txt + delete mode 100644 Documentation/devicetree/bindings/regulator/max8997-regulator.txt + create mode 100644 Documentation/devicetree/bindings/regulator/maxim,max8952.yaml + create mode 100644 Documentation/devicetree/bindings/regulator/maxim,max8973.yaml + create mode 100644 Documentation/devicetree/bindings/regulator/maxim,max8997.yaml + delete mode 100644 Documentation/devicetree/bindings/regulator/samsung,s2mpa01.txt + create mode 100644 Documentation/devicetree/bindings/regulator/samsung,s2mpa01.yaml + delete mode 100644 Documentation/devicetree/bindings/regulator/samsung,s2mps11.txt + create mode 100644 Documentation/devicetree/bindings/regulator/samsung,s2mps11.yaml + create mode 100644 Documentation/devicetree/bindings/regulator/samsung,s2mps13.yaml + create mode 100644 Documentation/devicetree/bindings/regulator/samsung,s2mps14.yaml + create mode 100644 Documentation/devicetree/bindings/regulator/samsung,s2mps15.yaml + create mode 100644 Documentation/devicetree/bindings/regulator/samsung,s2mpu02.yaml + delete mode 100644 Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt + create mode 100644 Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml + create mode 100644 Documentation/devicetree/bindings/regulator/silergy,sy8106a.yaml + delete mode 100644 Documentation/devicetree/bindings/regulator/sy8106a-regulator.txt + delete mode 100644 drivers/regulator/tps80031-regulator.c +Merging security/next-testing (047843bdb316 Merge branch 'landlock_lsm_v34' into next-testing) +$ git merge -m Merge branch 'next-testing' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git security/next-testing +Already up to date. +Merge made by the 'recursive' strategy. +Merging apparmor/apparmor-next (d108370c644b apparmor: fix error check) +$ git merge -m Merge branch 'apparmor-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jj/linux-apparmor apparmor/apparmor-next +Auto-merging security/apparmor/policy_unpack.c +Auto-merging security/apparmor/policy.c +Auto-merging security/apparmor/lsm.c +Auto-merging security/apparmor/label.c +Auto-merging security/apparmor/include/label.h +Auto-merging security/apparmor/include/file.h +Auto-merging security/apparmor/apparmorfs.c +Merge made by the 'recursive' strategy. + security/apparmor/apparmorfs.c | 4 +-- + security/apparmor/include/file.h | 2 +- + security/apparmor/include/label.h | 1 + + security/apparmor/include/policy.h | 6 ++-- + security/apparmor/label.c | 4 +-- + security/apparmor/lsm.c | 22 +++++++------- + security/apparmor/path.c | 2 +- + security/apparmor/policy.c | 59 ++++++++++++++++++++++++++++++++------ + security/apparmor/policy_unpack.c | 2 +- + 9 files changed, 73 insertions(+), 29 deletions(-) +Merging integrity/next-integrity (cc4299ea0399 ima: Use strscpy instead of strlcpy) +$ git merge -m Merge branch 'next-integrity' of git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity integrity/next-integrity +Merge made by the 'recursive' strategy. + Documentation/ABI/testing/ima_policy | 8 +- + security/integrity/ima/ima_api.c | 2 +- + security/integrity/ima/ima_policy.c | 243 ++++++++++++++++++++++++++++------- + 3 files changed, 207 insertions(+), 46 deletions(-) +Merging keys/keys-next (e377c31f788f integrity: Load mokx variables into the blacklist keyring) +$ git merge -m Merge branch 'keys-next' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git keys/keys-next +Auto-merging scripts/Makefile +Auto-merging include/keys/system_keyring.h +Auto-merging certs/system_keyring.c +CONFLICT (content): Merge conflict in certs/system_keyring.c +Auto-merging certs/Makefile +Auto-merging certs/Kconfig +Resolved 'certs/system_keyring.c' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master 9941d9e8c70c] Merge branch 'keys-next' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git +$ git diff -M --stat --summary HEAD^.. +Merging safesetid/safesetid-next (1b8b71922919 LSM: SafeSetID: Mark safesetid_initialized as __initdata) +$ git merge -m Merge branch 'safesetid-next' of https://github.com/micah-morton/linux.git safesetid/safesetid-next +Already up to date. +Merging selinux/next (15bf32398ad4 security: Return xattr name from security_dentry_init_security()) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git selinux/next +Auto-merging security/smack/smack_lsm.c +Auto-merging security/selinux/hooks.c +Auto-merging kernel/auditsc.c +Auto-merging include/uapi/linux/audit.h +CONFLICT (content): Merge conflict in include/uapi/linux/audit.h +Auto-merging fs/nfs/nfs4proc.c +Auto-merging fs/io_uring.c +CONFLICT (content): Merge conflict in fs/io_uring.c +Auto-merging fs/io-wq.c +CONFLICT (content): Merge conflict in fs/io-wq.c +Auto-merging drivers/android/binder_internal.h +Auto-merging drivers/android/binder.c +Resolved 'fs/io-wq.c' using previous resolution. +Resolved 'fs/io_uring.c' using previous resolution. +Resolved 'include/uapi/linux/audit.h' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master 7dc9cf75ebb1] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git +$ git diff -M --stat --summary HEAD^.. + drivers/android/binder.c | 27 +-- + drivers/android/binder_internal.h | 4 + + fs/anon_inodes.c | 29 +++ + fs/ceph/xattr.c | 3 +- + fs/io-wq.c | 4 + + fs/io_uring.c | 69 +++++- + fs/nfs/nfs4proc.c | 3 +- + include/linux/anon_inodes.h | 4 + + include/linux/audit.h | 26 ++ + include/linux/lsm_hook_defs.h | 22 +- + include/linux/lsm_hooks.h | 30 ++- + include/linux/security.h | 55 +++-- + include/uapi/linux/audit.h | 4 +- + kernel/audit.h | 7 +- + kernel/audit_tree.c | 3 +- + kernel/audit_watch.c | 3 +- + kernel/auditfilter.c | 15 +- + kernel/auditsc.c | 468 ++++++++++++++++++++++++++++-------- + security/security.c | 35 ++- + security/selinux/avc.c | 13 +- + security/selinux/hooks.c | 239 +++++++----------- + security/selinux/include/classmap.h | 4 +- + security/selinux/netlabel.c | 7 +- + security/selinux/netport.c | 2 +- + security/selinux/ss/hashtab.c | 1 + + security/selinux/ss/mls.c | 4 + + security/selinux/ss/services.c | 176 +++++++------- + security/smack/smack_lsm.c | 46 ++++ + 28 files changed, 882 insertions(+), 421 deletions(-) +$ git am -3 ../patches/0001-fixup-for-io_uring-init-opcode-in-io_init_req.patch +Applying: fixup for "io_uring: init opcode in io_init_req()" +$ git reset HEAD^ +Unstaged changes after reset: +M fs/io_uring.c +$ git add -A . +$ git commit -v -a --amend +[master c749639d9023] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git + Date: Mon Oct 25 14:01:11 2021 +1100 +Merging smack/next (0934ad42bb2c smackfs: use netlbl_cfg_cipsov4_del() for deleting cipso_v4_doi) +$ git merge -m Merge branch 'next' of git://github.com/cschaufler/smack-next smack/next +Auto-merging security/smack/smack_lsm.c +Merge made by the 'recursive' strategy. + security/smack/smack_lsm.c | 41 ++++++++++++++++++++++++---------------- + security/smack/smack_netfilter.c | 26 +++---------------------- + security/smack/smackfs.c | 11 ++++++----- + 3 files changed, 34 insertions(+), 44 deletions(-) +Merging tomoyo/master (7d2a07b76933 Linux 5.14) +$ git merge -m Merge branch 'master' of https://scm.osdn.net/gitroot/tomoyo/tomoyo-test1.git tomoyo/master +Already up to date. +Merging tpmdd/next (f985911b7bc7 crypto: public_key: fix overflow during implicit conversion) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git tpmdd/next +Already up to date. +Merging watchdog/master (519d81956ee2 Linux 5.15-rc6) +$ git merge -m Merge branch 'master' of git://www.linux-watchdog.org/linux-watchdog-next.git watchdog/master +Already up to date. +Merging iommu/next (145cac8e9cfe Merge branches 'apple/dart', 'arm/mediatek', 'arm/renesas', 'arm/tegra', 'x86/amd', 'x86/vt-d' and 'core' into next) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git iommu/next +Auto-merging drivers/iommu/apple-dart.c +Merge made by the 'recursive' strategy. + .../bindings/iommu/renesas,ipmmu-vmsa.yaml | 1 + + arch/x86/include/asm/fpu/api.h | 2 - + drivers/iommu/amd/amd_iommu_types.h | 2 + + drivers/iommu/amd/iommu.c | 21 ++ + drivers/iommu/apple-dart.c | 5 +- + drivers/iommu/dma-iommu.c | 195 ++++++++----------- + drivers/iommu/intel/Kconfig | 4 + + drivers/iommu/intel/cap_audit.c | 13 ++ + drivers/iommu/intel/cap_audit.h | 1 + + drivers/iommu/intel/dmar.c | 10 +- + drivers/iommu/intel/iommu.c | 213 ++++++++++++++------- + drivers/iommu/intel/svm.c | 24 +-- + drivers/iommu/iommu.c | 3 +- + drivers/iommu/ipmmu-vmsa.c | 32 +++- + drivers/iommu/mtk_iommu.c | 4 +- + drivers/iommu/tegra-smmu.c | 5 +- + drivers/xen/swiotlb-xen.c | 2 +- + include/linux/dmar.h | 8 + + include/linux/intel-iommu.h | 13 +- + include/linux/swiotlb.h | 3 +- + kernel/dma/swiotlb.c | 13 +- + 21 files changed, 340 insertions(+), 234 deletions(-) +$ git am -3 ../patches/0001-fix-for-iommu-dart-Exclude-MSI-doorbell-from-PCIe-de.patch +Applying: fix for "iommu/dart: Exclude MSI doorbell from PCIe device IOVA range" +$ git reset HEAD^ +Unstaged changes after reset: +M drivers/iommu/apple-dart.c +$ git add -A . +$ git commit -v -a --amend +[master 2c036e27cfac] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git + Date: Mon Oct 25 14:10:28 2021 +1100 +Merging audit/next (d9516f346e8b audit: return early if the filter rule has a lower priority) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git audit/next +Auto-merging kernel/auditsc.c +Auto-merging kernel/audit_tree.c +Auto-merging kernel/audit.h +Auto-merging include/uapi/linux/audit.h +CONFLICT (content): Merge conflict in include/uapi/linux/audit.h +Auto-merging include/linux/audit.h +Auto-merging MAINTAINERS +Resolved 'include/uapi/linux/audit.h' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master ec335a777c82] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git +$ git diff -M --stat --summary HEAD^.. + MAINTAINERS | 1 + + arch/alpha/kernel/audit.c | 10 +++++--- + arch/ia64/kernel/audit.c | 10 +++++--- + arch/parisc/kernel/audit.c | 10 +++++--- + arch/parisc/kernel/compat_audit.c | 11 +++++--- + arch/powerpc/kernel/audit.c | 12 +++++---- + arch/powerpc/kernel/compat_audit.c | 13 ++++++---- + arch/s390/kernel/audit.c | 12 +++++---- + arch/s390/kernel/compat_audit.c | 13 ++++++---- + arch/sparc/kernel/audit.c | 12 +++++---- + arch/sparc/kernel/compat_audit.c | 13 ++++++---- + arch/x86/ia32/audit.c | 13 ++++++---- + arch/x86/kernel/audit_64.c | 10 +++++--- + fs/open.c | 2 ++ + include/linux/audit.h | 11 ++++++++ + include/linux/audit_arch.h | 24 ++++++++++++++++++ + include/uapi/linux/audit.h | 7 +++--- + kernel/audit.h | 2 ++ + kernel/audit_tree.c | 20 +++++++-------- + kernel/auditsc.c | 51 ++++++++++++++++++++++---------------- + lib/audit.c | 14 +++++++---- + lib/compat_audit.c | 15 +++++++---- + security/lsm_audit.c | 2 +- + 23 files changed, 187 insertions(+), 101 deletions(-) + create mode 100644 include/linux/audit_arch.h +Merging devicetree/for-next (f1d46c113d5c dt-bindings: display: Document the Xylon LogiCVC display controller) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git devicetree/for-next +Auto-merging scripts/Makefile.lib +Auto-merging drivers/of/base.c +Auto-merging arch/powerpc/kernel/smp.c +Auto-merging arch/arm/boot/dts/omap3-gta04a5.dts +Auto-merging Makefile +Auto-merging MAINTAINERS +Removing Documentation/devicetree/bindings/w1/w1-gpio.txt +Auto-merging Documentation/devicetree/bindings/vendor-prefixes.yaml +Removing Documentation/devicetree/bindings/usb/udc-xilinx.txt +Removing Documentation/devicetree/bindings/rng/omap_rng.txt +Removing Documentation/devicetree/bindings/reserved-memory/ramoops.txt +Removing Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt +Removing Documentation/devicetree/bindings/net/wireless/ti,wlcore,spi.txt +Removing Documentation/devicetree/bindings/net/wireless/esp,esp8089.txt +Removing Documentation/devicetree/bindings/net/ti-bluetooth.txt +Auto-merging Documentation/devicetree/bindings/net/snps,dwmac.yaml +Removing Documentation/devicetree/bindings/net/nfc/trf7970a.txt +Removing Documentation/devicetree/bindings/net/nfc/st95hf.txt +Removing Documentation/devicetree/bindings/net/nfc/st21nfca.txt +Removing Documentation/devicetree/bindings/net/nfc/st-nci-spi.txt +Removing Documentation/devicetree/bindings/net/nfc/st-nci-i2c.txt +Removing Documentation/devicetree/bindings/net/nfc/pn532.txt +Removing Documentation/devicetree/bindings/net/nfc/nxp-nci.txt +Removing Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt +Removing Documentation/devicetree/bindings/net/marvell-bluetooth.txt +Auto-merging Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml +Removing Documentation/devicetree/bindings/mfd/aspeed-lpc.txt +Removing Documentation/devicetree/bindings/memory-controllers/fsl/ddr.txt +Removing Documentation/devicetree/bindings/media/i2c/ov5640.txt +Removing Documentation/devicetree/bindings/leds/register-bit-led.txt +Removing Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt +Removing Documentation/devicetree/bindings/input/elan_i2c.txt +Removing Documentation/devicetree/bindings/gnss/u-blox.txt +Auto-merging Documentation/devicetree/bindings/arm/cpus.yaml +Removing Documentation/devicetree/bindings/arm/cci.txt +Merge made by the 'recursive' strategy. + Documentation/admin-guide/ramoops.rst | 2 +- + Documentation/devicetree/bindings/Makefile | 15 +- + .../devicetree/bindings/arm/arm,cci-400.yaml | 216 +++++++++++++++ + .../devicetree/bindings/arm/arm,vexpress-juno.yaml | 46 ++-- + .../devicetree/bindings/arm/cci-control-port.yaml | 38 +++ + Documentation/devicetree/bindings/arm/cci.txt | 224 --------------- + Documentation/devicetree/bindings/arm/cpus.yaml | 2 + + Documentation/devicetree/bindings/bus/palmbus.yaml | 79 ++++++ + .../devicetree/bindings/devfreq/rk3399_dmc.txt | 2 +- + .../bindings/display/xylon,logicvc-display.yaml | 301 +++++++++++++++++++++ + .../devicetree/bindings/example-schema.yaml | 14 +- + .../devicetree/bindings/gnss/u-blox,neo-6m.yaml | 62 +++++ + Documentation/devicetree/bindings/gnss/u-blox.txt | 45 --- + .../devicetree/bindings/input/elan,ekth3000.yaml | 81 ++++++ + .../devicetree/bindings/input/elan_i2c.txt | 44 --- + .../bindings/input/touchscreen/silead,gsl1680.yaml | 91 +++++++ + .../bindings/input/touchscreen/silead_gsl1680.txt | 44 --- + .../interrupt-controller/msi-controller.yaml | 46 ++++ + .../devicetree/bindings/leds/register-bit-led.txt | 94 ------- + .../devicetree/bindings/leds/register-bit-led.yaml | 95 +++++++ + .../devicetree/bindings/mailbox/mtk-gce.txt | 4 +- + .../devicetree/bindings/media/i2c/ov5640.txt | 92 ------- + .../devicetree/bindings/media/i2c/ovti,ov5640.yaml | 154 +++++++++++ + .../devicetree/bindings/media/ti,cal.yaml | 4 +- + .../bindings/memory-controllers/fsl/ddr.txt | 29 -- + .../bindings/memory-controllers/fsl/fsl,ddr.yaml | 83 ++++++ + .../memory-controllers/mediatek,mt7621-memc.yaml | 30 ++ + .../devicetree/bindings/mfd/aspeed-lpc.txt | 157 ----------- + .../devicetree/bindings/mfd/aspeed-lpc.yaml | 199 ++++++++++++++ + .../bindings/net/allwinner,sun8i-a83t-emac.yaml | 4 +- + .../bindings/net/broadcom-bluetooth.yaml | 17 +- + .../devicetree/bindings/net/marvell-bluetooth.txt | 25 -- + .../devicetree/bindings/net/marvell-bluetooth.yaml | 31 +++ + .../devicetree/bindings/net/nfc/marvell,nci.yaml | 170 ++++++++++++ + .../devicetree/bindings/net/nfc/nfcmrvl.txt | 84 ------ + .../devicetree/bindings/net/nfc/nxp,nci.yaml | 61 +++++ + .../devicetree/bindings/net/nfc/nxp,pn532.yaml | 65 +++++ + .../devicetree/bindings/net/nfc/nxp-nci.txt | 33 --- + .../devicetree/bindings/net/nfc/pn532.txt | 46 ---- + .../devicetree/bindings/net/nfc/st,st-nci.yaml | 106 ++++++++ + .../devicetree/bindings/net/nfc/st,st21nfca.yaml | 64 +++++ + .../devicetree/bindings/net/nfc/st,st95hf.yaml | 57 ++++ + .../devicetree/bindings/net/nfc/st-nci-i2c.txt | 38 --- + .../devicetree/bindings/net/nfc/st-nci-spi.txt | 36 --- + .../devicetree/bindings/net/nfc/st21nfca.txt | 37 --- + .../devicetree/bindings/net/nfc/st95hf.txt | 45 --- + .../devicetree/bindings/net/nfc/ti,trf7970a.yaml | 98 +++++++ + .../devicetree/bindings/net/nfc/trf7970a.txt | 43 --- + .../devicetree/bindings/net/realtek-bluetooth.yaml | 2 + + .../devicetree/bindings/net/snps,dwmac.yaml | 6 +- + .../devicetree/bindings/net/ti,bluetooth.yaml | 91 +++++++ + .../devicetree/bindings/net/ti-bluetooth.txt | 60 ---- + .../bindings/net/wireless/esp,esp8089.txt | 30 -- + .../bindings/net/wireless/esp,esp8089.yaml | 43 +++ + .../bindings/net/wireless/ti,wlcore,spi.txt | 57 ---- + .../devicetree/bindings/net/wireless/ti,wlcore.txt | 45 --- + .../bindings/net/wireless/ti,wlcore.yaml | 134 +++++++++ + Documentation/devicetree/bindings/numa.txt | 46 +++- + .../devicetree/bindings/pci/apple,pcie.yaml | 160 +++++++++++ + .../devicetree/bindings/pci/brcm,stb-pcie.yaml | 1 + + .../bindings/pci/microchip,pcie-host.yaml | 1 + + .../bindings/reserved-memory/memory-region.yaml | 40 +++ + .../bindings/reserved-memory/ramoops.txt | 66 ----- + .../bindings/reserved-memory/ramoops.yaml | 145 ++++++++++ + .../bindings/reserved-memory/reserved-memory.txt | 172 +----------- + .../bindings/reserved-memory/reserved-memory.yaml | 100 +++++++ + .../bindings/reserved-memory/shared-dma-pool.yaml | 87 ++++++ + Documentation/devicetree/bindings/riscv/cpus.yaml | 8 +- + Documentation/devicetree/bindings/rng/omap_rng.txt | 38 --- + .../devicetree/bindings/rng/omap_rng.yaml | 92 +++++++ + .../bindings/soc/aspeed/uart-routing.yaml | 56 ++++ + .../devicetree/bindings/submitting-patches.rst | 3 + + .../devicetree/bindings/usb/udc-xilinx.txt | 18 -- + .../devicetree/bindings/usb/xlnx,usb2.yaml | 47 ++++ + .../devicetree/bindings/vendor-prefixes.yaml | 2 + + Documentation/devicetree/bindings/w1/w1-gpio.txt | 27 -- + Documentation/devicetree/bindings/w1/w1-gpio.yaml | 43 +++ + .../devicetree/bindings/writing-bindings.rst | 2 +- + .../devicetree/bindings/writing-schema.rst | 29 +- + Documentation/w1/masters/w1-gpio.rst | 2 +- + MAINTAINERS | 24 +- + Makefile | 10 +- + arch/arm/boot/dts/omap3-gta04a5.dts | 2 +- + arch/arm/kernel/devtree.c | 22 +- + arch/arm/mach-bcm/bcm63xx_pmb.c | 6 +- + arch/arm64/kernel/smp.c | 31 +-- + arch/csky/kernel/smp.c | 6 +- + arch/openrisc/kernel/smp.c | 6 +- + arch/powerpc/kernel/smp.c | 7 +- + arch/riscv/kernel/cpu.c | 3 +- + arch/sh/boards/of-generic.c | 5 +- + arch/x86/kernel/devicetree.c | 5 +- + drivers/of/base.c | 22 ++ + drivers/of/kobj.c | 4 +- + drivers/of/of_numa.c | 2 + + drivers/of/of_private.h | 10 +- + drivers/of/unittest.c | 2 + + include/linux/of.h | 3 +- + scripts/Makefile.lib | 1 - + 99 files changed, 3384 insertions(+), 1793 deletions(-) + create mode 100644 Documentation/devicetree/bindings/arm/arm,cci-400.yaml + create mode 100644 Documentation/devicetree/bindings/arm/cci-control-port.yaml + delete mode 100644 Documentation/devicetree/bindings/arm/cci.txt + create mode 100644 Documentation/devicetree/bindings/bus/palmbus.yaml + create mode 100644 Documentation/devicetree/bindings/display/xylon,logicvc-display.yaml + create mode 100644 Documentation/devicetree/bindings/gnss/u-blox,neo-6m.yaml + delete mode 100644 Documentation/devicetree/bindings/gnss/u-blox.txt + create mode 100644 Documentation/devicetree/bindings/input/elan,ekth3000.yaml + delete mode 100644 Documentation/devicetree/bindings/input/elan_i2c.txt + create mode 100644 Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml + delete mode 100644 Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt + create mode 100644 Documentation/devicetree/bindings/interrupt-controller/msi-controller.yaml + delete mode 100644 Documentation/devicetree/bindings/leds/register-bit-led.txt + create mode 100644 Documentation/devicetree/bindings/leds/register-bit-led.yaml + delete mode 100644 Documentation/devicetree/bindings/media/i2c/ov5640.txt + create mode 100644 Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml + delete mode 100644 Documentation/devicetree/bindings/memory-controllers/fsl/ddr.txt + create mode 100644 Documentation/devicetree/bindings/memory-controllers/fsl/fsl,ddr.yaml + create mode 100644 Documentation/devicetree/bindings/memory-controllers/mediatek,mt7621-memc.yaml + delete mode 100644 Documentation/devicetree/bindings/mfd/aspeed-lpc.txt + create mode 100644 Documentation/devicetree/bindings/mfd/aspeed-lpc.yaml + delete mode 100644 Documentation/devicetree/bindings/net/marvell-bluetooth.txt + create mode 100644 Documentation/devicetree/bindings/net/marvell-bluetooth.yaml + create mode 100644 Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml + delete mode 100644 Documentation/devicetree/bindings/net/nfc/nfcmrvl.txt + create mode 100644 Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml + create mode 100644 Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml + delete mode 100644 Documentation/devicetree/bindings/net/nfc/nxp-nci.txt + delete mode 100644 Documentation/devicetree/bindings/net/nfc/pn532.txt + create mode 100644 Documentation/devicetree/bindings/net/nfc/st,st-nci.yaml + create mode 100644 Documentation/devicetree/bindings/net/nfc/st,st21nfca.yaml + create mode 100644 Documentation/devicetree/bindings/net/nfc/st,st95hf.yaml + delete mode 100644 Documentation/devicetree/bindings/net/nfc/st-nci-i2c.txt + delete mode 100644 Documentation/devicetree/bindings/net/nfc/st-nci-spi.txt + delete mode 100644 Documentation/devicetree/bindings/net/nfc/st21nfca.txt + delete mode 100644 Documentation/devicetree/bindings/net/nfc/st95hf.txt + create mode 100644 Documentation/devicetree/bindings/net/nfc/ti,trf7970a.yaml + delete mode 100644 Documentation/devicetree/bindings/net/nfc/trf7970a.txt + create mode 100644 Documentation/devicetree/bindings/net/ti,bluetooth.yaml + delete mode 100644 Documentation/devicetree/bindings/net/ti-bluetooth.txt + delete mode 100644 Documentation/devicetree/bindings/net/wireless/esp,esp8089.txt + create mode 100644 Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml + delete mode 100644 Documentation/devicetree/bindings/net/wireless/ti,wlcore,spi.txt + delete mode 100644 Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt + create mode 100644 Documentation/devicetree/bindings/net/wireless/ti,wlcore.yaml + create mode 100644 Documentation/devicetree/bindings/pci/apple,pcie.yaml + create mode 100644 Documentation/devicetree/bindings/reserved-memory/memory-region.yaml + delete mode 100644 Documentation/devicetree/bindings/reserved-memory/ramoops.txt + create mode 100644 Documentation/devicetree/bindings/reserved-memory/ramoops.yaml + create mode 100644 Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml + create mode 100644 Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml + delete mode 100644 Documentation/devicetree/bindings/rng/omap_rng.txt + create mode 100644 Documentation/devicetree/bindings/rng/omap_rng.yaml + create mode 100644 Documentation/devicetree/bindings/soc/aspeed/uart-routing.yaml + delete mode 100644 Documentation/devicetree/bindings/usb/udc-xilinx.txt + create mode 100644 Documentation/devicetree/bindings/usb/xlnx,usb2.yaml + delete mode 100644 Documentation/devicetree/bindings/w1/w1-gpio.txt + create mode 100644 Documentation/devicetree/bindings/w1/w1-gpio.yaml +Merging mailbox/mailbox-for-next (0a5ad4322927 mailbox: mtk-cmdq: Fix local clock ID usage) +$ git merge -m Merge branch 'mailbox-for-next' of git://git.linaro.org/landing-teams/working/fujitsu/integration.git mailbox/mailbox-for-next +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + .../bindings/mailbox/qcom,apcs-kpss-global.yaml | 3 ++- + MAINTAINERS | 2 +- + drivers/mailbox/bcm2835-mailbox.c | 4 +-- + drivers/mailbox/hi3660-mailbox.c | 4 +-- + drivers/mailbox/hi6220-mailbox.c | 7 ++--- + drivers/mailbox/mailbox-altera.c | 5 +--- + drivers/mailbox/mailbox-sti.c | 4 +-- + drivers/mailbox/mailbox-xgene-slimpro.c | 4 +-- + drivers/mailbox/mtk-cmdq-mailbox.c | 15 ++++------- + drivers/mailbox/omap-mailbox.c | 4 +-- + drivers/mailbox/platform_mhu.c | 4 +-- + drivers/mailbox/qcom-apcs-ipc-mailbox.c | 31 +++++----------------- + drivers/mailbox/stm32-ipcc.c | 4 +-- + drivers/mailbox/sun6i-msgbox.c | 9 +------ + 14 files changed, 26 insertions(+), 74 deletions(-) +Merging spi/for-next (52e541e87158 Merge remote-tracking branch 'spi/for-5.16' into spi-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git spi/for-next +Auto-merging drivers/spi/spi.c +Auto-merging drivers/firmware/xilinx/zynqmp.c +Merge made by the 'recursive' strategy. + .../devicetree/bindings/spi/cdns,qspi-nor.yaml | 12 + + .../devicetree/bindings/spi/cdns,xspi.yaml | 77 +++ + .../devicetree/bindings/spi/ingenic,spi.yaml | 72 +++ + .../bindings/spi/qcom,spi-qcom-qspi.yaml | 6 +- + Documentation/spi/spi-summary.rst | 8 - + arch/mips/boot/dts/ingenic/ci20.dts | 9 +- + arch/mips/boot/dts/ingenic/jz4780.dtsi | 44 +- + drivers/firmware/xilinx/zynqmp.c | 17 + + drivers/spi/Kconfig | 26 +- + drivers/spi/Makefile | 2 + + drivers/spi/atmel-quadspi.c | 2 +- + drivers/spi/spi-amd.c | 113 ++-- + drivers/spi/spi-at91-usart.c | 27 +- + drivers/spi/spi-bcm-qspi.c | 193 ++++++- + drivers/spi/spi-cadence-quadspi.c | 214 +++++++ + drivers/spi/spi-cadence-xspi.c | 642 +++++++++++++++++++++ + drivers/spi/spi-fsi.c | 48 +- + drivers/spi/spi-ingenic.c | 482 ++++++++++++++++ + drivers/spi/spi-mtk-nor.c | 2 +- + drivers/spi/spi-orion.c | 1 + + drivers/spi/spi-rspi.c | 1 - + drivers/spi/spi-sh-msiof.c | 1 - + drivers/spi/spi-stm32-qspi.c | 2 +- + drivers/spi/spi-tle62x0.c | 2 +- + drivers/spi/spi.c | 237 +++----- + include/linux/firmware/xlnx-zynqmp.h | 12 + + include/linux/spi/spi.h | 55 -- + 27 files changed, 1940 insertions(+), 367 deletions(-) + create mode 100644 Documentation/devicetree/bindings/spi/cdns,xspi.yaml + create mode 100644 Documentation/devicetree/bindings/spi/ingenic,spi.yaml + create mode 100644 drivers/spi/spi-cadence-xspi.c + create mode 100644 drivers/spi/spi-ingenic.c +Merging tip/auto-latest (5323cc30d36f Merge remote-tracking branch 'tip/timers/core' into tip-master) +$ git merge -m Merge branch 'auto-latest' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git tip/auto-latest +Auto-merging scripts/sorttable.c +Auto-merging kernel/sched/sched.h +Auto-merging kernel/sched/core_sched.c +Auto-merging kernel/sched/core.c +Removing kernel/futex.c +Auto-merging kernel/fork.c +Auto-merging kernel/exit.c +Auto-merging kernel/events/core.c +Auto-merging kernel/dma/swiotlb.c +Auto-merging include/linux/sched.h +Auto-merging include/linux/perf_event.h +Auto-merging include/linux/mm_types.h +Auto-merging include/linux/acpi.h +Auto-merging drivers/iommu/iommu.c +Auto-merging drivers/iommu/amd/iommu.c +Auto-merging drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +Auto-merging arch/xtensa/kernel/process.c +Auto-merging arch/xtensa/include/asm/processor.h +Auto-merging arch/x86/net/bpf_jit_comp.c +Auto-merging arch/x86/kvm/x86.c +Auto-merging arch/x86/kvm/vmx/vmx.c +Auto-merging arch/x86/kvm/svm/sev.c +Auto-merging arch/x86/kernel/fpu/signal.c +CONFLICT (content): Merge conflict in arch/x86/kernel/fpu/signal.c +Auto-merging arch/x86/kernel/cpu/mce/amd.c +Auto-merging arch/x86/include/asm/mce.h +Auto-merging arch/x86/include/asm/kvm_host.h +Auto-merging arch/x86/include/asm/fpu/api.h +CONFLICT (content): Merge conflict in arch/x86/include/asm/fpu/api.h +Auto-merging arch/x86/events/perf_event.h +Auto-merging arch/x86/events/intel/core.c +Auto-merging arch/x86/Kconfig +Auto-merging arch/parisc/kernel/process.c +Auto-merging arch/parisc/include/asm/processor.h +Auto-merging arch/mips/include/asm/processor.h +Auto-merging arch/h8300/include/asm/processor.h +Auto-merging arch/arm64/include/asm/processor.h +Auto-merging arch/arm64/Kconfig +Auto-merging arch/arm/kernel/process.c +Auto-merging MAINTAINERS +Recorded preimage for 'arch/x86/include/asm/fpu/api.h' +Resolved 'arch/x86/kernel/fpu/signal.c' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +Recorded resolution for 'arch/x86/include/asm/fpu/api.h'. +[master 43d1eddaabcb] Merge branch 'auto-latest' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git +$ git diff -M --stat --summary HEAD^.. + Documentation/ABI/stable/sysfs-devices-system-cpu | 15 + + Documentation/admin-guide/cgroup-v2.rst | 8 + + Documentation/admin-guide/cputopology.rst | 12 +- + Documentation/kernel-hacking/locking.rst | 14 +- + Documentation/scheduler/sched-bwc.rst | 84 +- + .../translations/it_IT/kernel-hacking/locking.rst | 14 +- + Documentation/userspace-api/futex2.rst | 86 + + Documentation/userspace-api/index.rst | 1 + + Documentation/x86/sgx.rst | 26 + + MAINTAINERS | 3 +- + arch/Kconfig | 3 + + arch/alpha/include/asm/processor.h | 2 +- + arch/alpha/kernel/process.c | 5 +- + arch/arc/include/asm/processor.h | 2 +- + arch/arc/kernel/stacktrace.c | 4 +- + arch/arm/include/asm/processor.h | 2 +- + arch/arm/kernel/process.c | 4 +- + arch/arm/tools/syscall.tbl | 1 + + arch/arm64/Kconfig | 9 + + arch/arm64/include/asm/processor.h | 2 +- + arch/arm64/include/asm/unistd.h | 2 +- + arch/arm64/include/asm/unistd32.h | 2 + + arch/arm64/kernel/process.c | 4 +- + arch/arm64/kernel/topology.c | 2 + + arch/csky/include/asm/processor.h | 2 +- + arch/csky/kernel/stacktrace.c | 5 +- + arch/h8300/include/asm/processor.h | 2 +- + arch/h8300/kernel/process.c | 5 +- + arch/hexagon/include/asm/processor.h | 2 +- + arch/hexagon/kernel/process.c | 4 +- + arch/ia64/include/asm/processor.h | 2 +- + arch/ia64/kernel/process.c | 5 +- + arch/m68k/include/asm/processor.h | 2 +- + arch/m68k/kernel/process.c | 4 +- + arch/microblaze/include/asm/processor.h | 2 +- + arch/microblaze/kernel/process.c | 2 +- + arch/mips/include/asm/processor.h | 2 +- + arch/mips/kernel/process.c | 8 +- + arch/nds32/include/asm/processor.h | 2 +- + arch/nds32/kernel/process.c | 7 +- + arch/nios2/include/asm/processor.h | 2 +- + arch/nios2/kernel/process.c | 5 +- + arch/openrisc/include/asm/processor.h | 2 +- + arch/openrisc/kernel/process.c | 2 +- + arch/parisc/include/asm/processor.h | 2 +- + arch/parisc/kernel/process.c | 5 +- + arch/powerpc/include/asm/mem_encrypt.h | 5 - + arch/powerpc/include/asm/processor.h | 2 +- + arch/powerpc/kernel/process.c | 9 +- + arch/powerpc/perf/isa207-common.c | 26 +- + arch/powerpc/perf/isa207-common.h | 2 + + arch/powerpc/platforms/pseries/Kconfig | 1 + + arch/powerpc/platforms/pseries/Makefile | 2 + + arch/powerpc/platforms/pseries/cc_platform.c | 26 + + arch/powerpc/platforms/pseries/svm.c | 5 +- + arch/riscv/include/asm/processor.h | 2 +- + arch/riscv/kernel/stacktrace.c | 12 +- + arch/s390/include/asm/mem_encrypt.h | 2 - + arch/s390/include/asm/processor.h | 2 +- + arch/s390/kernel/process.c | 4 +- + arch/sh/include/asm/processor_32.h | 2 +- + arch/sh/kernel/process_32.c | 5 +- + arch/sparc/include/asm/processor_32.h | 2 +- + arch/sparc/include/asm/processor_64.h | 2 +- + arch/sparc/kernel/process_32.c | 5 +- + arch/sparc/kernel/process_64.c | 5 +- + arch/um/include/asm/processor-generic.h | 2 +- + arch/um/kernel/process.c | 5 +- + arch/x86/Kconfig | 17 +- + arch/x86/Kconfig.cpu | 13 + + arch/x86/boot/genimage.sh | 15 +- + arch/x86/boot/mtools.conf.in | 5 +- + arch/x86/entry/syscalls/syscall_32.tbl | 1 + + arch/x86/entry/syscalls/syscall_64.tbl | 1 + + arch/x86/events/core.c | 6 + + arch/x86/events/intel/bts.c | 6 + + arch/x86/events/intel/core.c | 16 + + arch/x86/events/intel/uncore_discovery.h | 2 +- + arch/x86/events/intel/uncore_snbep.c | 16 +- + arch/x86/events/perf_event.h | 2 + + arch/x86/ia32/ia32_signal.c | 15 +- + arch/x86/include/asm/asm.h | 55 +- + arch/x86/include/asm/cpu_entry_area.h | 8 +- + arch/x86/include/asm/cpufeature.h | 13 +- + arch/x86/include/asm/extable.h | 44 +- + arch/x86/include/asm/extable_fixup_types.h | 22 + + arch/x86/include/asm/fpu/api.h | 47 +- + arch/x86/include/asm/fpu/internal.h | 540 --- + arch/x86/include/asm/fpu/sched.h | 68 + + arch/x86/include/asm/fpu/signal.h | 13 +- + arch/x86/include/asm/fpu/types.h | 126 +- + arch/x86/include/asm/fpu/xcr.h | 11 - + arch/x86/include/asm/fpu/xstate.h | 66 +- + arch/x86/include/asm/ia32.h | 2 +- + arch/x86/include/asm/insn-eval.h | 1 + + arch/x86/include/asm/io.h | 8 + + arch/x86/include/asm/irq_stack.h | 39 +- + arch/x86/include/asm/kexec.h | 2 +- + arch/x86/include/asm/kvm_host.h | 7 +- + arch/x86/include/asm/mce.h | 12 - + arch/x86/include/asm/mem_encrypt.h | 12 +- + arch/x86/include/asm/msr.h | 4 +- + arch/x86/include/asm/page_32.h | 2 +- + arch/x86/include/asm/page_64_types.h | 2 +- + arch/x86/include/asm/paravirt.h | 31 +- + arch/x86/include/asm/pkru.h | 2 +- + arch/x86/include/asm/processor.h | 15 +- + arch/x86/include/asm/ptrace.h | 2 +- + arch/x86/include/asm/segment.h | 2 +- + arch/x86/include/asm/sgx.h | 3 + + arch/x86/include/asm/smp.h | 7 + + arch/x86/include/asm/stacktrace.h | 10 + + arch/x86/include/asm/topology.h | 3 + + arch/x86/include/asm/trace/fpu.h | 4 +- + arch/x86/include/asm/traps.h | 6 +- + arch/x86/include/asm/uaccess.h | 2 +- + arch/x86/include/asm/xen/hypercall.h | 6 +- + arch/x86/include/uapi/asm/sgx.h | 2 + + arch/x86/kernel/Makefile | 6 + + arch/x86/kernel/cc_platform.c | 69 + + arch/x86/kernel/cpu/Makefile | 1 + + arch/x86/kernel/cpu/amd.c | 2 + + arch/x86/kernel/cpu/bugs.c | 2 +- + arch/x86/kernel/cpu/cacheinfo.c | 1 + + arch/x86/kernel/cpu/common.c | 51 +- + arch/x86/kernel/cpu/cpu.h | 1 + + arch/x86/kernel/cpu/hygon.c | 2 + + arch/x86/kernel/cpu/mce/amd.c | 10 +- + arch/x86/kernel/cpu/mce/core.c | 292 +- + arch/x86/kernel/cpu/mce/internal.h | 71 +- + arch/x86/kernel/cpu/mce/p5.c | 6 +- + arch/x86/kernel/cpu/mce/severity.c | 33 +- + arch/x86/kernel/cpu/mce/winchip.c | 6 +- + arch/x86/kernel/cpu/sgx/virt.c | 69 +- + arch/x86/kernel/cpu/vortex.c | 39 + + arch/x86/kernel/crash_dump_64.c | 4 +- + arch/x86/kernel/dumpstack_64.c | 6 + + arch/x86/kernel/fpu/bugs.c | 2 +- + arch/x86/kernel/fpu/context.h | 81 + + arch/x86/kernel/fpu/core.c | 315 +- + arch/x86/kernel/fpu/init.c | 77 +- + arch/x86/kernel/fpu/internal.h | 31 + + arch/x86/kernel/fpu/legacy.h | 115 + + arch/x86/kernel/fpu/regset.c | 36 +- + arch/x86/kernel/fpu/signal.c | 236 +- + arch/x86/kernel/fpu/xstate.c | 369 +- + arch/x86/kernel/fpu/xstate.h | 225 ++ + arch/x86/kernel/head64.c | 9 +- + arch/x86/kernel/irqflags.S | 2 + + arch/x86/kernel/itmt.c | 2 +- + arch/x86/kernel/kvm.c | 3 +- + arch/x86/kernel/kvmclock.c | 4 +- + arch/x86/kernel/machine_kexec_64.c | 19 +- + arch/x86/kernel/paravirt.c | 45 +- + arch/x86/kernel/pci-swiotlb.c | 9 +- + arch/x86/kernel/process.c | 65 +- + arch/x86/kernel/process_32.c | 5 +- + arch/x86/kernel/process_64.c | 5 +- + arch/x86/kernel/ptrace.c | 2 +- + arch/x86/kernel/relocate_kernel_64.S | 2 +- + arch/x86/kernel/sev-shared.c | 55 +- + arch/x86/kernel/sev.c | 61 +- + arch/x86/kernel/signal.c | 19 +- + arch/x86/kernel/smpboot.c | 48 +- + arch/x86/kernel/traps.c | 62 +- + arch/x86/kernel/umip.c | 8 +- + arch/x86/kvm/svm/sev.c | 2 +- + arch/x86/kvm/svm/svm.c | 10 +- + arch/x86/kvm/svm/svm.h | 8 +- + arch/x86/kvm/svm/svm_ops.h | 4 +- + arch/x86/kvm/vmx/evmcs.h | 4 +- + arch/x86/kvm/vmx/vmx.c | 2 +- + arch/x86/kvm/x86.c | 258 +- + arch/x86/lib/copy_mc_64.S | 8 +- + arch/x86/lib/copy_user_64.S | 13 - + arch/x86/lib/insn-eval.c | 2 +- + arch/x86/lib/insn.c | 5 +- + arch/x86/math-emu/fpu_aux.c | 2 +- + arch/x86/math-emu/fpu_entry.c | 6 +- + arch/x86/math-emu/fpu_system.h | 2 +- + arch/x86/mm/cpu_entry_area.c | 7 + + arch/x86/mm/extable.c | 135 +- + arch/x86/mm/fault.c | 20 +- + arch/x86/mm/ioremap.c | 18 +- + arch/x86/mm/mem_encrypt.c | 55 +- + arch/x86/mm/mem_encrypt_identity.c | 18 +- + arch/x86/mm/pat/set_memory.c | 3 +- + arch/x86/net/bpf_jit_comp.c | 11 +- + arch/x86/platform/efi/efi_64.c | 9 +- + arch/x86/power/cpu.c | 2 +- + arch/x86/realmode/init.c | 8 +- + arch/x86/xen/enlighten_pv.c | 70 +- + arch/x86/xen/irq.c | 31 +- + arch/x86/xen/mmu_pv.c | 93 +- + arch/x86/xen/xen-asm.S | 79 +- + arch/x86/xen/xen-head.S | 34 +- + arch/xtensa/include/asm/processor.h | 2 +- + arch/xtensa/kernel/process.c | 5 +- + drivers/acpi/pptt.c | 67 + + drivers/base/arch_topology.c | 15 + + drivers/base/topology.c | 10 + + drivers/clocksource/Kconfig | 3 + + drivers/clocksource/arc_timer.c | 6 +- + drivers/firmware/efi/efi.c | 5 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +- + drivers/gpu/drm/drm_cache.c | 4 +- + drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 4 +- + drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 6 +- + drivers/iommu/amd/init.c | 7 +- + drivers/iommu/amd/iommu.c | 3 +- + drivers/iommu/amd/iommu_v2.c | 3 +- + drivers/iommu/iommu.c | 3 +- + fs/proc/array.c | 7 +- + fs/proc/base.c | 19 +- + fs/proc/stat.c | 4 +- + fs/proc/uptime.c | 14 +- + fs/proc/vmcore.c | 6 +- + include/linux/acpi.h | 5 + + include/linux/arch_topology.h | 5 + + include/linux/cc_platform.h | 88 + + include/linux/context_tracking.h | 2 +- + include/linux/debug_locks.h | 2 - + include/linux/irq_work.h | 8 + + include/linux/kernel.h | 13 +- + include/linux/kernel_stat.h | 1 + + include/linux/lockdep_types.h | 2 +- + include/linux/mem_encrypt.h | 4 - + include/linux/mm_types.h | 4 + + include/linux/perf_event.h | 1 + + include/linux/preempt.h | 5 +- + include/linux/sched.h | 50 +- + include/linux/sched/idle.h | 4 + + include/linux/sched/mm.h | 29 + + include/linux/sched/task.h | 3 +- + include/linux/sched/topology.h | 7 + + include/linux/stacktrace.h | 13 +- + include/linux/syscalls.h | 7 +- + include/linux/topology.h | 13 + + include/linux/wait.h | 3 +- + include/soc/arc/timers.h | 4 +- + include/uapi/asm-generic/unistd.h | 5 +- + include/uapi/linux/futex.h | 25 + + include/uapi/linux/perf_event.h | 34 +- + kernel/Kconfig.preempt | 32 +- + kernel/Makefile | 2 +- + kernel/dma/swiotlb.c | 4 +- + kernel/events/Makefile | 5 - + kernel/events/core.c | 30 + + kernel/exit.c | 2 + + kernel/fork.c | 2 +- + kernel/futex.c | 4272 -------------------- + kernel/futex/Makefile | 3 + + kernel/futex/core.c | 1176 ++++++ + kernel/futex/futex.h | 299 ++ + kernel/futex/pi.c | 1233 ++++++ + kernel/futex/requeue.c | 897 ++++ + kernel/futex/syscalls.c | 398 ++ + kernel/futex/waitwake.c | 708 ++++ + kernel/irq/manage.c | 6 +- + kernel/irq/spurious.c | 8 + + kernel/irq_work.c | 130 +- + kernel/kprobes.c | 8 +- + kernel/kthread.c | 16 +- + kernel/livepatch/transition.c | 95 +- + kernel/locking/lockdep.c | 6 +- + kernel/locking/mutex.c | 22 +- + kernel/locking/rtmutex.c | 19 +- + kernel/locking/rwbase_rt.c | 11 +- + kernel/locking/rwsem.c | 70 +- + kernel/locking/spinlock_rt.c | 17 +- + kernel/locking/test-ww_mutex.c | 3 +- + kernel/rcu/tasks.h | 12 +- + kernel/rcu/tree_stall.h | 8 +- + kernel/rcu/update.c | 4 +- + kernel/sched/Makefile | 4 + + kernel/sched/core.c | 459 +-- + kernel/sched/core_sched.c | 9 +- + kernel/sched/deadline.c | 99 +- + kernel/sched/debug.c | 101 +- + kernel/sched/fair.c | 431 +- + kernel/sched/features.h | 5 + + kernel/sched/rt.c | 130 +- + kernel/sched/sched.h | 32 +- + kernel/sched/stats.c | 104 + + kernel/sched/stats.h | 49 + + kernel/sched/stop_task.c | 4 +- + kernel/sched/topology.c | 32 +- + kernel/smp.c | 12 +- + kernel/sys_ni.c | 3 +- + lib/Kconfig.debug | 2 +- + mm/memory.c | 2 +- + scripts/leaking_addresses.pl | 3 +- + scripts/sorttable.c | 4 +- + tools/arch/x86/lib/insn.c | 5 +- + tools/include/asm-generic/unaligned.h | 23 + + tools/include/uapi/linux/perf_event.h | 19 +- + tools/objtool/arch/x86/decode.c | 74 +- + tools/objtool/check.c | 464 ++- + tools/objtool/include/objtool/arch.h | 5 +- + tools/objtool/include/objtool/cfi.h | 2 + + tools/objtool/include/objtool/check.h | 2 +- + tools/objtool/include/objtool/elf.h | 1 + + tools/objtool/include/objtool/objtool.h | 9 + + tools/objtool/objtool.c | 22 + + tools/objtool/orc_gen.c | 15 +- + tools/perf/util/intel-pt-decoder/Build | 2 + + tools/perf/util/mem-events.c | 20 +- + .../testing/selftests/futex/functional/.gitignore | 1 + + tools/testing/selftests/futex/functional/Makefile | 3 +- + .../futex/functional/futex_wait_timeout.c | 21 +- + .../futex/functional/futex_wait_wouldblock.c | 41 +- + .../selftests/futex/functional/futex_waitv.c | 237 ++ + tools/testing/selftests/futex/functional/run.sh | 3 + + tools/testing/selftests/futex/include/futex2test.h | 22 + + tools/testing/selftests/sched/cs_prctl_test.c | 28 +- + 315 files changed, 10382 insertions(+), 7479 deletions(-) + create mode 100644 Documentation/userspace-api/futex2.rst + create mode 100644 arch/powerpc/platforms/pseries/cc_platform.c + create mode 100644 arch/x86/include/asm/extable_fixup_types.h + create mode 100644 arch/x86/include/asm/fpu/sched.h + create mode 100644 arch/x86/kernel/cc_platform.c + create mode 100644 arch/x86/kernel/cpu/vortex.c + create mode 100644 arch/x86/kernel/fpu/context.h + create mode 100644 arch/x86/kernel/fpu/internal.h + create mode 100644 arch/x86/kernel/fpu/legacy.h + create mode 100644 arch/x86/kernel/fpu/xstate.h + create mode 100644 include/linux/cc_platform.h + delete mode 100644 kernel/futex.c + create mode 100644 kernel/futex/Makefile + create mode 100644 kernel/futex/core.c + create mode 100644 kernel/futex/futex.h + create mode 100644 kernel/futex/pi.c + create mode 100644 kernel/futex/requeue.c + create mode 100644 kernel/futex/syscalls.c + create mode 100644 kernel/futex/waitwake.c + create mode 100644 tools/include/asm-generic/unaligned.h + create mode 100644 tools/testing/selftests/futex/functional/futex_waitv.c + create mode 100644 tools/testing/selftests/futex/include/futex2test.h +$ git am -3 ../patches/0001-x86-fpu-include-vmalloc.h-for-vzalloc-etc.patch +Applying: x86/fpu: include vmalloc.h for vzalloc etc +$ git reset HEAD^ +Unstaged changes after reset: +M arch/x86/kernel/fpu/core.c +$ git add -A . +$ git commit -v -a --amend +[master e24fefc1b985] Merge branch 'auto-latest' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git + Date: Mon Oct 25 14:37:59 2021 +1100 +Merging clockevents/timers/drivers/next (eda9a4f7af6e clocksource/drivers/timer-ti-dm: Select TIMER_OF) +$ git merge -m Merge branch 'timers/drivers/next' of git://git.linaro.org/people/daniel.lezcano/linux.git clockevents/timers/drivers/next +Already up to date. +Merging edac/edac-for-next (f889e52436d6 Merge branch 'edac-urgent' into edac-for-next) +$ git merge -m Merge branch 'edac-for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac/edac-for-next +Merge made by the 'recursive' strategy. + drivers/edac/al_mc_edac.c | 12 ++++-------- + drivers/edac/amd64_edac.c | 22 +++++++++++++++++++++- + drivers/edac/edac_mc.c | 42 ++++++++++++++++++------------------------ + drivers/edac/edac_mc_sysfs.c | 8 ++++---- + drivers/edac/sb_edac.c | 2 +- + drivers/edac/ti_edac.c | 7 +------ + 6 files changed, 49 insertions(+), 44 deletions(-) +Merging irqchip/irq/irqchip-next (f880d3b0e2a4 Merge branch irq/modular-irqchips into irq/irqchip-next) +$ git merge -m Merge branch 'irq/irqchip-next' of git://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git irqchip/irq/irqchip-next +Auto-merging arch/mips/Kconfig +Auto-merging arch/arm64/Kconfig.platforms +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + .../interrupt-controller/microchip,eic.yaml | 73 ++++++ + MAINTAINERS | 6 + + arch/arm/mach-bcm/Kconfig | 4 - + arch/arm64/Kconfig.platforms | 4 - + arch/mips/Kconfig | 1 + + arch/mips/kernel/smp-bmips.c | 3 +- + drivers/irqchip/Kconfig | 23 +- + drivers/irqchip/Makefile | 1 + + drivers/irqchip/irq-bcm7038-l1.c | 47 +--- + drivers/irqchip/irq-bcm7120-l2.c | 21 +- + drivers/irqchip/irq-brcmstb-l2.c | 16 +- + drivers/irqchip/irq-mchp-eic.c | 280 +++++++++++++++++++++ + drivers/irqchip/irq-meson-gpio.c | 15 +- + drivers/irqchip/irq-mvebu-icu.c | 4 +- + drivers/irqchip/irq-mvebu-pic.c | 4 +- + drivers/irqchip/irq-stm32-exti.c | 4 +- + drivers/irqchip/irq-ti-sci-inta.c | 4 +- + drivers/irqchip/irq-ts4800.c | 4 +- + include/linux/irqchip.h | 15 +- + kernel/irq/generic-chip.c | 3 + + 20 files changed, 446 insertions(+), 86 deletions(-) + create mode 100644 Documentation/devicetree/bindings/interrupt-controller/microchip,eic.yaml + create mode 100644 drivers/irqchip/irq-mchp-eic.c +Merging ftrace/for-next (fed240d9c974 ARM: Recover kretprobe modified return address in stacktrace) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git ftrace/for-next +Auto-merging tools/objtool/check.c +Removing tools/bootconfig/include/linux/string.h +Removing tools/bootconfig/include/linux/printk.h +Removing tools/bootconfig/include/linux/memblock.h +Removing tools/bootconfig/include/linux/kernel.h +Removing tools/bootconfig/include/linux/errno.h +Removing tools/bootconfig/include/linux/ctype.h +Removing tools/bootconfig/include/linux/bug.h +Auto-merging lib/Kconfig.debug +Auto-merging kernel/trace/trace_events_hist.c +Auto-merging kernel/trace/trace.c +Auto-merging kernel/trace/ftrace.c +Auto-merging kernel/kprobes.c +CONFLICT (content): Merge conflict in kernel/kprobes.c +Auto-merging init/main.c +Auto-merging include/linux/trace_recursion.h +Auto-merging include/linux/preempt.h +Auto-merging arch/x86/Kconfig +Auto-merging arch/s390/kernel/kprobes.c +Auto-merging arch/s390/kernel/ftrace.c +Auto-merging arch/parisc/kernel/ftrace.c +CONFLICT (content): Merge conflict in arch/parisc/kernel/ftrace.c +Auto-merging arch/arm64/Kconfig +Auto-merging arch/arm/probes/kprobes/core.c +Auto-merging arch/arm/Kconfig +Recorded preimage for 'arch/parisc/kernel/ftrace.c' +Resolved 'kernel/kprobes.c' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +Recorded resolution for 'arch/parisc/kernel/ftrace.c'. +[master ced5dbda38b2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git +$ git diff -M --stat --summary HEAD^.. + arch/arc/include/asm/kprobes.h | 2 +- + arch/arc/include/asm/ptrace.h | 5 + + arch/arc/kernel/kprobes.c | 13 +- + arch/arm/Kconfig | 1 + + arch/arm/include/asm/stacktrace.h | 9 + + arch/arm/kernel/ftrace.c | 5 - + arch/arm/kernel/return_address.c | 4 + + arch/arm/kernel/stacktrace.c | 17 +- + arch/arm/probes/kprobes/core.c | 43 ++- + arch/arm/probes/kprobes/opt-arm.c | 7 +- + arch/arm64/Kconfig | 1 + + arch/arm64/include/asm/kprobes.h | 2 +- + arch/arm64/include/asm/stacktrace.h | 4 + + arch/arm64/kernel/ftrace.c | 5 - + arch/arm64/kernel/probes/kprobes.c | 12 +- + arch/arm64/kernel/probes/kprobes_trampoline.S | 8 +- + arch/arm64/kernel/stacktrace.c | 7 + + arch/csky/include/asm/kprobes.h | 2 +- + arch/csky/kernel/ftrace.c | 5 - + arch/csky/kernel/probes/ftrace.c | 7 - + arch/csky/kernel/probes/kprobes.c | 14 +- + arch/csky/kernel/probes/kprobes_trampoline.S | 4 +- + arch/ia64/include/asm/ptrace.h | 5 + + arch/ia64/kernel/ftrace.c | 6 - + arch/ia64/kernel/kprobes.c | 15 +- + arch/microblaze/kernel/ftrace.c | 5 - + arch/mips/kernel/kprobes.c | 26 +- + arch/nds32/kernel/ftrace.c | 5 - + arch/parisc/kernel/ftrace.c | 6 - + arch/parisc/kernel/kprobes.c | 6 +- + arch/powerpc/include/asm/kprobes.h | 2 +- + arch/powerpc/kernel/kprobes.c | 29 +- + arch/powerpc/kernel/optprobes.c | 8 +- + arch/powerpc/kernel/stacktrace.c | 2 +- + arch/riscv/include/asm/kprobes.h | 2 +- + arch/riscv/kernel/ftrace.c | 5 - + arch/riscv/kernel/probes/kprobes.c | 15 +- + arch/riscv/kernel/probes/kprobes_trampoline.S | 4 +- + arch/s390/include/asm/kprobes.h | 2 +- + arch/s390/kernel/ftrace.c | 5 - + arch/s390/kernel/kprobes.c | 16 +- + arch/s390/kernel/stacktrace.c | 2 +- + arch/sh/include/asm/kprobes.h | 2 +- + arch/sh/kernel/ftrace.c | 5 - + arch/sh/kernel/kprobes.c | 12 +- + arch/sparc/include/asm/kprobes.h | 2 +- + arch/sparc/kernel/ftrace.c | 5 - + arch/sparc/kernel/kprobes.c | 12 +- + arch/x86/Kconfig | 2 +- + arch/x86/include/asm/ftrace.h | 9 +- + arch/x86/include/asm/kprobes.h | 1 - + arch/x86/include/asm/unwind.h | 29 ++ + arch/x86/include/asm/unwind_hints.h | 5 + + arch/x86/kernel/ftrace.c | 76 ++-- + arch/x86/kernel/ftrace_64.S | 30 +- + arch/x86/kernel/kprobes/core.c | 71 +++- + arch/x86/kernel/kprobes/opt.c | 6 +- + arch/x86/kernel/unwind_frame.c | 3 +- + arch/x86/kernel/unwind_guess.c | 3 +- + arch/x86/kernel/unwind_orc.c | 21 +- + fs/tracefs/inode.c | 3 +- + include/asm-generic/vmlinux.lds.h | 10 +- + include/linux/bootconfig.h | 31 +- + include/linux/ftrace.h | 38 +- + include/linux/kprobes.h | 113 +++--- + include/linux/objtool.h | 12 + + include/linux/preempt.h | 21 ++ + include/linux/trace_recursion.h | 16 +- + init/main.c | 16 +- + kernel/events/internal.h | 7 +- + kernel/kprobes.c | 501 ++++++++++++++------------ + kernel/test_kprobes.c | 222 +++--------- + kernel/trace/Makefile | 1 + + kernel/trace/fgraph.c | 6 +- + kernel/trace/ftrace.c | 331 ++++++++++++++--- + kernel/trace/pid_list.c | 495 +++++++++++++++++++++++++ + kernel/trace/pid_list.h | 88 +++++ + kernel/trace/ring_buffer.c | 9 +- + kernel/trace/trace.c | 151 ++++---- + kernel/trace/trace.h | 19 +- + kernel/trace/trace_dynevent.c | 2 +- + kernel/trace/trace_events.c | 48 +-- + kernel/trace/trace_events_hist.c | 15 +- + kernel/trace/trace_events_synth.c | 4 +- + kernel/trace/trace_functions_graph.c | 4 +- + kernel/trace/trace_hwlat.c | 6 +- + kernel/trace/trace_kprobe.c | 10 +- + kernel/trace/trace_osnoise.c | 14 +- + kernel/trace/trace_output.c | 17 +- + kernel/trace/trace_printk.c | 2 +- + kernel/trace/trace_recursion_record.c | 4 +- + kernel/trace/trace_selftest.c | 92 ++++- + kernel/trace/trace_stack.c | 6 +- + kernel/trace/trace_stat.c | 6 +- + kernel/trace/trace_uprobe.c | 4 +- + kernel/trace/tracing_map.c | 40 +- + lib/Kconfig.debug | 3 +- + lib/bootconfig.c | 231 +++++++----- + lib/error-inject.c | 3 +- + samples/ftrace/Makefile | 1 + + samples/ftrace/ftrace-direct-multi.c | 52 +++ + tools/bootconfig/Makefile | 4 +- + tools/bootconfig/include/linux/bootconfig.h | 45 ++- + tools/bootconfig/include/linux/bug.h | 12 - + tools/bootconfig/include/linux/ctype.h | 7 - + tools/bootconfig/include/linux/errno.h | 7 - + tools/bootconfig/include/linux/kernel.h | 18 - + tools/bootconfig/include/linux/memblock.h | 11 - + tools/bootconfig/include/linux/printk.h | 14 - + tools/bootconfig/include/linux/string.h | 32 -- + tools/bootconfig/main.c | 32 +- + tools/include/linux/objtool.h | 12 + + tools/objtool/check.c | 2 +- + 113 files changed, 2262 insertions(+), 1225 deletions(-) + create mode 100644 kernel/trace/pid_list.c + create mode 100644 kernel/trace/pid_list.h + create mode 100644 samples/ftrace/ftrace-direct-multi.c + delete mode 100644 tools/bootconfig/include/linux/bug.h + delete mode 100644 tools/bootconfig/include/linux/ctype.h + delete mode 100644 tools/bootconfig/include/linux/errno.h + delete mode 100644 tools/bootconfig/include/linux/kernel.h + delete mode 100644 tools/bootconfig/include/linux/memblock.h + delete mode 100644 tools/bootconfig/include/linux/printk.h + delete mode 100644 tools/bootconfig/include/linux/string.h +Merging rcu/rcu/next (6161068bf209 torture: Test splatting for delay-ridden clocksources) +$ git merge -m Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git rcu/rcu/next +Auto-merging tools/testing/selftests/bpf/prog_tests/atomics.c +Auto-merging kernel/rcu/update.c +Auto-merging kernel/rcu/tree_stall.h +Auto-merging kernel/rcu/tasks.h +CONFLICT (content): Merge conflict in kernel/rcu/tasks.h +Auto-merging Documentation/timers/no_hz.rst +Auto-merging Documentation/admin-guide/kernel-parameters.txt +Resolved 'kernel/rcu/tasks.h' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master 36616d918718] Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git +$ git diff -M --stat --summary HEAD^.. + .../Memory-Ordering/Tree-RCU-Memory-Ordering.rst | 69 +++-- + Documentation/RCU/stallwarn.rst | 21 +- + Documentation/admin-guide/kernel-parameters.txt | 12 +- + .../admin-guide/kernel-per-CPU-kthreads.rst | 2 +- + .../litmus-tests/locking/DCL-broken.litmus | 55 ++++ + .../litmus-tests/locking/DCL-fixed.litmus | 56 ++++ + .../litmus-tests/locking/RM-broken.litmus | 42 +++ + Documentation/litmus-tests/locking/RM-fixed.litmus | 42 +++ + Documentation/timers/no_hz.rst | 10 +- + arch/sh/configs/sdk7786_defconfig | 1 - + arch/xtensa/configs/nommu_kc705_defconfig | 1 - + include/linux/clocksource.h | 2 + + include/linux/kcsan-checks.h | 3 + + include/linux/rcu_segcblist.h | 51 +++- + include/linux/rcupdate.h | 53 ++-- + include/linux/rcupdate_trace.h | 5 +- + include/linux/rcutiny.h | 2 +- + include/linux/srcu.h | 3 +- + include/linux/torture.h | 8 + + kernel/kcsan/core.c | 75 ++--- + kernel/kcsan/kcsan.h | 8 +- + kernel/kcsan/kcsan_test.c | 62 ++-- + kernel/kcsan/report.c | 77 ++++- + kernel/kcsan/selftest.c | 72 ++--- + kernel/locking/locktorture.c | 14 +- + kernel/rcu/Kconfig | 18 -- + kernel/rcu/rcu_segcblist.c | 10 +- + kernel/rcu/rcu_segcblist.h | 12 +- + kernel/rcu/rcuscale.c | 10 +- + kernel/rcu/rcutorture.c | 167 +++++++---- + kernel/rcu/refscale.c | 6 +- + kernel/rcu/srcutiny.c | 2 +- + kernel/rcu/tasks.h | 112 ++++---- + kernel/rcu/tree.c | 164 +++++++---- + kernel/rcu/tree.h | 24 +- + kernel/rcu/tree_exp.h | 14 +- + kernel/rcu/tree_nocb.h | 35 ++- + kernel/rcu/tree_plugin.h | 247 ++-------------- + kernel/rcu/tree_stall.h | 27 +- + kernel/rcu/update.c | 8 +- + kernel/scftorture.c | 43 +-- + kernel/softirq.c | 3 +- + kernel/time/clocksource-wdtest.c | 19 +- + kernel/time/clocksource.c | 38 ++- + kernel/time/tick-sched.c | 7 + + tools/memory-model/Documentation/locking.txt | 320 +++++++++++++++++++++ + tools/memory-model/litmus-tests/.gitignore | 2 +- + tools/memory-model/scripts/README | 16 +- + tools/memory-model/scripts/checkalllitmus.sh | 29 +- + tools/memory-model/scripts/checkghlitmus.sh | 11 +- + tools/memory-model/scripts/checklitmus.sh | 25 +- + tools/memory-model/scripts/checklitmushist.sh | 2 +- + tools/memory-model/scripts/checktheselitmus.sh | 43 +++ + tools/memory-model/scripts/cmplitmushist.sh | 49 +++- + tools/memory-model/scripts/hwfnseg.sh | 20 ++ + tools/memory-model/scripts/initlitmushist.sh | 2 +- + tools/memory-model/scripts/judgelitmus.sh | 120 ++++++-- + tools/memory-model/scripts/newlitmushist.sh | 4 +- + tools/memory-model/scripts/parseargs.sh | 21 +- + tools/memory-model/scripts/runlitmus.sh | 80 ++++++ + tools/memory-model/scripts/runlitmushist.sh | 29 +- + tools/memory-model/scripts/simpletest.sh | 35 +++ + tools/rcu/extract-stall.sh | 34 +++ + tools/testing/selftests/bpf/prog_tests/atomics.c | 4 +- + .../testing/selftests/rcutorture/bin/kvm-remote.sh | 1 + + tools/testing/selftests/rcutorture/bin/kvm.sh | 11 +- + tools/testing/selftests/rcutorture/bin/torture.sh | 13 +- + .../selftests/rcutorture/configs/rcu/TREE01 | 1 - + .../selftests/rcutorture/configs/rcu/TREE02 | 1 - + .../selftests/rcutorture/configs/rcu/TREE04 | 1 - + .../selftests/rcutorture/configs/rcu/TREE05 | 1 - + .../selftests/rcutorture/configs/rcu/TREE06 | 1 - + .../selftests/rcutorture/configs/rcu/TREE07 | 1 - + .../selftests/rcutorture/configs/rcu/TREE08 | 1 - + .../selftests/rcutorture/configs/rcu/TREE10 | 1 - + .../selftests/rcutorture/configs/rcuscale/TINY | 1 - + .../selftests/rcutorture/configs/rcuscale/TRACE01 | 1 - + .../selftests/rcutorture/configs/rcuscale/TREE | 1 - + .../selftests/rcutorture/configs/rcuscale/TREE54 | 1 - + .../rcutorture/configs/refscale/NOPREEMPT | 1 - + .../selftests/rcutorture/configs/refscale/PREEMPT | 1 - + .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt | 1 - + 82 files changed, 1740 insertions(+), 858 deletions(-) + create mode 100644 Documentation/litmus-tests/locking/DCL-broken.litmus + create mode 100644 Documentation/litmus-tests/locking/DCL-fixed.litmus + create mode 100644 Documentation/litmus-tests/locking/RM-broken.litmus + create mode 100644 Documentation/litmus-tests/locking/RM-fixed.litmus + create mode 100644 tools/memory-model/Documentation/locking.txt + create mode 100755 tools/memory-model/scripts/checktheselitmus.sh + create mode 100755 tools/memory-model/scripts/hwfnseg.sh + create mode 100755 tools/memory-model/scripts/runlitmus.sh + create mode 100755 tools/memory-model/scripts/simpletest.sh + create mode 100644 tools/rcu/extract-stall.sh +Merging kvm/next (5edcbfdf96fd KVM: x86: SGX must obey the KVM_INTERNAL_ERROR_EMULATION protocol) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm.git kvm/next +Auto-merging drivers/gpu/drm/i915/Kconfig +Auto-merging arch/x86/kvm/x86.c +CONFLICT (content): Merge conflict in arch/x86/kvm/x86.c +Auto-merging arch/x86/kvm/vmx/vmx.c +Auto-merging arch/x86/kvm/svm/svm.h +Auto-merging arch/x86/kvm/svm/svm.c +Auto-merging arch/x86/kvm/svm/sev.c +Auto-merging arch/x86/kvm/mmu/mmu.c +Auto-merging arch/x86/kvm/emulate.c +Auto-merging arch/x86/include/asm/kvm_host.h +Auto-merging arch/riscv/Makefile +Auto-merging arch/riscv/Kconfig +CONFLICT (content): Merge conflict in arch/riscv/Kconfig +Auto-merging arch/powerpc/kvm/powerpc.c +Auto-merging MAINTAINERS +Auto-merging Documentation/admin-guide/kernel-parameters.txt +Resolved 'arch/riscv/Kconfig' using previous resolution. +Recorded preimage for 'arch/x86/kvm/x86.c' +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +Recorded resolution for 'arch/x86/kvm/x86.c'. +[master ab73418f6f7b] Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm.git +$ git diff -M --stat --summary HEAD^.. + Documentation/admin-guide/kernel-parameters.txt | 9 +- + Documentation/virt/kvm/api.rst | 241 ++++- + Documentation/virt/kvm/devices/vcpu.rst | 70 ++ + Documentation/virt/kvm/devices/xics.rst | 2 +- + Documentation/virt/kvm/devices/xive.rst | 2 +- + MAINTAINERS | 12 + + arch/arm64/include/asm/kvm_host.h | 1 - + arch/arm64/kvm/arm.c | 8 - + arch/mips/kvm/mips.c | 2 +- + arch/powerpc/include/asm/kvm_book3s.h | 2 +- + arch/powerpc/include/asm/kvm_host.h | 4 +- + arch/powerpc/kvm/book3s_xive.c | 2 +- + arch/powerpc/kvm/powerpc.c | 2 +- + arch/riscv/Kconfig | 2 + + arch/riscv/Makefile | 1 + + arch/riscv/include/asm/kvm_host.h | 266 ++++++ + arch/riscv/include/asm/kvm_types.h | 7 + + arch/riscv/include/asm/kvm_vcpu_timer.h | 44 + + arch/riscv/include/uapi/asm/kvm.h | 128 +++ + arch/riscv/kernel/asm-offsets.c | 156 ++++ + arch/riscv/kvm/Kconfig | 35 + + arch/riscv/kvm/Makefile | 25 + + arch/riscv/kvm/main.c | 118 +++ + arch/riscv/kvm/mmu.c | 802 +++++++++++++++++ + arch/riscv/kvm/tlb.S | 74 ++ + arch/riscv/kvm/vcpu.c | 997 +++++++++++++++++++++ + arch/riscv/kvm/vcpu_exit.c | 701 +++++++++++++++ + arch/riscv/kvm/vcpu_sbi.c | 185 ++++ + arch/riscv/kvm/vcpu_switch.S | 400 +++++++++ + arch/riscv/kvm/vcpu_timer.c | 225 +++++ + arch/riscv/kvm/vm.c | 97 ++ + arch/riscv/kvm/vmid.c | 120 +++ + arch/x86/include/asm/kvm_host.h | 48 +- + arch/x86/include/asm/kvm_page_track.h | 11 +- + arch/x86/include/uapi/asm/kvm.h | 4 + + arch/x86/kernel/irq.c | 4 +- + arch/x86/kvm/Kconfig | 3 + + arch/x86/kvm/cpuid.c | 47 +- + arch/x86/kvm/emulate.c | 5 + + arch/x86/kvm/hyperv.c | 22 +- + arch/x86/kvm/ioapic.c | 2 +- + arch/x86/kvm/ioapic.h | 4 +- + arch/x86/kvm/mmu.h | 114 ++- + arch/x86/kvm/mmu/mmu.c | 702 ++++++++------- + arch/x86/kvm/mmu/mmu_internal.h | 21 +- + arch/x86/kvm/mmu/mmutrace.h | 18 +- + arch/x86/kvm/mmu/page_track.c | 49 +- + arch/x86/kvm/mmu/paging_tmpl.h | 168 ++-- + arch/x86/kvm/mmu/spte.c | 34 +- + arch/x86/kvm/mmu/spte.h | 21 +- + arch/x86/kvm/mmu/tdp_mmu.c | 119 +-- + arch/x86/kvm/mmu/tdp_mmu.h | 6 +- + arch/x86/kvm/svm/nested.c | 282 +++--- + arch/x86/kvm/svm/sev.c | 6 +- + arch/x86/kvm/svm/svm.c | 176 ++-- + arch/x86/kvm/svm/svm.h | 66 +- + arch/x86/kvm/trace.h | 9 +- + arch/x86/kvm/vmx/nested.c | 63 +- + arch/x86/kvm/vmx/pmu_intel.c | 6 +- + arch/x86/kvm/vmx/sgx.c | 16 +- + arch/x86/kvm/vmx/vmx.c | 136 +-- + arch/x86/kvm/vmx/vmx.h | 2 +- + arch/x86/kvm/x86.c | 800 +++++++++++------ + arch/x86/kvm/x86.h | 2 - + drivers/clocksource/timer-riscv.c | 9 + + drivers/gpu/drm/i915/Kconfig | 1 + + include/clocksource/timer-riscv.h | 16 + + include/linux/kvm_host.h | 18 +- + include/uapi/linux/kvm.h | 29 +- + tools/arch/x86/include/asm/pvclock-abi.h | 48 + + tools/arch/x86/include/asm/pvclock.h | 103 +++ + tools/testing/selftests/kvm/.gitignore | 2 + + tools/testing/selftests/kvm/Makefile | 2 + + tools/testing/selftests/kvm/include/kvm_util.h | 11 + + tools/testing/selftests/kvm/kvm_create_max_vcpus.c | 2 +- + tools/testing/selftests/kvm/lib/kvm_util.c | 46 +- + tools/testing/selftests/kvm/lib/sparsebit.c | 2 +- + tools/testing/selftests/kvm/lib/x86_64/processor.c | 4 +- + tools/testing/selftests/kvm/lib/x86_64/svm.c | 14 +- + .../selftests/kvm/system_counter_offset_test.c | 132 +++ + .../selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 3 +- + .../testing/selftests/kvm/x86_64/kvm_clock_test.c | 203 +++++ + .../selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 2 +- + virt/kvm/eventfd.c | 15 +- + virt/kvm/kvm_main.c | 127 +-- + 85 files changed, 7163 insertions(+), 1332 deletions(-) + create mode 100644 arch/riscv/include/asm/kvm_host.h + create mode 100644 arch/riscv/include/asm/kvm_types.h + create mode 100644 arch/riscv/include/asm/kvm_vcpu_timer.h + create mode 100644 arch/riscv/include/uapi/asm/kvm.h + create mode 100644 arch/riscv/kvm/Kconfig + create mode 100644 arch/riscv/kvm/Makefile + create mode 100644 arch/riscv/kvm/main.c + create mode 100644 arch/riscv/kvm/mmu.c + create mode 100644 arch/riscv/kvm/tlb.S + create mode 100644 arch/riscv/kvm/vcpu.c + create mode 100644 arch/riscv/kvm/vcpu_exit.c + create mode 100644 arch/riscv/kvm/vcpu_sbi.c + create mode 100644 arch/riscv/kvm/vcpu_switch.S + create mode 100644 arch/riscv/kvm/vcpu_timer.c + create mode 100644 arch/riscv/kvm/vm.c + create mode 100644 arch/riscv/kvm/vmid.c + create mode 100644 include/clocksource/timer-riscv.h + create mode 100644 tools/arch/x86/include/asm/pvclock-abi.h + create mode 100644 tools/arch/x86/include/asm/pvclock.h + create mode 100644 tools/testing/selftests/kvm/system_counter_offset_test.c + create mode 100644 tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +Merging kvm-arm/next (5a2acbbb0179 Merge branch kvm/selftests/memslot into kvmarm-master/next) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git kvm-arm/next +Auto-merging tools/testing/selftests/kvm/include/kvm_util.h +Auto-merging tools/testing/selftests/kvm/Makefile +Auto-merging tools/testing/selftests/kvm/.gitignore +Auto-merging arch/arm64/kvm/reset.c +Auto-merging arch/arm64/kvm/mmu.c +Auto-merging arch/arm64/kvm/hyp/nvhe/mem_protect.c +Auto-merging arch/arm64/kvm/hyp/include/hyp/switch.h +Auto-merging arch/arm64/kvm/arm.c +Auto-merging arch/arm64/kernel/smp.c +Auto-merging arch/arm64/include/asm/sysreg.h +Auto-merging arch/arm64/include/asm/kvm_host.h +Auto-merging arch/arm64/include/asm/kvm_asm.h +Auto-merging arch/arm64/Kconfig +Auto-merging Documentation/admin-guide/kernel-parameters.txt +Merge made by the 'recursive' strategy. + Documentation/admin-guide/kernel-parameters.txt | 6 +- + arch/arm64/Kconfig | 1 + + arch/arm64/include/asm/kvm_arm.h | 1 + + arch/arm64/include/asm/kvm_asm.h | 48 +- + arch/arm64/include/asm/kvm_emulate.h | 5 +- + arch/arm64/include/asm/kvm_host.h | 3 + + arch/arm64/include/asm/kvm_hyp.h | 5 + + arch/arm64/include/asm/sysreg.h | 3 + + arch/arm64/kernel/smp.c | 3 +- + arch/arm64/kvm/Kconfig | 10 +- + arch/arm64/kvm/arm.c | 94 +- + arch/arm64/kvm/hyp/include/hyp/fault.h | 75 ++ + arch/arm64/kvm/hyp/include/hyp/switch.h | 235 ++-- + arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 200 +++ + arch/arm64/kvm/hyp/include/nvhe/trap_handler.h | 2 + + arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- + arch/arm64/kvm/hyp/nvhe/host.S | 26 +- + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 48 +- + arch/arm64/kvm/hyp/nvhe/mem_protect.c | 11 +- + arch/arm64/kvm/hyp/nvhe/pkvm.c | 185 +++ + arch/arm64/kvm/hyp/nvhe/setup.c | 3 + + arch/arm64/kvm/hyp/nvhe/switch.c | 99 ++ + arch/arm64/kvm/hyp/nvhe/sys_regs.c | 487 ++++++++ + arch/arm64/kvm/hyp/vgic-v3-sr.c | 22 +- + arch/arm64/kvm/hyp/vhe/switch.c | 16 + + arch/arm64/kvm/mmu.c | 2 +- + arch/arm64/kvm/pmu-emul.c | 2 +- + arch/arm64/kvm/reset.c | 2 +- + arch/arm64/kvm/sys_regs.c | 41 +- + arch/arm64/kvm/vgic/vgic-init.c | 2 +- + arch/arm64/kvm/vgic/vgic-irqfd.c | 2 +- + arch/arm64/kvm/vgic/vgic-its.c | 18 +- + arch/arm64/kvm/vgic/vgic-kvm-device.c | 25 +- + arch/arm64/kvm/vgic/vgic-mmio-v3.c | 8 +- + arch/arm64/kvm/vgic/vgic-v3.c | 27 +- + arch/arm64/kvm/vgic/vgic-v4.c | 2 +- + arch/arm64/kvm/vgic/vgic.h | 5 +- + tools/arch/arm64/include/asm/sysreg.h | 1296 ++++++++++++++++++++ + tools/testing/selftests/kvm/.gitignore | 1 + + tools/testing/selftests/kvm/Makefile | 5 +- + tools/testing/selftests/kvm/aarch64/arch_timer.c | 479 ++++++++ + .../selftests/kvm/aarch64/debug-exceptions.c | 30 +- + .../selftests/kvm/aarch64/psci_cpu_on_test.c | 2 +- + tools/testing/selftests/kvm/aarch64/vgic_init.c | 369 ++++-- + .../selftests/kvm/include/aarch64/arch_timer.h | 142 +++ + .../testing/selftests/kvm/include/aarch64/delay.h | 25 + + tools/testing/selftests/kvm/include/aarch64/gic.h | 21 + + .../selftests/kvm/include/aarch64/processor.h | 90 +- + .../selftests/kvm/include/aarch64/spinlock.h | 13 + + tools/testing/selftests/kvm/include/aarch64/vgic.h | 20 + + tools/testing/selftests/kvm/include/kvm_util.h | 2 + + tools/testing/selftests/kvm/lib/aarch64/gic.c | 95 ++ + .../selftests/kvm/lib/aarch64/gic_private.h | 21 + + tools/testing/selftests/kvm/lib/aarch64/gic_v3.c | 240 ++++ + tools/testing/selftests/kvm/lib/aarch64/gic_v3.h | 70 ++ + .../testing/selftests/kvm/lib/aarch64/processor.c | 24 +- + tools/testing/selftests/kvm/lib/aarch64/spinlock.c | 27 + + tools/testing/selftests/kvm/lib/aarch64/vgic.c | 70 ++ + tools/testing/selftests/kvm/memslot_perf_test.c | 56 +- + 59 files changed, 4373 insertions(+), 451 deletions(-) + create mode 100644 arch/arm64/kvm/hyp/include/hyp/fault.h + create mode 100644 arch/arm64/kvm/hyp/include/nvhe/fixed_config.h + create mode 100644 arch/arm64/kvm/hyp/nvhe/pkvm.c + create mode 100644 arch/arm64/kvm/hyp/nvhe/sys_regs.c + create mode 100644 tools/arch/arm64/include/asm/sysreg.h + create mode 100644 tools/testing/selftests/kvm/aarch64/arch_timer.c + create mode 100644 tools/testing/selftests/kvm/include/aarch64/arch_timer.h + create mode 100644 tools/testing/selftests/kvm/include/aarch64/delay.h + create mode 100644 tools/testing/selftests/kvm/include/aarch64/gic.h + create mode 100644 tools/testing/selftests/kvm/include/aarch64/spinlock.h + create mode 100644 tools/testing/selftests/kvm/include/aarch64/vgic.h + create mode 100644 tools/testing/selftests/kvm/lib/aarch64/gic.c + create mode 100644 tools/testing/selftests/kvm/lib/aarch64/gic_private.h + create mode 100644 tools/testing/selftests/kvm/lib/aarch64/gic_v3.c + create mode 100644 tools/testing/selftests/kvm/lib/aarch64/gic_v3.h + create mode 100644 tools/testing/selftests/kvm/lib/aarch64/spinlock.c + create mode 100644 tools/testing/selftests/kvm/lib/aarch64/vgic.c +Merging kvm-ppc/kvm-ppc-next (72476aaa4691 KVM: PPC: Book3S HV: Fix host radix SLB optimisation with hash guests) +$ git merge -m Merge branch 'kvm-ppc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc.git kvm-ppc/kvm-ppc-next +Already up to date. +Merging kvms390/next (2ef7843375dc KVM: s390: Add a routine for setting userspace CPU state) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git kvms390/next +Auto-merging arch/s390/kvm/kvm-s390.c +Merge made by the 'recursive' strategy. + arch/s390/include/asm/uv.h | 5 +++ + arch/s390/kernel/uv.c | 31 ++++++++++--- + arch/s390/kvm/intercept.c | 5 +++ + arch/s390/kvm/kvm-s390.c | 6 +-- + arch/s390/kvm/kvm-s390.h | 9 ++++ + arch/s390/kvm/pv.c | 21 +++++---- + arch/s390/kvm/sigp.c | 14 +----- + arch/s390/mm/gmap.c | 11 ++++- + arch/s390/mm/pgtable.c | 109 ++++++++++++++++++++++++++++++++------------- + 9 files changed, 143 insertions(+), 68 deletions(-) +Merging xen-tip/linux-next (319933a80fd4 xen/balloon: fix cancelled balloon action) +$ git merge -m Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip.git xen-tip/linux-next +Already up to date. +Merging percpu/for-next (a81a52b325ec Merge branch 'for-5.14-fixes' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git percpu/for-next +Already up to date. +Merge made by the 'recursive' strategy. +Merging workqueues/for-next (55df0933be74 workqueue: Introduce show_one_worker_pool and show_one_workqueue.) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git workqueues/for-next +Merge made by the 'recursive' strategy. + drivers/tty/sysrq.c | 2 +- + include/linux/workqueue.h | 3 +- + kernel/power/process.c | 2 +- + kernel/workqueue.c | 187 ++++++++++++++++++++++++++-------------------- + 4 files changed, 111 insertions(+), 83 deletions(-) +Merging drivers-x86/for-next (fc3341b4b55f platform/x86: system76_acpi: fix Kconfig dependencies) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git drivers-x86/for-next +Auto-merging drivers/platform/x86/thinkpad_acpi.c +Auto-merging drivers/platform/x86/amd-pmc.c +Auto-merging drivers/platform/x86/Kconfig +Auto-merging drivers/platform/mellanox/mlxreg-io.c +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + Documentation/ABI/stable/sysfs-driver-mlxreg-io | 244 +++ + .../ABI/testing/sysfs-platform-dell-privacy-wmi | 60 +- + Documentation/ABI/testing/sysfs-platform-intel-pmc | 2 + + MAINTAINERS | 28 +- + drivers/hid/surface-hid/surface_hid.c | 4 +- + drivers/input/misc/axp20x-pek.c | 26 +- + drivers/platform/mellanox/Kconfig | 12 + + drivers/platform/mellanox/Makefile | 1 + + drivers/platform/mellanox/mlxreg-hotplug.c | 123 +- + drivers/platform/mellanox/mlxreg-io.c | 2 +- + drivers/platform/mellanox/mlxreg-lc.c | 906 ++++++++ + drivers/platform/surface/surface3-wmi.c | 9 +- + drivers/platform/surface/surface3_power.c | 3 +- + .../platform/surface/surface_aggregator_registry.c | 54 + + drivers/platform/surface/surface_gpe.c | 13 + + drivers/platform/x86/Kconfig | 29 + + drivers/platform/x86/Makefile | 4 + + drivers/platform/x86/acer-wmi.c | 14 +- + drivers/platform/x86/amd-pmc.c | 155 +- + drivers/platform/x86/barco-p50-gpio.c | 436 ++++ + drivers/platform/x86/dell/dell-wmi-base.c | 76 +- + drivers/platform/x86/hp-wmi.c | 329 ++- + drivers/platform/x86/ideapad-laptop.c | 35 +- + drivers/platform/x86/intel/Kconfig | 16 + + drivers/platform/x86/intel/Makefile | 1 + + drivers/platform/x86/intel/int0002_vgpio.c | 14 +- + drivers/platform/x86/intel/ishtp_eclite.c | 701 ++++++ + drivers/platform/x86/lg-laptop.c | 11 +- + drivers/platform/x86/mlx-platform.c | 2279 +++++++++++++++++--- + drivers/platform/x86/nvidia-wmi-ec-backlight.c | 213 ++ + drivers/platform/x86/panasonic-laptop.c | 18 +- + drivers/platform/x86/sony-laptop.c | 46 +- + drivers/platform/x86/system76_acpi.c | 428 +++- + drivers/platform/x86/thinkpad_acpi.c | 195 +- + drivers/platform/x86/wmi.c | 375 ++-- + include/linux/platform_data/mlxreg.h | 82 + + include/linux/platform_data/x86/soc.h | 65 + + include/linux/surface_aggregator/controller.h | 4 +- + sound/soc/intel/common/soc-intel-quirks.h | 51 +- + 39 files changed, 6224 insertions(+), 840 deletions(-) + create mode 100644 drivers/platform/mellanox/mlxreg-lc.c + create mode 100644 drivers/platform/x86/barco-p50-gpio.c + create mode 100644 drivers/platform/x86/intel/ishtp_eclite.c + create mode 100644 drivers/platform/x86/nvidia-wmi-ec-backlight.c + create mode 100644 include/linux/platform_data/x86/soc.h +Merging chrome-platform/for-next (3119c28634dd MAINTAINERS: Chrome: Drop Enric Balletbo i Serra) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/chrome-platform/linux.git chrome-platform/for-next +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 14 +++- + .../common/cros_ec_sensors/cros_ec_sensors_core.c | 3 +- + drivers/platform/chrome/cros_ec_lpc.c | 4 +- + drivers/platform/chrome/cros_ec_proto.c | 60 ++++++++++++++++-- + drivers/platform/chrome/cros_ec_sensorhub.c | 6 +- + drivers/platform/chrome/cros_ec_typec.c | 74 ++++++---------------- + drivers/platform/chrome/cros_usbpd_notify.c | 50 +-------------- + include/linux/platform_data/cros_ec_proto.h | 5 +- + 8 files changed, 98 insertions(+), 118 deletions(-) +Merging hsi/for-next (4ef69e17eb56 HSI: cmt_speech: unmark comments as kernel-doc) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-hsi.git hsi/for-next +Merge made by the 'recursive' strategy. + drivers/hsi/clients/cmt_speech.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) +Merging leds/for-next (97b31c1f8eb8 leds: trigger: Disable CPU trigger on PREEMPT_RT) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git leds/for-next +Merge made by the 'recursive' strategy. + drivers/leds/led-class-flash.c | 2 +- + drivers/leds/led-triggers.c | 41 +++++++++++++++++++++-------------------- + drivers/leds/trigger/Kconfig | 1 + + include/linux/leds.h | 2 +- + 4 files changed, 24 insertions(+), 22 deletions(-) +Merging ipmi/for-next (fc4e78481afa char: ipmi: replace snprintf in show functions with sysfs_emit) +$ git merge -m Merge branch 'for-next' of git://github.com/cminyard/linux-ipmi.git ipmi/for-next +Merge made by the 'recursive' strategy. + .../bindings/ipmi/aspeed,ast2400-ibt-bmc.txt | 1 + + .../devicetree/bindings/ipmi/ipmi-ipmb.yaml | 59 +++ + Documentation/driver-api/ipmi.rst | 64 ++- + drivers/char/ipmi/Kconfig | 11 +- + drivers/char/ipmi/Makefile | 1 + + drivers/char/ipmi/bt-bmc.c | 69 +-- + drivers/char/ipmi/ipmi_devintf.c | 8 +- + drivers/char/ipmi/ipmi_ipmb.c | 539 +++++++++++++++++++++ + drivers/char/ipmi/ipmi_msghandler.c | 330 +++++++++++-- + drivers/char/ipmi/ipmi_si_intf.c | 8 +- + drivers/char/ipmi/ipmi_ssif.c | 4 +- + drivers/char/ipmi/ipmi_watchdog.c | 25 +- + include/linux/ipmi.h | 3 + + include/linux/ipmi_smi.h | 59 +++ + include/uapi/linux/ipmi.h | 16 +- + 15 files changed, 1075 insertions(+), 122 deletions(-) + create mode 100644 Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml + create mode 100644 drivers/char/ipmi/ipmi_ipmb.c +Merging driver-core/driver-core-next (c8dcf655ec81 x86/build: Tuck away built-in firmware under FW_LOADER) +$ git merge -m Merge branch 'driver-core-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git driver-core/driver-core-next +Auto-merging include/asm-generic/vmlinux.lds.h +Auto-merging drivers/pci/pci-sysfs.c +Auto-merging drivers/base/arch_topology.c +Auto-merging MAINTAINERS +Auto-merging Documentation/admin-guide/kernel-parameters.txt +Auto-merging Documentation/ABI/testing/sysfs-bus-pci +Auto-merging Documentation/ABI/testing/ima_policy +Merge made by the 'recursive' strategy. + Documentation/ABI/obsolete/o2cb | 11 + + Documentation/ABI/obsolete/sysfs-bus-iio | 4 + + Documentation/ABI/stable/o2cb | 2 +- + Documentation/ABI/stable/sysfs-class-infiniband | 64 +- + Documentation/ABI/stable/sysfs-class-tpm | 2 +- + Documentation/ABI/stable/sysfs-devices | 7 + + Documentation/ABI/stable/sysfs-module | 25 +- + Documentation/ABI/testing/configfs-usb-gadget-uac1 | 42 +- + Documentation/ABI/testing/configfs-usb-gadget-uac2 | 43 +- + Documentation/ABI/testing/evm | 5 +- + Documentation/ABI/testing/ima_policy | 2 +- + Documentation/ABI/testing/pstore | 3 +- + Documentation/ABI/testing/sysfs-ata | 2 +- + Documentation/ABI/testing/sysfs-bus-mdio | 9 + + Documentation/ABI/testing/sysfs-bus-pci | 24 +- + Documentation/ABI/testing/sysfs-bus-platform | 12 + + Documentation/ABI/testing/sysfs-bus-rapidio | 32 +- + .../ABI/testing/sysfs-bus-soundwire-master | 20 +- + .../ABI/testing/sysfs-bus-soundwire-slave | 62 +- + Documentation/ABI/testing/sysfs-bus-usb | 292 ++++++- + Documentation/ABI/testing/sysfs-class-bdi | 30 +- + Documentation/ABI/testing/sysfs-class-cxl | 15 +- + .../ABI/testing/sysfs-class-devfreq-event | 12 +- + Documentation/ABI/testing/sysfs-class-extcon | 12 +- + Documentation/ABI/testing/sysfs-class-gnss | 2 +- + Documentation/ABI/testing/sysfs-class-hwmon | 932 +++++++++++++++++++++ + Documentation/ABI/testing/sysfs-class-mei | 18 +- + Documentation/ABI/testing/sysfs-class-mic | 24 +- + Documentation/ABI/testing/sysfs-class-mux | 2 +- + Documentation/ABI/testing/sysfs-class-pwm | 20 +- + Documentation/ABI/testing/sysfs-class-rapidio | 4 +- + Documentation/ABI/testing/sysfs-class-rc | 14 +- + Documentation/ABI/testing/sysfs-class-rc-nuvoton | 2 +- + Documentation/ABI/testing/sysfs-class-typec | 2 +- + Documentation/ABI/testing/sysfs-class-uwb_rc | 26 +- + .../ABI/testing/sysfs-class-uwb_rc-wusbhc | 10 +- + .../ABI/testing/sysfs-devices-platform-dock | 10 +- + Documentation/ABI/testing/sysfs-devices-power | 36 + + Documentation/ABI/testing/sysfs-devices-removable | 8 +- + Documentation/ABI/testing/sysfs-devices-system-cpu | 68 +- + Documentation/ABI/testing/sysfs-driver-ufs | 126 +++ + Documentation/ABI/testing/sysfs-firmware-efi-esrt | 16 +- + Documentation/ABI/testing/sysfs-kernel-slab | 115 +-- + Documentation/ABI/testing/sysfs-mce | 129 +++ + Documentation/ABI/testing/sysfs-module | 7 + + Documentation/ABI/testing/sysfs-platform-dptf | 4 + + Documentation/ABI/testing/sysfs-platform-sst-atom | 2 +- + Documentation/ABI/testing/sysfs-ptp | 30 +- + Documentation/ABI/testing/sysfs-tty | 32 +- + Documentation/admin-guide/dynamic-debug-howto.rst | 15 +- + Documentation/admin-guide/kernel-parameters.txt | 5 - + Documentation/hwmon/sysfs-interface.rst | 596 +------------ + Documentation/x86/x86_64/machinecheck.rst | 56 +- + MAINTAINERS | 3 + + arch/x86/include/asm/microcode.h | 3 - + arch/x86/kernel/cpu/microcode/amd.c | 14 +- + arch/x86/kernel/cpu/microcode/core.c | 17 - + arch/x86/kernel/cpu/microcode/intel.c | 9 +- + arch/x86/tools/relocs.c | 2 + + drivers/base/arch_topology.c | 3 - + drivers/base/component.c | 6 +- + drivers/base/core.c | 17 +- + drivers/base/firmware_loader/builtin/Makefile | 6 +- + drivers/base/firmware_loader/builtin/main.c | 106 +++ + drivers/base/firmware_loader/firmware.h | 17 + + drivers/base/firmware_loader/main.c | 65 +- + drivers/base/platform.c | 3 +- + drivers/gpio/gpiolib-acpi.c | 5 +- + drivers/i2c/i2c-core-acpi.c | 22 +- + drivers/pci/pci-sysfs.c | 6 +- + fs/kernfs/symlink.c | 3 +- + fs/sysfs/dir.c | 3 +- + fs/sysfs/file.c | 140 ++-- + fs/sysfs/group.c | 15 +- + fs/sysfs/sysfs.h | 8 +- + include/asm-generic/vmlinux.lds.h | 20 +- + include/linux/device/bus.h | 1 + + include/linux/firmware.h | 30 +- + include/linux/fwnode.h | 1 + + include/linux/kernfs.h | 28 - + include/linux/kobject.h | 1 - + lib/dynamic_debug.c | 60 +- + lib/kobject.c | 2 +- + scripts/get_abi.pl | 493 ++++++++++- + 84 files changed, 2853 insertions(+), 1299 deletions(-) + create mode 100644 Documentation/ABI/obsolete/o2cb + create mode 100644 Documentation/ABI/testing/sysfs-class-hwmon + create mode 100644 Documentation/ABI/testing/sysfs-mce + create mode 100644 drivers/base/firmware_loader/builtin/main.c +Merging usb/usb-next (e4ce9ed835bc usb: gadget: uvc: ensure the vdev is unset) +$ git merge -m Merge branch 'usb-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb/usb-next +Auto-merging drivers/usb/host/xhci-pci.c +Auto-merging arch/arm/boot/dts/stm32mp151.dtsi +Removing Documentation/devicetree/bindings/usb/usb3503.txt +Removing Documentation/devicetree/bindings/phy/nvidia,tegra20-usb-phy.txt +Merge made by the 'recursive' strategy. + .../bindings/phy/nvidia,tegra20-usb-phy.txt | 74 ---- + .../bindings/phy/nvidia,tegra20-usb-phy.yaml | 373 +++++++++++++++++++++ + .../devicetree/bindings/usb/atmel-usb.txt | 4 + + .../devicetree/bindings/usb/qcom,dwc3.yaml | 1 + + .../devicetree/bindings/usb/smsc,usb3503.yaml | 108 ++++++ + .../devicetree/bindings/usb/snps,dwc3.yaml | 18 +- + .../devicetree/bindings/usb/ti,tps6598x.yaml | 4 + + Documentation/devicetree/bindings/usb/usb3503.txt | 39 --- + arch/arm/boot/dts/stm32mp151.dtsi | 1 + + drivers/platform/x86/i2c-multi-instantiate.c | 31 +- + drivers/usb/atm/usbatm.c | 4 +- + drivers/usb/chipidea/udc.c | 8 + + drivers/usb/class/cdc-acm.c | 1 - + drivers/usb/class/cdc-wdm.c | 2 +- + drivers/usb/core/config.c | 4 +- + drivers/usb/core/devio.c | 144 ++++++-- + drivers/usb/core/hcd.c | 6 +- + drivers/usb/dwc2/drd.c | 24 +- + drivers/usb/dwc3/Kconfig | 7 +- + drivers/usb/dwc3/core.c | 29 ++ + drivers/usb/dwc3/core.h | 8 + + drivers/usb/dwc3/gadget.c | 6 +- + drivers/usb/gadget/epautoconf.c | 2 +- + drivers/usb/gadget/function/f_mass_storage.c | 87 ++++- + drivers/usb/gadget/function/f_uac2.c | 21 +- + drivers/usb/gadget/function/f_uvc.c | 8 +- + drivers/usb/gadget/function/u_audio.c | 96 ++++-- + drivers/usb/gadget/function/u_ether.c | 4 +- + drivers/usb/gadget/function/uvc.h | 6 +- + drivers/usb/gadget/function/uvc_v4l2.c | 52 ++- + drivers/usb/gadget/function/uvc_video.c | 26 +- + drivers/usb/gadget/function/uvc_video.h | 2 - + drivers/usb/gadget/legacy/hid.c | 4 +- + drivers/usb/gadget/udc/amd5536udc.h | 1 - + drivers/usb/gadget/udc/core.c | 6 +- + drivers/usb/gadget/udc/goku_udc.c | 6 +- + drivers/usb/gadget/udc/pxa25x_udc.c | 2 +- + drivers/usb/gadget/udc/snps_udc_plat.c | 5 - + drivers/usb/gadget/udc/udc-xilinx.c | 25 ++ + drivers/usb/host/Kconfig | 6 +- + drivers/usb/host/ehci-atmel.c | 8 + + drivers/usb/host/ehci-hcd.c | 13 +- + drivers/usb/host/ehci-hub.c | 11 +- + drivers/usb/host/ehci-mem.c | 3 +- + drivers/usb/host/ehci-mv.c | 2 - + drivers/usb/host/ehci-platform.c | 6 + + drivers/usb/host/ehci.h | 1 + + drivers/usb/host/fotg210-hcd.c | 5 +- + drivers/usb/host/max3421-hcd.c | 25 +- + drivers/usb/host/ohci-hcd.c | 3 +- + drivers/usb/host/ohci-hub.c | 3 + + drivers/usb/host/ohci-tmio.c | 2 +- + drivers/usb/host/oxu210hp-hcd.c | 2 +- + drivers/usb/host/xhci-mtk-sch.c | 2 +- + drivers/usb/host/xhci-pci.c | 16 + + drivers/usb/musb/Kconfig | 2 +- + drivers/usb/musb/mediatek.c | 1 + + drivers/usb/musb/tusb6010.c | 5 + + drivers/usb/phy/phy-tahvo.c | 4 - + drivers/usb/phy/phy-tegra-usb.c | 198 ++++++++++- + drivers/usb/storage/unusual_devs.h | 10 + + drivers/usb/typec/Kconfig | 4 +- + drivers/usb/typec/hd3ss3220.c | 8 +- + drivers/usb/typec/tcpm/tcpci.c | 2 +- + drivers/usb/typec/tipd/core.c | 223 +++++++++--- + drivers/usb/typec/tipd/tps6598x.h | 12 + + drivers/usb/typec/tipd/trace.h | 23 ++ + drivers/usb/typec/ucsi/ucsi.c | 337 +++++++++---------- + drivers/usb/typec/ucsi/ucsi.h | 3 +- + drivers/usb/typec/ucsi/ucsi_acpi.c | 2 +- + drivers/usb/usb-skeleton.c | 2 +- + include/linux/usb/tegra_usb_phy.h | 5 + + 72 files changed, 1655 insertions(+), 543 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/phy/nvidia,tegra20-usb-phy.txt + create mode 100644 Documentation/devicetree/bindings/phy/nvidia,tegra20-usb-phy.yaml + create mode 100644 Documentation/devicetree/bindings/usb/smsc,usb3503.yaml + delete mode 100644 Documentation/devicetree/bindings/usb/usb3503.txt +Merging usb-gadget/next (e49d033bddf5 Linux 5.12-rc6) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git usb-gadget/next +Already up to date. +Merging usb-serial/usb-next (c8345c0500de USB: serial: kl5kusb105: drop line-status helper) +$ git merge -m Merge branch 'usb-next' of git://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial.git usb-serial/usb-next +Merge made by the 'recursive' strategy. + drivers/usb/serial/f81232.c | 96 ++++++++++++-------------------- + drivers/usb/serial/ftdi_sio.c | 53 +++++------------- + drivers/usb/serial/keyspan_pda.c | 67 ++++++++++------------- + drivers/usb/serial/kl5kusb105.c | 115 +++++++++++++++------------------------ + drivers/usb/serial/usb-serial.c | 59 ++++++++++---------- + 5 files changed, 151 insertions(+), 239 deletions(-) +Merging usb-chipidea-next/for-usb-next (78665f57c3fa usb: chipidea: udc: make controller hardware endpoint primed) +$ git merge -m Merge branch 'for-usb-next' of git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git usb-chipidea-next/for-usb-next +Merge made by the 'recursive' strategy. +Merging tty/tty-next (7c0408d80579 tty: add rpmsg driver) +$ git merge -m Merge branch 'tty-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git tty/tty-next +Auto-merging net/nfc/nci/uart.c +Auto-merging drivers/tty/sysrq.c +Removing drivers/tty/moxa.h +Auto-merging drivers/net/hamradio/6pack.c +Auto-merging drivers/bluetooth/hci_ldisc.c +Removing Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.txt +Removing Documentation/devicetree/bindings/serial/brcm,bcm6345-uart.txt +Merge made by the 'recursive' strategy. + .../devicetree/bindings/serial/8250_omap.yaml | 2 +- + .../bindings/serial/brcm,bcm6345-uart.txt | 36 --- + .../bindings/serial/brcm,bcm6345-uart.yaml | 47 ++++ + .../devicetree/bindings/serial/samsung_uart.yaml | 1 + + .../devicetree/bindings/serial/sprd-uart.yaml | 1 + + .../bindings/serial/xlnx,opb-uartlite.txt | 23 -- + .../bindings/serial/xlnx,opb-uartlite.yaml | 89 ++++++ + Documentation/driver-api/serial/n_gsm.rst | 71 ++++- + Documentation/driver-api/serial/tty.rst | 2 +- + arch/sh/boot/dts/j2_mimas_v2.dts | 2 + + drivers/bluetooth/hci_ldisc.c | 2 +- + drivers/char/virtio_console.c | 12 +- + drivers/input/serio/serport.c | 3 +- + drivers/net/can/slcan.c | 5 +- + drivers/net/hamradio/6pack.c | 2 +- + drivers/net/ppp/ppp_async.c | 7 +- + drivers/net/ppp/ppp_synctty.c | 7 +- + drivers/net/slip/slip.c | 5 +- + drivers/rpmsg/rpmsg_core.c | 21 ++ + drivers/rpmsg/rpmsg_internal.h | 2 + + drivers/rpmsg/virtio_rpmsg_bus.c | 10 + + drivers/tty/Kconfig | 12 + + drivers/tty/Makefile | 1 + + drivers/tty/hvc/hvc_console.c | 36 ++- + drivers/tty/hvc/hvc_console.h | 21 +- + drivers/tty/moxa.c | 302 +++++++++++++++++++- + drivers/tty/moxa.h | 307 --------------------- + drivers/tty/mxser.c | 119 ++++---- + drivers/tty/n_gsm.c | 116 ++++++-- + drivers/tty/n_hdlc.c | 2 +- + drivers/tty/n_tty.c | 3 +- + drivers/tty/rpmsg_tty.c | 274 ++++++++++++++++++ + drivers/tty/serial/8250/8250_dw.c | 28 +- + drivers/tty/serial/8250/8250_dwlib.c | 10 + + drivers/tty/serial/8250/8250_dwlib.h | 1 + + drivers/tty/serial/8250/8250_fsl.c | 8 +- + drivers/tty/serial/8250/8250_lpss.c | 9 +- + drivers/tty/serial/8250/8250_pci.c | 4 +- + drivers/tty/serial/8250/8250_pnp.c | 4 - + drivers/tty/serial/8250/8250_port.c | 31 ++- + drivers/tty/serial/8250/Kconfig | 2 +- + drivers/tty/serial/Kconfig | 9 +- + drivers/tty/serial/atmel_serial.c | 4 +- + drivers/tty/serial/imx.c | 16 +- + drivers/tty/serial/max310x.c | 7 +- + drivers/tty/serial/samsung_tty.c | 13 +- + drivers/tty/serial/sc16is7xx.c | 12 +- + drivers/tty/serial/serial_core.c | 16 +- + drivers/tty/serial/sifive.c | 2 +- + drivers/tty/serial/stm32-usart.c | 240 ++++++++++++---- + drivers/tty/serial/stm32-usart.h | 13 +- + drivers/tty/serial/uartlite.c | 91 +++++- + drivers/tty/sysrq.c | 2 + + drivers/tty/tty_baudrate.c | 2 +- + drivers/tty/tty_buffer.c | 3 + + drivers/tty/tty_ioctl.c | 12 +- + include/linux/console.h | 2 + + include/linux/rpmsg.h | 10 + + include/linux/tty.h | 140 +++++----- + include/linux/tty_driver.h | 8 +- + include/linux/tty_flip.h | 20 +- + include/linux/tty_ldisc.h | 27 +- + net/nfc/nci/uart.c | 2 +- + sound/soc/codecs/cx20442.c | 3 +- + sound/soc/ti/ams-delta.c | 3 +- + 65 files changed, 1532 insertions(+), 765 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/serial/brcm,bcm6345-uart.txt + create mode 100644 Documentation/devicetree/bindings/serial/brcm,bcm6345-uart.yaml + delete mode 100644 Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.txt + create mode 100644 Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.yaml + delete mode 100644 drivers/tty/moxa.h + create mode 100644 drivers/tty/rpmsg_tty.c +Merging char-misc/char-misc-next (7b473ae754fe iio: frequency: adrf6780: Fix adrf6780_spi_{read,write}()) +$ git merge -m Merge branch 'char-misc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git char-misc/char-misc-next +Removing include/linux/counter_enum.h +Removing drivers/counter/counter.c +Auto-merging drivers/android/binder.c +Auto-merging MAINTAINERS +Auto-merging Documentation/devicetree/bindings/vendor-prefixes.yaml +Removing Documentation/ABI/testing/sysfs-bus-iio-scd30 +Merge made by the 'recursive' strategy. + .../ABI/testing/debugfs-driver-habanalabs | 6 + + Documentation/ABI/testing/sysfs-bus-counter | 38 +- + .../ABI/testing/sysfs-bus-fsi-devices-sbefifo | 10 + + Documentation/ABI/testing/sysfs-bus-iio | 42 + + .../ABI/testing/sysfs-bus-iio-chemical-sunrise-co2 | 38 + + Documentation/ABI/testing/sysfs-bus-iio-scd30 | 34 - + .../ABI/testing/sysfs-bus-iio-temperature-max31865 | 20 + + .../testing/sysfs-bus-platform-devices-occ-hwmon | 13 + + .../devicetree/bindings/iio/accel/adi,adxl313.yaml | 86 + + .../devicetree/bindings/iio/accel/adi,adxl355.yaml | 88 + + .../bindings/iio/accel/kionix,kxcjk1013.yaml | 3 + + .../devicetree/bindings/iio/adc/adi,ad7949.yaml | 51 +- + .../devicetree/bindings/iio/adc/adi,ad799x.yaml | 73 + + .../bindings/iio/adc/aspeed,ast2600-adc.yaml | 100 + + .../bindings/iio/adc/atmel,sama5d2-adc.yaml | 1 + + .../bindings/iio/adc/nxp,imx8qxp-adc.yaml | 78 + + .../devicetree/bindings/iio/adc/st,stm32-adc.yaml | 108 +- + .../bindings/iio/chemical/senseair,sunrise.yaml | 55 + + .../bindings/iio/chemical/sensirion,scd4x.yaml | 46 + + .../bindings/iio/frequency/adi,adrf6780.yaml | 131 ++ + .../bindings/iio/light/liteon,ltr501.yaml | 51 + + .../iio/magnetometer/asahi-kasei,ak8975.yaml | 7 + + .../bindings/iio/multiplexer/io-channel-mux.yaml | 13 +- + .../bindings/iio/temperature/maxim,max31865.yaml | 52 + + .../devicetree/bindings/vendor-prefixes.yaml | 4 + + Documentation/driver-api/driver-model/devres.rst | 1 + + Documentation/driver-api/generic-counter.rst | 363 +++- + Documentation/userspace-api/ioctl/ioctl-number.rst | 1 + + Documentation/virt/ne_overview.rst | 21 +- + MAINTAINERS | 42 +- + drivers/android/binder.c | 14 +- + drivers/char/mem.c | 8 +- + drivers/char/xillybus/xillybus.h | 31 +- + drivers/char/xillybus/xillybus_core.c | 131 +- + drivers/char/xillybus/xillybus_of.c | 86 +- + drivers/char/xillybus/xillybus_pcie.c | 99 +- + drivers/char/xillybus/xillyusb.c | 1 + + drivers/counter/104-quad-8.c | 699 ++++--- + drivers/counter/Kconfig | 6 +- + drivers/counter/Makefile | 1 + + drivers/counter/counter-chrdev.c | 573 ++++++ + drivers/counter/counter-chrdev.h | 14 + + drivers/counter/counter-core.c | 191 ++ + drivers/counter/counter-sysfs.c | 959 ++++++++++ + drivers/counter/counter-sysfs.h | 13 + + drivers/counter/counter.c | 1496 --------------- + drivers/counter/ftm-quaddec.c | 60 +- + drivers/counter/intel-qep.c | 146 +- + drivers/counter/interrupt-cnt.c | 62 +- + drivers/counter/microchip-tcb-capture.c | 93 +- + drivers/counter/stm32-lptimer-cnt.c | 212 +-- + drivers/counter/stm32-timer-cnt.c | 195 +- + drivers/counter/ti-eqep.c | 180 +- + drivers/fsi/fsi-occ.c | 218 ++- + drivers/fsi/fsi-sbefifo.c | 28 +- + drivers/hwmon/occ/common.c | 30 +- + drivers/hwmon/occ/common.h | 3 +- + drivers/hwmon/occ/p8_i2c.c | 15 +- + drivers/hwmon/occ/p9_sbe.c | 90 +- + drivers/iio/accel/Kconfig | 62 + + drivers/iio/accel/Makefile | 6 + + drivers/iio/accel/adxl313.h | 54 + + drivers/iio/accel/adxl313_core.c | 332 ++++ + drivers/iio/accel/adxl313_i2c.c | 66 + + drivers/iio/accel/adxl313_spi.c | 92 + + drivers/iio/accel/adxl355.h | 21 + + drivers/iio/accel/adxl355_core.c | 765 ++++++++ + drivers/iio/accel/adxl355_i2c.c | 62 + + drivers/iio/accel/adxl355_spi.c | 65 + + drivers/iio/accel/adxl372.c | 1 + + drivers/iio/accel/bma400.h | 2 +- + drivers/iio/accel/bma400_core.c | 7 +- + drivers/iio/accel/bma400_i2c.c | 4 +- + drivers/iio/accel/bma400_spi.c | 4 +- + drivers/iio/accel/bmc150-accel-core.c | 5 +- + drivers/iio/accel/bmc150-accel-i2c.c | 4 +- + drivers/iio/accel/bmc150-accel-spi.c | 4 +- + drivers/iio/accel/bmc150-accel.h | 2 +- + drivers/iio/accel/bmi088-accel-core.c | 4 +- + drivers/iio/accel/bmi088-accel-spi.c | 4 +- + drivers/iio/accel/bmi088-accel.h | 2 +- + drivers/iio/accel/fxls8962af-core.c | 347 +++- + drivers/iio/accel/kxsd9-i2c.c | 4 +- + drivers/iio/accel/kxsd9-spi.c | 4 +- + drivers/iio/accel/kxsd9.c | 4 +- + drivers/iio/accel/kxsd9.h | 2 +- + drivers/iio/accel/mma7455.h | 2 +- + drivers/iio/accel/mma7455_core.c | 4 +- + drivers/iio/accel/mma7455_i2c.c | 4 +- + drivers/iio/accel/mma7455_spi.c | 4 +- + drivers/iio/accel/mma7660.c | 2 +- + drivers/iio/accel/sca3000.c | 3 +- + drivers/iio/accel/st_accel_core.c | 31 +- + drivers/iio/accel/st_accel_i2c.c | 23 +- + drivers/iio/accel/st_accel_spi.c | 23 +- + drivers/iio/adc/Kconfig | 18 +- + drivers/iio/adc/Makefile | 1 + + drivers/iio/adc/ab8500-gpadc.c | 22 +- + drivers/iio/adc/ad7291.c | 70 +- + drivers/iio/adc/ad7949.c | 254 ++- + drivers/iio/adc/ad799x.c | 68 +- + drivers/iio/adc/aspeed_adc.c | 598 +++++- + drivers/iio/adc/at91-sama5d2_adc.c | 598 ++++-- + drivers/iio/adc/axp288_adc.c | 28 +- + drivers/iio/adc/berlin2-adc.c | 34 +- + drivers/iio/adc/da9150-gpadc.c | 27 +- + drivers/iio/adc/ep93xx_adc.c | 4 +- + drivers/iio/adc/fsl-imx25-gcq.c | 55 +- + drivers/iio/adc/imx7d_adc.c | 18 +- + drivers/iio/adc/imx8qxp-adc.c | 494 +++++ + drivers/iio/adc/intel_mrfld_adc.c | 24 +- + drivers/iio/adc/lp8788_adc.c | 31 +- + drivers/iio/adc/lpc18xx_adc.c | 75 +- + drivers/iio/adc/max1027.c | 278 ++- + drivers/iio/adc/max1118.c | 7 +- + drivers/iio/adc/max1241.c | 17 +- + drivers/iio/adc/max1363.c | 82 +- + drivers/iio/adc/meson_saradc.c | 39 +- + drivers/iio/adc/nau7802.c | 50 +- + drivers/iio/adc/qcom-pm8xxx-xoadc.c | 9 +- + drivers/iio/adc/rn5t618-adc.c | 13 +- + drivers/iio/adc/rockchip_saradc.c | 31 +- + drivers/iio/adc/stm32-adc-core.c | 1 + + drivers/iio/adc/stm32-adc-core.h | 10 + + drivers/iio/adc/stm32-adc.c | 422 ++++- + drivers/iio/adc/ti-adc108s102.c | 11 +- + drivers/iio/adc/ti-adc128s052.c | 33 +- + drivers/iio/adc/ti-ads7950.c | 4 +- + drivers/iio/adc/ti-ads8344.c | 27 +- + drivers/iio/adc/twl6030-gpadc.c | 6 +- + drivers/iio/adc/xilinx-xadc-core.c | 5 +- + drivers/iio/adc/xilinx-xadc.h | 1 - + drivers/iio/buffer/industrialio-triggered-buffer.c | 8 +- + drivers/iio/buffer/kfifo_buf.c | 50 + + drivers/iio/chemical/Kconfig | 24 + + drivers/iio/chemical/Makefile | 2 + + drivers/iio/chemical/scd4x.c | 696 +++++++ + drivers/iio/chemical/sunrise_co2.c | 537 ++++++ + .../iio/common/hid-sensors/hid-sensor-trigger.c | 5 +- + drivers/iio/common/st_sensors/st_sensors_core.c | 48 +- + drivers/iio/common/st_sensors/st_sensors_i2c.c | 1 - + drivers/iio/common/st_sensors/st_sensors_spi.c | 1 - + drivers/iio/common/st_sensors/st_sensors_trigger.c | 53 +- + drivers/iio/dac/ad5064.c | 49 +- + drivers/iio/dac/ad5380.c | 15 +- + drivers/iio/dac/ad5446.c | 12 +- + drivers/iio/dac/ad5592r-base.c | 4 +- + drivers/iio/dac/ad5592r-base.h | 2 +- + drivers/iio/dac/ad5592r.c | 4 +- + drivers/iio/dac/ad5593r.c | 4 +- + drivers/iio/dac/ad5686-spi.c | 4 +- + drivers/iio/dac/ad5686.c | 4 +- + drivers/iio/dac/ad5686.h | 2 +- + drivers/iio/dac/ad5696-i2c.c | 4 +- + drivers/iio/dac/ad5766.c | 42 + + drivers/iio/dac/ad5770r.c | 2 +- + drivers/iio/dac/ad7303.c | 47 +- + drivers/iio/dac/ad8801.c | 11 +- + drivers/iio/dac/ds4424.c | 9 +- + drivers/iio/dac/lpc18xx_dac.c | 14 +- + drivers/iio/dac/ltc1660.c | 7 +- + drivers/iio/dac/max5821.c | 9 +- + drivers/iio/dac/mcp4922.c | 7 +- + drivers/iio/dac/stm32-dac-core.c | 18 +- + drivers/iio/dac/ti-dac7311.c | 7 +- + drivers/iio/frequency/Kconfig | 12 + + drivers/iio/frequency/Makefile | 1 + + drivers/iio/frequency/adrf6780.c | 527 ++++++ + drivers/iio/gyro/Kconfig | 1 - + drivers/iio/gyro/adis16080.c | 11 +- + drivers/iio/gyro/mpu3050-core.c | 24 +- + drivers/iio/gyro/st_gyro_core.c | 27 +- + drivers/iio/gyro/st_gyro_i2c.c | 23 +- + drivers/iio/gyro/st_gyro_spi.c | 23 +- + drivers/iio/health/afe4403.c | 14 +- + drivers/iio/health/afe4404.c | 8 +- + drivers/iio/iio_core.h | 4 + + drivers/iio/imu/adis.c | 17 +- + drivers/iio/imu/adis16400.c | 20 +- + drivers/iio/imu/adis16460.c | 18 +- + drivers/iio/imu/adis16475.c | 19 +- + drivers/iio/imu/adis_trigger.c | 4 + + drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c | 2 +- + drivers/iio/imu/inv_mpu6050/inv_mpu_magn.c | 36 +- + drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h | 4 +- + drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 22 +- + drivers/iio/imu/st_lsm9ds0/st_lsm9ds0.h | 1 - + drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c | 29 +- + drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c | 6 - + drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c | 6 - + drivers/iio/industrialio-buffer.c | 173 +- + drivers/iio/industrialio-core.c | 1 + + drivers/iio/inkern.c | 17 + + drivers/iio/light/cm3605.c | 29 +- + drivers/iio/light/cm36651.c | 7 +- + drivers/iio/light/gp2ap002.c | 24 +- + drivers/iio/light/ltr501.c | 37 + + drivers/iio/light/max44000.c | 17 +- + drivers/iio/light/noa1305.c | 7 +- + drivers/iio/magnetometer/Kconfig | 2 +- + drivers/iio/magnetometer/ak8975.c | 35 + + drivers/iio/magnetometer/hmc5843.h | 2 +- + drivers/iio/magnetometer/hmc5843_core.c | 4 +- + drivers/iio/magnetometer/hmc5843_i2c.c | 4 +- + drivers/iio/magnetometer/hmc5843_spi.c | 4 +- + drivers/iio/magnetometer/st_magn_core.c | 29 +- + drivers/iio/magnetometer/st_magn_i2c.c | 23 +- + drivers/iio/magnetometer/st_magn_spi.c | 23 +- + drivers/iio/multiplexer/iio-mux.c | 7 +- + drivers/iio/potentiometer/max5487.c | 7 +- + drivers/iio/pressure/ms5611.h | 2 +- + drivers/iio/pressure/ms5611_core.c | 4 +- + drivers/iio/pressure/ms5611_i2c.c | 4 +- + drivers/iio/pressure/ms5611_spi.c | 4 +- + drivers/iio/pressure/st_pressure_core.c | 27 +- + drivers/iio/pressure/st_pressure_i2c.c | 23 +- + drivers/iio/pressure/st_pressure_spi.c | 23 +- + drivers/iio/temperature/Kconfig | 10 + + drivers/iio/temperature/Makefile | 1 + + drivers/iio/temperature/ltc2983.c | 16 + + drivers/iio/temperature/max31865.c | 349 ++++ + drivers/interconnect/qcom/icc-rpm.c | 263 ++- + drivers/interconnect/qcom/icc-rpm.h | 56 +- + drivers/interconnect/qcom/msm8916.c | 1214 +++++++++++- + drivers/interconnect/qcom/msm8939.c | 1283 ++++++++++++- + drivers/interconnect/qcom/qcs404.c | 967 +++++++++- + drivers/interconnect/qcom/sdm660.c | 1940 ++++++++++++++------ + drivers/interconnect/samsung/Kconfig | 6 +- + drivers/ipack/devices/ipoctal.c | 48 +- + drivers/misc/ad525x_dpot-i2c.c | 3 +- + drivers/misc/ad525x_dpot-spi.c | 3 +- + drivers/misc/ad525x_dpot.c | 4 +- + drivers/misc/ad525x_dpot.h | 2 +- + drivers/misc/cardreader/rtsx_pcr.c | 2 +- + drivers/misc/enclosure.c | 16 +- + drivers/misc/fastrpc.c | 20 +- + drivers/misc/genwqe/card_utils.c | 10 +- + drivers/misc/habanalabs/Kconfig | 2 + + drivers/misc/habanalabs/common/Makefile | 2 +- + .../misc/habanalabs/common/command_submission.c | 105 +- + drivers/misc/habanalabs/common/context.c | 8 +- + drivers/misc/habanalabs/common/debugfs.c | 51 + + drivers/misc/habanalabs/common/device.c | 159 +- + drivers/misc/habanalabs/common/firmware_if.c | 28 +- + drivers/misc/habanalabs/common/habanalabs.h | 64 +- + drivers/misc/habanalabs/common/habanalabs_drv.c | 24 +- + .../{gaudi/gaudi_hwmgr.c => common/hwmgr.c} | 38 +- + drivers/misc/habanalabs/common/hwmon.c | 194 +- + drivers/misc/habanalabs/common/irq.c | 5 +- + drivers/misc/habanalabs/common/memory.c | 513 +++++- + drivers/misc/habanalabs/common/mmu/mmu.c | 30 +- + drivers/misc/habanalabs/common/sysfs.c | 6 +- + drivers/misc/habanalabs/gaudi/Makefile | 2 +- + drivers/misc/habanalabs/gaudi/gaudi.c | 22 +- + drivers/misc/habanalabs/gaudi/gaudiP.h | 4 - + drivers/misc/habanalabs/goya/goya.c | 13 +- + drivers/misc/habanalabs/goya/goyaP.h | 1 - + drivers/misc/habanalabs/goya/goya_hwmgr.c | 31 - + drivers/misc/habanalabs/include/common/cpucp_if.h | 22 +- + .../misc/habanalabs/include/common/hl_boot_if.h | 189 +- + .../misc/habanalabs/include/gaudi/gaudi_fw_if.h | 10 +- + .../misc/habanalabs/include/gaudi/gaudi_reg_map.h | 1 + + drivers/misc/hisi_hikey_usb.c | 119 +- + drivers/misc/lis3lv02d/lis3lv02d.c | 3 +- + drivers/misc/lis3lv02d/lis3lv02d.h | 2 +- + drivers/misc/lis3lv02d/lis3lv02d_spi.c | 4 +- + drivers/misc/mei/pci-txe.c | 4 +- + drivers/misc/pvpanic/pvpanic-mmio.c | 9 +- + drivers/misc/pvpanic/pvpanic-pci.c | 26 +- + drivers/misc/pvpanic/pvpanic.c | 16 +- + drivers/misc/tifm_7xx1.c | 2 +- + drivers/misc/tifm_core.c | 8 +- + drivers/mux/core.c | 38 +- + drivers/nvmem/core.c | 174 +- + drivers/nvmem/imx-ocotp.c | 25 + + drivers/platform/x86/hp_accel.c | 3 +- + drivers/staging/iio/cdc/ad7746.c | 4 +- + drivers/staging/iio/frequency/ad9832.c | 82 +- + drivers/virt/acrn/hsm.c | 49 + + drivers/virt/acrn/hypercall.h | 52 + + drivers/virt/nitro_enclaves/Kconfig | 8 +- + drivers/virt/nitro_enclaves/ne_misc_dev.c | 17 +- + drivers/virt/nitro_enclaves/ne_pci_dev.c | 2 +- + drivers/virt/nitro_enclaves/ne_pci_dev.h | 8 +- + include/linux/counter.h | 715 ++++---- + include/linux/counter_enum.h | 45 - + include/linux/fsi-occ.h | 2 + + include/linux/iio/buffer.h | 11 + + include/linux/iio/buffer_impl.h | 11 + + include/linux/iio/common/st_sensors.h | 13 - + include/linux/iio/driver.h | 14 + + include/linux/iio/iio-opaque.h | 4 + + include/linux/iio/imu/adis.h | 2 + + include/linux/iio/triggered_buffer.h | 11 +- + include/linux/mfd/stm32-lptimer.h | 5 + + include/linux/mfd/stm32-timers.h | 4 + + include/linux/mux/consumer.h | 23 +- + include/linux/mux/driver.h | 4 + + include/linux/nvmem-provider.h | 5 + + include/uapi/linux/acrn.h | 70 + + include/uapi/linux/counter.h | 154 ++ + include/uapi/linux/nitro_enclaves.h | 10 +- + include/uapi/misc/habanalabs.h | 84 +- + samples/kfifo/bytestream-example.c | 12 +- + samples/kfifo/inttype-example.c | 12 +- + samples/kfifo/record-example.c | 12 +- + samples/nitro_enclaves/ne_ioctl_sample.c | 7 +- + scripts/tags.sh | 6 +- + tools/Makefile | 13 +- + tools/counter/Build | 1 + + tools/counter/Makefile | 53 + + tools/counter/counter_example.c | 92 + + 312 files changed, 19100 insertions(+), 6507 deletions(-) + create mode 100644 Documentation/ABI/testing/sysfs-bus-fsi-devices-sbefifo + create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-chemical-sunrise-co2 + delete mode 100644 Documentation/ABI/testing/sysfs-bus-iio-scd30 + create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-temperature-max31865 + create mode 100644 Documentation/ABI/testing/sysfs-bus-platform-devices-occ-hwmon + create mode 100644 Documentation/devicetree/bindings/iio/accel/adi,adxl313.yaml + create mode 100644 Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml + create mode 100644 Documentation/devicetree/bindings/iio/adc/adi,ad799x.yaml + create mode 100644 Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml + create mode 100644 Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml + create mode 100644 Documentation/devicetree/bindings/iio/chemical/senseair,sunrise.yaml + create mode 100644 Documentation/devicetree/bindings/iio/chemical/sensirion,scd4x.yaml + create mode 100644 Documentation/devicetree/bindings/iio/frequency/adi,adrf6780.yaml + create mode 100644 Documentation/devicetree/bindings/iio/light/liteon,ltr501.yaml + create mode 100644 Documentation/devicetree/bindings/iio/temperature/maxim,max31865.yaml + create mode 100644 drivers/counter/counter-chrdev.c + create mode 100644 drivers/counter/counter-chrdev.h + create mode 100644 drivers/counter/counter-core.c + create mode 100644 drivers/counter/counter-sysfs.c + create mode 100644 drivers/counter/counter-sysfs.h + delete mode 100644 drivers/counter/counter.c + create mode 100644 drivers/iio/accel/adxl313.h + create mode 100644 drivers/iio/accel/adxl313_core.c + create mode 100644 drivers/iio/accel/adxl313_i2c.c + create mode 100644 drivers/iio/accel/adxl313_spi.c + create mode 100644 drivers/iio/accel/adxl355.h + create mode 100644 drivers/iio/accel/adxl355_core.c + create mode 100644 drivers/iio/accel/adxl355_i2c.c + create mode 100644 drivers/iio/accel/adxl355_spi.c + create mode 100644 drivers/iio/adc/imx8qxp-adc.c + create mode 100644 drivers/iio/chemical/scd4x.c + create mode 100644 drivers/iio/chemical/sunrise_co2.c + create mode 100644 drivers/iio/frequency/adrf6780.c + create mode 100644 drivers/iio/temperature/max31865.c + rename drivers/misc/habanalabs/{gaudi/gaudi_hwmgr.c => common/hwmgr.c} (61%) + delete mode 100644 include/linux/counter_enum.h + create mode 100644 include/uapi/linux/counter.h + create mode 100644 tools/counter/Build + create mode 100644 tools/counter/Makefile + create mode 100644 tools/counter/counter_example.c +Merging extcon/extcon-next (f83d7033d4ec dt-bindings: extcon: usbc-tusb320: Add TUSB320L compatible string) +$ git merge -m Merge branch 'extcon-next' of git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon.git extcon/extcon-next +Merge made by the 'recursive' strategy. + .../bindings/extcon/extcon-usbc-tusb320.yaml | 4 +- + drivers/extcon/Kconfig | 2 +- + drivers/extcon/extcon-axp288.c | 31 +++- + drivers/extcon/extcon-max3355.c | 1 - + drivers/extcon/extcon-usb-gpio.c | 3 +- + drivers/extcon/extcon-usbc-tusb320.c | 163 ++++++++++++++++++++- + 6 files changed, 192 insertions(+), 12 deletions(-) +Merging phy-next/next (a18c27eccafa phy: hisilicon: Add of_node_put() in phy-hisi-inno-usb2) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git phy-next/next +Auto-merging arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +Merge made by the 'recursive' strategy. + .../devicetree/bindings/phy/bcm-ns-usb2-phy.yaml | 25 +- + .../devicetree/bindings/phy/phy-stm32-usbphyc.yaml | 129 ++++ + .../devicetree/bindings/phy/qcom,qmp-phy.yaml | 84 +- + .../devicetree/bindings/phy/qcom,qusb2-phy.yaml | 7 + + .../devicetree/bindings/phy/rockchip-usb-phy.yaml | 11 +- + arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 2 + + .../boot/dts/qcom/msm8996-sony-xperia-tone.dtsi | 1 + + arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi | 1 + + .../boot/dts/qcom/msm8998-oneplus-common.dtsi | 1 + + drivers/phy/broadcom/Kconfig | 4 + + drivers/phy/cadence/phy-cadence-torrent.c | 316 +++++++- + drivers/phy/hisilicon/Kconfig | 10 + + drivers/phy/hisilicon/Makefile | 1 + + drivers/phy/hisilicon/phy-hi3670-pcie.c | 845 +++++++++++++++++++++ + drivers/phy/hisilicon/phy-hisi-inno-usb2.c | 10 +- + drivers/phy/qualcomm/phy-qcom-qmp.c | 157 +++- + drivers/phy/qualcomm/phy-qcom-qmp.h | 2 + + drivers/phy/qualcomm/phy-qcom-qusb2.c | 21 +- + drivers/phy/rockchip/phy-rockchip-inno-usb2.c | 11 +- + drivers/phy/samsung/Kconfig | 16 +- + drivers/phy/st/phy-stm32-usbphyc.c | 203 +++++ + drivers/phy/ti/phy-gmii-sel.c | 2 + + include/dt-bindings/phy/phy-cadence.h | 2 + + 23 files changed, 1754 insertions(+), 107 deletions(-) + create mode 100644 drivers/phy/hisilicon/phy-hi3670-pcie.c +Merging soundwire/next (abd9a6049bb5 soundwire: qcom: add debugfs entry for soundwire register dump) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire.git soundwire/next +Merge made by the 'recursive' strategy. + drivers/soundwire/bus.c | 2 +- + drivers/soundwire/debugfs.c | 2 +- + drivers/soundwire/qcom.c | 27 +++++++++++++++++++++++++++ + 3 files changed, 29 insertions(+), 2 deletions(-) +Merging thunderbolt/next (0a0624a26f9c thunderbolt: Fix -Wrestrict warning) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git thunderbolt/next +Merge made by the 'recursive' strategy. + drivers/thunderbolt/ctl.c | 2 +- + drivers/thunderbolt/xdomain.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) +Merging vfio/next (9cef73918e15 vfio: Use cdev_device_add() instead of device_create()) +$ git merge -m Merge branch 'next' of git://github.com/awilliam/linux-vfio.git vfio/next +Merge made by the 'recursive' strategy. + drivers/bus/fsl-mc/Makefile | 3 +- + drivers/bus/fsl-mc/fsl-mc-private.h | 39 +- + drivers/bus/fsl-mc/obj-api.c | 103 +++++ + drivers/s390/crypto/vfio_ap_ops.c | 2 +- + drivers/vfio/fsl-mc/vfio_fsl_mc.c | 62 +-- + drivers/vfio/mdev/mdev_driver.c | 45 +- + drivers/vfio/mdev/vfio_mdev.c | 2 +- + drivers/vfio/pci/vfio_pci_core.c | 13 +- + drivers/vfio/pci/vfio_pci_igd.c | 234 +++++++--- + drivers/vfio/platform/vfio_platform_common.c | 13 +- + drivers/vfio/vfio.c | 622 +++++++++++---------------- + drivers/vfio/vfio.h | 72 ++++ + drivers/vfio/vfio_iommu_spapr_tce.c | 6 +- + drivers/vfio/vfio_iommu_type1.c | 256 ++++------- + include/linux/fsl/mc.h | 14 + + include/linux/mdev.h | 20 - + include/linux/vfio.h | 53 +-- + samples/vfio-mdev/mbochs.c | 2 +- + samples/vfio-mdev/mdpy.c | 2 +- + samples/vfio-mdev/mtty.c | 2 +- + 20 files changed, 779 insertions(+), 786 deletions(-) + create mode 100644 drivers/bus/fsl-mc/obj-api.c + create mode 100644 drivers/vfio/vfio.h +Merging staging/staging-next (5c0480deda08 staging: r8188eu: Use memdup_user instead of kmalloc/copy_from_user) +$ git merge -m Merge branch 'staging-next' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git staging/staging-next +Removing drivers/staging/r8188eu/include/rtw_mp_phy_regdef.h +Removing drivers/staging/r8188eu/include/rtw_mp_ioctl.h +Removing drivers/staging/r8188eu/include/rtw_mp.h +Removing drivers/staging/r8188eu/include/rtw_ioctl_rtl.h +Removing drivers/staging/r8188eu/include/odm_reg.h +Removing drivers/staging/r8188eu/include/odm_RegDefine11AC.h +Removing drivers/staging/r8188eu/include/mp_custom_oid.h +Removing drivers/staging/r8188eu/include/HalHWImg8188E_FW.h +Removing drivers/staging/r8188eu/hal/rtl8188e_mp.c +Removing drivers/staging/r8188eu/core/rtw_mp_ioctl.c +Removing drivers/staging/r8188eu/core/rtw_mp.c +Removing drivers/staging/r8188eu/core/rtw_io.c +Removing drivers/staging/r8188eu/core/rtw_debug.c +Auto-merging drivers/staging/qlge/qlge_main.c +Removing drivers/staging/most/dim2/sysfs.c +Auto-merging drivers/pci/pci.c +Auto-merging drivers/pci/controller/pcie-mt7621.c +Merge made by the 'recursive' strategy. + arch/mips/include/asm/mach-ralink/spaces.h | 4 +- + arch/mips/include/asm/pci.h | 4 + + arch/mips/pci/pci-generic.c | 14 + + drivers/pci/controller/pcie-mt7621.c | 2 +- + drivers/pci/pci.c | 2 + + drivers/staging/axis-fifo/axis-fifo.c | 88 +- + drivers/staging/fbtft/fbtft-core.c | 11 +- + drivers/staging/fbtft/fbtft.h | 8 +- + drivers/staging/gdm724x/gdm_lte.c | 4 +- + drivers/staging/ks7010/Kconfig | 3 + + drivers/staging/ks7010/ks_hostif.c | 2 +- + drivers/staging/ks7010/ks_wlan_net.c | 4 +- + drivers/staging/most/dim2/Makefile | 2 +- + drivers/staging/most/dim2/dim2.c | 115 +- + drivers/staging/most/dim2/sysfs.c | 49 - + drivers/staging/most/dim2/sysfs.h | 11 - + drivers/staging/most/net/net.c | 2 +- + drivers/staging/mt7621-dma/hsdma-mt7621.c | 6 +- + drivers/staging/mt7621-dts/gbpc1.dts | 2 +- + drivers/staging/mt7621-dts/mt7621.dtsi | 66 +- + drivers/staging/octeon/ethernet.c | 2 +- + drivers/staging/pi433/pi433_if.c | 18 +- + drivers/staging/pi433/pi433_if.h | 23 +- + drivers/staging/qlge/qlge_main.c | 18 +- + drivers/staging/qlge/qlge_mpi.c | 2 +- + drivers/staging/r8188eu/Kconfig | 10 - + drivers/staging/r8188eu/Makefile | 156 +- + drivers/staging/r8188eu/core/rtw_ap.c | 607 +---- + drivers/staging/r8188eu/core/rtw_br_ext.c | 3 +- + drivers/staging/r8188eu/core/rtw_cmd.c | 618 +----- + drivers/staging/r8188eu/core/rtw_debug.c | 904 -------- + drivers/staging/r8188eu/core/rtw_efuse.c | 582 +---- + drivers/staging/r8188eu/core/rtw_ieee80211.c | 339 +-- + drivers/staging/r8188eu/core/rtw_io.c | 299 --- + drivers/staging/r8188eu/core/rtw_ioctl_set.c | 386 +--- + drivers/staging/r8188eu/core/rtw_iol.c | 27 - + drivers/staging/r8188eu/core/rtw_led.c | 1365 +----------- + drivers/staging/r8188eu/core/rtw_mlme.c | 126 +- + drivers/staging/r8188eu/core/rtw_mlme_ext.c | 384 +--- + drivers/staging/r8188eu/core/rtw_mp.c | 935 -------- + drivers/staging/r8188eu/core/rtw_mp_ioctl.c | 1170 ---------- + drivers/staging/r8188eu/core/rtw_p2p.c | 43 +- + drivers/staging/r8188eu/core/rtw_pwrctrl.c | 130 +- + drivers/staging/r8188eu/core/rtw_recv.c | 116 +- + drivers/staging/r8188eu/core/rtw_rf.c | 17 - + drivers/staging/r8188eu/core/rtw_security.c | 197 +- + drivers/staging/r8188eu/core/rtw_sreset.c | 32 +- + drivers/staging/r8188eu/core/rtw_sta_mgt.c | 34 +- + drivers/staging/r8188eu/core/rtw_wlan_util.c | 157 +- + drivers/staging/r8188eu/core/rtw_xmit.c | 120 +- + drivers/staging/r8188eu/hal/Hal8188ERateAdaptive.c | 22 +- + drivers/staging/r8188eu/hal/HalHWImg8188E_BB.c | 32 +- + drivers/staging/r8188eu/hal/HalHWImg8188E_MAC.c | 10 +- + drivers/staging/r8188eu/hal/HalHWImg8188E_RF.c | 15 +- + drivers/staging/r8188eu/hal/HalPhyRf_8188e.c | 171 +- + drivers/staging/r8188eu/hal/hal_com.c | 26 +- + drivers/staging/r8188eu/hal/hal_intf.c | 391 +--- + drivers/staging/r8188eu/hal/odm.c | 1188 +--------- + drivers/staging/r8188eu/hal/odm_HWConfig.c | 393 +--- + drivers/staging/r8188eu/hal/odm_RTL8188E.c | 31 +- + drivers/staging/r8188eu/hal/odm_RegConfig8188E.c | 8 - + drivers/staging/r8188eu/hal/odm_interface.c | 85 - + drivers/staging/r8188eu/hal/rtl8188e_cmd.c | 29 +- + drivers/staging/r8188eu/hal/rtl8188e_dm.c | 93 +- + drivers/staging/r8188eu/hal/rtl8188e_hal_init.c | 310 +-- + drivers/staging/r8188eu/hal/rtl8188e_mp.c | 798 ------- + drivers/staging/r8188eu/hal/rtl8188e_phycfg.c | 215 +- + drivers/staging/r8188eu/hal/rtl8188e_rf6052.c | 226 +- + drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c | 2 +- + drivers/staging/r8188eu/hal/rtl8188e_sreset.c | 14 +- + drivers/staging/r8188eu/hal/rtl8188eu_recv.c | 2 +- + drivers/staging/r8188eu/hal/rtl8188eu_xmit.c | 60 +- + drivers/staging/r8188eu/hal/usb_halinit.c | 328 +-- + drivers/staging/r8188eu/hal/usb_ops_linux.c | 254 ++- + drivers/staging/r8188eu/include/Hal8188EPhyCfg.h | 91 - + .../staging/r8188eu/include/Hal8188ERateAdaptive.h | 2 - + drivers/staging/r8188eu/include/HalHWImg8188E_FW.h | 16 - + drivers/staging/r8188eu/include/HalVerDef.h | 70 - + drivers/staging/r8188eu/include/drv_types.h | 37 +- + drivers/staging/r8188eu/include/hal_intf.h | 312 +-- + drivers/staging/r8188eu/include/ieee80211.h | 71 - + drivers/staging/r8188eu/include/ioctl_cfg80211.h | 2 - + drivers/staging/r8188eu/include/mp_custom_oid.h | 333 --- + drivers/staging/r8188eu/include/odm.h | 457 +--- + drivers/staging/r8188eu/include/odm_HWConfig.h | 11 +- + drivers/staging/r8188eu/include/odm_RTL8188E.h | 2 - + .../staging/r8188eu/include/odm_RegConfig8188E.h | 3 - + .../staging/r8188eu/include/odm_RegDefine11AC.h | 29 - + drivers/staging/r8188eu/include/odm_RegDefine11N.h | 112 +- + drivers/staging/r8188eu/include/odm_interface.h | 88 - + drivers/staging/r8188eu/include/odm_precomp.h | 22 - + drivers/staging/r8188eu/include/odm_reg.h | 89 - + drivers/staging/r8188eu/include/odm_types.h | 24 - + drivers/staging/r8188eu/include/osdep_intf.h | 4 - + drivers/staging/r8188eu/include/osdep_service.h | 40 +- + drivers/staging/r8188eu/include/recv_osdep.h | 2 - + drivers/staging/r8188eu/include/rtl8188e_cmd.h | 16 - + drivers/staging/r8188eu/include/rtl8188e_dm.h | 13 - + drivers/staging/r8188eu/include/rtl8188e_hal.h | 101 +- + drivers/staging/r8188eu/include/rtl8188e_led.h | 2 - + drivers/staging/r8188eu/include/rtl8188e_rf.h | 1 - + drivers/staging/r8188eu/include/rtl8188e_spec.h | 4 - + drivers/staging/r8188eu/include/rtl8188e_sreset.h | 1 - + drivers/staging/r8188eu/include/rtw_ap.h | 11 - + drivers/staging/r8188eu/include/rtw_br_ext.h | 3 +- + drivers/staging/r8188eu/include/rtw_cmd.h | 27 +- + drivers/staging/r8188eu/include/rtw_debug.h | 156 -- + drivers/staging/r8188eu/include/rtw_eeprom.h | 57 +- + drivers/staging/r8188eu/include/rtw_efuse.h | 21 - + drivers/staging/r8188eu/include/rtw_io.h | 87 +- + drivers/staging/r8188eu/include/rtw_ioctl_rtl.h | 63 - + drivers/staging/r8188eu/include/rtw_ioctl_set.h | 8 - + drivers/staging/r8188eu/include/rtw_iol.h | 5 - + drivers/staging/r8188eu/include/rtw_led.h | 20 - + drivers/staging/r8188eu/include/rtw_mlme.h | 11 - + drivers/staging/r8188eu/include/rtw_mlme_ext.h | 14 - + drivers/staging/r8188eu/include/rtw_mp.h | 474 ---- + drivers/staging/r8188eu/include/rtw_mp_ioctl.h | 242 -- + .../staging/r8188eu/include/rtw_mp_phy_regdef.h | 1063 --------- + drivers/staging/r8188eu/include/rtw_p2p.h | 1 - + drivers/staging/r8188eu/include/rtw_pwrctrl.h | 113 - + drivers/staging/r8188eu/include/rtw_recv.h | 6 - + drivers/staging/r8188eu/include/rtw_rf.h | 12 - + drivers/staging/r8188eu/include/rtw_security.h | 20 +- + drivers/staging/r8188eu/include/rtw_sreset.h | 5 - + drivers/staging/r8188eu/include/rtw_xmit.h | 5 - + drivers/staging/r8188eu/include/sta_info.h | 7 - + drivers/staging/r8188eu/include/usb_ops.h | 5 - + drivers/staging/r8188eu/include/usb_ops_linux.h | 8 - + drivers/staging/r8188eu/include/usb_osintf.h | 5 +- + drivers/staging/r8188eu/include/wifi.h | 52 - + drivers/staging/r8188eu/include/xmit_osdep.h | 2 - + drivers/staging/r8188eu/os_dep/ioctl_linux.c | 2331 +------------------- + drivers/staging/r8188eu/os_dep/mlme_linux.c | 6 - + drivers/staging/r8188eu/os_dep/os_intfs.c | 401 +--- + drivers/staging/r8188eu/os_dep/osdep_service.c | 74 +- + drivers/staging/r8188eu/os_dep/recv_linux.c | 14 - + drivers/staging/r8188eu/os_dep/usb_intf.c | 273 +-- + drivers/staging/r8188eu/os_dep/usb_ops_linux.c | 22 +- + drivers/staging/r8188eu/os_dep/xmit_linux.c | 4 - + drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 7 +- + drivers/staging/rtl8192e/rtl8192e/rtl_cam.c | 4 +- + drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 2 +- + drivers/staging/rtl8192e/rtl819x_BAProc.c | 9 +- + drivers/staging/rtl8192u/r8192U.h | 3 +- + drivers/staging/rtl8192u/r8192U_core.c | 18 +- + drivers/staging/rtl8712/os_intfs.c | 9 +- + drivers/staging/rtl8712/rtl871x_cmd.c | 2 +- + drivers/staging/rtl8712/rtl871x_cmd.h | 2 +- + drivers/staging/rtl8712/rtl871x_xmit.h | 10 +- + drivers/staging/rtl8712/usb_intf.c | 6 +- + drivers/staging/rtl8723bs/core/rtw_ap.c | 23 +- + drivers/staging/rtl8723bs/core/rtw_cmd.c | 116 +- + drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 4 +- + drivers/staging/rtl8723bs/core/rtw_mlme.c | 24 +- + drivers/staging/rtl8723bs/core/rtw_mlme_ext.c | 79 +- + drivers/staging/rtl8723bs/core/rtw_recv.c | 22 +- + drivers/staging/rtl8723bs/core/rtw_security.c | 6 +- + drivers/staging/rtl8723bs/core/rtw_sta_mgt.c | 48 +- + drivers/staging/rtl8723bs/core/rtw_xmit.c | 49 +- + drivers/staging/rtl8723bs/hal/odm_DIG.c | 2 +- + drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c | 6 +- + drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c | 2 - + drivers/staging/rtl8723bs/include/osdep_service.h | 2 +- + drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 26 +- + drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 34 +- + drivers/staging/rtl8723bs/os_dep/os_intfs.c | 8 +- + drivers/staging/rtl8723bs/os_dep/osdep_service.c | 11 +- + drivers/staging/rts5208/ms.c | 42 +- + drivers/staging/rts5208/rtsx.c | 2 +- + drivers/staging/rts5208/rtsx_card.c | 8 +- + drivers/staging/rts5208/rtsx_chip.c | 16 +- + drivers/staging/rts5208/rtsx_scsi.c | 106 +- + drivers/staging/rts5208/rtsx_transport.c | 6 +- + drivers/staging/rts5208/sd.c | 68 +- + drivers/staging/rts5208/xd.c | 48 +- + drivers/staging/unisys/visornic/visornic_main.c | 5 +- + .../vc04_services/interface/vchiq_arm/vchiq_arm.c | 294 ++- + .../vc04_services/interface/vchiq_arm/vchiq_arm.h | 52 - + .../interface/vchiq_arm/vchiq_connected.c | 20 +- + .../interface/vchiq_arm/vchiq_connected.h | 4 +- + .../vc04_services/interface/vchiq_arm/vchiq_core.c | 2 +- + .../vc04_services/interface/vchiq_arm/vchiq_core.h | 6 +- + .../vc04_services/interface/vchiq_arm/vchiq_dev.c | 178 +- + drivers/staging/vt6655/baseband.c | 74 +- + drivers/staging/vt6655/baseband.h | 2 +- + drivers/staging/vt6655/card.c | 98 +- + drivers/staging/vt6655/channel.c | 12 +- + drivers/staging/vt6655/device.h | 10 +- + drivers/staging/vt6655/device_main.c | 162 +- + drivers/staging/vt6655/dpc.c | 2 +- + drivers/staging/vt6655/key.c | 2 +- + drivers/staging/vt6655/mac.c | 50 +- + drivers/staging/vt6655/mac.h | 6 +- + drivers/staging/vt6655/power.c | 24 +- + drivers/staging/vt6655/rf.c | 20 +- + drivers/staging/vt6655/rxtx.c | 64 +- + drivers/staging/wfx/bh.c | 37 +- + drivers/staging/wfx/bh.h | 4 +- + drivers/staging/wfx/bus_sdio.c | 25 +- + drivers/staging/wfx/bus_spi.c | 22 +- + drivers/staging/wfx/data_rx.c | 7 +- + drivers/staging/wfx/data_rx.h | 4 +- + drivers/staging/wfx/data_tx.c | 87 +- + drivers/staging/wfx/data_tx.h | 6 +- + drivers/staging/wfx/debug.c | 56 +- + drivers/staging/wfx/debug.h | 2 +- + drivers/staging/wfx/fwio.c | 26 +- + drivers/staging/wfx/fwio.h | 2 +- + drivers/staging/wfx/hif_api_cmd.h | 14 +- + drivers/staging/wfx/hif_api_general.h | 25 +- + drivers/staging/wfx/hif_api_mib.h | 85 +- + drivers/staging/wfx/hif_rx.c | 23 +- + drivers/staging/wfx/hif_rx.h | 3 +- + drivers/staging/wfx/hif_tx.c | 60 +- + drivers/staging/wfx/hif_tx.h | 6 +- + drivers/staging/wfx/hif_tx_mib.c | 14 +- + drivers/staging/wfx/hif_tx_mib.h | 2 +- + drivers/staging/wfx/hwio.c | 6 +- + drivers/staging/wfx/hwio.h | 20 +- + drivers/staging/wfx/key.c | 30 +- + drivers/staging/wfx/key.h | 4 +- + drivers/staging/wfx/main.c | 37 +- + drivers/staging/wfx/main.h | 3 +- + drivers/staging/wfx/queue.c | 43 +- + drivers/staging/wfx/queue.h | 6 +- + drivers/staging/wfx/scan.c | 51 +- + drivers/staging/wfx/scan.h | 4 +- + drivers/staging/wfx/sta.c | 118 +- + drivers/staging/wfx/sta.h | 8 +- + drivers/staging/wfx/traces.h | 2 +- + drivers/staging/wfx/wfx.h | 14 +- + drivers/staging/wlan-ng/hfa384x.h | 2 +- + drivers/staging/wlan-ng/hfa384x_usb.c | 24 +- + drivers/staging/wlan-ng/p80211conv.c | 2 +- + drivers/staging/wlan-ng/p80211conv.h | 2 +- + drivers/staging/wlan-ng/p80211hdr.h | 2 +- + drivers/staging/wlan-ng/p80211ioctl.h | 2 +- + drivers/staging/wlan-ng/p80211mgmt.h | 2 +- + drivers/staging/wlan-ng/p80211msg.h | 2 +- + drivers/staging/wlan-ng/p80211netdev.c | 4 +- + drivers/staging/wlan-ng/p80211netdev.h | 2 +- + drivers/staging/wlan-ng/p80211req.c | 2 +- + drivers/staging/wlan-ng/p80211req.h | 2 +- + drivers/staging/wlan-ng/p80211types.h | 2 +- + drivers/staging/wlan-ng/p80211wep.c | 2 +- + drivers/staging/wlan-ng/prism2mgmt.c | 2 +- + drivers/staging/wlan-ng/prism2mgmt.h | 2 +- + drivers/staging/wlan-ng/prism2mib.c | 2 +- + drivers/staging/wlan-ng/prism2sta.c | 6 +- + drivers/staging/wlan-ng/prism2usb.c | 3 +- + 251 files changed, 2842 insertions(+), 21503 deletions(-) + delete mode 100644 drivers/staging/most/dim2/sysfs.c + delete mode 100644 drivers/staging/r8188eu/core/rtw_debug.c + delete mode 100644 drivers/staging/r8188eu/core/rtw_io.c + delete mode 100644 drivers/staging/r8188eu/core/rtw_mp.c + delete mode 100644 drivers/staging/r8188eu/core/rtw_mp_ioctl.c + delete mode 100644 drivers/staging/r8188eu/hal/rtl8188e_mp.c + delete mode 100644 drivers/staging/r8188eu/include/HalHWImg8188E_FW.h + delete mode 100644 drivers/staging/r8188eu/include/mp_custom_oid.h + delete mode 100644 drivers/staging/r8188eu/include/odm_RegDefine11AC.h + delete mode 100644 drivers/staging/r8188eu/include/odm_reg.h + delete mode 100644 drivers/staging/r8188eu/include/rtw_ioctl_rtl.h + delete mode 100644 drivers/staging/r8188eu/include/rtw_mp.h + delete mode 100644 drivers/staging/r8188eu/include/rtw_mp_ioctl.h + delete mode 100644 drivers/staging/r8188eu/include/rtw_mp_phy_regdef.h +Merging iio/togreg (7b473ae754fe iio: frequency: adrf6780: Fix adrf6780_spi_{read,write}()) +$ git merge -m Merge branch 'togreg' of git://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git iio/togreg +Already up to date. +Merging mux/for-next (3516bd729358 Merge tag 's390-5.11-3' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux) +$ git merge -m Merge branch 'for-next' of https://gitlab.com/peda-linux/mux.git mux/for-next +Already up to date. +Merging icc/icc-next (dfe14674bf7b Merge branch 'icc-rpm' into icc-next) +$ git merge -m Merge branch 'icc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/djakov/icc.git icc/icc-next +Already up to date. +Merging dmaengine/next (635156d94b64 dmaengine: imx-sdma: remove space after sizeof) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git dmaengine/next +Merge made by the 'recursive' strategy. + drivers/dma/Kconfig | 2 +- + drivers/dma/at_xdmac.c | 67 +++++++-------- + drivers/dma/bestcomm/ata.c | 2 +- + drivers/dma/bestcomm/bestcomm.c | 22 ++--- + drivers/dma/bestcomm/fec.c | 4 +- + drivers/dma/bestcomm/gen_bd.c | 4 +- + drivers/dma/dma-jz4780.c | 1 + + drivers/dma/dmaengine.c | 3 +- + drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c | 113 ++++++++++++++++++------- + drivers/dma/dw-axi-dmac/dw-axi-dmac.h | 35 +++++++- + drivers/dma/dw-edma/dw-edma-core.c | 1 - + drivers/dma/idxd/device.c | 3 +- + drivers/dma/idxd/dma.c | 2 + + drivers/dma/idxd/registers.h | 3 +- + drivers/dma/imx-sdma.c | 28 +++--- + drivers/dma/sh/rz-dmac.c | 14 +++ + drivers/dma/stm32-dma.c | 24 ++++-- + drivers/dma/stm32-mdma.c | 3 +- + drivers/dma/tegra210-adma.c | 55 ++++++++---- + drivers/dma/xilinx/zynqmp_dma.c | 67 +++++++-------- + 20 files changed, 292 insertions(+), 161 deletions(-) +Merging cgroup/for-next (0061270307f2 cgroup: cgroup-v1: do not exclude cgrp_dfl_root) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git cgroup/for-next +Auto-merging kernel/cgroup/cgroup.c +Auto-merging Documentation/admin-guide/cgroup-v2.rst +Merge made by the 'recursive' strategy. + Documentation/admin-guide/cgroup-v2.rst | 10 ++++++++++ + include/linux/misc_cgroup.h | 6 +++++- + kernel/cgroup/cgroup-v1.c | 3 --- + kernel/cgroup/cgroup.c | 31 +++++++++++++++++++++++++++---- + kernel/cgroup/misc.c | 31 ++++++++++++++++++++++++------- + 5 files changed, 66 insertions(+), 15 deletions(-) +Merging scsi/for-next (151a3b7b9d3c Merge branch 'misc' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git scsi/for-next +Auto-merging include/scsi/scsi_device.h +Auto-merging include/scsi/scsi_cmnd.h +Auto-merging drivers/target/target_core_iblock.c +Auto-merging drivers/staging/rts5208/rtsx.c +Auto-merging drivers/scsi/virtio_scsi.c +Auto-merging drivers/scsi/ufs/ufshcd.h +Auto-merging drivers/scsi/ufs/ufshcd.c +Auto-merging drivers/scsi/sr.c +Auto-merging drivers/scsi/sd.c +Auto-merging drivers/scsi/scsi_scan.c +Auto-merging drivers/scsi/scsi_lib.c +Auto-merging drivers/scsi/scsi_error.c +Auto-merging drivers/scsi/scsi_debug.c +Auto-merging drivers/scsi/qla2xxx/qla_nvme.c +Auto-merging drivers/scsi/qedf/qedf_main.c +Auto-merging drivers/scsi/qedf/qedf_io.c +Auto-merging drivers/scsi/qedf/qedf.h +Auto-merging drivers/scsi/lpfc/lpfc.h +Auto-merging drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +Auto-merging drivers/ata/libata-scsi.c +Auto-merging Documentation/ABI/testing/sysfs-driver-ufs +Merge made by the 'recursive' strategy. + Documentation/ABI/testing/sysfs-class-fc | 27 ++ + Documentation/ABI/testing/sysfs-driver-ufs | 2 +- + drivers/ata/ahci.h | 8 +- + drivers/ata/ata_piix.c | 8 +- + drivers/ata/libahci.c | 52 ++- + drivers/ata/libata-sata.c | 21 +- + drivers/ata/libata-scsi.c | 29 +- + drivers/ata/pata_macio.c | 2 +- + drivers/ata/sata_mv.c | 2 +- + drivers/ata/sata_nv.c | 4 +- + drivers/ata/sata_sil24.c | 2 +- + drivers/firewire/sbp2.c | 10 +- + drivers/infiniband/ulp/srp/ib_srp.c | 59 ++- + drivers/infiniband/ulp/srpt/ib_srpt.c | 38 +- + drivers/message/fusion/mptfc.c | 8 +- + drivers/message/fusion/mptsas.c | 4 +- + drivers/message/fusion/mptscsih.c | 46 +- + drivers/message/fusion/mptscsih.h | 2 +- + drivers/message/fusion/mptspi.c | 6 +- + drivers/s390/scsi/zfcp_ext.h | 4 +- + drivers/s390/scsi/zfcp_fsf.c | 2 +- + drivers/s390/scsi/zfcp_scsi.c | 8 +- + drivers/s390/scsi/zfcp_sysfs.c | 52 ++- + drivers/scsi/3w-9xxx.c | 18 +- + drivers/scsi/3w-sas.c | 18 +- + drivers/scsi/3w-xxxx.c | 26 +- + drivers/scsi/53c700.c | 20 +- + drivers/scsi/BusLogic.c | 13 +- + drivers/scsi/NCR5380.c | 12 +- + drivers/scsi/a100u2w.c | 5 +- + drivers/scsi/aacraid/aachba.c | 53 ++- + drivers/scsi/aacraid/linit.c | 38 +- + drivers/scsi/advansys.c | 14 +- + drivers/scsi/aha152x.c | 29 +- + drivers/scsi/aha1542.c | 16 +- + drivers/scsi/aha1740.c | 4 +- + drivers/scsi/aic7xxx/aic79xx_osm.c | 6 +- + drivers/scsi/aic7xxx/aic79xx_osm.h | 2 +- + drivers/scsi/aic7xxx/aic7xxx_osm.c | 6 +- + drivers/scsi/aic7xxx/aic7xxx_osm.h | 2 +- + drivers/scsi/arcmsr/arcmsr.h | 2 +- + drivers/scsi/arcmsr/arcmsr_attr.c | 33 +- + drivers/scsi/arcmsr/arcmsr_hba.c | 22 +- + drivers/scsi/arm/acornscsi.c | 20 +- + drivers/scsi/arm/arxescsi.c | 1 + + drivers/scsi/arm/cumana_2.c | 1 + + drivers/scsi/arm/eesox.c | 1 + + drivers/scsi/arm/fas216.c | 26 +- + drivers/scsi/arm/fas216.h | 10 + + drivers/scsi/arm/powertec.c | 2 +- + drivers/scsi/atp870u.c | 17 +- + drivers/scsi/be2iscsi/be_main.c | 21 +- + drivers/scsi/bfa/bfad_attr.c | 68 +-- + drivers/scsi/bfa/bfad_im.c | 16 +- + drivers/scsi/bfa/bfad_im.h | 4 +- + drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 8 +- + drivers/scsi/bnx2fc/bnx2fc_io.c | 8 +- + drivers/scsi/bnx2i/bnx2i.h | 2 +- + drivers/scsi/bnx2i/bnx2i_iscsi.c | 2 +- + drivers/scsi/bnx2i/bnx2i_sysfs.c | 15 +- + drivers/scsi/csiostor/csio_lnode.c | 2 +- + drivers/scsi/csiostor/csio_scsi.c | 32 +- + drivers/scsi/cxlflash/main.c | 46 +- + drivers/scsi/dc395x.c | 12 +- + drivers/scsi/dpt_i2o.c | 13 +- + drivers/scsi/elx/efct/efct_driver.c | 6 +- + drivers/scsi/elx/efct/efct_lio.c | 4 +- + drivers/scsi/elx/efct/efct_scsi.c | 3 +- + drivers/scsi/elx/libefc/efc.h | 2 +- + drivers/scsi/elx/libefc/efc_cmds.c | 7 +- + drivers/scsi/elx/libefc/efc_fabric.c | 2 +- + drivers/scsi/elx/libefc/efclib.h | 1 + + drivers/scsi/esas2r/esas2r_main.c | 8 +- + drivers/scsi/esp_scsi.c | 12 +- + drivers/scsi/fcoe/fcoe.c | 2 +- + drivers/scsi/fdomain.c | 2 +- + drivers/scsi/fnic/fnic.h | 2 +- + drivers/scsi/fnic/fnic_attrs.c | 17 +- + drivers/scsi/fnic/fnic_main.c | 2 +- + drivers/scsi/fnic/fnic_scsi.c | 122 +++-- + drivers/scsi/hisi_sas/hisi_sas.h | 3 +- + drivers/scsi/hisi_sas/hisi_sas_main.c | 113 +++-- + drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 23 +- + drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 35 +- + drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 62 ++- + drivers/scsi/hosts.c | 17 +- + drivers/scsi/hpsa.c | 56 +-- + drivers/scsi/hptiop.c | 20 +- + drivers/scsi/ibmvscsi/ibmvfc.c | 30 +- + drivers/scsi/ibmvscsi/ibmvscsi.c | 28 +- + drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 42 +- + drivers/scsi/imm.c | 6 +- + drivers/scsi/initio.c | 7 +- + drivers/scsi/ipr.c | 48 +- + drivers/scsi/ips.c | 31 +- + drivers/scsi/isci/init.c | 8 +- + drivers/scsi/isci/task.h | 4 - + drivers/scsi/libfc/fc_fcp.c | 6 +- + drivers/scsi/libiscsi.c | 7 +- + drivers/scsi/libsas/sas_init.c | 8 +- + drivers/scsi/libsas/sas_scsi_host.c | 27 +- + drivers/scsi/lpfc/lpfc.h | 1 + + drivers/scsi/lpfc/lpfc_attr.c | 314 +++++++------ + drivers/scsi/lpfc/lpfc_crtn.h | 4 +- + drivers/scsi/lpfc/lpfc_els.c | 41 +- + drivers/scsi/lpfc/lpfc_hbadisc.c | 32 +- + drivers/scsi/lpfc/lpfc_hw4.h | 4 + + drivers/scsi/lpfc/lpfc_init.c | 67 ++- + drivers/scsi/lpfc/lpfc_nvme.c | 70 ++- + drivers/scsi/lpfc/lpfc_nvmet.c | 44 +- + drivers/scsi/lpfc/lpfc_scsi.c | 112 +++-- + drivers/scsi/lpfc/lpfc_sli.c | 154 +++++-- + drivers/scsi/lpfc/lpfc_sli4.h | 2 + + drivers/scsi/lpfc/lpfc_version.h | 2 +- + drivers/scsi/mac53c94.c | 6 +- + drivers/scsi/megaraid.c | 24 +- + drivers/scsi/megaraid/megaraid_mbox.c | 28 +- + drivers/scsi/megaraid/megaraid_sas.h | 4 +- + drivers/scsi/megaraid/megaraid_sas_base.c | 40 +- + drivers/scsi/megaraid/megaraid_sas_fusion.c | 56 ++- + drivers/scsi/mesh.c | 18 +- + drivers/scsi/mpi3mr/mpi3mr_fw.c | 32 +- + drivers/scsi/mpi3mr/mpi3mr_os.c | 26 +- + drivers/scsi/mpt3sas/mpt3sas_base.h | 4 +- + drivers/scsi/mpt3sas/mpt3sas_ctl.c | 84 ++-- + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 26 +- + drivers/scsi/mvsas/mv_init.c | 12 +- + drivers/scsi/mvumi.c | 4 +- + drivers/scsi/myrb.c | 60 +-- + drivers/scsi/myrs.c | 50 +- + drivers/scsi/ncr53c8xx.c | 16 +- + drivers/scsi/nsp32.c | 7 +- + drivers/scsi/pcmcia/nsp_cs.c | 7 +- + drivers/scsi/pcmcia/sym53c500_cs.c | 14 +- + drivers/scsi/pm8001/pm8001_ctl.c | 70 +-- + drivers/scsi/pm8001/pm8001_hwi.c | 12 +- + drivers/scsi/pm8001/pm8001_init.c | 14 +- + drivers/scsi/pm8001/pm8001_sas.c | 15 + + drivers/scsi/pm8001/pm8001_sas.h | 8 +- + drivers/scsi/pm8001/pm80xx_hwi.c | 63 ++- + drivers/scsi/pmcraid.c | 27 +- + drivers/scsi/ppa.c | 6 +- + drivers/scsi/ps3rom.c | 8 +- + drivers/scsi/qedf/qedf.h | 2 +- + drivers/scsi/qedf/qedf_attr.c | 15 +- + drivers/scsi/qedf/qedf_io.c | 19 +- + drivers/scsi/qedf/qedf_main.c | 2 +- + drivers/scsi/qedi/qedi_gbl.h | 2 +- + drivers/scsi/qedi/qedi_iscsi.c | 2 +- + drivers/scsi/qedi/qedi_sysfs.c | 15 +- + drivers/scsi/qla1280.c | 8 +- + drivers/scsi/qla2xxx/qla_attr.c | 149 +++--- + drivers/scsi/qla2xxx/qla_bsg.c | 48 ++ + drivers/scsi/qla2xxx/qla_bsg.h | 7 + + drivers/scsi/qla2xxx/qla_def.h | 4 +- + drivers/scsi/qla2xxx/qla_gbl.h | 8 +- + drivers/scsi/qla2xxx/qla_gs.c | 3 +- + drivers/scsi/qla2xxx/qla_init.c | 17 +- + drivers/scsi/qla2xxx/qla_mbx.c | 35 +- + drivers/scsi/qla2xxx/qla_nvme.c | 20 +- + drivers/scsi/qla2xxx/qla_os.c | 103 ++--- + drivers/scsi/qla2xxx/qla_version.h | 6 +- + drivers/scsi/qla2xxx/tcm_qla2xxx.c | 73 +-- + drivers/scsi/qla4xxx/ql4_attr.c | 41 +- + drivers/scsi/qla4xxx/ql4_glbl.h | 3 +- + drivers/scsi/qla4xxx/ql4_os.c | 6 +- + drivers/scsi/qlogicfas408.c | 7 +- + drivers/scsi/qlogicpti.c | 7 +- + drivers/scsi/scsi.c | 8 - + drivers/scsi/scsi_debug.c | 19 +- + drivers/scsi/scsi_error.c | 17 +- + drivers/scsi/scsi_lib.c | 64 ++- + drivers/scsi/scsi_pm.c | 105 +---- + drivers/scsi/scsi_priv.h | 7 +- + drivers/scsi/scsi_scan.c | 74 +-- + drivers/scsi/scsi_sysfs.c | 54 +-- + drivers/scsi/scsi_transport_sas.c | 1 + + drivers/scsi/sd.c | 38 +- + drivers/scsi/smartpqi/smartpqi.h | 61 ++- + drivers/scsi/smartpqi/smartpqi_init.c | 588 +++++++++++++++++------- + drivers/scsi/smartpqi/smartpqi_sas_transport.c | 6 +- + drivers/scsi/smartpqi/smartpqi_sis.c | 60 ++- + drivers/scsi/smartpqi/smartpqi_sis.h | 4 +- + drivers/scsi/snic/snic.h | 2 +- + drivers/scsi/snic/snic_attrs.c | 19 +- + drivers/scsi/snic/snic_main.c | 2 +- + drivers/scsi/snic/snic_scsi.c | 33 +- + drivers/scsi/sr.c | 7 +- + drivers/scsi/stex.c | 10 +- + drivers/scsi/storvsc_drv.c | 4 +- + drivers/scsi/sym53c8xx_2/sym_glue.c | 6 +- + drivers/scsi/ufs/Kconfig | 19 +- + drivers/scsi/ufs/Makefile | 1 + + drivers/scsi/ufs/ufs-exynos.c | 6 +- + drivers/scsi/ufs/ufs-hisi.c | 6 +- + drivers/scsi/ufs/ufs-hwmon.c | 210 +++++++++ + drivers/scsi/ufs/ufs-mediatek.c | 111 ++++- + drivers/scsi/ufs/ufs-mediatek.h | 27 ++ + drivers/scsi/ufs/ufs-qcom.c | 21 +- + drivers/scsi/ufs/ufs.h | 7 + + drivers/scsi/ufs/ufshcd.c | 341 ++++++-------- + drivers/scsi/ufs/ufshcd.h | 43 +- + drivers/scsi/ufs/ufshpb.c | 7 +- + drivers/scsi/virtio_scsi.c | 7 +- + drivers/scsi/vmw_pvscsi.c | 9 +- + drivers/scsi/wd33c93.c | 18 +- + drivers/scsi/wd719x.c | 4 +- + drivers/scsi/xen-scsifront.c | 4 +- + drivers/staging/rts5208/rtsx.c | 9 +- + drivers/staging/unisys/visorhba/visorhba_main.c | 20 +- + drivers/target/iscsi/cxgbit/cxgbit_cm.c | 8 +- + drivers/target/iscsi/cxgbit/cxgbit_main.c | 17 +- + drivers/target/iscsi/cxgbit/cxgbit_target.c | 28 +- + drivers/target/iscsi/iscsi_target_configfs.c | 91 ++-- + drivers/target/loopback/tcm_loop.c | 4 +- + drivers/target/sbp/sbp_target.c | 30 +- + drivers/target/target_core_alua.c | 83 ++-- + drivers/target/target_core_configfs.c | 1 + + drivers/target/target_core_device.c | 2 + + drivers/target/target_core_fabric_configfs.c | 78 +++- + drivers/target/target_core_iblock.c | 4 +- + drivers/target/target_core_internal.h | 1 + + drivers/target/target_core_transport.c | 94 ++-- + drivers/target/target_core_user.c | 7 +- + drivers/target/target_core_xcopy.c | 14 +- + drivers/usb/gadget/function/f_tcm.c | 31 +- + drivers/usb/image/microtek.c | 5 +- + drivers/usb/storage/scsiglue.c | 13 +- + drivers/usb/storage/uas.c | 13 +- + drivers/usb/storage/usb.c | 4 +- + include/linux/libata.h | 8 +- + include/scsi/libsas.h | 1 + + include/scsi/scsi_cmnd.h | 14 +- + include/scsi/scsi_device.h | 6 + + include/scsi/scsi_host.h | 27 +- + include/scsi/scsi_transport_sas.h | 1 + + include/target/target_core_base.h | 9 +- + include/target/target_core_fabric.h | 1 + + 238 files changed, 3942 insertions(+), 2737 deletions(-) + create mode 100644 Documentation/ABI/testing/sysfs-class-fc + create mode 100644 drivers/scsi/ufs/ufs-hwmon.c +Merging scsi-mkp/for-next (83c3a7beaef7 scsi: lpfc: Update lpfc version to 14.0.0.3) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git scsi-mkp/for-next +Merge made by the 'recursive' strategy. + drivers/scsi/lpfc/lpfc_crtn.h | 3 + + drivers/scsi/lpfc/lpfc_disc.h | 12 +++- + drivers/scsi/lpfc/lpfc_els.c | 20 ++++++- + drivers/scsi/lpfc/lpfc_hbadisc.c | 112 ++++++++++++++++++++++++++++++++++++- + drivers/scsi/lpfc/lpfc_init.c | 68 +++++++++++++++++++--- + drivers/scsi/lpfc/lpfc_scsi.c | 19 +++++-- + drivers/scsi/lpfc/lpfc_sli.c | 43 +++++--------- + drivers/scsi/lpfc/lpfc_version.h | 2 +- + drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +- + drivers/scsi/ufs/ufs-mediatek.c | 6 +- + drivers/scsi/ufs/ufshcd-pltfrm.c | 4 +- + 11 files changed, 235 insertions(+), 56 deletions(-) +Merging vhost/linux-next (2b109044b081 virtio_blk: allow 0 as num_request_queues) +$ git merge -m Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git vhost/linux-next +Auto-merging drivers/vdpa/mlx5/net/mlx5_vnet.c +Auto-merging drivers/vdpa/mlx5/core/mlx5_vdpa.h +Auto-merging drivers/net/virtio_net.c +Auto-merging drivers/char/virtio_console.c +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 7 + + drivers/block/virtio_blk.c | 173 +++++++---- + drivers/char/hw_random/virtio-rng.c | 84 +++-- + drivers/char/virtio_console.c | 9 + + drivers/i2c/busses/i2c-virtio.c | 56 ++-- + drivers/iommu/virtio-iommu.c | 113 +++++-- + drivers/net/virtio_net.c | 3 +- + drivers/vdpa/Kconfig | 8 + + drivers/vdpa/Makefile | 1 + + drivers/vdpa/alibaba/Makefile | 3 + + drivers/vdpa/alibaba/eni_vdpa.c | 553 +++++++++++++++++++++++++++++++++ + drivers/vdpa/ifcvf/ifcvf_main.c | 3 +- + drivers/vdpa/mlx5/core/mlx5_vdpa.h | 2 +- + drivers/vdpa/mlx5/net/mlx5_vnet.c | 203 ++++++++++-- + drivers/vdpa/vdpa.c | 256 ++++++++++++++- + drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 3 +- + drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 38 ++- + drivers/vdpa/vdpa_user/vduse_dev.c | 32 +- + drivers/vdpa/virtio_pci/vp_vdpa.c | 12 + + drivers/vhost/vdpa.c | 3 +- + drivers/virtio/Kconfig | 10 + + drivers/virtio/Makefile | 1 + + drivers/virtio/virtio_pci_common.c | 58 +++- + drivers/virtio/virtio_pci_common.h | 16 +- + drivers/virtio/virtio_pci_legacy.c | 106 ++----- + drivers/virtio/virtio_pci_legacy_dev.c | 220 +++++++++++++ + drivers/virtio/virtio_pci_modern.c | 6 +- + drivers/virtio/virtio_ring.c | 30 +- + drivers/virtio/virtio_vdpa.c | 21 +- + include/linux/vdpa.h | 53 ++-- + include/linux/virtio_config.h | 6 + + include/linux/virtio_pci_legacy.h | 42 +++ + include/uapi/linux/vdpa.h | 7 + + include/uapi/linux/virtio_i2c.h | 6 + + include/uapi/linux/virtio_iommu.h | 8 +- + 35 files changed, 1835 insertions(+), 317 deletions(-) + create mode 100644 drivers/vdpa/alibaba/Makefile + create mode 100644 drivers/vdpa/alibaba/eni_vdpa.c + create mode 100644 drivers/virtio/virtio_pci_legacy_dev.c + create mode 100644 include/linux/virtio_pci_legacy.h +Merging rpmsg/for-next (6ee5808de074 Merge branches 'rpmsg-next' and 'rproc-next' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/remoteproc/linux.git rpmsg/for-next +Auto-merging include/linux/rpmsg.h +Auto-merging drivers/soc/qcom/qcom_aoss.c +Auto-merging drivers/rpmsg/virtio_rpmsg_bus.c +Auto-merging MAINTAINERS +Removing Documentation/devicetree/bindings/remoteproc/mtk,scp.txt +Merge made by the 'recursive' strategy. + Documentation/devicetree/bindings/dsp/fsl,dsp.yaml | 123 +- + .../remoteproc/amlogic,meson-mx-ao-arc.yaml | 87 ++ + .../devicetree/bindings/remoteproc/mtk,scp.txt | 36 - + .../devicetree/bindings/remoteproc/mtk,scp.yaml | 92 ++ + .../devicetree/bindings/remoteproc/qcom,adsp.yaml | 59 +- + .../devicetree/bindings/remoteproc/qcom,q6v5.txt | 39 +- + .../bindings/remoteproc/ti,k3-dsp-rproc.yaml | 4 +- + .../bindings/remoteproc/ti,k3-r5f-rproc.yaml | 4 +- + MAINTAINERS | 4 +- + drivers/remoteproc/Kconfig | 32 +- + drivers/remoteproc/Makefile | 2 + + drivers/remoteproc/imx_dsp_rproc.c | 1206 ++++++++++++++++++++ + drivers/remoteproc/imx_rproc.c | 71 +- + drivers/remoteproc/imx_rproc.h | 39 + + drivers/remoteproc/meson_mx_ao_arc.c | 261 +++++ + drivers/remoteproc/mtk_common.h | 1 + + drivers/remoteproc/mtk_scp.c | 48 +- + drivers/remoteproc/omap_remoteproc.c | 6 +- + drivers/remoteproc/qcom_q6v5.c | 57 +- + drivers/remoteproc/qcom_q6v5.h | 7 +- + drivers/remoteproc/qcom_q6v5_adsp.c | 7 +- + drivers/remoteproc/qcom_q6v5_mss.c | 304 ++++- + drivers/remoteproc/qcom_q6v5_pas.c | 141 +-- + drivers/remoteproc/qcom_q6v5_wcss.c | 5 +- + drivers/remoteproc/qcom_wcnss.c | 1 - + drivers/remoteproc/remoteproc_core.c | 8 +- + drivers/remoteproc/remoteproc_coredump.c | 2 +- + drivers/remoteproc/remoteproc_elf_loader.c | 4 +- + drivers/remoteproc/remoteproc_virtio.c | 12 + + drivers/remoteproc/ti_k3_dsp_remoteproc.c | 2 +- + drivers/remoteproc/ti_k3_r5_remoteproc.c | 2 +- + drivers/rpmsg/mtk_rpmsg.c | 2 +- + drivers/rpmsg/qcom_glink_native.c | 90 +- + drivers/rpmsg/rpmsg_char.c | 2 - + drivers/rpmsg/virtio_rpmsg_bus.c | 3 +- + include/linux/remoteproc.h | 12 - + include/linux/rpmsg.h | 2 +- + 37 files changed, 2462 insertions(+), 315 deletions(-) + create mode 100644 Documentation/devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml + delete mode 100644 Documentation/devicetree/bindings/remoteproc/mtk,scp.txt + create mode 100644 Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml + create mode 100644 drivers/remoteproc/imx_dsp_rproc.c + create mode 100644 drivers/remoteproc/imx_rproc.h + create mode 100644 drivers/remoteproc/meson_mx_ao_arc.c +Merging gpio/for-next (7ac554888233 MAINTAINERS: Remove reference to non-existing file) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git gpio/for-next +Already up to date. +Merging gpio-brgl/gpio/for-next (f4a20dfac88c gpio: mc33880: Drop if with an always false condition) +$ git merge -m Merge branch 'gpio/for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git gpio-brgl/gpio/for-next +Auto-merging include/linux/firmware/xlnx-zynqmp.h +Auto-merging drivers/gpio/gpio-uniphier.c +Auto-merging drivers/firmware/xilinx/zynqmp.c +Merge made by the 'recursive' strategy. + .../bindings/gpio/rockchip,gpio-bank.yaml | 2 + + .../bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml | 43 ++++++ + drivers/firmware/xilinx/zynqmp.c | 46 ++++++ + drivers/gpio/Kconfig | 12 ++ + drivers/gpio/Makefile | 1 + + drivers/gpio/gpio-aggregator.c | 25 ++-- + drivers/gpio/gpio-max7300.c | 4 +- + drivers/gpio/gpio-max7301.c | 4 +- + drivers/gpio/gpio-max730x.c | 6 +- + drivers/gpio/gpio-max77620.c | 1 - + drivers/gpio/gpio-mc33880.c | 2 - + drivers/gpio/gpio-tegra186.c | 114 +++++++++++++-- + drivers/gpio/gpio-tps65218.c | 1 - + drivers/gpio/gpio-uniphier.c | 18 ++- + drivers/gpio/gpio-xilinx.c | 6 +- + drivers/gpio/gpio-zynqmp-modepin.c | 162 +++++++++++++++++++++ + include/linux/firmware/xlnx-zynqmp.h | 14 ++ + include/linux/spi/max7301.h | 2 +- + 18 files changed, 413 insertions(+), 50 deletions(-) + create mode 100644 Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml + create mode 100644 drivers/gpio/gpio-zynqmp-modepin.c +Merging gpio-intel/for-next (1649b8376694 gpio: pca953x: Improve bias setting) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git gpio-intel/for-next +Already up to date. +Merging pinctrl/for-next (664bad61ccde Merge branch 'devel' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git pinctrl/for-next +Removing Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt +Removing Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt +Auto-merging Documentation/devicetree/bindings/mfd/brcm,cru.yaml +CONFLICT (content): Merge conflict in Documentation/devicetree/bindings/mfd/brcm,cru.yaml +Resolved 'Documentation/devicetree/bindings/mfd/brcm,cru.yaml' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master a2ed391c6490] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git +$ git diff -M --stat --summary HEAD^.. + .../devicetree/bindings/mfd/brcm,cru.yaml | 11 +- + .../bindings/pinctrl/brcm,ns-pinmux.yaml | 33 +- + .../bindings/pinctrl/pinctrl-mt8195.yaml | 86 +- + .../bindings/pinctrl/qcom,pmic-gpio.yaml | 4 + + .../devicetree/bindings/pinctrl/qcom,pmic-mpp.txt | 187 --- + .../devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml | 188 +++ + .../bindings/pinctrl/qcom,qcm2290-pinctrl.yaml | 165 +++ + .../bindings/pinctrl/qcom,sm6350-pinctrl.yaml | 148 +++ + .../bindings/pinctrl/rockchip,pinctrl.txt | 114 -- + .../bindings/pinctrl/rockchip,pinctrl.yaml | 184 +++ + .../bindings/pinctrl/samsung-pinctrl.txt | 1 + + .../pinctrl/socionext,uniphier-pinctrl.yaml | 1 + + drivers/pinctrl/bcm/pinctrl-ns.c | 29 +- + drivers/pinctrl/mediatek/pinctrl-moore.c | 18 + + drivers/pinctrl/mediatek/pinctrl-mt8195.c | 134 ++ + drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 231 +++- + drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h | 46 + + drivers/pinctrl/mediatek/pinctrl-paris.c | 68 +- + drivers/pinctrl/nomadik/Kconfig | 1 - + drivers/pinctrl/pinctrl-amd.c | 31 + + drivers/pinctrl/pinctrl-gemini.c | 4 +- + drivers/pinctrl/pinctrl-st.c | 2 +- + drivers/pinctrl/qcom/Kconfig | 17 + + drivers/pinctrl/qcom/Makefile | 2 + + drivers/pinctrl/qcom/pinctrl-msm8226.c | 74 +- + drivers/pinctrl/qcom/pinctrl-qcm2290.c | 1129 ++++++++++++++++ + drivers/pinctrl/qcom/pinctrl-sm6350.c | 1401 ++++++++++++++++++++ + drivers/pinctrl/qcom/pinctrl-spmi-gpio.c | 7 + + drivers/pinctrl/qcom/pinctrl-spmi-mpp.c | 111 +- + drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c | 133 +- + drivers/pinctrl/renesas/core.c | 83 +- + drivers/pinctrl/renesas/pfc-r8a77950.c | 14 + + drivers/pinctrl/renesas/pfc-r8a77951.c | 22 +- + drivers/pinctrl/renesas/pfc-r8a7796.c | 22 +- + drivers/pinctrl/renesas/pfc-r8a77965.c | 22 +- + drivers/pinctrl/renesas/pinctrl-rzg2l.c | 2 +- + drivers/pinctrl/samsung/pinctrl-exynos-arm64.c | 108 ++ + drivers/pinctrl/samsung/pinctrl-samsung.c | 2 + + drivers/pinctrl/samsung/pinctrl-samsung.h | 1 + + drivers/pinctrl/stm32/pinctrl-stm32.c | 20 +- + drivers/pinctrl/uniphier/Kconfig | 4 + + drivers/pinctrl/uniphier/Makefile | 1 + + drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c | 18 + + drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c | 35 + + drivers/pinctrl/uniphier/pinctrl-uniphier-nx1.c | 489 +++++++ + drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c | 40 + + include/dt-bindings/pinctrl/mt65xx.h | 9 + + 47 files changed, 4909 insertions(+), 543 deletions(-) + delete mode 100644 Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt + create mode 100644 Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml + create mode 100644 Documentation/devicetree/bindings/pinctrl/qcom,qcm2290-pinctrl.yaml + create mode 100644 Documentation/devicetree/bindings/pinctrl/qcom,sm6350-pinctrl.yaml + delete mode 100644 Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt + create mode 100644 Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.yaml + create mode 100644 drivers/pinctrl/qcom/pinctrl-qcm2290.c + create mode 100644 drivers/pinctrl/qcom/pinctrl-sm6350.c + create mode 100644 drivers/pinctrl/uniphier/pinctrl-uniphier-nx1.c +Merging pinctrl-intel/for-next (176412f8674b pinctrl: intel: Kconfig: Add configuration menu to Intel pin control) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git pinctrl-intel/for-next +Merge made by the 'recursive' strategy. + drivers/pinctrl/intel/Kconfig | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) +Merging pinctrl-renesas/renesas-pinctrl (f4e260bffcf3 pinctrl: renesas: checker: Prefix common checker output) +$ git merge -m Merge branch 'renesas-pinctrl' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git pinctrl-renesas/renesas-pinctrl +Already up to date. +Merging pinctrl-samsung/for-next (f9d8de699ac4 pinctrl: samsung: support ExynosAutov9 SoC pinctrl) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git pinctrl-samsung/for-next +Merge made by the 'recursive' strategy. +Merging pwm/for-next (3f2b16734914 pwm: mtk-disp: Implement atomic API .get_state()) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm.git pwm/for-next +Already up to date. +Merging userns/for-next (e9012e756d30 Merge of per_signal_struct_coredumps-for-v5.16, and ucount-fixes-for-v5.16 for testing in linux-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git userns/for-next +Auto-merging mm/debug.c +Auto-merging kernel/fork.c +Auto-merging kernel/exit.c +Auto-merging include/linux/sched.h +Auto-merging include/linux/mm_types.h +Auto-merging fs/proc/array.c +Auto-merging fs/binfmt_elf.c +Auto-merging arch/ia64/include/asm/ptrace.h +Merge made by the 'recursive' strategy. + arch/ia64/include/asm/ptrace.h | 4 +- + arch/sparc/include/asm/ptrace.h | 8 ++-- + fs/binfmt_elf.c | 4 +- + fs/binfmt_elf_fdpic.c | 2 +- + fs/coredump.c | 88 ++++++----------------------------------- + fs/exec.c | 14 +++---- + fs/proc/array.c | 6 +-- + include/linux/mm_types.h | 13 ------ + include/linux/ptrace.h | 22 +++++------ + include/linux/sched.h | 1 + + include/linux/sched/signal.h | 13 ++++++ + kernel/cred.c | 5 +-- + kernel/exit.c | 76 +++++++++++++++++++---------------- + kernel/fork.c | 4 +- + kernel/signal.c | 49 ++++------------------- + kernel/ucount.c | 20 ++++++---- + mm/debug.c | 4 +- + mm/oom_kill.c | 6 +-- + 18 files changed, 121 insertions(+), 218 deletions(-) +Merging ktest/for-next (170f4869e662 ktest.pl: Fix the logic for truncating the size of the log file for email) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-ktest.git ktest/for-next +Already up to date. +Merging kselftest/next (c3867ab5924b selftests: kvm: fix mismatched fclose() after popen()) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git kselftest/next +Merge made by the 'recursive' strategy. + tools/testing/selftests/kvm/x86_64/mmio_warning_test.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) +Merging livepatching/for-next (cd2d68f2d6b2 Merge branch 'for-5.15/cpu-hotplug' into for-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/livepatching/livepatching livepatching/for-next +Already up to date. +Merge made by the 'recursive' strategy. +Merging coresight/next (1efbcec2ef8c coresight: cti: Reduce scope for the variable “cs_fwnode” in cti_plat_create_connection()) +$ git merge -m Merge branch 'next' of git://git.linaro.org/kernel/coresight.git coresight/next +Merge made by the 'recursive' strategy. +Merging rtc/rtc-next (a5feda3b361e rtc: s3c: Add time range) +$ git merge -m Merge branch 'rtc-next' of git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git rtc/rtc-next +Removing drivers/rtc/rtc-tps80031.c +Auto-merging drivers/rtc/Kconfig +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + .../devicetree/bindings/rtc/mstar,msc313-rtc.yaml | 49 +++ + .../devicetree/bindings/rtc/nxp,pcf85063.txt | 9 + + MAINTAINERS | 1 + + drivers/rtc/Kconfig | 19 +- + drivers/rtc/Makefile | 2 +- + drivers/rtc/class.c | 20 +- + drivers/rtc/dev.c | 65 +++ + drivers/rtc/interface.c | 3 +- + drivers/rtc/rtc-ds1302.c | 7 + + drivers/rtc/rtc-ds1390.c | 7 + + drivers/rtc/rtc-m41t80.c | 2 +- + drivers/rtc/rtc-mcp795.c | 7 + + drivers/rtc/rtc-msc313.c | 259 ++++++++++++ + drivers/rtc/rtc-omap.c | 1 - + drivers/rtc/rtc-pcf2123.c | 9 + + drivers/rtc/rtc-pcf85063.c | 14 + + drivers/rtc/rtc-pcf8523.c | 434 +++++++++------------ + drivers/rtc/rtc-rv3028.c | 74 ++++ + drivers/rtc/rtc-rv3032.c | 80 +++- + drivers/rtc/rtc-rx6110.c | 2 +- + drivers/rtc/rtc-s3c.c | 106 +++-- + drivers/rtc/rtc-s5m.c | 1 - + drivers/rtc/rtc-sun6i.c | 13 +- + drivers/rtc/rtc-tps80031.c | 324 --------------- + include/linux/rtc.h | 3 + + include/uapi/linux/rtc.h | 31 +- + 26 files changed, 902 insertions(+), 640 deletions(-) + create mode 100644 Documentation/devicetree/bindings/rtc/mstar,msc313-rtc.yaml + create mode 100644 drivers/rtc/rtc-msc313.c + delete mode 100644 drivers/rtc/rtc-tps80031.c +Merging nvdimm/libnvdimm-for-next (e765f13ed126 nvdimm/pmem: move dax_attribute_group from dax to pmem) +$ git merge -m Merge branch 'libnvdimm-for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git nvdimm/libnvdimm-for-next +Auto-merging drivers/nvdimm/pmem.c +Merge made by the 'recursive' strategy. + drivers/dax/super.c | 100 +++++++++----------------------------------------- + drivers/nvdimm/pmem.c | 43 ++++++++++++++++++++++ + include/linux/dax.h | 2 - + 3 files changed, 61 insertions(+), 84 deletions(-) +Merging at24/at24/for-next (762925405482 dt-bindings: at24: add ON Semi CAT24C04 and CAT24C05) +$ git merge -m Merge branch 'at24/for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git at24/at24/for-next +Merge made by the 'recursive' strategy. + Documentation/devicetree/bindings/eeprom/at24.yaml | 6 ++++++ + 1 file changed, 6 insertions(+) +Merging ntb/ntb-next (f96cb827ce49 ntb: ntb_pingpong: remove redundant initialization of variables msg_data and spad_data) +$ git merge -m Merge branch 'ntb-next' of https://github.com/jonmason/ntb.git ntb/ntb-next +Merge made by the 'recursive' strategy. +Merging seccomp/for-next/seccomp (d9bbdbf324cd x86: deduplicate the spectre_v2_user documentation) +$ git merge -m Merge branch 'for-next/seccomp' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git seccomp/for-next/seccomp +Auto-merging arch/x86/kernel/cpu/bugs.c +Auto-merging Documentation/admin-guide/kernel-parameters.txt +Merge made by the 'recursive' strategy. + Documentation/admin-guide/hw-vuln/spectre.rst | 61 +++---------------------- + Documentation/admin-guide/kernel-parameters.txt | 5 +- + arch/x86/kernel/cpu/bugs.c | 4 +- + 3 files changed, 10 insertions(+), 60 deletions(-) +Merging kspp/for-next/kspp (a7790b4a4310 Merge branches 'for-next/thread_info/cpu', 'for-next/overflow' and 'for-next/hardening' into for-next/kspp) +$ git merge -m Merge branch 'for-next/kspp' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git kspp/for-next/kspp +Auto-merging scripts/kernel-doc +Removing scripts/gcc-plugins/cyc_complexity_plugin.c +Auto-merging scripts/Makefile.gcc-plugins +Auto-merging net/xfrm/xfrm_user.c +Auto-merging lib/Makefile +Auto-merging lib/Kconfig.debug +Auto-merging kernel/sched/sched.h +Auto-merging include/uapi/rdma/rdma_user_rxe.h +Auto-merging include/linux/sched.h +Auto-merging include/linux/ieee80211.h +Auto-merging include/linux/filter.h +Auto-merging include/linux/compiler_types.h +Auto-merging drivers/staging/r8188eu/include/ieee80211.h +Auto-merging drivers/scsi/ibmvscsi/ibmvscsi.c +Auto-merging drivers/iommu/amd/init.c +Auto-merging arch/s390/lib/string.c +Auto-merging arch/riscv/kernel/asm-offsets.c +Auto-merging arch/powerpc/kernel/smp.c +Auto-merging arch/powerpc/Makefile +Auto-merging arch/arm64/kernel/asm-offsets.c +Auto-merging arch/arm64/include/asm/thread_info.h +Auto-merging Makefile +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + Documentation/kbuild/gcc-plugins.rst | 28 +- + MAINTAINERS | 9 + + Makefile | 6 +- + arch/arm/boot/compressed/string.c | 1 + + arch/arm64/include/asm/thread_info.h | 1 + + arch/arm64/kernel/asm-offsets.c | 2 +- + arch/arm64/kernel/head.S | 2 +- + arch/powerpc/Makefile | 11 - + arch/powerpc/include/asm/smp.h | 17 +- + arch/powerpc/include/asm/thread_info.h | 3 + + arch/powerpc/kernel/asm-offsets.c | 4 +- + arch/powerpc/kernel/smp.c | 2 +- + arch/riscv/kernel/asm-offsets.c | 1 - + arch/riscv/kernel/entry.S | 5 - + arch/riscv/kernel/head.S | 1 - + arch/s390/include/asm/thread_info.h | 1 + + arch/s390/lib/string.c | 3 + + arch/x86/boot/compressed/misc.h | 2 + + arch/x86/boot/compressed/pgtable_64.c | 2 + + arch/x86/include/asm/thread_info.h | 3 + + arch/x86/lib/string_32.c | 1 + + drivers/char/pcmcia/cm4000_cs.c | 9 +- + drivers/crypto/chelsio/chcr_crypto.h | 14 +- + drivers/cxl/cxl.h | 61 ++--- + drivers/gpu/drm/mga/mga_ioc32.c | 27 +- + drivers/hid/hid-cp2112.c | 14 +- + drivers/hid/hid-roccat-kone.c | 2 +- + drivers/hid/hid-roccat-kone.h | 12 +- + drivers/iommu/amd/init.c | 9 +- + drivers/macintosh/smu.c | 3 +- + drivers/net/can/flexcan.c | 68 ++--- + drivers/net/can/usb/etas_es58x/es581_4.h | 2 +- + drivers/net/can/usb/etas_es58x/es58x_fd.h | 2 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 4 +- + drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h | 14 +- + drivers/net/wireless/ath/ath10k/bmi.h | 10 +- + drivers/net/wireless/ath/ath10k/htt.h | 7 +- + drivers/net/wireless/intel/iwlegacy/commands.h | 6 +- + drivers/net/wireless/intel/iwlwifi/dvm/commands.h | 6 +- + drivers/net/wireless/intel/iwlwifi/fw/api/tx.h | 12 +- + drivers/scsi/aic94xx/aic94xx_sds.c | 6 +- + drivers/scsi/ibmvscsi/ibmvscsi.c | 3 +- + drivers/scsi/qla4xxx/ql4_def.h | 4 +- + drivers/staging/r8188eu/include/ieee80211.h | 6 +- + drivers/staging/rtl8712/ieee80211.h | 4 +- + drivers/staging/rtl8723bs/include/ieee80211.h | 6 +- + fs/btrfs/root-tree.c | 6 +- + fs/hpfs/hpfs.h | 8 +- + include/linux/compiler-gcc.h | 10 +- + include/linux/compiler_types.h | 5 - + include/linux/filter.h | 6 +- + include/linux/fortify-string.h | 77 ++++-- + include/linux/ieee80211.h | 30 +-- + include/linux/sched.h | 13 +- + include/linux/stddef.h | 65 ++++- + include/linux/string.h | 44 +++- + include/linux/thread_info.h | 2 +- + include/scsi/sas.h | 12 +- + include/uapi/drm/mga_drm.h | 22 +- + include/uapi/linux/dlm_device.h | 4 +- + include/uapi/linux/stddef.h | 37 +++ + include/uapi/rdma/rdma_user_rxe.h | 4 +- + include/uapi/sound/asoc.h | 4 +- + kernel/kallsyms.c | 46 +++- + kernel/sched/sched.h | 4 - + lib/.gitignore | 2 + + lib/Kconfig.debug | 11 + + lib/Makefile | 34 +++ + lib/memcpy_kunit.c | 289 +++++++++++++++++++++ + lib/string.c | 210 +-------------- + lib/string_helpers.c | 195 ++++++++++++++ + lib/test_fortify/read_overflow-memchr.c | 5 + + lib/test_fortify/read_overflow-memchr_inv.c | 5 + + lib/test_fortify/read_overflow-memcmp.c | 5 + + lib/test_fortify/read_overflow-memscan.c | 5 + + lib/test_fortify/read_overflow2-memcmp.c | 5 + + lib/test_fortify/read_overflow2-memcpy.c | 5 + + lib/test_fortify/read_overflow2-memmove.c | 5 + + lib/test_fortify/test_fortify.h | 35 +++ + lib/test_fortify/write_overflow-memcpy.c | 5 + + lib/test_fortify/write_overflow-memmove.c | 5 + + lib/test_fortify/write_overflow-memset.c | 5 + + lib/test_fortify/write_overflow-strcpy-lit.c | 5 + + lib/test_fortify/write_overflow-strcpy.c | 5 + + lib/test_fortify/write_overflow-strlcpy-src.c | 5 + + lib/test_fortify/write_overflow-strlcpy.c | 5 + + lib/test_fortify/write_overflow-strncpy-src.c | 5 + + lib/test_fortify/write_overflow-strncpy.c | 5 + + lib/test_fortify/write_overflow-strscpy.c | 5 + + net/xfrm/xfrm_policy.c | 4 +- + net/xfrm/xfrm_user.c | 2 +- + scripts/Makefile.gcc-plugins | 2 - + scripts/gcc-plugins/Kconfig | 20 +- + scripts/gcc-plugins/cyc_complexity_plugin.c | 69 ----- + scripts/gcc-plugins/gcc-common.h | 132 +--------- + scripts/gcc-plugins/gcc-generate-gimple-pass.h | 19 -- + scripts/gcc-plugins/gcc-generate-ipa-pass.h | 19 -- + scripts/gcc-plugins/gcc-generate-rtl-pass.h | 19 -- + scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h | 19 -- + scripts/gcc-plugins/structleak_plugin.c | 2 - + scripts/kernel-doc | 9 + + scripts/test_fortify.sh | 62 +++++ + security/Kconfig | 3 + + security/Kconfig.hardening | 14 +- + 104 files changed, 1249 insertions(+), 822 deletions(-) + create mode 100644 lib/memcpy_kunit.c + create mode 100644 lib/test_fortify/read_overflow-memchr.c + create mode 100644 lib/test_fortify/read_overflow-memchr_inv.c + create mode 100644 lib/test_fortify/read_overflow-memcmp.c + create mode 100644 lib/test_fortify/read_overflow-memscan.c + create mode 100644 lib/test_fortify/read_overflow2-memcmp.c + create mode 100644 lib/test_fortify/read_overflow2-memcpy.c + create mode 100644 lib/test_fortify/read_overflow2-memmove.c + create mode 100644 lib/test_fortify/test_fortify.h + create mode 100644 lib/test_fortify/write_overflow-memcpy.c + create mode 100644 lib/test_fortify/write_overflow-memmove.c + create mode 100644 lib/test_fortify/write_overflow-memset.c + create mode 100644 lib/test_fortify/write_overflow-strcpy-lit.c + create mode 100644 lib/test_fortify/write_overflow-strcpy.c + create mode 100644 lib/test_fortify/write_overflow-strlcpy-src.c + create mode 100644 lib/test_fortify/write_overflow-strlcpy.c + create mode 100644 lib/test_fortify/write_overflow-strncpy-src.c + create mode 100644 lib/test_fortify/write_overflow-strncpy.c + create mode 100644 lib/test_fortify/write_overflow-strscpy.c + delete mode 100644 scripts/gcc-plugins/cyc_complexity_plugin.c + create mode 100644 scripts/test_fortify.sh +Merging kspp-gustavo/for-next/kspp (3915822ab3a5 Merge branch 'for-next/cast-function' into for-next/kspp) +$ git merge -m Merge branch 'for-next/kspp' of git://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git kspp-gustavo/for-next/kspp +Auto-merging kernel/trace/ftrace.c +Auto-merging include/linux/ftrace.h +CONFLICT (content): Merge conflict in include/linux/ftrace.h +Auto-merging include/asm-generic/vmlinux.lds.h +Auto-merging drivers/scsi/st.c +Auto-merging Makefile +Recorded preimage for 'include/linux/ftrace.h' +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +Recorded resolution for 'include/linux/ftrace.h'. +[master 0b7e34114ee6] Merge branch 'for-next/kspp' of git://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git +$ git diff -M --stat --summary HEAD^.. + Makefile | 13 +++++++++++++ + arch/mips/alchemy/devboards/db1550.c | 1 + + arch/mips/kernel/uprobes.c | 1 + + drivers/firewire/core-cdev.c | 32 ++++++++++++++++++++++++++------ + drivers/pcmcia/db1xxx_ss.c | 1 + + include/linux/firewire.h | 11 +++++++---- + lib/assoc_array.c | 22 ++++++++++------------ + 7 files changed, 59 insertions(+), 22 deletions(-) +$ git am -3 ../patches/0001-Revert-Makefile-Enable-Wcast-function-type.patch +Applying: Revert "Makefile: Enable -Wcast-function-type" +$ git reset HEAD^ +Unstaged changes after reset: +M Makefile +$ git add -A . +$ git commit -v -a --amend +[master 3dc9f91d7d07] Merge branch 'for-next/kspp' of git://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux.git + Date: Mon Oct 25 18:22:26 2021 +1100 +Merging cisco/for-next (9e98c678c2d6 Linux 5.1-rc1) +$ git merge -m Merge branch 'for-next' of https://github.com/daniel-walker/cisco-linux.git cisco/for-next +Already up to date. +Merging gnss/gnss-next (0f79ce970e79 gnss: drop stray semicolons) +$ git merge -m Merge branch 'gnss-next' of git://git.kernel.org/pub/scm/linux/kernel/git/johan/gnss.git gnss/gnss-next +Merge made by the 'recursive' strategy. + drivers/gnss/mtk.c | 2 +- + drivers/gnss/serial.c | 2 +- + drivers/gnss/sirf.c | 2 +- + drivers/gnss/ubx.c | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) +Merging fsi/next (7cc2f34e1f4d fsi: sbefifo: Use interruptible mutex locking) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joel/fsi.git fsi/next +Already up to date. +Merging slimbus/for-next (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/srini/slimbus.git slimbus/for-next +Already up to date. +Merging nvmem/for-next (413333fd6a88 nvmem: imx-ocotp: add support for post processing) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/srini/nvmem.git nvmem/for-next +Merge made by the 'recursive' strategy. +Merging xarray/main (2c7e57a02708 idr test suite: Improve reporting from idr_find_test_1) +$ git merge -m Merge branch 'main' of git://git.infradead.org/users/willy/xarray.git xarray/main +Already up to date. +Merging hyperv/hyperv-next (9d68cd9120e4 hv_utils: Set the maximum packet size for VSS driver to the length of the receive buffer) +$ git merge -m Merge branch 'hyperv-next' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git hyperv/hyperv-next +Already up to date. +Merging auxdisplay/auxdisplay (97fbb29fc1eb MAINTAINERS: Add DT Bindings for Auxiliary Display Drivers) +$ git merge -m Merge branch 'auxdisplay' of https://github.com/ojeda/linux.git auxdisplay/auxdisplay +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + .../bindings/auxdisplay/holtek,ht16k33.yaml | 32 +- + MAINTAINERS | 1 + + drivers/auxdisplay/Kconfig | 12 +- + drivers/auxdisplay/Makefile | 1 + + drivers/auxdisplay/cfag12864bfb.c | 9 +- + drivers/auxdisplay/ht16k33.c | 501 ++++++++++++++++----- + drivers/auxdisplay/img-ascii-lcd.c | 205 ++------- + drivers/auxdisplay/ks0108.c | 3 - + drivers/auxdisplay/line-display.c | 261 +++++++++++ + drivers/auxdisplay/line-display.h | 43 ++ + include/uapi/linux/map_to_14segment.h | 241 ++++++++++ + 11 files changed, 1023 insertions(+), 286 deletions(-) + create mode 100644 drivers/auxdisplay/line-display.c + create mode 100644 drivers/auxdisplay/line-display.h + create mode 100644 include/uapi/linux/map_to_14segment.h +Merging kgdb/kgdb/for-next (f8416aa29185 kernel: debug: Convert to SPDX identifier) +$ git merge -m Merge branch 'kgdb/for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/danielt/linux.git kgdb/kgdb/for-next +Already up to date. +Merging hmm/hmm (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git hmm/hmm +Already up to date. +Merging fpga/for-next (57b44817a8d6 MAINTAINERS: Drop outdated FPGA Manager website) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git fpga/for-next +Already up to date. +Merging kunit/test (6880fa6c5660 Linux 5.15-rc1) +$ git merge -m Merge branch 'test' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git kunit/test +Already up to date. +Merging cfi/cfi/next (ff1176468d36 Linux 5.14-rc3) +$ git merge -m Merge branch 'cfi/next' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git cfi/cfi/next +Already up to date. +Merging kunit-next/kunit (17ac23eb43f0 kunit: Reset suite count after running tests) +$ git merge -m Merge branch 'kunit' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git kunit-next/kunit +Merge made by the 'recursive' strategy. + Documentation/dev-tools/kunit/running_tips.rst | 11 +- + lib/kunit/executor.c | 152 ++- + lib/kunit/executor_test.c | 110 ++- + lib/kunit/kunit-test.c | 14 +- + lib/kunit/test.c | 6 +- + tools/testing/kunit/kunit.py | 151 ++- + tools/testing/kunit/kunit_json.py | 56 +- + tools/testing/kunit/kunit_kernel.py | 92 +- + tools/testing/kunit/kunit_parser.py | 1015 ++++++++++++++------ + tools/testing/kunit/kunit_tool_test.py | 211 +++- + .../test_is_test_passed-all_passed_nested.log | 34 + + .../test_data/test_is_test_passed-kselftest.log | 14 + + .../test_data/test_is_test_passed-missing_plan.log | 31 + + .../testing/kunit/test_data/test_strip_hyphen.log | 16 + + 14 files changed, 1402 insertions(+), 511 deletions(-) + create mode 100644 tools/testing/kunit/test_data/test_is_test_passed-all_passed_nested.log + create mode 100644 tools/testing/kunit/test_data/test_is_test_passed-kselftest.log + create mode 100644 tools/testing/kunit/test_data/test_is_test_passed-missing_plan.log + create mode 100644 tools/testing/kunit/test_data/test_strip_hyphen.log +Merging trivial/for-next (9ff9b0d392ea Merge tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial.git trivial/for-next +Already up to date. +Merging mhi/mhi-next (8c61951b372d Merge tag 'soundwire-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire into char-misc-next) +$ git merge -m Merge branch 'mhi-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mani/mhi.git mhi/mhi-next +Already up to date. +Merging memblock/for-next (e888fa7bb882 memblock: Check memory add/cap ordering) +$ git merge -m Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git memblock/for-next +Already up to date. +Merging init/init-user-pointers (38b082236e77 initramfs: use vfs_utimes in do_copy) +$ git merge -m Merge branch 'init-user-pointers' of git://git.infradead.org/users/hch/misc.git init/init-user-pointers +Already up to date. +Merging counters/counters (e71ba9452f0b Linux 5.11-rc2) +$ git merge -m Merge branch 'counters' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux.git counters/counters +Already up to date. +Merging rust/rust-next (a5085d70a3ba MAINTAINERS: Rust) +$ git merge -m Merge branch 'rust-next' of https://github.com/Rust-for-Linux/linux.git rust/rust-next +Auto-merging scripts/kconfig/confdata.c +CONFLICT (content): Merge conflict in scripts/kconfig/confdata.c +Auto-merging scripts/Makefile.modfinal +CONFLICT (content): Merge conflict in scripts/Makefile.modfinal +Auto-merging scripts/Makefile.lib +Auto-merging scripts/Makefile.build +Auto-merging samples/Kconfig +Auto-merging lib/Kconfig.debug +Auto-merging kernel/printk/printk.c +Auto-merging kernel/kallsyms.c +Auto-merging arch/riscv/Makefile +Auto-merging Makefile +CONFLICT (content): Merge conflict in Makefile +Auto-merging MAINTAINERS +Resolved 'Makefile' using previous resolution. +Resolved 'scripts/Makefile.modfinal' using previous resolution. +Resolved 'scripts/kconfig/confdata.c' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master f25af31247bb] Merge branch 'rust-next' of https://github.com/Rust-for-Linux/linux.git +$ git diff -M --stat --summary HEAD^.. + .gitignore | 5 + + .rustfmt.toml | 12 + + Documentation/doc-guide/kernel-doc.rst | 3 + + Documentation/index.rst | 1 + + Documentation/kbuild/kbuild.rst | 4 + + Documentation/process/changes.rst | 13 + + Documentation/rust/arch-support.rst | 35 + + Documentation/rust/assets/favicon-16x16.png | Bin 0 -> 798 bytes + Documentation/rust/assets/favicon-32x32.png | Bin 0 -> 2076 bytes + Documentation/rust/assets/rust-logo.png | Bin 0 -> 53976 bytes + Documentation/rust/coding.rst | 92 + + Documentation/rust/docs.rst | 110 + + Documentation/rust/index.rst | 20 + + Documentation/rust/quick-start.rst | 218 ++ + MAINTAINERS | 14 + + Makefile | 153 +- + arch/arm/rust/target.json | 27 + + arch/arm64/rust/target.json | 34 + + arch/powerpc/rust/target.json | 29 + + arch/riscv/Makefile | 1 + + arch/riscv/rust/rv32ima.json | 36 + + arch/riscv/rust/rv32imac.json | 36 + + arch/riscv/rust/rv64ima.json | 36 + + arch/riscv/rust/rv64imac.json | 36 + + arch/x86/rust/target.json | 36 + + include/linux/kallsyms.h | 2 +- + include/linux/spinlock.h | 17 +- + init/Kconfig | 31 +- + kernel/kallsyms.c | 7 + + kernel/livepatch/core.c | 4 +- + kernel/printk/printk.c | 5 +- + lib/Kconfig.debug | 144 ++ + lib/vsprintf.c | 12 + + rust/.gitignore | 7 + + rust/Makefile | 355 +++ + rust/alloc/README.md | 32 + + rust/alloc/alloc.rs | 425 ++++ + rust/alloc/borrow.rs | 494 ++++ + rust/alloc/boxed.rs | 1725 ++++++++++++++ + rust/alloc/collections/mod.rs | 117 + + rust/alloc/fmt.rs | 587 +++++ + rust/alloc/lib.rs | 200 ++ + rust/alloc/macros.rs | 126 ++ + rust/alloc/prelude/mod.rs | 17 + + rust/alloc/prelude/v1.rs | 16 + + rust/alloc/raw_vec.rs | 612 +++++ + rust/alloc/slice.rs | 1271 +++++++++++ + rust/alloc/str.rs | 614 +++++ + rust/alloc/string.rs | 2842 +++++++++++++++++++++++ + rust/alloc/vec/drain.rs | 157 ++ + rust/alloc/vec/drain_filter.rs | 145 ++ + rust/alloc/vec/into_iter.rs | 297 +++ + rust/alloc/vec/is_zero.rs | 106 + + rust/alloc/vec/mod.rs | 3261 +++++++++++++++++++++++++++ + rust/alloc/vec/partial_eq.rs | 49 + + rust/alloc/vec/set_len_on_drop.rs | 30 + + rust/alloc/vec/spec_extend.rs | 172 ++ + rust/bindgen_parameters | 13 + + rust/build_error.rs | 33 + + rust/compiler_builtins.rs | 57 + + rust/exports.c | 16 + + rust/helpers.c | 301 +++ + rust/kernel/allocator.rs | 63 + + rust/kernel/bindings.rs | 46 + + rust/kernel/bindings_helper.h | 25 + + rust/kernel/buffer.rs | 39 + + rust/kernel/build_assert.rs | 80 + + rust/kernel/c_types.rs | 119 + + rust/kernel/chrdev.rs | 212 ++ + rust/kernel/error.rs | 523 +++++ + rust/kernel/file.rs | 131 ++ + rust/kernel/file_operations.rs | 715 ++++++ + rust/kernel/io_buffer.rs | 153 ++ + rust/kernel/io_mem.rs | 207 ++ + rust/kernel/iov_iter.rs | 81 + + rust/kernel/lib.rs | 251 +++ + rust/kernel/linked_list.rs | 247 ++ + rust/kernel/miscdev.rs | 111 + + rust/kernel/module_param.rs | 497 ++++ + rust/kernel/of.rs | 101 + + rust/kernel/pages.rs | 162 ++ + rust/kernel/platdev.rs | 153 ++ + rust/kernel/power.rs | 118 + + rust/kernel/prelude.rs | 26 + + rust/kernel/print.rs | 441 ++++ + rust/kernel/random.rs | 50 + + rust/kernel/raw_list.rs | 361 +++ + rust/kernel/rbtree.rs | 562 +++++ + rust/kernel/security.rs | 56 + + rust/kernel/static_assert.rs | 39 + + rust/kernel/std_vendor.rs | 150 ++ + rust/kernel/str.rs | 253 +++ + rust/kernel/sync/arc.rs | 480 ++++ + rust/kernel/sync/condvar.rs | 132 ++ + rust/kernel/sync/guard.rs | 91 + + rust/kernel/sync/locked_by.rs | 112 + + rust/kernel/sync/mod.rs | 80 + + rust/kernel/sync/mutex.rs | 101 + + rust/kernel/sync/spinlock.rs | 102 + + rust/kernel/sysctl.rs | 198 ++ + rust/kernel/task.rs | 182 ++ + rust/kernel/types.rs | 228 ++ + rust/kernel/user_ptr.rs | 175 ++ + rust/macros/helpers.rs | 79 + + rust/macros/lib.rs | 128 ++ + rust/macros/module.rs | 678 ++++++ + samples/Kconfig | 2 + + samples/Makefile | 1 + + samples/rust/Kconfig | 113 + + samples/rust/Makefile | 12 + + samples/rust/rust_chrdev.rs | 51 + + samples/rust/rust_minimal.rs | 38 + + samples/rust/rust_miscdev.rs | 150 ++ + samples/rust/rust_module_parameters.rs | 72 + + samples/rust/rust_print.rs | 57 + + samples/rust/rust_random.rs | 61 + + samples/rust/rust_semaphore.rs | 177 ++ + samples/rust/rust_semaphore_c.c | 212 ++ + samples/rust/rust_stack_probing.rs | 40 + + samples/rust/rust_sync.rs | 81 + + scripts/Makefile.build | 22 + + scripts/Makefile.lib | 12 + + scripts/Makefile.modfinal | 8 +- + scripts/generate_rust_analyzer.py | 133 ++ + scripts/is_rust_module.sh | 19 + + scripts/kallsyms.c | 42 +- + scripts/rust-version.sh | 31 + + tools/include/linux/kallsyms.h | 2 +- + tools/include/linux/lockdep.h | 2 +- + tools/lib/perf/include/perf/event.h | 2 +- + tools/lib/symbol/kallsyms.h | 2 +- + 131 files changed, 25298 insertions(+), 32 deletions(-) + create mode 100644 .rustfmt.toml + create mode 100644 Documentation/rust/arch-support.rst + create mode 100644 Documentation/rust/assets/favicon-16x16.png + create mode 100644 Documentation/rust/assets/favicon-32x32.png + create mode 100644 Documentation/rust/assets/rust-logo.png + create mode 100644 Documentation/rust/coding.rst + create mode 100644 Documentation/rust/docs.rst + create mode 100644 Documentation/rust/index.rst + create mode 100644 Documentation/rust/quick-start.rst + create mode 100644 arch/arm/rust/target.json + create mode 100644 arch/arm64/rust/target.json + create mode 100644 arch/powerpc/rust/target.json + create mode 100644 arch/riscv/rust/rv32ima.json + create mode 100644 arch/riscv/rust/rv32imac.json + create mode 100644 arch/riscv/rust/rv64ima.json + create mode 100644 arch/riscv/rust/rv64imac.json + create mode 100644 arch/x86/rust/target.json + create mode 100644 rust/.gitignore + create mode 100644 rust/Makefile + create mode 100644 rust/alloc/README.md + create mode 100644 rust/alloc/alloc.rs + create mode 100644 rust/alloc/borrow.rs + create mode 100644 rust/alloc/boxed.rs + create mode 100644 rust/alloc/collections/mod.rs + create mode 100644 rust/alloc/fmt.rs + create mode 100644 rust/alloc/lib.rs + create mode 100644 rust/alloc/macros.rs + create mode 100644 rust/alloc/prelude/mod.rs + create mode 100644 rust/alloc/prelude/v1.rs + create mode 100644 rust/alloc/raw_vec.rs + create mode 100644 rust/alloc/slice.rs + create mode 100644 rust/alloc/str.rs + create mode 100644 rust/alloc/string.rs + create mode 100644 rust/alloc/vec/drain.rs + create mode 100644 rust/alloc/vec/drain_filter.rs + create mode 100644 rust/alloc/vec/into_iter.rs + create mode 100644 rust/alloc/vec/is_zero.rs + create mode 100644 rust/alloc/vec/mod.rs + create mode 100644 rust/alloc/vec/partial_eq.rs + create mode 100644 rust/alloc/vec/set_len_on_drop.rs + create mode 100644 rust/alloc/vec/spec_extend.rs + create mode 100644 rust/bindgen_parameters + create mode 100644 rust/build_error.rs + create mode 100644 rust/compiler_builtins.rs + create mode 100644 rust/exports.c + create mode 100644 rust/helpers.c + create mode 100644 rust/kernel/allocator.rs + create mode 100644 rust/kernel/bindings.rs + create mode 100644 rust/kernel/bindings_helper.h + create mode 100644 rust/kernel/buffer.rs + create mode 100644 rust/kernel/build_assert.rs + create mode 100644 rust/kernel/c_types.rs + create mode 100644 rust/kernel/chrdev.rs + create mode 100644 rust/kernel/error.rs + create mode 100644 rust/kernel/file.rs + create mode 100644 rust/kernel/file_operations.rs + create mode 100644 rust/kernel/io_buffer.rs + create mode 100644 rust/kernel/io_mem.rs + create mode 100644 rust/kernel/iov_iter.rs + create mode 100644 rust/kernel/lib.rs + create mode 100644 rust/kernel/linked_list.rs + create mode 100644 rust/kernel/miscdev.rs + create mode 100644 rust/kernel/module_param.rs + create mode 100644 rust/kernel/of.rs + create mode 100644 rust/kernel/pages.rs + create mode 100644 rust/kernel/platdev.rs + create mode 100644 rust/kernel/power.rs + create mode 100644 rust/kernel/prelude.rs + create mode 100644 rust/kernel/print.rs + create mode 100644 rust/kernel/random.rs + create mode 100644 rust/kernel/raw_list.rs + create mode 100644 rust/kernel/rbtree.rs + create mode 100644 rust/kernel/security.rs + create mode 100644 rust/kernel/static_assert.rs + create mode 100644 rust/kernel/std_vendor.rs + create mode 100644 rust/kernel/str.rs + create mode 100644 rust/kernel/sync/arc.rs + create mode 100644 rust/kernel/sync/condvar.rs + create mode 100644 rust/kernel/sync/guard.rs + create mode 100644 rust/kernel/sync/locked_by.rs + create mode 100644 rust/kernel/sync/mod.rs + create mode 100644 rust/kernel/sync/mutex.rs + create mode 100644 rust/kernel/sync/spinlock.rs + create mode 100644 rust/kernel/sysctl.rs + create mode 100644 rust/kernel/task.rs + create mode 100644 rust/kernel/types.rs + create mode 100644 rust/kernel/user_ptr.rs + create mode 100644 rust/macros/helpers.rs + create mode 100644 rust/macros/lib.rs + create mode 100644 rust/macros/module.rs + create mode 100644 samples/rust/Kconfig + create mode 100644 samples/rust/Makefile + create mode 100644 samples/rust/rust_chrdev.rs + create mode 100644 samples/rust/rust_minimal.rs + create mode 100644 samples/rust/rust_miscdev.rs + create mode 100644 samples/rust/rust_module_parameters.rs + create mode 100644 samples/rust/rust_print.rs + create mode 100644 samples/rust/rust_random.rs + create mode 100644 samples/rust/rust_semaphore.rs + create mode 100644 samples/rust/rust_semaphore_c.c + create mode 100644 samples/rust/rust_stack_probing.rs + create mode 100644 samples/rust/rust_sync.rs + create mode 100755 scripts/generate_rust_analyzer.py + create mode 100755 scripts/is_rust_module.sh + create mode 100755 scripts/rust-version.sh +$ git am -3 ../patches/0001-Kbuild-fix-for-kbuild-split-DEBUG_CFLAGS-out-to-scri.patch +Applying: Kbuild: fix for "kbuild: split DEBUG_CFLAGS out to scripts/Makefile.debug" +$ git reset HEAD^ +Unstaged changes after reset: +M scripts/Makefile.debug +$ git add -A . +$ git commit -v -a --amend +[master 5d3feb164648] Merge branch 'rust-next' of https://github.com/Rust-for-Linux/linux.git + Date: Mon Oct 25 18:29:51 2021 +1100 +Merging cxl/next (ed97afb53365 cxl/pci: Disambiguate cxl_pci further from cxl_mem) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git cxl/next +Auto-merging drivers/nvdimm/core.c +Auto-merging drivers/nvdimm/btt.c +Auto-merging drivers/cxl/cxl.h +Merge made by the 'recursive' strategy. + Documentation/driver-api/cxl/memory-devices.rst | 6 + + drivers/cxl/acpi.c | 129 ++- + drivers/cxl/core/Makefile | 1 + + drivers/cxl/core/bus.c | 119 +-- + drivers/cxl/core/core.h | 11 +- + drivers/cxl/core/mbox.c | 787 ++++++++++++++++ + drivers/cxl/core/memdev.c | 118 ++- + drivers/cxl/core/pmem.c | 39 +- + drivers/cxl/cxl.h | 48 +- + drivers/cxl/cxlmem.h | 202 +++- + drivers/cxl/pci.c | 1121 ++--------------------- + drivers/cxl/pmem.c | 163 +++- + drivers/nvdimm/btt.c | 11 +- + drivers/nvdimm/btt_devs.c | 14 +- + drivers/nvdimm/core.c | 40 +- + drivers/nvdimm/label.c | 139 ++- + drivers/nvdimm/label.h | 94 +- + drivers/nvdimm/namespace_devs.c | 95 +- + drivers/nvdimm/nd-core.h | 5 +- + drivers/nvdimm/nd.h | 185 +++- + drivers/nvdimm/pfn_devs.c | 2 +- + include/linux/nd.h | 4 +- + tools/testing/cxl/Kbuild | 38 + + tools/testing/cxl/config_check.c | 13 + + tools/testing/cxl/mock_acpi.c | 109 +++ + tools/testing/cxl/mock_pmem.c | 24 + + tools/testing/cxl/test/Kbuild | 10 + + tools/testing/cxl/test/cxl.c | 576 ++++++++++++ + tools/testing/cxl/test/mem.c | 256 ++++++ + tools/testing/cxl/test/mock.c | 171 ++++ + tools/testing/cxl/test/mock.h | 27 + + 31 files changed, 3168 insertions(+), 1389 deletions(-) + create mode 100644 drivers/cxl/core/mbox.c + create mode 100644 tools/testing/cxl/Kbuild + create mode 100644 tools/testing/cxl/config_check.c + create mode 100644 tools/testing/cxl/mock_acpi.c + create mode 100644 tools/testing/cxl/mock_pmem.c + create mode 100644 tools/testing/cxl/test/Kbuild + create mode 100644 tools/testing/cxl/test/cxl.c + create mode 100644 tools/testing/cxl/test/mem.c + create mode 100644 tools/testing/cxl/test/mock.c + create mode 100644 tools/testing/cxl/test/mock.h +Merging folio/for-next (121703c1c817 mm/writeback: Add folio_write_one) +$ git merge -m Merge branch 'for-next' of git://git.infradead.org/users/willy/pagecache.git folio/for-next +Auto-merging mm/vmscan.c +Auto-merging mm/swapfile.c +Auto-merging mm/shmem.c +Auto-merging mm/page_io.c +Auto-merging mm/migrate.c +Auto-merging mm/mempolicy.c +Auto-merging mm/memory.c +Auto-merging mm/huge_memory.c +Auto-merging mm/filemap.c +Auto-merging kernel/bpf/verifier.c +Auto-merging include/linux/writeback.h +Auto-merging include/linux/pagemap.h +Auto-merging include/linux/netfs.h +Auto-merging include/linux/mm_types.h +Auto-merging include/linux/mm.h +Auto-merging include/linux/backing-dev.h +Auto-merging fs/io_uring.c +CONFLICT (modify/delete): fs/cachefiles/rdwr.c deleted in HEAD and modified in folio/for-next. Version folio/for-next of fs/cachefiles/rdwr.c left in tree. +Auto-merging fs/afs/write.c +Automatic merge failed; fix conflicts and then commit the result. +$ git rm -f fs/cachefiles/rdwr.c +rm 'fs/cachefiles/rdwr.c' +$ git commit --no-edit -v -a +[master 88309af70e6b] Merge branch 'for-next' of git://git.infradead.org/users/willy/pagecache.git +$ git diff -M --stat --summary HEAD^.. + Documentation/core-api/cachetlb.rst | 6 + + Documentation/core-api/mm-api.rst | 5 + + Documentation/filesystems/netfs_library.rst | 2 + + arch/arc/include/asm/cacheflush.h | 1 + + arch/arm/include/asm/cacheflush.h | 1 + + arch/m68k/include/asm/cacheflush_mm.h | 1 + + arch/mips/include/asm/cacheflush.h | 2 + + arch/nds32/include/asm/cacheflush.h | 1 + + arch/nios2/include/asm/cacheflush.h | 3 +- + arch/parisc/include/asm/cacheflush.h | 3 +- + arch/sh/include/asm/cacheflush.h | 3 +- + arch/xtensa/include/asm/cacheflush.h | 5 +- + fs/afs/write.c | 9 +- + fs/io_uring.c | 2 +- + fs/jfs/jfs_metapage.c | 1 + + include/asm-generic/cacheflush.h | 6 + + include/linux/backing-dev.h | 6 +- + include/linux/flex_proportions.h | 9 +- + include/linux/gfp.h | 22 +- + include/linux/highmem-internal.h | 11 + + include/linux/highmem.h | 37 ++ + include/linux/huge_mm.h | 15 - + include/linux/ksm.h | 4 +- + include/linux/memcontrol.h | 264 ++++++++----- + include/linux/migrate.h | 4 + + include/linux/mm.h | 239 +++++++++--- + include/linux/mm_inline.h | 103 +++-- + include/linux/mm_types.h | 77 ++++ + include/linux/mmdebug.h | 20 + + include/linux/netfs.h | 77 ++-- + include/linux/page-flags.h | 267 +++++++++---- + include/linux/page_idle.h | 99 +++-- + include/linux/page_owner.h | 8 +- + include/linux/page_ref.h | 158 +++++++- + include/linux/pagemap.h | 585 ++++++++++++++++++---------- + include/linux/rmap.h | 10 +- + include/linux/swap.h | 17 +- + include/linux/vmstat.h | 113 +++++- + include/linux/writeback.h | 9 +- + include/trace/events/pagemap.h | 46 ++- + include/trace/events/writeback.h | 28 +- + kernel/bpf/verifier.c | 2 +- + kernel/events/uprobes.c | 3 +- + lib/flex_proportions.c | 28 +- + mm/Makefile | 2 +- + mm/compaction.c | 4 +- + mm/filemap.c | 567 +++++++++++++-------------- + mm/folio-compat.c | 142 +++++++ + mm/huge_memory.c | 7 +- + mm/hugetlb.c | 2 +- + mm/internal.h | 36 +- + mm/khugepaged.c | 8 +- + mm/ksm.c | 34 +- + mm/memcontrol.c | 356 +++++++++-------- + mm/memory-failure.c | 2 +- + mm/memory.c | 20 +- + mm/mempolicy.c | 10 + + mm/memremap.c | 2 +- + mm/migrate.c | 189 +++++---- + mm/mlock.c | 3 +- + mm/page-writeback.c | 476 ++++++++++++---------- + mm/page_alloc.c | 14 +- + mm/page_io.c | 4 +- + mm/page_owner.c | 10 +- + mm/rmap.c | 14 +- + mm/shmem.c | 7 +- + mm/swap.c | 197 +++++----- + mm/swap_state.c | 2 +- + mm/swapfile.c | 8 +- + mm/userfaultfd.c | 2 +- + mm/util.c | 111 +++--- + mm/vmscan.c | 8 +- + mm/workingset.c | 52 +-- + 73 files changed, 2906 insertions(+), 1695 deletions(-) + create mode 100644 mm/folio-compat.c +$ git am -3 ../patches/0001-fix-up-for-9p-untested-Convert-to-using-the-netfs-he.patch +Applying: fix up for "9p: Convert to using the netfs helper lib to do reads and caching" +$ git reset HEAD^ +Unstaged changes after reset: +M fs/9p/vfs_file.c +$ git add -A . +$ git commit -v -a --amend +[master 1e919673970d] Merge branch 'for-next' of git://git.infradead.org/users/willy/pagecache.git + Date: Mon Oct 25 18:47:15 2021 +1100 +Merging bitmap/bitmap-master-5.15 (785cb064e2f8 vsprintf: rework bitmap_list_string) +$ git merge -m Merge branch 'bitmap-master-5.15' of https://guthub.com/norov/linux.git bitmap/bitmap-master-5.15 +Removing tools/include/asm-generic/bitops/find.h +Auto-merging lib/vsprintf.c +Auto-merging kernel/time/clocksource.c +Auto-merging include/linux/cpumask.h +Auto-merging fs/f2fs/segment.c +Auto-merging drivers/tty/n_tty.c +Auto-merging drivers/scsi/lpfc/lpfc_sli.c +Auto-merging drivers/pci/controller/dwc/pci-dra7xx.c +Auto-merging drivers/net/virtio_net.c +Auto-merging drivers/gpu/drm/etnaviv/etnaviv_gpu.c +Auto-merging drivers/block/rnbd/rnbd-clt.c +Auto-merging block/blk-mq.c +Auto-merging arch/x86/Kconfig +Auto-merging arch/s390/kvm/kvm-s390.c +Auto-merging arch/s390/Kconfig +Auto-merging arch/parisc/include/asm/bitops.h +CONFLICT (content): Merge conflict in arch/parisc/include/asm/bitops.h +Auto-merging arch/mips/Kconfig +Auto-merging arch/m68k/include/asm/bitops.h +Auto-merging arch/csky/include/asm/bitops.h +Auto-merging arch/arm64/Kconfig +Auto-merging MAINTAINERS +Resolved 'arch/parisc/include/asm/bitops.h' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit --no-edit -v -a +[master c658fce18f03] Merge branch 'bitmap-master-5.15' of https://guthub.com/norov/linux.git +$ git diff -M --stat --summary HEAD^.. + MAINTAINERS | 4 +- + arch/alpha/include/asm/bitops.h | 2 - + arch/arc/Kconfig | 1 - + arch/arc/include/asm/bitops.h | 1 - + arch/arm/include/asm/bitops.h | 1 - + arch/arm64/Kconfig | 1 - + arch/arm64/include/asm/bitops.h | 1 - + arch/csky/include/asm/bitops.h | 1 - + arch/h8300/include/asm/bitops.h | 1 - + arch/hexagon/include/asm/bitops.h | 1 - + arch/ia64/include/asm/bitops.h | 2 - + arch/m68k/include/asm/bitops.h | 2 - + arch/mips/Kconfig | 1 - + arch/mips/include/asm/bitops.h | 1 - + arch/openrisc/include/asm/bitops.h | 1 - + arch/parisc/include/asm/bitops.h | 1 - + arch/powerpc/include/asm/bitops.h | 2 - + arch/powerpc/include/asm/cputhreads.h | 2 +- + arch/powerpc/platforms/pasemi/dma_lib.c | 4 +- + arch/riscv/include/asm/bitops.h | 1 - + arch/s390/Kconfig | 1 - + arch/s390/include/asm/bitops.h | 1 - + arch/s390/kvm/kvm-s390.c | 2 +- + arch/sh/include/asm/bitops.h | 1 - + arch/sparc/include/asm/bitops_32.h | 1 - + arch/sparc/include/asm/bitops_64.h | 2 - + arch/x86/Kconfig | 1 - + arch/x86/include/asm/bitops.h | 2 - + arch/x86/kernel/apic/vector.c | 4 +- + arch/x86/um/Kconfig | 1 - + arch/xtensa/include/asm/bitops.h | 1 - + block/blk-mq.c | 2 +- + drivers/block/rnbd/rnbd-clt.c | 2 +- + drivers/dma/ti/edma.c | 2 +- + drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 4 +- + drivers/hwmon/ltc2992.c | 3 +- + drivers/iio/adc/ad7124.c | 2 +- + drivers/infiniband/hw/irdma/hw.c | 16 +- + drivers/media/cec/core/cec-core.c | 2 +- + drivers/media/mc/mc-devnode.c | 2 +- + drivers/mmc/host/renesas_sdhi_core.c | 2 +- + drivers/net/virtio_net.c | 2 +- + drivers/pci/controller/dwc/pci-dra7xx.c | 2 +- + drivers/scsi/lpfc/lpfc_sli.c | 10 +- + drivers/soc/fsl/qbman/bman_portal.c | 2 +- + drivers/soc/fsl/qbman/qman_portal.c | 2 +- + drivers/soc/ti/k3-ringacc.c | 4 +- + drivers/tty/n_tty.c | 2 +- + drivers/virt/acrn/ioreq.c | 3 +- + fs/f2fs/segment.c | 8 +- + fs/ocfs2/cluster/heartbeat.c | 2 +- + fs/ocfs2/dlm/dlmdomain.c | 4 +- + fs/ocfs2/dlm/dlmmaster.c | 18 +- + fs/ocfs2/dlm/dlmrecovery.c | 2 +- + fs/ocfs2/dlm/dlmthread.c | 2 +- + include/asm-generic/bitops.h | 1 - + include/asm-generic/bitops/le.h | 64 ---- + include/linux/bitmap.h | 34 +- + include/linux/bitops.h | 34 -- + include/linux/cpumask.h | 46 ++- + include/linux/find.h | 372 +++++++++++++++++++++ + kernel/time/clocksource.c | 4 +- + lib/Kconfig | 3 - + lib/find_bit.c | 21 ++ + lib/find_bit_benchmark.c | 21 ++ + lib/genalloc.c | 2 +- + lib/test_bitmap.c | 37 ++ + lib/vsprintf.c | 24 +- + mm/percpu.c | 35 +- + net/ncsi/ncsi-manage.c | 4 +- + tools/include/asm-generic/bitops.h | 1 - + tools/include/asm-generic/bitops/find.h | 145 -------- + tools/include/linux/bitmap.h | 7 +- + .../bitops => tools/include/linux}/find.h | 54 ++- + tools/lib/find_bit.c | 20 ++ + 75 files changed, 637 insertions(+), 440 deletions(-) + create mode 100644 include/linux/find.h + delete mode 100644 tools/include/asm-generic/bitops/find.h + rename {include/asm-generic/bitops => tools/include/linux}/find.h (83%) +Merging zstd/zstd-1.4.10 (464413496acb MAINTAINERS: Add maintainer entry for zstd) +$ git merge -m Merge branch 'zstd-1.4.10' of https://github.com/terrelln/linux.git zstd/zstd-1.4.10 +Removing lib/zstd/zstd_opt.h +Removing lib/zstd/zstd_internal.h +Removing lib/zstd/zstd_common.c +Removing lib/zstd/mem.h +Removing lib/zstd/huf_decompress.c +Removing lib/zstd/huf_compress.c +Removing lib/zstd/huf.h +Removing lib/zstd/fse_decompress.c +Removing lib/zstd/fse_compress.c +Removing lib/zstd/fse.h +Removing lib/zstd/error_private.h +Removing lib/zstd/entropy_common.c +Removing lib/zstd/decompress.c +Removing lib/zstd/compress.c +Removing lib/zstd/bitstream.h +Auto-merging fs/f2fs/super.c +Auto-merging fs/f2fs/compress.c +Auto-merging MAINTAINERS +Merge made by the 'recursive' strategy. + MAINTAINERS | 12 + + crypto/zstd.c | 28 +- + fs/btrfs/zstd.c | 68 +- + fs/f2fs/compress.c | 56 +- + fs/f2fs/super.c | 2 +- + fs/pstore/platform.c | 2 +- + fs/squashfs/zstd_wrapper.c | 16 +- + include/linux/zstd.h | 1252 ++---- + include/linux/zstd_errors.h | 77 + + include/linux/zstd_lib.h | 2432 +++++++++++ + lib/decompress_unzstd.c | 48 +- + lib/zstd/Makefile | 46 +- + lib/zstd/bitstream.h | 380 -- + lib/zstd/common/bitstream.h | 437 ++ + lib/zstd/common/compiler.h | 170 + + lib/zstd/common/cpu.h | 194 + + lib/zstd/common/debug.c | 24 + + lib/zstd/common/debug.h | 101 + + lib/zstd/common/entropy_common.c | 357 ++ + lib/zstd/common/error_private.c | 56 + + lib/zstd/common/error_private.h | 66 + + lib/zstd/common/fse.h | 710 ++++ + lib/zstd/common/fse_decompress.c | 390 ++ + lib/zstd/common/huf.h | 356 ++ + lib/zstd/common/mem.h | 259 ++ + lib/zstd/common/zstd_common.c | 83 + + lib/zstd/common/zstd_deps.h | 125 + + lib/zstd/common/zstd_internal.h | 450 +++ + lib/zstd/compress.c | 3485 ---------------- + lib/zstd/compress/fse_compress.c | 625 +++ + lib/zstd/compress/hist.c | 165 + + lib/zstd/compress/hist.h | 75 + + lib/zstd/compress/huf_compress.c | 905 +++++ + lib/zstd/compress/zstd_compress.c | 5109 ++++++++++++++++++++++++ + lib/zstd/compress/zstd_compress_internal.h | 1188 ++++++ + lib/zstd/compress/zstd_compress_literals.c | 158 + + lib/zstd/compress/zstd_compress_literals.h | 29 + + lib/zstd/compress/zstd_compress_sequences.c | 439 ++ + lib/zstd/compress/zstd_compress_sequences.h | 54 + + lib/zstd/compress/zstd_compress_superblock.c | 850 ++++ + lib/zstd/compress/zstd_compress_superblock.h | 32 + + lib/zstd/compress/zstd_cwksp.h | 482 +++ + lib/zstd/compress/zstd_double_fast.c | 519 +++ + lib/zstd/compress/zstd_double_fast.h | 32 + + lib/zstd/compress/zstd_fast.c | 496 +++ + lib/zstd/compress/zstd_fast.h | 31 + + lib/zstd/compress/zstd_lazy.c | 1414 +++++++ + lib/zstd/compress/zstd_lazy.h | 81 + + lib/zstd/compress/zstd_ldm.c | 686 ++++ + lib/zstd/compress/zstd_ldm.h | 110 + + lib/zstd/compress/zstd_ldm_geartab.h | 103 + + lib/zstd/compress/zstd_opt.c | 1346 +++++++ + lib/zstd/compress/zstd_opt.h | 50 + + lib/zstd/decompress.c | 2531 ------------ + lib/zstd/decompress/huf_decompress.c | 1206 ++++++ + lib/zstd/decompress/zstd_ddict.c | 241 ++ + lib/zstd/decompress/zstd_ddict.h | 44 + + lib/zstd/decompress/zstd_decompress.c | 2085 ++++++++++ + lib/zstd/decompress/zstd_decompress_block.c | 1540 +++++++ + lib/zstd/decompress/zstd_decompress_block.h | 62 + + lib/zstd/decompress/zstd_decompress_internal.h | 202 + + lib/zstd/decompress_sources.h | 28 + + lib/zstd/entropy_common.c | 243 -- + lib/zstd/error_private.h | 53 - + lib/zstd/fse.h | 575 --- + lib/zstd/fse_compress.c | 795 ---- + lib/zstd/fse_decompress.c | 325 -- + lib/zstd/huf.h | 212 - + lib/zstd/huf_compress.c | 773 ---- + lib/zstd/huf_decompress.c | 960 ----- + lib/zstd/mem.h | 151 - + lib/zstd/zstd_common.c | 75 - + lib/zstd/zstd_compress_module.c | 160 + + lib/zstd/zstd_decompress_module.c | 105 + + lib/zstd/zstd_internal.h | 273 -- + lib/zstd/zstd_opt.h | 1014 ----- + 76 files changed, 27373 insertions(+), 12941 deletions(-) + create mode 100644 include/linux/zstd_errors.h + create mode 100644 include/linux/zstd_lib.h + delete mode 100644 lib/zstd/bitstream.h + create mode 100644 lib/zstd/common/bitstream.h + create mode 100644 lib/zstd/common/compiler.h + create mode 100644 lib/zstd/common/cpu.h + create mode 100644 lib/zstd/common/debug.c + create mode 100644 lib/zstd/common/debug.h + create mode 100644 lib/zstd/common/entropy_common.c + create mode 100644 lib/zstd/common/error_private.c + create mode 100644 lib/zstd/common/error_private.h + create mode 100644 lib/zstd/common/fse.h + create mode 100644 lib/zstd/common/fse_decompress.c + create mode 100644 lib/zstd/common/huf.h + create mode 100644 lib/zstd/common/mem.h + create mode 100644 lib/zstd/common/zstd_common.c + create mode 100644 lib/zstd/common/zstd_deps.h + create mode 100644 lib/zstd/common/zstd_internal.h + delete mode 100644 lib/zstd/compress.c + create mode 100644 lib/zstd/compress/fse_compress.c + create mode 100644 lib/zstd/compress/hist.c + create mode 100644 lib/zstd/compress/hist.h + create mode 100644 lib/zstd/compress/huf_compress.c + create mode 100644 lib/zstd/compress/zstd_compress.c + create mode 100644 lib/zstd/compress/zstd_compress_internal.h + create mode 100644 lib/zstd/compress/zstd_compress_literals.c + create mode 100644 lib/zstd/compress/zstd_compress_literals.h + create mode 100644 lib/zstd/compress/zstd_compress_sequences.c + create mode 100644 lib/zstd/compress/zstd_compress_sequences.h + create mode 100644 lib/zstd/compress/zstd_compress_superblock.c + create mode 100644 lib/zstd/compress/zstd_compress_superblock.h + create mode 100644 lib/zstd/compress/zstd_cwksp.h + create mode 100644 lib/zstd/compress/zstd_double_fast.c + create mode 100644 lib/zstd/compress/zstd_double_fast.h + create mode 100644 lib/zstd/compress/zstd_fast.c + create mode 100644 lib/zstd/compress/zstd_fast.h + create mode 100644 lib/zstd/compress/zstd_lazy.c + create mode 100644 lib/zstd/compress/zstd_lazy.h + create mode 100644 lib/zstd/compress/zstd_ldm.c + create mode 100644 lib/zstd/compress/zstd_ldm.h + create mode 100644 lib/zstd/compress/zstd_ldm_geartab.h + create mode 100644 lib/zstd/compress/zstd_opt.c + create mode 100644 lib/zstd/compress/zstd_opt.h + delete mode 100644 lib/zstd/decompress.c + create mode 100644 lib/zstd/decompress/huf_decompress.c + create mode 100644 lib/zstd/decompress/zstd_ddict.c + create mode 100644 lib/zstd/decompress/zstd_ddict.h + create mode 100644 lib/zstd/decompress/zstd_decompress.c + create mode 100644 lib/zstd/decompress/zstd_decompress_block.c + create mode 100644 lib/zstd/decompress/zstd_decompress_block.h + create mode 100644 lib/zstd/decompress/zstd_decompress_internal.h + create mode 100644 lib/zstd/decompress_sources.h + delete mode 100644 lib/zstd/entropy_common.c + delete mode 100644 lib/zstd/error_private.h + delete mode 100644 lib/zstd/fse.h + delete mode 100644 lib/zstd/fse_compress.c + delete mode 100644 lib/zstd/fse_decompress.c + delete mode 100644 lib/zstd/huf.h + delete mode 100644 lib/zstd/huf_compress.c + delete mode 100644 lib/zstd/huf_decompress.c + delete mode 100644 lib/zstd/mem.h + delete mode 100644 lib/zstd/zstd_common.c + create mode 100644 lib/zstd/zstd_compress_module.c + create mode 100644 lib/zstd/zstd_decompress_module.c + delete mode 100644 lib/zstd/zstd_internal.h + delete mode 100644 lib/zstd/zstd_opt.h +Merging efi/next (720dff78de36 efi: Allow efi=runtime) +$ git merge -m Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git efi/next +Already up to date. +Merging akpm-current/current (cc0170e845bf ipc/ipc_sysctl.c: remove fallback for !CONFIG_PROC_SYSCTL) +$ git merge --no-ff akpm-current/current +Auto-merging security/Kconfig +Auto-merging net/netfilter/ipvs/ip_vs_ctl.c +Auto-merging net/ipv4/tcp.c +Auto-merging mm/workingset.c +Auto-merging mm/vmscan.c +Auto-merging mm/userfaultfd.c +Auto-merging mm/swapfile.c +Auto-merging mm/swap.c +Auto-merging mm/shmem.c +Auto-merging mm/rmap.c +Auto-merging mm/readahead.c +Auto-merging mm/percpu.c +Auto-merging mm/page_owner.c +Auto-merging mm/page_alloc.c +Auto-merging mm/page-writeback.c +Auto-merging mm/oom_kill.c +Auto-merging mm/nommu.c +Auto-merging mm/migrate.c +Auto-merging mm/memremap.c +Auto-merging mm/mempolicy.c +CONFLICT (content): Merge conflict in mm/mempolicy.c +Auto-merging mm/memory.c +Auto-merging mm/memory-failure.c +Auto-merging mm/memcontrol.c +Auto-merging mm/memblock.c +Auto-merging mm/khugepaged.c +Auto-merging mm/kasan/report.c +Auto-merging mm/kasan/kasan.h +Auto-merging mm/internal.h +CONFLICT (content): Merge conflict in mm/internal.h +Auto-merging mm/hugetlb.c +Auto-merging mm/huge_memory.c +Auto-merging mm/gup.c +Auto-merging mm/filemap.c +CONFLICT (content): Merge conflict in mm/filemap.c +Auto-merging mm/debug.c +Auto-merging mm/compaction.c +Auto-merging mm/backing-dev.c +Auto-merging lib/vsprintf.c +Auto-merging lib/test_kasan.c +Auto-merging lib/bootconfig.c +CONFLICT (content): Merge conflict in lib/bootconfig.c +Auto-merging lib/Kconfig.debug +Auto-merging lib/Kconfig +Auto-merging kernel/workqueue.c +Auto-merging kernel/trace/ftrace.c +Auto-merging kernel/sched/topology.c +Auto-merging kernel/sched/sched.h +Auto-merging kernel/sched/core.c +CONFLICT (content): Merge conflict in kernel/sched/core.c +Auto-merging kernel/printk/printk.c +Auto-merging kernel/locking/lockdep.c +Auto-merging kernel/kthread.c +Auto-merging kernel/fork.c +Auto-merging kernel/exit.c +CONFLICT (content): Merge conflict in kernel/exit.c +Auto-merging kernel/dma/swiotlb.c +Auto-merging init/main.c +Auto-merging include/trace/events/writeback.h +Auto-merging include/linux/swap.h +Auto-merging include/linux/stacktrace.h +Auto-merging include/linux/spinlock.h +Auto-merging include/linux/sched/mm.h +CONFLICT (content): Merge conflict in include/linux/sched/mm.h +Auto-merging include/linux/sched.h +Auto-merging include/linux/rmap.h +Auto-merging include/linux/pagemap.h +Auto-merging include/linux/page_owner.h +Auto-merging include/linux/page-flags.h +Auto-merging include/linux/mm_types.h +Auto-merging include/linux/mm.h +Auto-merging include/linux/migrate.h +CONFLICT (content): Merge conflict in include/linux/migrate.h +Auto-merging include/linux/kernel.h +Auto-merging include/linux/kasan.h +Auto-merging include/linux/kallsyms.h +Auto-merging include/linux/highmem.h +Auto-merging include/linux/gfp.h +Auto-merging include/linux/fs.h +Auto-merging include/linux/compiler_types.h +Auto-merging include/linux/compiler_attributes.h +Auto-merging include/linux/compiler-gcc.h +Auto-merging include/linux/backing-dev.h +Auto-merging fs/ramfs/inode.c +Auto-merging fs/proc/vmcore.c +Auto-merging fs/proc/base.c +Auto-merging fs/ocfs2/dlm/dlmrecovery.c +Auto-merging fs/internal.h +Auto-merging fs/inode.c +Auto-merging fs/exec.c +Auto-merging fs/buffer.c +Auto-merging fs/binfmt_elf.c +Auto-merging drivers/xen/swiotlb-xen.c +Auto-merging drivers/virtio/Kconfig +Auto-merging drivers/of/of_reserved_mem.c +CONFLICT (content): Merge conflict in drivers/of/of_reserved_mem.c +Auto-merging drivers/macintosh/smu.c +Auto-merging drivers/gpu/drm/i915/i915_vma.c +Auto-merging drivers/block/zram/zram_drv.c +Auto-merging arch/x86/xen/mmu_pv.c +Auto-merging arch/x86/kernel/unwind_orc.c +Auto-merging arch/x86/Kconfig +Auto-merging arch/sh/boards/mach-se/7724/setup.c +Auto-merging arch/sh/boards/mach-ecovec24/setup.c +Auto-merging arch/s390/kernel/uv.c +Auto-merging arch/s390/kernel/setup.c +Auto-merging arch/powerpc/platforms/pseries/svm.c +Auto-merging arch/powerpc/platforms/pseries/setup.c +Auto-merging arch/powerpc/kernel/smp.c +Auto-merging arch/mips/loongson64/init.c +Auto-merging arch/arm64/mm/mmu.c +Auto-merging arch/arm64/Kconfig +Auto-merging arch/Kconfig +Auto-merging Makefile +Auto-merging MAINTAINERS +Auto-merging Documentation/translations/zh_CN/core-api/memory-hotplug.rst +Auto-merging Documentation/admin-guide/kernel-parameters.txt +Resolved 'drivers/of/of_reserved_mem.c' using previous resolution. +Resolved 'include/linux/migrate.h' using previous resolution. +Resolved 'include/linux/sched/mm.h' using previous resolution. +Resolved 'kernel/exit.c' using previous resolution. +Resolved 'kernel/sched/core.c' using previous resolution. +Recorded preimage for 'lib/bootconfig.c' +Resolved 'mm/filemap.c' using previous resolution. +Resolved 'mm/internal.h' using previous resolution. +Resolved 'mm/mempolicy.c' using previous resolution. +Automatic merge failed; fix conflicts and then commit the result. +$ git commit -v -a +Recorded resolution for 'lib/bootconfig.c'. +[master cc4940688364] Merge branch 'akpm-current/current' +$ git diff -M --stat --summary HEAD^.. + Documentation/admin-guide/blockdev/zram.rst | 8 + + Documentation/admin-guide/cgroup-v1/memory.rst | 11 +- + Documentation/admin-guide/kernel-parameters.txt | 14 +- + Documentation/admin-guide/mm/damon/index.rst | 1 + + Documentation/admin-guide/mm/damon/reclaim.rst | 235 +++++++ + Documentation/admin-guide/mm/damon/start.rst | 11 + + Documentation/admin-guide/mm/damon/usage.rst | 109 +++- + Documentation/admin-guide/mm/hugetlbpage.rst | 42 +- + Documentation/admin-guide/mm/index.rst | 2 + + Documentation/admin-guide/mm/memory-hotplug.rst | 143 ++++- + Documentation/{vm => admin-guide/mm}/swap_numa.rst | 0 + Documentation/{vm => admin-guide/mm}/zswap.rst | 0 + Documentation/core-api/memory-hotplug.rst | 3 - + Documentation/dev-tools/kcov.rst | 5 + + Documentation/dev-tools/kfence.rst | 23 +- + .../translations/zh_CN/core-api/memory-hotplug.rst | 4 - + Documentation/vm/damon/design.rst | 29 +- + Documentation/vm/damon/faq.rst | 5 +- + Documentation/vm/damon/index.rst | 1 - + Documentation/vm/index.rst | 26 +- + Documentation/vm/page_owner.rst | 23 +- + MAINTAINERS | 16 +- + Makefile | 15 + + arch/Kconfig | 28 + + arch/alpha/kernel/core_irongate.c | 2 +- + arch/alpha/kernel/traps.c | 4 +- + arch/arc/mm/init.c | 6 +- + arch/arm/mach-hisi/platmcpm.c | 2 +- + arch/arm/mach-rpc/ecard.c | 2 +- + arch/arm/mm/init.c | 2 +- + arch/arm64/Kconfig | 4 + + arch/arm64/mm/kasan_init.c | 16 + + arch/arm64/mm/mmu.c | 4 +- + arch/ia64/mm/contig.c | 2 +- + arch/ia64/mm/init.c | 2 +- + arch/m68k/mm/mcfmmu.c | 3 +- + arch/m68k/mm/motorola.c | 6 +- + arch/microblaze/mm/pgtable.c | 3 +- + arch/mips/loongson64/init.c | 4 +- + arch/mips/mm/init.c | 2 +- + arch/mips/sgi-ip27/ip27-memory.c | 3 +- + arch/mips/sgi-ip30/ip30-setup.c | 6 +- + arch/powerpc/Kconfig | 1 + + arch/powerpc/configs/skiroot_defconfig | 1 - + arch/powerpc/include/asm/machdep.h | 2 +- + arch/powerpc/include/asm/sections.h | 13 - + arch/powerpc/kernel/dt_cpu_ftrs.c | 4 +- + arch/powerpc/kernel/paca.c | 8 +- + arch/powerpc/kernel/setup-common.c | 2 +- + arch/powerpc/kernel/setup_64.c | 4 +- + arch/powerpc/kernel/smp.c | 2 +- + arch/powerpc/mm/book3s64/radix_tlb.c | 4 +- + arch/powerpc/mm/hugetlbpage.c | 9 +- + arch/powerpc/mm/pgtable_32.c | 7 +- + arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- + arch/powerpc/platforms/powernv/setup.c | 4 +- + arch/powerpc/platforms/pseries/setup.c | 2 +- + arch/powerpc/platforms/pseries/svm.c | 3 +- + arch/riscv/kernel/setup.c | 4 +- + arch/s390/include/asm/sections.h | 12 - + arch/s390/kernel/setup.c | 11 +- + arch/s390/kernel/smp.c | 4 +- + arch/s390/kernel/uv.c | 2 +- + arch/s390/mm/init.c | 3 - + arch/s390/mm/kasan_init.c | 2 +- + arch/sh/boards/mach-ap325rxa/setup.c | 2 +- + arch/sh/boards/mach-ecovec24/setup.c | 4 +- + arch/sh/boards/mach-kfr2r09/setup.c | 2 +- + arch/sh/boards/mach-migor/setup.c | 2 +- + arch/sh/boards/mach-se/7724/setup.c | 4 +- + arch/sparc/kernel/smp_64.c | 2 +- + arch/um/kernel/mem.c | 2 +- + arch/x86/Kconfig | 6 +- + arch/x86/kernel/aperture_64.c | 13 +- + arch/x86/kernel/setup.c | 4 +- + arch/x86/kernel/setup_percpu.c | 2 +- + arch/x86/kernel/unwind_orc.c | 2 +- + arch/x86/mm/init.c | 2 +- + arch/x86/mm/init_32.c | 45 +- + arch/x86/mm/kasan_init_64.c | 4 +- + arch/x86/mm/numa.c | 2 +- + arch/x86/mm/numa_emulation.c | 2 +- + arch/x86/xen/mmu_hvm.c | 37 +- + arch/x86/xen/mmu_pv.c | 6 +- + arch/x86/xen/p2m.c | 2 +- + arch/x86/xen/setup.c | 6 +- + drivers/base/Makefile | 2 +- + drivers/base/arch_numa.c | 92 ++- + drivers/base/node.c | 9 +- + drivers/block/zram/zram_drv.c | 66 +- + drivers/firmware/efi/memmap.c | 2 +- + drivers/gpu/drm/drm_dp_mst_topology.c | 6 +- + drivers/gpu/drm/drm_mm.c | 9 +- + drivers/gpu/drm/i915/i915_vma.c | 5 +- + drivers/gpu/drm/i915/intel_runtime_pm.c | 23 +- + drivers/macintosh/smu.c | 2 +- + drivers/mmc/core/mmc_test.c | 1 - + drivers/of/kexec.c | 4 +- + drivers/of/of_reserved_mem.c | 5 +- + drivers/rapidio/devices/rio_mport_cdev.c | 9 +- + drivers/s390/char/sclp_early.c | 2 +- + drivers/usb/early/xhci-dbc.c | 10 +- + drivers/virtio/Kconfig | 3 +- + drivers/virtio/virtio_mem.c | 301 ++++++--- + drivers/xen/swiotlb-xen.c | 2 +- + fs/binfmt_elf.c | 56 +- + fs/buffer.c | 56 +- + fs/coda/cnode.c | 13 +- + fs/coda/coda_linux.c | 39 +- + fs/coda/coda_linux.h | 6 +- + fs/coda/dir.c | 20 +- + fs/coda/file.c | 12 +- + fs/coda/psdev.c | 14 +- + fs/coda/upcall.c | 3 +- + fs/exec.c | 4 +- + fs/hfs/inode.c | 6 +- + fs/hfsplus/catalog.c | 16 +- + fs/hfsplus/dir.c | 4 +- + fs/hfsplus/hfsplus_raw.h | 12 +- + fs/hfsplus/inode.c | 12 +- + fs/hfsplus/xattr.c | 18 +- + fs/hugetlbfs/inode.c | 7 +- + fs/inode.c | 46 +- + fs/internal.h | 1 - + fs/ocfs2/alloc.c | 2 + + fs/ocfs2/dlm/dlmrecovery.c | 1 - + fs/ocfs2/inode.c | 4 +- + fs/ocfs2/journal.c | 26 +- + fs/ocfs2/journal.h | 3 +- + fs/ocfs2/namei.c | 49 +- + fs/ocfs2/namei.h | 2 + + fs/ocfs2/refcounttree.c | 15 +- + fs/ocfs2/super.c | 40 +- + fs/ocfs2/xattr.c | 12 +- + fs/ocfs2/xattr.h | 1 + + fs/posix_acl.c | 3 +- + fs/proc/base.c | 21 +- + fs/proc/fd.c | 23 +- + fs/proc/page.c | 40 +- + fs/proc/task_mmu.c | 28 +- + fs/proc/vmcore.c | 105 +++- + fs/ramfs/inode.c | 11 +- + fs/seq_file.c | 16 - + fs/sysv/super.c | 6 +- + include/asm-generic/sections.h | 89 ++- + include/kunit/test.h | 13 +- + include/linux/backing-dev.h | 1 - + include/linux/bottom_half.h | 1 + + include/linux/cma.h | 1 + + include/linux/compiler-gcc.h | 8 + + include/linux/compiler_attributes.h | 10 + + include/linux/compiler_types.h | 12 + + include/linux/container_of.h | 40 ++ + include/linux/cpuset.h | 17 + + include/linux/crash_dump.h | 26 +- + include/linux/damon.h | 231 ++++++- + include/linux/fs.h | 1 + + include/linux/gfp.h | 8 +- + include/linux/highmem.h | 28 +- + include/linux/hugetlb.h | 36 +- + include/linux/io-mapping.h | 6 - + include/linux/kallsyms.h | 13 +- + include/linux/kasan.h | 8 + + include/linux/kernel.h | 36 +- + include/linux/kfence.h | 21 +- + include/linux/list.h | 4 +- + include/linux/llist.h | 4 +- + include/linux/memblock.h | 48 +- + include/linux/memory.h | 26 +- + include/linux/memory_hotplug.h | 3 - + include/linux/mempolicy.h | 10 +- + include/linux/memremap.h | 6 + + include/linux/migrate.h | 24 +- + include/linux/migrate_mode.h | 13 + + include/linux/mm.h | 57 +- + include/linux/mm_types.h | 2 - + include/linux/mmzone.h | 38 +- + include/linux/node.h | 4 +- + include/linux/page-flags.h | 25 +- + include/linux/page_owner.h | 12 +- + include/linux/pagemap.h | 50 ++ + include/linux/percpu.h | 6 +- + include/linux/plist.h | 5 +- + include/linux/rmap.h | 8 +- + include/linux/rwsem.h | 1 - + include/linux/sched.h | 8 + + include/linux/sched/mm.h | 21 + + include/linux/seq_file.h | 17 +- + include/linux/signal.h | 1 - + include/linux/slab.h | 135 ++-- + include/linux/slub_def.h | 13 +- + include/linux/smp.h | 1 - + include/linux/spinlock.h | 1 - + include/linux/stackdepot.h | 34 +- + include/linux/stacktrace.h | 1 + + include/linux/string_helpers.h | 1 + + include/linux/swap.h | 1 - + include/linux/vmalloc.h | 24 +- + include/media/media-entity.h | 3 +- + include/trace/events/mmap_lock.h | 48 +- + include/trace/events/vmscan.h | 38 ++ + include/trace/events/writeback.h | 7 - + init/initramfs.c | 2 +- + init/main.c | 44 +- + ipc/ipc_sysctl.c | 32 +- + kernel/cgroup/cpuset.c | 23 + + kernel/cpu.c | 2 +- + kernel/dma/swiotlb.c | 2 +- + kernel/exit.c | 2 +- + kernel/extable.c | 35 +- + kernel/fork.c | 60 +- + kernel/hung_task.c | 44 ++ + kernel/kcov.c | 36 +- + kernel/kexec_file.c | 5 + + kernel/kthread.c | 21 +- + kernel/locking/lockdep.c | 18 +- + kernel/printk/printk.c | 4 +- + kernel/resource.c | 54 +- + kernel/sched/core.c | 35 +- + kernel/sched/sched.h | 4 +- + kernel/sched/topology.c | 1 - + kernel/stacktrace.c | 30 + + kernel/sysctl.c | 8 +- + kernel/trace/ftrace.c | 2 +- + kernel/tsacct.c | 2 +- + kernel/workqueue.c | 2 +- + lib/Kconfig | 4 + + lib/Kconfig.debug | 8 +- + lib/Kconfig.kasan | 2 +- + lib/Kconfig.kfence | 26 +- + lib/bootconfig.c | 2 +- + lib/cpumask.c | 2 +- + lib/scatterlist.c | 11 +- + lib/stackdepot.c | 151 +++-- + lib/test_kasan.c | 29 +- + lib/test_kasan_module.c | 2 + + lib/vsprintf.c | 3 +- + mm/Kconfig | 18 +- + mm/backing-dev.c | 48 -- + mm/cma.c | 26 +- + mm/compaction.c | 10 +- + mm/damon/Kconfig | 24 +- + mm/damon/Makefile | 4 +- + mm/damon/core.c | 401 +++++++++++- + mm/damon/dbgfs-test.h | 54 ++ + mm/damon/dbgfs.c | 425 +++++++++++-- + mm/damon/paddr.c | 273 ++++++++ + mm/damon/prmtv-common.c | 133 ++++ + mm/damon/prmtv-common.h | 20 + + mm/damon/reclaim.c | 354 +++++++++++ + mm/damon/vaddr.c | 167 +++-- + mm/debug.c | 20 +- + mm/debug_vm_pgtable.c | 7 +- + mm/filemap.c | 36 +- + mm/gup.c | 5 +- + mm/huge_memory.c | 2 + + mm/hugetlb.c | 692 +++++++++++++++++++-- + mm/internal.h | 22 + + mm/kasan/common.c | 8 +- + mm/kasan/generic.c | 14 +- + mm/kasan/kasan.h | 2 +- + mm/kasan/report.c | 17 +- + mm/kasan/shadow.c | 5 + + mm/kfence/core.c | 200 ++++-- + mm/kfence/kfence.h | 2 + + mm/kfence/kfence_test.c | 14 +- + mm/khugepaged.c | 10 +- + mm/list_lru.c | 7 +- + mm/memblock.c | 35 +- + mm/memcontrol.c | 187 +++--- + mm/memory-failure.c | 139 +++-- + mm/memory.c | 171 +++-- + mm/memory_hotplug.c | 59 +- + mm/mempolicy.c | 149 +++-- + mm/memremap.c | 20 + + mm/migrate.c | 61 ++ + mm/mmap.c | 2 +- + mm/mprotect.c | 5 +- + mm/mremap.c | 86 ++- + mm/nommu.c | 6 - + mm/oom_kill.c | 9 +- + mm/page-writeback.c | 11 +- + mm/page_alloc.c | 115 ++-- + mm/page_ext.c | 2 +- + mm/page_isolation.c | 29 +- + mm/page_owner.c | 26 +- + mm/percpu.c | 8 +- + mm/readahead.c | 2 +- + mm/rmap.c | 22 +- + mm/shmem.c | 44 +- + mm/slab.c | 16 - + mm/slab_common.c | 8 - + mm/slub.c | 109 ++-- + mm/sparse-vmemmap.c | 2 +- + mm/sparse.c | 2 +- + mm/swap.c | 23 +- + mm/swapfile.c | 6 +- + mm/truncate.c | 19 +- + mm/userfaultfd.c | 8 +- + mm/vmalloc.c | 109 +++- + mm/vmscan.c | 161 ++++- + mm/vmstat.c | 76 ++- + mm/workingset.c | 10 + + mm/zsmalloc.c | 10 +- + net/ipv4/tcp.c | 1 - + net/ipv4/udp.c | 1 - + net/netfilter/ipvs/ip_vs_ctl.c | 1 - + net/openvswitch/meter.c | 1 - + net/sctp/protocol.c | 1 - + net/sysctl_net.c | 2 +- + scripts/checkpatch.pl | 36 +- + scripts/const_structs.checkpatch | 4 + + scripts/decodecode | 2 +- + scripts/spelling.txt | 16 + + security/Kconfig | 14 - + tools/testing/selftests/damon/debugfs_attrs.sh | 13 + + tools/testing/selftests/memory-hotplug/config | 1 - + tools/testing/selftests/proc/.gitignore | 1 + + tools/testing/selftests/proc/Makefile | 2 + + tools/testing/selftests/proc/proc-tid0.c | 81 +++ + tools/testing/selftests/vm/.gitignore | 1 + + tools/testing/selftests/vm/Makefile | 1 + + tools/testing/selftests/vm/hugepage-mremap.c | 160 +++++ + tools/testing/selftests/vm/ksm_tests.c | 154 ++++- + tools/testing/selftests/vm/madv_populate.c | 15 +- + tools/testing/selftests/vm/run_vmtests.sh | 11 + + tools/testing/selftests/vm/split_huge_page_test.c | 2 +- + tools/testing/selftests/vm/transhuge-stress.c | 2 +- + tools/testing/selftests/vm/userfaultfd.c | 157 +++-- + tools/vm/page-types.c | 38 +- + tools/vm/page_owner_sort.c | 94 ++- + 331 files changed, 7436 insertions(+), 2547 deletions(-) + create mode 100644 Documentation/admin-guide/mm/damon/reclaim.rst + rename Documentation/{vm => admin-guide/mm}/swap_numa.rst (100%) + rename Documentation/{vm => admin-guide/mm}/zswap.rst (100%) + create mode 100644 include/linux/container_of.h + create mode 100644 mm/damon/paddr.c + create mode 100644 mm/damon/prmtv-common.c + create mode 100644 mm/damon/prmtv-common.h + create mode 100644 mm/damon/reclaim.c + create mode 100644 tools/testing/selftests/proc/proc-tid0.c + create mode 100644 tools/testing/selftests/vm/hugepage-mremap.c +$ git am -3 ../mmotm/broken-out/mm-filemap-check-if-thp-has-hwpoisoned-subpage-for-pmd-page-fault-vs-folios.patch +Applying: mm-filemap-check-if-thp-has-hwpoisoned-subpage-for-pmd-page-fault-vs-folios +$ git reset HEAD^ +Unstaged changes after reset: +M include/linux/page-flags.h +$ git add -A . +$ git commit -v -a --amend +[master 8bcb8d3e8882] Merge branch 'akpm-current/current' + Date: Mon Oct 25 19:53:33 2021 +1100 +$ git am -3 ../patches/0001-kvmalloc-etc-moved-to-slab.h.patch +Applying: kvmalloc etc moved to slab.h +$ git reset HEAD^ +Unstaged changes after reset: +M drivers/hwmon/occ/p9_sbe.c +$ git add -A . +$ git commit -v -a --amend +[master 11eb436edcf1] Merge branch 'akpm-current/current' + Date: Mon Oct 25 19:53:33 2021 +1100 +$ git clone -s -l -n -q . ../rebase-tmp +$ cd ../rebase-tmp +$ git checkout -b akpm remotes/origin/akpm/master +Switched to a new branch 'akpm' +$ git rebase --onto master remotes/origin/akpm/master-base +Rebasing (1/6) Rebasing (2/6) Rebasing (3/6) Rebasing (4/6) Rebasing (5/6) Rebasing (6/6) Successfully rebased and updated refs/heads/akpm. +$ cd ../next +$ git fetch -f ../rebase-tmp akpm:akpm/master +From ../rebase-tmp + + 5b3cbcec307a...39573caa1324 akpm -> akpm/master (forced update) +$ rm -rf ../rebase-tmp +Merging akpm/master (39573caa1324 lib/stackdepot: allow optional init and stack_table allocation by kvmalloc() - fixup3) +$ git merge --no-ff akpm/master +Merge made by the 'recursive' strategy. + arch/arm64/mm/kasan_init.c | 2 +- + drivers/gpu/drm/drm_modeset_lock.c | 9 +++++++++ + init/Kconfig | 2 ++ + mm/kasan/hw_tags.c | 14 +++++++++++++- + mm/kasan/sw_tags.c | 2 +- + mm/memcontrol.c | 4 ---- + mm/migrate.c | 6 ------ + 7 files changed, 26 insertions(+), 13 deletions(-) diff --git a/localversion-next b/localversion-next new file mode 100644 index 0000000000000..9f35725a38829 --- /dev/null +++ b/localversion-next @@ -0,0 +1 @@ +-next-20211025 From e7d8df9815f07db6319a2f83ddaa4595c1bd961a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 7 Oct 2021 07:35:50 +0300 Subject: [PATCH 0981/1202] notifier: Add blocking_notifier_call_chain_empty() Add blocking_notifier_call_chain_empty() that returns true if call chain is empty and false otherwise. Signed-off-by: Dmitry Osipenko --- include/linux/notifier.h | 2 ++ kernel/notifier.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 87069b8459af9..f0b1d600d9f6d 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -173,6 +173,8 @@ extern int blocking_notifier_call_chain_robust(struct blocking_notifier_head *nh extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, unsigned long val_up, unsigned long val_down, void *v); +extern bool blocking_notifier_call_chain_empty(struct blocking_notifier_head *nh); + #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ diff --git a/kernel/notifier.c b/kernel/notifier.c index b8251dc0bc0f1..1f7ba8988b908 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -322,6 +322,20 @@ int blocking_notifier_call_chain(struct blocking_notifier_head *nh, } EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); +/** + * blocking_notifier_call_chain_empty - Check whether notifier chain is empty + * @nh: Pointer to head of the blocking notifier chain + * + * Checks whether notifier chain is empty. + * + * Returns true is notifier chain is empty, false otherwise. + */ +bool blocking_notifier_call_chain_empty(struct blocking_notifier_head *nh) +{ + return !rcu_access_pointer(nh->head); +} +EXPORT_SYMBOL_GPL(blocking_notifier_call_chain_empty); + /* * Raw notifier chain routines. There is no protection; * the caller must provide it. Use at your own risk! From 2426c168eb8c06bcb5cd86238d5a0742f2a248a1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 20:30:53 +0300 Subject: [PATCH 0982/1202] notifier: Add atomic/blocking_notifier_has_unique_priority() Add atomic/blocking_notifier_has_unique_priority() helpers which return true if given handler has unique priority in the notifier chain. Signed-off-by: Dmitry Osipenko --- include/linux/notifier.h | 5 +++ kernel/notifier.c | 74 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/include/linux/notifier.h b/include/linux/notifier.h index f0b1d600d9f6d..bbdd395fa1036 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -175,6 +175,11 @@ extern int raw_notifier_call_chain_robust(struct raw_notifier_head *nh, extern bool blocking_notifier_call_chain_empty(struct blocking_notifier_head *nh); +extern bool atomic_notifier_has_unique_priority(struct atomic_notifier_head *nh, + struct notifier_block *nb); +extern bool blocking_notifier_has_unique_priority(struct blocking_notifier_head *nh, + struct notifier_block *nb); + #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ diff --git a/kernel/notifier.c b/kernel/notifier.c index 1f7ba8988b908..cf0e1c4bd364a 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -203,6 +203,40 @@ int atomic_notifier_call_chain(struct atomic_notifier_head *nh, EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); NOKPROBE_SYMBOL(atomic_notifier_call_chain); +/** + * atomic_notifier_has_unique_priority - Checks whether notifier's priority is unique + * @nh: Pointer to head of the atomic notifier chain + * @n: Entry in notifier chain to check + * + * Checks whether there is another notifier in the chain with the same priority. + * Must be called in process context. + * + * Returns true if priority is unique, false otherwise. + */ +bool atomic_notifier_has_unique_priority(struct atomic_notifier_head *nh, + struct notifier_block *n) +{ + struct notifier_block **nl = &nh->head; + unsigned long flags; + bool ret = true; + + spin_lock_irqsave(&nh->lock, flags); + + while ((*nl) != NULL && (*nl)->priority >= n->priority) { + if ((*nl)->priority == n->priority && (*nl) != n) { + ret = false; + break; + } + + nl = &((*nl)->next); + } + + spin_unlock_irqrestore(&nh->lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(atomic_notifier_has_unique_priority); + /* * Blocking notifier chain routines. All access to the chain is * synchronized by an rwsem. @@ -336,6 +370,46 @@ bool blocking_notifier_call_chain_empty(struct blocking_notifier_head *nh) } EXPORT_SYMBOL_GPL(blocking_notifier_call_chain_empty); +/** + * blocking_notifier_has_unique_priority - Checks whether notifier's priority is unique + * @nh: Pointer to head of the blocking notifier chain + * @n: Entry in notifier chain to check + * + * Checks whether there is another notifier in the chain with the same priority. + * Must be called in process context. + * + * Returns true if priority is unique, false otherwise. + */ +bool blocking_notifier_has_unique_priority(struct blocking_notifier_head *nh, + struct notifier_block *n) +{ + struct notifier_block **nl = &nh->head; + bool ret = true; + + /* + * This code gets used during boot-up, when task switching is + * not yet working and interrupts must remain disabled. At + * such times we must not call down_write(). + */ + if (system_state != SYSTEM_BOOTING) + down_write(&nh->rwsem); + + while ((*nl) != NULL && (*nl)->priority >= n->priority) { + if ((*nl)->priority == n->priority && (*nl) != n) { + ret = false; + break; + } + + nl = &((*nl)->next); + } + + if (system_state != SYSTEM_BOOTING) + up_write(&nh->rwsem); + + return ret; +} +EXPORT_SYMBOL_GPL(blocking_notifier_has_unique_priority); + /* * Raw notifier chain routines. There is no protection; * the caller must provide it. Use at your own risk! From dbff910acb21b059ad570bbe211e73ed1645c462 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 16:23:45 +0300 Subject: [PATCH 0983/1202] reboot: Correct typo in a comment Correct s/implemenations/implementations/ in . Signed-off-by: Dmitry Osipenko --- include/linux/reboot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/reboot.h b/include/linux/reboot.h index af907a3d68d1d..7c288013a3caa 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -63,7 +63,7 @@ struct pt_regs; extern void machine_crash_shutdown(struct pt_regs *); /* - * Architecture independent implemenations of sys_reboot commands. + * Architecture independent implementations of sys_reboot commands. */ extern void kernel_restart_prepare(char *cmd); From ca49c2ec49e2c4c34f58682912499978739d80dd Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 20:37:24 +0300 Subject: [PATCH 0984/1202] reboot: Warn if restart handler has duplicated priority It doesn't make sense to register restart handlers with the same priority, normally it's a direct sign of a problem. Add sanity check which ensures that there are no two restart handlers registered with the same priority. Signed-off-by: Dmitry Osipenko --- kernel/reboot.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/reboot.c b/kernel/reboot.c index efb40d095d1ef..d39e599c3c996 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -182,7 +182,20 @@ static ATOMIC_NOTIFIER_HEAD(restart_handler_list); */ int register_restart_handler(struct notifier_block *nb) { - return atomic_notifier_chain_register(&restart_handler_list, nb); + int ret; + + ret = atomic_notifier_chain_register(&restart_handler_list, nb); + if (ret) + return ret; + + /* + * Handler must have unique priority. Otherwise invocation order is + * determined by the registration order, which is presumed to be + * unreliable. + */ + WARN_ON(!atomic_notifier_has_unique_priority(&restart_handler_list, nb)); + + return 0; } EXPORT_SYMBOL(register_restart_handler); From a93911a51075554f2ea7cb01f53930b488e37693 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 16:06:42 +0300 Subject: [PATCH 0985/1202] reboot: Warn if unregister_restart_handler() fails Emit warning if unregister_restart_handler() fails since it never should fail. This will ease further API development by catching dumb mistakes. Signed-off-by: Dmitry Osipenko --- kernel/reboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/reboot.c b/kernel/reboot.c index d39e599c3c996..291d44082f420 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -210,7 +210,7 @@ EXPORT_SYMBOL(register_restart_handler); */ int unregister_restart_handler(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&restart_handler_list, nb); + return WARN_ON(atomic_notifier_chain_unregister(&restart_handler_list, nb)); } EXPORT_SYMBOL(unregister_restart_handler); From 497f58b910c49a1b1e8ad7080f3baff3f81c01ec Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 19 Oct 2021 00:28:15 +0300 Subject: [PATCH 0986/1202] reboot: Remove extern annotation from function prototypes There is no need to annotate function prototypes with 'extern', it makes code less readable. Remove unnecessary annotations from . Signed-off-by: Dmitry Osipenko --- include/linux/reboot.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 7c288013a3caa..b7fa25726323e 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -40,36 +40,36 @@ extern int reboot_cpu; extern int reboot_force; -extern int register_reboot_notifier(struct notifier_block *); -extern int unregister_reboot_notifier(struct notifier_block *); +int register_reboot_notifier(struct notifier_block *); +int unregister_reboot_notifier(struct notifier_block *); -extern int devm_register_reboot_notifier(struct device *, struct notifier_block *); +int devm_register_reboot_notifier(struct device *, struct notifier_block *); -extern int register_restart_handler(struct notifier_block *); -extern int unregister_restart_handler(struct notifier_block *); -extern void do_kernel_restart(char *cmd); +int register_restart_handler(struct notifier_block *); +int unregister_restart_handler(struct notifier_block *); +void do_kernel_restart(char *cmd); /* * Architecture-specific implementations of sys_reboot commands. */ -extern void migrate_to_reboot_cpu(void); -extern void machine_restart(char *cmd); -extern void machine_halt(void); -extern void machine_power_off(void); +void migrate_to_reboot_cpu(void); +void machine_restart(char *cmd); +void machine_halt(void); +void machine_power_off(void); -extern void machine_shutdown(void); +void machine_shutdown(void); struct pt_regs; -extern void machine_crash_shutdown(struct pt_regs *); +void machine_crash_shutdown(struct pt_regs *); /* * Architecture independent implementations of sys_reboot commands. */ -extern void kernel_restart_prepare(char *cmd); -extern void kernel_restart(char *cmd); -extern void kernel_halt(void); -extern void kernel_power_off(void); +void kernel_restart_prepare(char *cmd); +void kernel_restart(char *cmd); +void kernel_halt(void); +void kernel_power_off(void); extern int C_A_D; /* for sysctl */ void ctrl_alt_del(void); @@ -77,15 +77,15 @@ void ctrl_alt_del(void); #define POWEROFF_CMD_PATH_LEN 256 extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN]; -extern void orderly_poweroff(bool force); -extern void orderly_reboot(void); +void orderly_poweroff(bool force); +void orderly_reboot(void); void hw_protection_shutdown(const char *reason, int ms_until_forced); /* * Emergency restart, callable from an interrupt handler. */ -extern void emergency_restart(void); +void emergency_restart(void); #include #endif /* _LINUX_REBOOT_H */ From ab107474b441c646717310be86f5ee4dee48b927 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 7 Oct 2021 05:43:50 +0300 Subject: [PATCH 0987/1202] kernel: Add combined power-off+restart handler call chain API SoC platforms often have multiple options for performing of system's power-off and restart operations. Meanwhile today's kernel is limited to a single option. Add combined power-off+restart handler call chain API, which is inspired by the restart API. The new API provides both power-off and restart functionality, it's designed with reliability, simplicity and extensibility in mind. The old pm_power_off method will be kept around till all users are converted to the new API. Current restart API will be replaced with the new unified API since new API is its superset. The restart functionality of the power-handler API is built upon the existing restart-notifier APIs. In order to ease conversion to the new API, convenient helpers are added for the common use-cases. They will reduce amount of boilerplate code and remove global variables. These helpers preserve old behaviour where only one power-off handler was executed. Users of the new API should explicitly opt-in to allow power-off chaining by enabling the corresponding flag of power_handler structure. Signed-off-by: Dmitry Osipenko --- include/linux/reboot.h | 170 ++++++++++- kernel/power/hibernate.c | 2 +- kernel/reboot.c | 594 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 758 insertions(+), 8 deletions(-) diff --git a/include/linux/reboot.h b/include/linux/reboot.h index b7fa25726323e..9fa4ae7b87580 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -8,10 +8,16 @@ struct device; -#define SYS_DOWN 0x0001 /* Notify of system down */ -#define SYS_RESTART SYS_DOWN -#define SYS_HALT 0x0002 /* Notify of system halt */ -#define SYS_POWER_OFF 0x0003 /* Notify of system power off */ +enum reboot_prepare_mode { + SYS_DOWN = 1, /* Notify of system down */ + SYS_RESTART = SYS_DOWN, + SYS_HALT, /* Notify of system halt */ + SYS_POWER_OFF, /* Notify of system power off */ +}; + +#define RESTART_PRIO_RESERVED 0 +#define RESTART_PRIO_DEFAULT 128 +#define RESTART_PRIO_HIGH 192 enum reboot_mode { REBOOT_UNDEFINED = -1, @@ -49,6 +55,161 @@ int register_restart_handler(struct notifier_block *); int unregister_restart_handler(struct notifier_block *); void do_kernel_restart(char *cmd); +/* + * Unified poweroff + restart API. + */ + +#define POWEROFF_PRIO_RESERVED 0 +#define POWEROFF_PRIO_PLATFORM 1 +#define POWEROFF_PRIO_DEFAULT 128 +#define POWEROFF_PRIO_HIGH 192 +#define POWEROFF_PRIO_FIRMWARE 224 + +enum poweroff_mode { + POWEROFF_NORMAL = 0, + POWEROFF_PREPARE, +}; + +struct power_off_data { + void *cb_data; +}; + +struct power_off_prep_data { + void *cb_data; +}; + +struct restart_data { + void *cb_data; + const char *cmd; + enum reboot_mode mode; +}; + +struct reboot_prep_data { + void *cb_data; + const char *cmd; + enum reboot_prepare_mode mode; +}; + +struct power_handler_private_data { + struct notifier_block reboot_prep_nb; + struct notifier_block power_off_nb; + struct notifier_block restart_nb; + void (*trivial_power_off_cb)(void); + void (*simple_power_off_cb)(void *data); + void *simple_power_off_cb_data; + bool registered; +}; + +/** + * struct power_handler - Power-off + restart handlers + * + * Describes power-off and restart handlers which are invoked by kernel + * to power off or restart this machine. Struct power_handler can be static. + * Members of this structure must not be altered while handler is registered. + * Fill the structure members and pass it to register_power_handler(). + */ +struct power_handler { + /** + * @cb_data: + * + * User data included in callback's argument. + */ + void *cb_data; + + /** + * @power_off_cb: + * + * Callback that should turn off machine. Inactive if NULL. + */ + void (*power_off_cb)(struct power_off_data *data); + + /** + * @power_off_prepare_cb: + * + * Power-off preparation callback. All power-off preparation callbacks + * are invoked before @restart_cb. Inactive if NULL. + */ + void (*power_off_prepare_cb)(struct power_off_prep_data *data); + + /** + * @power_off_priority: + * + * Power-off callback priority, must be unique. Zero value is reassigned + * to default priority. Inactive if @power_off_cb is NULL. + */ + int power_off_priority; + + /** + * @power_off_chaining_allowed: + * + * False if callbacks execution should stop when @power_off_cb fails + * to power off machine. True if further lower priority power-off + * callback should be executed. + */ + bool power_off_chaining_allowed; + + /** + * @restart_cb: + * + * Callback that should reboot machine. Inactive if NULL. + */ + void (*restart_cb)(struct restart_data *data); + + /** + * @restart_priority: + * + * Restart callback priority, must be unique. Zero value is reassigned + * to default priority. Inactive if @restart_cb is NULL. + */ + int restart_priority; + + /** + * @reboot_prepare_cb: + * + * Reboot preparation callback. All reboot preparation callbacks are + * invoked before @restart_cb. Inactive if NULL. + */ + void (*reboot_prepare_cb)(struct reboot_prep_data *data); + + /** + * @priv: + * + * Internal data. Shouldn't be touched. + */ + const struct power_handler_private_data priv; +}; + +int register_power_handler(struct power_handler *handler); +void unregister_power_handler(struct power_handler *handler); + +struct power_handler * +register_simple_power_off_handler(void (*callback)(void *data), void *data); + +void unregister_simple_power_off_handler(struct power_handler *handler); + +int devm_register_power_handler(struct device *dev, + struct power_handler *handler); + +int devm_register_simple_power_off_handler(struct device *dev, + void (*callback)(void *data), + void *data); + +int devm_register_trivial_power_off_handler(struct device *dev, + void (*callback)(void)); + +int devm_register_simple_restart_handler(struct device *dev, + void (*callback)(struct restart_data *data), + void *data); + +int devm_register_prioritized_restart_handler(struct device *dev, + int priority, + void (*callback)(struct restart_data *data), + void *data); + +int register_platform_power_off(void (*power_off)(void)); + +void do_kernel_power_off(void); + /* * Architecture-specific implementations of sys_reboot commands. */ @@ -70,6 +231,7 @@ void kernel_restart_prepare(char *cmd); void kernel_restart(char *cmd); void kernel_halt(void); void kernel_power_off(void); +bool kernel_can_power_off(void); extern int C_A_D; /* for sysctl */ void ctrl_alt_del(void); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 559acef3fddb8..13ad98352fde9 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -664,7 +664,7 @@ static void power_down(void) hibernation_platform_enter(); fallthrough; case HIBERNATION_SHUTDOWN: - if (pm_power_off) + if (kernel_can_power_off()) kernel_power_off(); break; } diff --git a/kernel/reboot.c b/kernel/reboot.c index 291d44082f420..3dadbfa2ed0a6 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -294,6 +294,585 @@ void kernel_halt(void) } EXPORT_SYMBOL_GPL(kernel_halt); +/* + * Notifier list for kernel code which wants to be called + * to power off the system. + */ +static BLOCKING_NOTIFIER_HEAD(power_off_handler_list); + +static void dummy_pm_power_off(void) +{ + /* temporary stub until pm_power_off() is gone */ +} + +static struct notifier_block *pm_power_off_nb; + +/** + * register_power_off_handler - Register function to be called to power off + * the system + * @nb: Info about handler function to be called + * @nb->priority: Handler priority. Handlers should follow the + * following guidelines for setting priorities. + * 0: Power-off handler of last resort, + * with limited power-off capabilities + * 128: Default power-off handler; use if no other + * power-off handler is expected to be available, + * and/or if power-off functionality is + * sufficient to power-off the entire system + * 255: Highest priority power-off handler, will + * preempt all other power-off handlers + * + * Registers a function with code to be called to power off the + * system. + * + * Registered functions will be called as last step of the power-off + * sequence. + * + * Registered functions are expected to power off the system immediately. + * If more than one function is registered, the power-off handler priority + * selects which function will be called first. + * + * Power-off handlers are expected to be registered from non-architecture + * code, typically from drivers. A typical use case would be a system + * where power-off functionality is provided through a PMIC. Multiple + * power-off handlers may exist; for example, one power-off handler might + * turn off the entire system, while another only turns off part of + * system. In such cases, the power-off handler which only disables part + * of the hardware is expected to register with low priority to ensure + * that it only runs if no other means to power off the system is + * available. + * + * Currently always returns zero, as blocking_notifier_chain_register() + * always returns zero. + */ +static int register_power_off_handler(struct notifier_block *nb) +{ + int ret; + + ret = blocking_notifier_chain_register(&power_off_handler_list, nb); + if (ret) + return ret; + + /* + * Handler must have unique priority. Otherwise invocation order is + * determined by the registration order, which is presumed to be + * unreliable. + */ + WARN_ON(!blocking_notifier_has_unique_priority(&power_off_handler_list, nb)); + + /* + * Some drivers check whether pm_power_off was already installed by + * an early code. Install dummy callback to preserve old behaviour + * for those drivers during period of transition to the new API. + */ + if (!pm_power_off) { + pm_power_off = dummy_pm_power_off; + pm_power_off_nb = nb; + } + + return 0; +} + +static void unregister_power_off_handler(struct notifier_block *nb) +{ + if (nb == pm_power_off_nb) { + /* + * Check whether somebody replaced pm_power_off behind + * out back. + */ + if (!WARN_ON(pm_power_off != dummy_pm_power_off)) + pm_power_off = NULL; + + pm_power_off_nb = NULL; + } + + WARN_ON(blocking_notifier_chain_unregister(&power_off_handler_list, nb)); +} + +static void devm_unregister_power_off_handler(void *data) +{ + struct notifier_block *nb = data; + + unregister_power_off_handler(nb); +} + +static int devm_register_power_off_handler(struct device *dev, + struct notifier_block *nb) +{ + int err; + + err = register_power_off_handler(nb); + if (err) + return err; + + return devm_add_action_or_reset(dev, devm_unregister_power_off_handler, + nb); +} + +static int power_handler_power_off(struct notifier_block *nb, + unsigned long mode, void *unused) +{ + struct power_off_prep_data prep_data = {}; + struct power_handler_private_data *priv; + struct power_off_data data = {}; + struct power_handler *h; + int ret = NOTIFY_DONE; + + priv = container_of(nb, struct power_handler_private_data, power_off_nb); + h = container_of(priv, struct power_handler, priv); + prep_data.cb_data = h->cb_data; + data.cb_data = h->cb_data; + + switch (mode) { + case POWEROFF_NORMAL: + if (h->power_off_cb) + h->power_off_cb(&data); + + if (priv->simple_power_off_cb) + priv->simple_power_off_cb(priv->simple_power_off_cb_data); + + if (priv->trivial_power_off_cb) + priv->trivial_power_off_cb(); + + if (!h->power_off_chaining_allowed) + ret = NOTIFY_STOP; + + break; + + case POWEROFF_PREPARE: + if (h->power_off_prepare_cb) + h->power_off_prepare_cb(&prep_data); + + break; + + default: + unreachable(); + } + + return ret; +} + +static int power_handler_restart(struct notifier_block *nb, + unsigned long mode, void *cmd) +{ + struct power_handler_private_data *priv; + struct restart_data data = {}; + struct power_handler *h; + + priv = container_of(nb, struct power_handler_private_data, restart_nb); + h = container_of(priv, struct power_handler, priv); + + data.cb_data = h->cb_data; + data.mode = mode; + data.cmd = cmd; + + h->restart_cb(&data); + + return NOTIFY_DONE; +} + +static int power_handler_restart_prep(struct notifier_block *nb, + unsigned long mode, void *cmd) +{ + struct power_handler_private_data *priv; + struct reboot_prep_data data = {}; + struct power_handler *h; + + priv = container_of(nb, struct power_handler_private_data, reboot_prep_nb); + h = container_of(priv, struct power_handler, priv); + + data.cb_data = h->cb_data; + data.mode = mode; + data.cmd = cmd; + + h->reboot_prepare_cb(&data); + + return NOTIFY_DONE; +} + +static struct power_handler_private_data * +power_handler_private_data(struct power_handler *handler) +{ + return (struct power_handler_private_data *)&handler->priv; +} + +/** + * devm_register_power_handler - Register power handler + * @dev: Device that registers handler + * @handler: Power handler descriptor + * + * Registers power handler that will be called as last step of the + * power-off and restart sequences. + * + * Returns zero on success, or error code on failure. + */ +int register_power_handler(struct power_handler *handler) +{ + struct power_handler_private_data *priv = power_handler_private_data(handler); + int err, priority; + + /* sanity-check whether handler is registered twice */ + if (WARN_ON(priv->registered)) + return -EBUSY; + + if (handler->power_off_cb || handler->power_off_prepare_cb) { + if (handler->power_off_priority == POWEROFF_PRIO_RESERVED) + priority = POWEROFF_PRIO_DEFAULT; + else + priority = handler->power_off_priority; + + priv->power_off_nb.notifier_call = power_handler_power_off; + priv->power_off_nb.priority = priority; + + err = register_power_off_handler(&priv->power_off_nb); + if (err) + goto reset_power_handler; + } + + if (handler->restart_cb) { + if (handler->restart_priority == RESTART_PRIO_RESERVED) + priority = RESTART_PRIO_DEFAULT; + else + priority = handler->restart_priority; + + priv->restart_nb.notifier_call = power_handler_restart; + priv->restart_nb.priority = priority; + + err = register_restart_handler(&priv->restart_nb); + if (err) + goto unreg_power_off_handler; + } + + if (handler->reboot_prepare_cb) { + priv->reboot_prep_nb.notifier_call = power_handler_restart_prep; + priv->reboot_prep_nb.priority = 0; + + err = register_reboot_notifier(&priv->reboot_prep_nb); + if (err) + goto unreg_restart_handler; + } + + priv->registered = true; + + return 0; + +unreg_restart_handler: + if (handler->restart_cb) + unregister_restart_handler(&priv->restart_nb); + +unreg_power_off_handler: + if (handler->power_off_cb) + unregister_power_off_handler(&priv->power_off_nb); + +reset_power_handler: + memset(priv, 0, sizeof(*priv)); + + return err; +} +EXPORT_SYMBOL(register_power_handler); + +/** + * unregister_power_handler - Unregister power handler + * @handler: Power handler descriptor + * + * Unregisters a previously registered power handler. Does nothing if + * handler is NULL. + */ +void unregister_power_handler(struct power_handler *handler) +{ + struct power_handler_private_data *priv; + + if (!handler) + return; + + priv = power_handler_private_data(handler); + + /* sanity-check whether handler is unregistered twice */ + if (WARN_ON(!priv->registered)) + return; + + if (handler->reboot_prepare_cb) + unregister_reboot_notifier(&priv->reboot_prep_nb); + + if (handler->restart_cb) + unregister_restart_handler(&priv->restart_nb); + + if (handler->power_off_cb) + unregister_power_off_handler(&priv->power_off_nb); + + memset(priv, 0, sizeof(*priv)); +} +EXPORT_SYMBOL(unregister_power_handler); + +static void devm_unregister_power_handler(void *data) +{ + struct power_handler *handler = data; + + unregister_power_handler(handler); +} + +/** + * devm_register_power_handler - Register power handler + * @dev: Device that registers handler + * @handler: Power handler descriptor + * + * Resource-managed variant of register_power_handler(); + * + * Returns zero on success, or error code on failure. + */ +int devm_register_power_handler(struct device *dev, + struct power_handler *handler) +{ + int err; + + err = register_power_handler(handler); + if (err) + return err; + + return devm_add_action_or_reset(dev, devm_unregister_power_handler, + handler); +} +EXPORT_SYMBOL(devm_register_power_handler); + +/** + * register_simple_power_off_handler - Register simple power-off callback + * @dev: Device that registers callback + * @callback: Callback function + * @data: Callback's argument + * + * Registers power-off callback with default priority, it will be called + * as last step of the power-off sequence. + * + * Returns power_handler pointer on success, or ERR_PTR on failure. + */ +struct power_handler * +register_simple_power_off_handler(void (*callback)(void *data), void *data) +{ + struct power_handler_private_data *priv; + struct power_handler *handler; + int err; + + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler) + return ERR_PTR(-ENOMEM); + + priv = power_handler_private_data(handler); + + priv->power_off_nb.notifier_call = power_handler_power_off; + priv->power_off_nb.priority = POWEROFF_PRIO_DEFAULT; + priv->simple_power_off_cb_data = data; + priv->simple_power_off_cb = callback; + + err = register_power_off_handler(&priv->power_off_nb); + if (err) { + kfree(handler); + return ERR_PTR(err); + } + + return handler; +} +EXPORT_SYMBOL(register_simple_power_off_handler); + +/** + * unregister_power_handler - Unregister simple power-off handler + * @handler: Power handler descriptor + * + * Unregisters power handler that was registered by + * register_simple_power_off_handler(). Does nothing if handler is + * error or NULL. + */ +void unregister_simple_power_off_handler(struct power_handler *handler) +{ + struct power_handler_private_data *priv; + + if (!IS_ERR_OR_NULL(handler)) { + priv = power_handler_private_data(handler); + unregister_power_off_handler(&priv->power_off_nb); + kfree(handler); + } +} +EXPORT_SYMBOL(unregister_simple_power_off_handler); + +/** + * devm_register_simple_power_off_handler - Register simple power-off callback + * @dev: Device that registers callback + * @callback: Callback function + * @data: Callback's argument + * + * Registers resource-managed power-off callback with default priority, + * it will be called as last step of the power-off sequence. Further + * lower priority callbacks won't be executed if this @callback fails. + * + * Returns zero on success, or error code on failure. + */ +int devm_register_simple_power_off_handler(struct device *dev, + void (*callback)(void *data), + void *data) +{ + struct power_handler_private_data *priv; + struct power_handler *handler; + + handler = devm_kzalloc(dev, sizeof(*handler), GFP_KERNEL); + if (!handler) + return -ENOMEM; + + priv = power_handler_private_data(handler); + + priv->power_off_nb.notifier_call = power_handler_power_off; + priv->power_off_nb.priority = POWEROFF_PRIO_DEFAULT; + priv->simple_power_off_cb_data = data; + priv->simple_power_off_cb = callback; + + return devm_register_power_off_handler(dev, &priv->power_off_nb); +} +EXPORT_SYMBOL(devm_register_simple_power_off_handler); + +/** + * devm_register_trivial_power_off_handler - Register trivial power-off callback + * @dev: Device that registers callback + * @desc: Callback descriptor + * + * Same as devm_register_simple_power_off_handler(), but callback + * doesn't take argument. Further lower priority callbacks won't be + * executed if this @callback fails. + * + * Returns zero on success, or error code on failure. + */ +int devm_register_trivial_power_off_handler(struct device *dev, + void (*callback)(void)) +{ + struct power_handler_private_data *priv; + struct power_handler *handler; + + handler = devm_kzalloc(dev, sizeof(*handler), GFP_KERNEL); + if (!handler) + return -ENOMEM; + + priv = power_handler_private_data(handler); + + priv->power_off_nb.notifier_call = power_handler_power_off; + priv->power_off_nb.priority = POWEROFF_PRIO_DEFAULT; + priv->trivial_power_off_cb = callback; + + return devm_register_power_off_handler(dev, &priv->power_off_nb); +} +EXPORT_SYMBOL(devm_register_trivial_power_off_handler); + +/** + * devm_register_simple_restart_handler - Register simple restart callback + * @dev: Device that registers callback + * @callback: Callback function + * @data: Callback's argument + * + * Registers resource-managed restart callback with default priority, + * it will be called as last step of the restart sequence. + * + * Returns zero on success, or error code on failure. + */ +int devm_register_simple_restart_handler(struct device *dev, + void (*callback)(struct restart_data *data), + void *data) +{ + return devm_register_prioritized_restart_handler(dev, + RESTART_PRIO_DEFAULT, + callback, data); +} +EXPORT_SYMBOL(devm_register_simple_restart_handler); + +/** + * devm_register_prioritized_restart_handler - Register prioritized restart callback + * @dev: Device that registers callback + * @priority: Callback's priority + * @callback: Callback function + * @data: Callback's argument + * + * Registers resource-managed restart callback with a given priority, + * it will be called as last step of the restart sequence. + * + * Returns zero on success, or error code on failure. + */ +int devm_register_prioritized_restart_handler(struct device *dev, + int priority, + void (*callback)(struct restart_data *data), + void *data) +{ + struct power_handler *handler; + + handler = devm_kzalloc(dev, sizeof(*handler), GFP_KERNEL); + if (!handler) + return -ENOMEM; + + handler->restart_priority = priority; + handler->restart_cb = callback; + handler->cb_data = data; + + return devm_register_power_handler(dev, handler); +} +EXPORT_SYMBOL(devm_register_prioritized_restart_handler); + +static struct power_handler platform_power_off_handler = { + .priv = { + .power_off_nb = { + .notifier_call = power_handler_power_off, + .priority = POWEROFF_PRIO_PLATFORM, + }, + }, +}; + +/** + * register_platform_power_off - Register platform-level power-off callback + * @power_off: Power-off callback + * + * Registers power-off callback that will be called as last step + * of the power-off sequence. This callback is expected to be invoked + * for the last resort. Further lower priority callbacks won't be + * executed if @power_off fails. Only one platform power-off callback + * is allowed to be registered. + * + * Returns zero on success, or error code on failure. + */ +int register_platform_power_off(void (*power_off)(void)) +{ + struct power_handler_private_data *priv; + + /* this function is allowed to be called only once */ + if (WARN_ON(platform_power_off_handler.priv.trivial_power_off_cb)) + return -EBUSY; + + priv = power_handler_private_data(&platform_power_off_handler); + priv->trivial_power_off_cb = power_off; + + return register_power_off_handler(&priv->power_off_nb); +} + +/** + * do_kernel_power_off - Execute kernel power-off handler call chain + * + * Calls functions registered with register_power_off_handler. + * + * Expected to be called as last step of the power-off sequence. + * + * Powers off the system immediately if a power-off handler function has + * been registered. Otherwise does nothing. + */ +void do_kernel_power_off(void) +{ + if (pm_power_off) + pm_power_off(); + + blocking_notifier_call_chain(&power_off_handler_list, POWEROFF_NORMAL, + NULL); +} + +static void do_kernel_power_off_prepare(void) +{ + if (pm_power_off_prepare) + pm_power_off_prepare(); + + blocking_notifier_call_chain(&power_off_handler_list, POWEROFF_PREPARE, + NULL); +} + /** * kernel_power_off - power_off the system * @@ -302,8 +881,7 @@ EXPORT_SYMBOL_GPL(kernel_halt); void kernel_power_off(void) { kernel_shutdown_prepare(SYSTEM_POWER_OFF); - if (pm_power_off_prepare) - pm_power_off_prepare(); + do_kernel_power_off_prepare(); migrate_to_reboot_cpu(); syscore_shutdown(); pr_emerg("Power down\n"); @@ -312,6 +890,16 @@ void kernel_power_off(void) } EXPORT_SYMBOL_GPL(kernel_power_off); +bool kernel_can_power_off(void) +{ + if (!pm_power_off && + blocking_notifier_call_chain_empty(&power_off_handler_list)) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(kernel_can_power_off); + DEFINE_MUTEX(system_transition_mutex); /* @@ -353,7 +941,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, /* Instead of trying to make the power_off code look like * halt when pm_power_off is not set do it the easy way. */ - if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) + if (cmd == LINUX_REBOOT_CMD_POWER_OFF && !kernel_can_power_off()) cmd = LINUX_REBOOT_CMD_HALT; mutex_lock(&system_transition_mutex); From 1830c9036bd5abe706e53c1093913d7bf8c99886 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 16:57:27 +0300 Subject: [PATCH 0988/1202] xen/x86: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/x86/xen/enlighten_pv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4f63117f09bb2..4a0b9b7baf020 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -1087,8 +1088,7 @@ static void xen_machine_halt(void) static void xen_machine_power_off(void) { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); xen_reboot(SHUTDOWN_poweroff); } From 4a122026790f29a8bddc14bdb6487cd5f4f5a889 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:09:37 +0300 Subject: [PATCH 0989/1202] ARM: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/arm/kernel/reboot.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c index 3044fcb8d0736..2cb943422554e 100644 --- a/arch/arm/kernel/reboot.c +++ b/arch/arm/kernel/reboot.c @@ -116,9 +116,7 @@ void machine_power_off(void) { local_irq_disable(); smp_send_stop(); - - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); } /* From 28c7d12ca2078b6a074df4e4dff1ff7439ceb8d7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:10:07 +0300 Subject: [PATCH 0990/1202] arm64: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/arm64/kernel/process.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index aacf2f5559a8b..f8db031afa7d7 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -110,8 +110,7 @@ void machine_power_off(void) { local_irq_disable(); smp_send_stop(); - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); } /* From 901e137a3f246a2f65da1c3816fabc11022b4c48 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:10:54 +0300 Subject: [PATCH 0991/1202] csky: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/csky/kernel/power.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/csky/kernel/power.c b/arch/csky/kernel/power.c index 923ee4e381b81..86ee202906f8d 100644 --- a/arch/csky/kernel/power.c +++ b/arch/csky/kernel/power.c @@ -9,16 +9,14 @@ EXPORT_SYMBOL(pm_power_off); void machine_power_off(void) { local_irq_disable(); - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); asm volatile ("bkpt"); } void machine_halt(void) { local_irq_disable(); - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); asm volatile ("bkpt"); } From 07b5cb5aa6c9ab012e40c5d8626f92e4aa97f39a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:11:17 +0300 Subject: [PATCH 0992/1202] ia64: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/ia64/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 834df24a88f12..cee4d7db2143b 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -599,8 +600,7 @@ machine_halt (void) void machine_power_off (void) { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); machine_halt(); } From d9c052eb8caaecc97f3b4cf71014d3aaf526eacf Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:11:25 +0300 Subject: [PATCH 0993/1202] mips: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/mips/kernel/reset.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kernel/reset.c b/arch/mips/kernel/reset.c index 6288780b779e7..e7ce07b3e79b9 100644 --- a/arch/mips/kernel/reset.c +++ b/arch/mips/kernel/reset.c @@ -114,8 +114,7 @@ void machine_halt(void) void machine_power_off(void) { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); #ifdef CONFIG_SMP preempt_disable(); From 6129584a27994e5397251c2e24206fedc012e23e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:11:37 +0300 Subject: [PATCH 0994/1202] nds32: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/nds32/kernel/process.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c index 49fab9e39cbff..0936dcd7db1b6 100644 --- a/arch/nds32/kernel/process.c +++ b/arch/nds32/kernel/process.c @@ -54,8 +54,7 @@ EXPORT_SYMBOL(machine_halt); void machine_power_off(void) { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); } EXPORT_SYMBOL(machine_power_off); From 8918521499c3d5303696a6bdb7c0e5faa84a2739 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:11:51 +0300 Subject: [PATCH 0995/1202] parisc: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/parisc/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index ea3d83b6fb62d..928201b1f58f9 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -114,8 +115,7 @@ void machine_power_off(void) pdc_chassis_send_status(PDC_CHASSIS_DIRECT_SHUTDOWN); /* ipmi_poweroff may have been installed. */ - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); /* It seems we have no way to power the system off via * software. The user has to press the button himself. */ From 949c20435cb5999d8e618d66cf3182efa6ab741b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:12:13 +0300 Subject: [PATCH 0996/1202] powerpc: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/powerpc/kernel/setup-common.c | 4 +--- arch/powerpc/xmon/xmon.c | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 6b1338db87795..d883efaf543e3 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -161,9 +161,7 @@ void machine_restart(char *cmd) void machine_power_off(void) { machine_shutdown(); - if (pm_power_off) - pm_power_off(); - + do_kernel_power_off(); smp_send_stop(); machine_hang(); } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index dd8241c009e53..9d835807d645b 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1243,8 +1243,7 @@ static void bootcmds(void) } else if (cmd == 'h') { ppc_md.halt(); } else if (cmd == 'p') { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); } } From 1548b92353d46b7838229787b3025259cdb07e56 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:12:23 +0300 Subject: [PATCH 0997/1202] riscv: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/riscv/kernel/reset.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c index 9c842c41684ac..9122885722265 100644 --- a/arch/riscv/kernel/reset.c +++ b/arch/riscv/kernel/reset.c @@ -23,16 +23,12 @@ void machine_restart(char *cmd) void machine_halt(void) { - if (pm_power_off != NULL) - pm_power_off(); - else - default_power_off(); + do_kernel_power_off(); + default_power_off(); } void machine_power_off(void) { - if (pm_power_off != NULL) - pm_power_off(); - else - default_power_off(); + do_kernel_power_off(); + default_power_off(); } From 6d67add6a595b634cdd1ee521081911fa30bbdb9 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:12:46 +0300 Subject: [PATCH 0998/1202] sh: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/sh/kernel/reboot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/sh/kernel/reboot.c b/arch/sh/kernel/reboot.c index 5c33f036418be..e8eeedc9b1822 100644 --- a/arch/sh/kernel/reboot.c +++ b/arch/sh/kernel/reboot.c @@ -46,8 +46,7 @@ static void native_machine_shutdown(void) static void native_machine_power_off(void) { - if (pm_power_off) - pm_power_off(); + do_kernel_power_off(); } static void native_machine_halt(void) From 9ea321cee5a6b7cf6730497412e3057c928d7dd8 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 21:13:25 +0300 Subject: [PATCH 0999/1202] x86: Use do_kernel_power_off() Kernel now supports chained power-off handlers. Use do_kernel_power_off() that invokes chained power-off handlers. It also invokes legacy pm_power_off() for now, which will be removed once all drivers will be converted to the new power-off API. Signed-off-by: Dmitry Osipenko --- arch/x86/kernel/reboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 0a40df66a40de..cd7d9416d81a4 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -747,10 +747,10 @@ static void native_machine_halt(void) static void native_machine_power_off(void) { - if (pm_power_off) { + if (kernel_can_power_off()) { if (!reboot_force) machine_shutdown(); - pm_power_off(); + do_kernel_power_off(); } /* A fallback in case there is no PM info available */ tboot_shutdown(TB_SHUTDOWN_HALT); From 04a2536815ae6ecafd858624221240533edc0ff5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 08:32:23 +0300 Subject: [PATCH 1000/1202] m68k: Switch to new power-handler API Kernel now supports chained power-off handlers. Use register_power_off_handler() that registers power-off handlers and do_kernel_power_off() that invokes chained power-off handlers. Legacy pm_power_off() will be removed once all drivers will be converted to the new power-off API. Normally arch code should adopt only the do_kernel_power_off() at first, but m68k is a special case because it uses pm_power_off() "inside out", i.e. it assigns machine_power_off() to pm_power_off, while it's machine_power_off() that should invoke the pm_power_off(), and thus, we can't convert platforms to the new API separately. There are only two platforms changed here, so it's not a big deal. Signed-off-by: Dmitry Osipenko --- arch/m68k/emu/natfeat.c | 3 ++- arch/m68k/include/asm/machdep.h | 1 - arch/m68k/kernel/process.c | 5 ++--- arch/m68k/kernel/setup_mm.c | 1 - arch/m68k/kernel/setup_no.c | 1 - arch/m68k/mac/config.c | 4 +++- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/m68k/emu/natfeat.c b/arch/m68k/emu/natfeat.c index 71b78ecee75c6..b19dc00026d99 100644 --- a/arch/m68k/emu/natfeat.c +++ b/arch/m68k/emu/natfeat.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -90,5 +91,5 @@ void __init nf_init(void) pr_info("NatFeats found (%s, %lu.%lu)\n", buf, version >> 16, version & 0xffff); - mach_power_off = nf_poweroff; + register_platform_power_off(nf_poweroff); } diff --git a/arch/m68k/include/asm/machdep.h b/arch/m68k/include/asm/machdep.h index 8fd80ef1b77e1..8d8c3ee2069f0 100644 --- a/arch/m68k/include/asm/machdep.h +++ b/arch/m68k/include/asm/machdep.h @@ -24,7 +24,6 @@ extern int (*mach_get_rtc_pll)(struct rtc_pll_info *); extern int (*mach_set_rtc_pll)(struct rtc_pll_info *); extern void (*mach_reset)( void ); extern void (*mach_halt)( void ); -extern void (*mach_power_off)( void ); extern unsigned long (*mach_hd_init) (unsigned long, unsigned long); extern void (*mach_hd_setup)(char *, int *); extern void (*mach_heartbeat) (int); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index a6030dbaa0891..e160a7c57bd3c 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -67,12 +67,11 @@ void machine_halt(void) void machine_power_off(void) { - if (mach_power_off) - mach_power_off(); + do_kernel_power_off(); for (;;); } -void (*pm_power_off)(void) = machine_power_off; +void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); void show_regs(struct pt_regs * regs) diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 4b51bfd38e5f2..50f4f120a4ff3 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -98,7 +98,6 @@ EXPORT_SYMBOL(mach_get_rtc_pll); EXPORT_SYMBOL(mach_set_rtc_pll); void (*mach_reset)( void ); void (*mach_halt)( void ); -void (*mach_power_off)( void ); #ifdef CONFIG_HEARTBEAT void (*mach_heartbeat) (int); EXPORT_SYMBOL(mach_heartbeat); diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index 5e4104f07a443..00bf822582337 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -55,7 +55,6 @@ int (*mach_hwclk) (int, struct rtc_time*); /* machine dependent reboot functions */ void (*mach_reset)(void); void (*mach_halt)(void); -void (*mach_power_off)(void); #ifdef CONFIG_M68000 #if defined(CONFIG_M68328) diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 5d16f9b47aa90..727320dedf08f 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -139,7 +140,6 @@ void __init config_mac(void) mach_hwclk = mac_hwclk; mach_reset = mac_reset; mach_halt = mac_poweroff; - mach_power_off = mac_poweroff; #if IS_ENABLED(CONFIG_INPUT_M68K_BEEP) mach_beep = mac_mksound; #endif @@ -159,6 +159,8 @@ void __init config_mac(void) if (macintosh_config->ident == MAC_MODEL_IICI) mach_l2_flush = via_l2_flush; + + register_platform_power_off(mac_poweroff); } From adb208d6bca43806c36743e9c3e31ae2860e0a66 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 17 Oct 2021 16:29:24 +0300 Subject: [PATCH 1001/1202] memory: emif: Use kernel_can_power_off() Replace legacy pm_power_off with kernel_can_power_off() helper that is aware about chained power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/memory/emif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c index 762d0c0f0716f..cab10d5274a09 100644 --- a/drivers/memory/emif.c +++ b/drivers/memory/emif.c @@ -630,7 +630,7 @@ static irqreturn_t emif_threaded_isr(int irq, void *dev_id) dev_emerg(emif->dev, "SDRAM temperature exceeds operating limit.. Needs shut down!!!\n"); /* If we have Power OFF ability, use it, else try restarting */ - if (pm_power_off) { + if (kernel_can_power_off()) { kernel_power_off(); } else { WARN(1, "FIXME: NO pm_power_off!!! trying restart\n"); From 42a26b715eb6ab6368fec71515365d11f69fc92a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 14:25:08 +0300 Subject: [PATCH 1002/1202] ACPI: power: Switch to power-handler API Switch to power-handler API that replaces legacy pm_power_off callbacks. Signed-off-by: Dmitry Osipenko --- drivers/acpi/sleep.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 3023224515abe..41b3ea867f8f1 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -47,19 +47,11 @@ static void acpi_sleep_tts_switch(u32 acpi_state) } } -static int tts_notify_reboot(struct notifier_block *this, - unsigned long code, void *x) +static void tts_reboot_prepare(struct reboot_prep_data *data) { acpi_sleep_tts_switch(ACPI_STATE_S5); - return NOTIFY_DONE; } -static struct notifier_block tts_notifier = { - .notifier_call = tts_notify_reboot, - .next = NULL, - .priority = 0, -}; - static int acpi_sleep_prepare(u32 acpi_state) { #ifdef CONFIG_ACPI_SLEEP @@ -1016,7 +1008,7 @@ static void acpi_sleep_hibernate_setup(void) static inline void acpi_sleep_hibernate_setup(void) {} #endif /* !CONFIG_HIBERNATION */ -static void acpi_power_off_prepare(void) +static void acpi_power_off_prepare(struct power_off_prep_data *data) { /* Prepare to power off the system */ acpi_sleep_prepare(ACPI_STATE_S5); @@ -1024,7 +1016,7 @@ static void acpi_power_off_prepare(void) acpi_os_wait_events_complete(); } -static void acpi_power_off(void) +static void acpi_power_off(struct power_off_data *data) { /* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */ pr_debug("%s called\n", __func__); @@ -1032,6 +1024,11 @@ static void acpi_power_off(void) acpi_enter_sleep_state(ACPI_STATE_S5); } +static struct power_handler acpi_power_handler = { + .power_off_priority = POWEROFF_PRIO_FIRMWARE, + .reboot_prepare_cb = tts_reboot_prepare, +}; + int __init acpi_sleep_init(void) { char supported[ACPI_S_STATE_COUNT * 3 + 1]; @@ -1048,8 +1045,8 @@ int __init acpi_sleep_init(void) if (acpi_sleep_state_supported(ACPI_STATE_S5)) { sleep_states[ACPI_STATE_S5] = 1; - pm_power_off_prepare = acpi_power_off_prepare; - pm_power_off = acpi_power_off; + acpi_power_handler.power_off_cb = acpi_power_off; + acpi_power_handler.power_off_prepare_cb = acpi_power_off_prepare; } else { acpi_no_s5 = true; } @@ -1065,6 +1062,6 @@ int __init acpi_sleep_init(void) * Register the tts_notifier to reboot notifier list so that the _TTS * object can also be evaluated when the system enters S5. */ - register_reboot_notifier(&tts_notifier); + register_power_handler(&acpi_power_handler); return 0; } From 3ae17be2b2649fdb51eb2488a9d4731874e9963e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 20 Oct 2021 17:35:13 +0300 Subject: [PATCH 1003/1202] regulator: pfuze100: Use devm_register_power_handler() Use devm_register_power_handler() that replaces global pm_power_off_prepare variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/regulator/pfuze100-regulator.c | 39 ++++++++++---------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index d60d7d1b7fa25..73d5baf9d3eab 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -76,6 +77,7 @@ struct pfuze_chip { struct pfuze_regulator regulator_descs[PFUZE100_MAX_REGULATOR]; struct regulator_dev *regulators[PFUZE100_MAX_REGULATOR]; struct pfuze_regulator *pfuze_regulators; + struct power_handler power_handler; }; static const int pfuze100_swbst[] = { @@ -569,10 +571,10 @@ static inline struct device_node *match_of_node(int index) return pfuze_matches[index].of_node; } -static struct pfuze_chip *syspm_pfuze_chip; - -static void pfuze_power_off_prepare(void) +static void pfuze_power_off_prepare(struct power_off_prep_data *data) { + struct pfuze_chip *syspm_pfuze_chip = data->cb_data; + dev_info(syspm_pfuze_chip->dev, "Configure standby mode for power off"); /* Switch from default mode: APS/APS to APS/Off */ @@ -611,24 +613,24 @@ static void pfuze_power_off_prepare(void) static int pfuze_power_off_prepare_init(struct pfuze_chip *pfuze_chip) { + int err; + if (pfuze_chip->chip_id != PFUZE100) { dev_warn(pfuze_chip->dev, "Requested pm_power_off_prepare handler for not supported chip\n"); return -ENODEV; } - if (pm_power_off_prepare) { - dev_warn(pfuze_chip->dev, "pm_power_off_prepare is already registered.\n"); - return -EBUSY; - } + pfuze_chip->power_handler.power_off_prepare_cb = pfuze_power_off_prepare; + pfuze_chip->power_handler.cb_data = pfuze_chip; - if (syspm_pfuze_chip) { - dev_warn(pfuze_chip->dev, "syspm_pfuze_chip is already set.\n"); - return -EBUSY; + err = devm_register_power_handler(pfuze_chip->dev, + &pfuze_chip->power_handler); + if (err) { + dev_err(pfuze_chip->dev, + "failed to register power handler: %d\n", err); + return err; } - syspm_pfuze_chip = pfuze_chip; - pm_power_off_prepare = pfuze_power_off_prepare; - return 0; } @@ -837,23 +839,12 @@ static int pfuze100_regulator_probe(struct i2c_client *client, return 0; } -static int pfuze100_regulator_remove(struct i2c_client *client) -{ - if (syspm_pfuze_chip) { - syspm_pfuze_chip = NULL; - pm_power_off_prepare = NULL; - } - - return 0; -} - static struct i2c_driver pfuze_driver = { .driver = { .name = "pfuze100-regulator", .of_match_table = pfuze_dt_ids, }, .probe = pfuze100_regulator_probe, - .remove = pfuze100_regulator_remove, }; module_i2c_driver(pfuze_driver); From ca4ea14f58bdc73f0823b3251e68e80de316003b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 20 Oct 2021 17:37:58 +0300 Subject: [PATCH 1004/1202] reboot: Remove pm_power_off_prepare() All pm_power_off_prepare() users were converted to power-handler API. Remove the obsolete callback. Signed-off-by: Dmitry Osipenko --- include/linux/pm.h | 1 - kernel/reboot.c | 10 ---------- 2 files changed, 11 deletions(-) diff --git a/include/linux/pm.h b/include/linux/pm.h index 1d8209c09686c..d9bf1426f81e6 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -20,7 +20,6 @@ * Callbacks for platform drivers to implement. */ extern void (*pm_power_off)(void); -extern void (*pm_power_off_prepare)(void); struct device; /* we have a circular dep with device.h */ #ifdef CONFIG_VT_CONSOLE_SLEEP diff --git a/kernel/reboot.c b/kernel/reboot.c index 3dadbfa2ed0a6..3ac7d2a6a0116 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -48,13 +48,6 @@ int reboot_cpu; enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; -/* - * If set, this is used for preparing the system to power off. - */ - -void (*pm_power_off_prepare)(void); -EXPORT_SYMBOL_GPL(pm_power_off_prepare); - /** * emergency_restart - reboot the system * @@ -866,9 +859,6 @@ void do_kernel_power_off(void) static void do_kernel_power_off_prepare(void) { - if (pm_power_off_prepare) - pm_power_off_prepare(); - blocking_notifier_call_chain(&power_off_handler_list, POWEROFF_PREPARE, NULL); } From 67007dec6249871b2e013e3a4a642a1382e0a213 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 7 Oct 2021 06:53:44 +0300 Subject: [PATCH 1005/1202] soc/tegra: pmc: Utilize power-handler API to power off Nexus 7 properly Nexus 7 Android tablet can be turned off using a special bootloader command which is conveyed to bootloader by putting magic value into specific scratch register and then rebooting normally. This power-off method should be invoked if USB cable is connected. Bootloader then will display battery status and power off the device. This behaviour is borrowed from downstream kernel and matches user expectations, otherwise it looks like device got hung during power-off and it may wake up on USB disconnect. Switch PMC driver to power-handler API, which provides drivers with combined power-off+restart call chains functionality, replacing the restart-only call chain API. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/pmc.c | 54 +++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index 575d6d5b42941..a01330099e1ac 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,7 @@ #define PMC_USB_DEBOUNCE_DEL 0xec #define PMC_USB_AO 0xf0 +#define PMC_SCRATCH37 0x130 #define PMC_SCRATCH41 0x140 #define PMC_WAKE2_MASK 0x160 @@ -1064,10 +1066,8 @@ int tegra_pmc_cpu_remove_clamping(unsigned int cpuid) return tegra_powergate_remove_clamping(id); } -static int tegra_pmc_restart_notify(struct notifier_block *this, - unsigned long action, void *data) +static void tegra_pmc_restart(const char *cmd) { - const char *cmd = data; u32 value; value = tegra_pmc_scratch_readl(pmc, pmc->soc->regs->scratch0); @@ -1090,13 +1090,33 @@ static int tegra_pmc_restart_notify(struct notifier_block *this, value = tegra_pmc_readl(pmc, PMC_CNTRL); value |= PMC_CNTRL_MAIN_RST; tegra_pmc_writel(pmc, value, PMC_CNTRL); +} - return NOTIFY_DONE; +static void tegra_pmc_restart_handler(struct restart_data *data) +{ + tegra_pmc_restart(data->cmd); } -static struct notifier_block tegra_pmc_restart_handler = { - .notifier_call = tegra_pmc_restart_notify, - .priority = 128, +static void tegra_pmc_power_off_handler(struct power_off_data *data) +{ + /* + * Reboot Nexus 7 into special bootloader mode if USB cable is + * connected in order to display battery status and power off. + */ + if (of_machine_is_compatible("asus,grouper") && + power_supply_is_system_supplied()) { + const u32 go_to_charger_mode = 0xa5a55a5a; + + tegra_pmc_writel(pmc, go_to_charger_mode, PMC_SCRATCH37); + tegra_pmc_restart(NULL); + } +} + +static struct power_handler tegra_pmc_power_handler = { + .restart_cb = tegra_pmc_restart_handler, + .power_off_cb = tegra_pmc_power_off_handler, + .power_off_priority = POWEROFF_PRIO_FIRMWARE, + .power_off_chaining_allowed = true, }; static int powergate_show(struct seq_file *s, void *data) @@ -2859,6 +2879,13 @@ static int tegra_pmc_probe(struct platform_device *pdev) pmc->clk = NULL; } + err = devm_register_power_handler(&pdev->dev, &tegra_pmc_power_handler); + if (err) { + dev_err(&pdev->dev, "unable to register power handler, %d\n", + err); + return err; + } + /* * PCLK clock rate can't be retrieved using CLK API because it * causes lockup if CPU enters LP2 idle state from some other @@ -2890,20 +2917,13 @@ static int tegra_pmc_probe(struct platform_device *pdev) goto cleanup_sysfs; } - err = register_restart_handler(&tegra_pmc_restart_handler); - if (err) { - dev_err(&pdev->dev, "unable to register restart handler, %d\n", - err); - goto cleanup_debugfs; - } - err = tegra_pmc_pinctrl_init(pmc); if (err) - goto cleanup_restart_handler; + goto cleanup_debugfs; err = tegra_pmc_regmap_init(pmc); if (err < 0) - goto cleanup_restart_handler; + goto cleanup_debugfs; err = tegra_powergate_init(pmc, pdev->dev.of_node); if (err < 0) @@ -2926,8 +2946,6 @@ static int tegra_pmc_probe(struct platform_device *pdev) cleanup_powergates: tegra_powergate_remove_all(pdev->dev.of_node); -cleanup_restart_handler: - unregister_restart_handler(&tegra_pmc_restart_handler); cleanup_debugfs: debugfs_remove(pmc->debugfs); cleanup_sysfs: From 474db1bcfbb96f71edf8ab27e43d727e66ad9add Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:24:52 +0300 Subject: [PATCH 1006/1202] mfd: ntxec: Use devm_register_power_handler() Use devm_register_power_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. It also provides restart-handler support, i.e. all in one API. Signed-off-by: Dmitry Osipenko --- drivers/mfd/ntxec.c | 50 ++++++++++----------------------------------- 1 file changed, 11 insertions(+), 39 deletions(-) diff --git a/drivers/mfd/ntxec.c b/drivers/mfd/ntxec.c index b711e73eedcb0..fd6410cbe153d 100644 --- a/drivers/mfd/ntxec.c +++ b/drivers/mfd/ntxec.c @@ -32,12 +32,11 @@ #define NTXEC_POWERKEEP_VALUE 0x0800 #define NTXEC_RESET_VALUE 0xff00 -static struct i2c_client *poweroff_restart_client; - -static void ntxec_poweroff(void) +static void ntxec_poweroff(struct power_off_data *data) { int res; u8 buf[3] = { NTXEC_REG_POWEROFF }; + struct i2c_client *poweroff_restart_client = data->cb_data; struct i2c_msg msgs[] = { { .addr = poweroff_restart_client->addr, @@ -62,8 +61,7 @@ static void ntxec_poweroff(void) msleep(5000); } -static int ntxec_restart(struct notifier_block *nb, - unsigned long action, void *data) +static void ntxec_restart(struct restart_data *data) { int res; u8 buf[3] = { NTXEC_REG_RESET }; @@ -72,6 +70,7 @@ static int ntxec_restart(struct notifier_block *nb, * it causes an I2C error. (The reset handler in the downstream driver * does send the full two-byte value, but doesn't check the result). */ + struct i2c_client *poweroff_restart_client = data->cb_data; struct i2c_msg msgs[] = { { .addr = poweroff_restart_client->addr, @@ -87,13 +86,11 @@ static int ntxec_restart(struct notifier_block *nb, if (res < 0) dev_warn(&poweroff_restart_client->dev, "Failed to restart (err = %d)\n", res); - - return NOTIFY_DONE; } -static struct notifier_block ntxec_restart_handler = { - .notifier_call = ntxec_restart, - .priority = 128, +static struct power_handler ntxec_power_handler = { + .restart_cb = ntxec_restart, + .power_off_cb = ntxec_poweroff, }; static int regmap_ignore_write(void *context, @@ -208,25 +205,12 @@ static int ntxec_probe(struct i2c_client *client) if (res < 0) return res; - if (poweroff_restart_client) - /* - * Another instance of the driver already took - * poweroff/restart duties. - */ - dev_err(ec->dev, "poweroff_restart_client already assigned\n"); - else - poweroff_restart_client = client; - - if (pm_power_off) - /* Another driver already registered a poweroff handler. */ - dev_err(ec->dev, "pm_power_off already assigned\n"); - else - pm_power_off = ntxec_poweroff; - - res = register_restart_handler(&ntxec_restart_handler); + ntxec_power_handler.cb_data = client; + + res = devm_register_power_handler(ec->dev, &ntxec_power_handler); if (res) dev_err(ec->dev, - "Failed to register restart handler: %d\n", res); + "Failed to register power handler: %d\n", res); } i2c_set_clientdata(client, ec); @@ -239,17 +223,6 @@ static int ntxec_probe(struct i2c_client *client) return res; } -static int ntxec_remove(struct i2c_client *client) -{ - if (client == poweroff_restart_client) { - poweroff_restart_client = NULL; - pm_power_off = NULL; - unregister_restart_handler(&ntxec_restart_handler); - } - - return 0; -} - static const struct of_device_id of_ntxec_match_table[] = { { .compatible = "netronix,ntxec", }, {} @@ -262,7 +235,6 @@ static struct i2c_driver ntxec_driver = { .of_match_table = of_ntxec_match_table, }, .probe_new = ntxec_probe, - .remove = ntxec_remove, }; module_i2c_driver(ntxec_driver); From d346f4ed8f490526cc7a0fdef3699da2bdc9788c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:28:32 +0300 Subject: [PATCH 1007/1202] mfd: rn5t618: Use devm_register_power_handler() Use devm_register_power_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. It also provides restart-handler support, i.e. all in one API. Signed-off-by: Dmitry Osipenko --- drivers/mfd/rn5t618.c | 56 ++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c index 384acb4594272..12d7b2339bbe7 100644 --- a/drivers/mfd/rn5t618.c +++ b/drivers/mfd/rn5t618.c @@ -84,9 +84,6 @@ static const struct regmap_irq_chip rc5t619_irq_chip = { .mask_invert = true, }; -static struct i2c_client *rn5t618_pm_power_off; -static struct notifier_block rn5t618_restart_handler; - static int rn5t618_irq_init(struct rn5t618 *rn5t618) { const struct regmap_irq_chip *irq_chip = NULL; @@ -115,7 +112,9 @@ static int rn5t618_irq_init(struct rn5t618 *rn5t618) return ret; } -static void rn5t618_trigger_poweroff_sequence(bool repower) +static void +rn5t618_trigger_poweroff_sequence(struct i2c_client *rn5t618_pm_power_off, + bool repower) { int ret; @@ -151,25 +150,31 @@ static void rn5t618_trigger_poweroff_sequence(bool repower) dev_alert(&rn5t618_pm_power_off->dev, "Failed to shutdown (err = %d)\n", ret); } -static void rn5t618_power_off(void) +static void rn5t618_power_off(struct power_off_data *data) { - rn5t618_trigger_poweroff_sequence(false); + struct i2c_client *client = data->cb_data; + + rn5t618_trigger_poweroff_sequence(client, false); } -static int rn5t618_restart(struct notifier_block *this, - unsigned long mode, void *cmd) +static void rn5t618_restart(struct restart_data *data) { - rn5t618_trigger_poweroff_sequence(true); + struct i2c_client *client = data->cb_data; + + rn5t618_trigger_poweroff_sequence(client, true); /* * Re-power factor detection on PMIC side is not instant. 1ms * proved to be enough time until reset takes effect. */ mdelay(1); - - return NOTIFY_DONE; } +static struct power_handler rn5t618_power_handler = { + .restart_cb = rn5t618_restart, + .restart_priority = RESTART_PRIO_HIGH, +}; + static const struct of_device_id rn5t618_of_match[] = { { .compatible = "ricoh,rn5t567", .data = (void *)RN5T567 }, { .compatible = "ricoh,rn5t618", .data = (void *)RN5T618 }, @@ -221,38 +226,20 @@ static int rn5t618_i2c_probe(struct i2c_client *i2c) return ret; } - rn5t618_pm_power_off = i2c; - if (of_device_is_system_power_controller(i2c->dev.of_node)) { - if (!pm_power_off) - pm_power_off = rn5t618_power_off; - else - dev_warn(&i2c->dev, "Poweroff callback already assigned\n"); - } + if (of_device_is_system_power_controller(i2c->dev.of_node)) + rn5t618_power_handler.power_off_cb = rn5t618_power_off; - rn5t618_restart_handler.notifier_call = rn5t618_restart; - rn5t618_restart_handler.priority = 192; + rn5t618_power_handler.cb_data = i2c; - ret = register_restart_handler(&rn5t618_restart_handler); + ret = devm_register_power_handler(&i2c->dev, &rn5t618_power_handler); if (ret) { - dev_err(&i2c->dev, "cannot register restart handler, %d\n", ret); + dev_err(&i2c->dev, "failed to register power handler: %d\n", ret); return ret; } return rn5t618_irq_init(priv); } -static int rn5t618_i2c_remove(struct i2c_client *i2c) -{ - if (i2c == rn5t618_pm_power_off) { - rn5t618_pm_power_off = NULL; - pm_power_off = NULL; - } - - unregister_restart_handler(&rn5t618_restart_handler); - - return 0; -} - static int __maybe_unused rn5t618_i2c_suspend(struct device *dev) { struct rn5t618 *priv = dev_get_drvdata(dev); @@ -284,7 +271,6 @@ static struct i2c_driver rn5t618_i2c_driver = { .pm = &rn5t618_i2c_dev_pm_ops, }, .probe_new = rn5t618_i2c_probe, - .remove = rn5t618_i2c_remove, }; module_i2c_driver(rn5t618_i2c_driver); From 8cf3cd12a6cf41e4cc9b44c216a383395f44abda Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 20:53:19 +0300 Subject: [PATCH 1008/1202] mfd: acer-a500: Use devm_register_power_handler() Use devm_register_power_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. It also provides restart-handler support, i.e. all in one API. Signed-off-by: Dmitry Osipenko --- drivers/mfd/acer-ec-a500.c | 52 ++++++++++++++------------------------ 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/drivers/mfd/acer-ec-a500.c b/drivers/mfd/acer-ec-a500.c index 80c2fdd14fc49..fc864abc00494 100644 --- a/drivers/mfd/acer-ec-a500.c +++ b/drivers/mfd/acer-ec-a500.c @@ -31,8 +31,6 @@ enum { REG_COLD_REBOOT = 0x55, }; -static struct i2c_client *a500_ec_client_pm_off; - static int a500_ec_read(void *context, const void *reg_buf, size_t reg_size, void *val_buf, size_t val_sizel) { @@ -104,32 +102,35 @@ static const struct regmap_bus a500_ec_regmap_bus = { .max_raw_read = 2, }; -static void a500_ec_poweroff(void) +static void a500_ec_power_off_handler(struct power_off_data *data) { - i2c_smbus_write_word_data(a500_ec_client_pm_off, - REG_SHUTDOWN, CMD_SHUTDOWN); + struct i2c_client *client = data->cb_data; + + i2c_smbus_write_word_data(client, REG_SHUTDOWN, CMD_SHUTDOWN); mdelay(A500_EC_POWER_CMD_TIMEOUT); } -static int a500_ec_restart_notify(struct notifier_block *this, - unsigned long reboot_mode, void *data) +static void a500_ec_restart_handler(struct restart_data *data) { - if (reboot_mode == REBOOT_WARM) - i2c_smbus_write_word_data(a500_ec_client_pm_off, + struct i2c_client *client = data->cb_data; + + if (data->mode == REBOOT_WARM) + i2c_smbus_write_word_data(client, REG_WARM_REBOOT, CMD_WARM_REBOOT); else - i2c_smbus_write_word_data(a500_ec_client_pm_off, + i2c_smbus_write_word_data(client, REG_COLD_REBOOT, CMD_COLD_REBOOT); mdelay(A500_EC_POWER_CMD_TIMEOUT); - - return NOTIFY_DONE; } -static struct notifier_block a500_ec_restart_handler = { - .notifier_call = a500_ec_restart_notify, - .priority = 200, +static struct power_handler a500_ec_power_handler = { + .restart_cb = a500_ec_restart_handler, + .restart_priority = RESTART_PRIO_HIGH, + + .power_off_cb = a500_ec_power_off_handler, + .power_off_priority = POWEROFF_PRIO_HIGH, }; static const struct mfd_cell a500_ec_cells[] = { @@ -156,26 +157,12 @@ static int a500_ec_probe(struct i2c_client *client) } if (of_device_is_system_power_controller(client->dev.of_node)) { - a500_ec_client_pm_off = client; + a500_ec_power_handler.cb_data = client; - err = register_restart_handler(&a500_ec_restart_handler); + err = devm_register_power_handler(&client->dev, + &a500_ec_power_handler); if (err) return err; - - if (!pm_power_off) - pm_power_off = a500_ec_poweroff; - } - - return 0; -} - -static int a500_ec_remove(struct i2c_client *client) -{ - if (of_device_is_system_power_controller(client->dev.of_node)) { - if (pm_power_off == a500_ec_poweroff) - pm_power_off = NULL; - - unregister_restart_handler(&a500_ec_restart_handler); } return 0; @@ -193,7 +180,6 @@ static struct i2c_driver a500_ec_driver = { .of_match_table = a500_ec_match, }, .probe_new = a500_ec_probe, - .remove = a500_ec_remove, }; module_i2c_driver(a500_ec_driver); From b8a571d57f16087bc7220bdaa60abc83d3349a73 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:01:06 +0300 Subject: [PATCH 1009/1202] mfd: ene-kb3930: Use devm_register_power_handler() Use devm_register_power_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. It also provides restart-handler support, i.e. all in one API. Signed-off-by: Dmitry Osipenko --- drivers/mfd/ene-kb3930.c | 45 ++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/drivers/mfd/ene-kb3930.c b/drivers/mfd/ene-kb3930.c index 1b73318d1f1fb..6a3c5f48e5e18 100644 --- a/drivers/mfd/ene-kb3930.c +++ b/drivers/mfd/ene-kb3930.c @@ -31,10 +31,9 @@ struct kb3930 { struct i2c_client *client; struct regmap *ram_regmap; struct gpio_descs *off_gpios; + struct power_handler power_handler; }; -static struct kb3930 *kb3930_power_off; - #define EC_GPIO_WAVE 0 #define EC_GPIO_OFF_MODE 1 @@ -60,21 +59,19 @@ static void kb3930_off(struct kb3930 *ddata, int off_mode) } } -static int kb3930_restart(struct notifier_block *this, - unsigned long mode, void *cmd) +static void kb3930_restart(struct restart_data *data) { - kb3930_off(kb3930_power_off, EC_OFF_MODE_REBOOT); - return NOTIFY_DONE; + struct kb3930 *ddata = data->cb_data; + + kb3930_off(ddata, EC_OFF_MODE_REBOOT); } -static void kb3930_pm_power_off(void) +static void kb3930_power_off(struct power_off_data *data) { - kb3930_off(kb3930_power_off, EC_OFF_MODE_POWER); -} + struct kb3930 *ddata = data->cb_data; -static struct notifier_block kb3930_restart_nb = { - .notifier_call = kb3930_restart, -}; + kb3930_off(ddata, EC_OFF_MODE_POWER); +} static const struct mfd_cell ariel_ec_cells[] = { { .name = "dell-wyse-ariel-led", }, @@ -131,7 +128,6 @@ static int kb3930_probe(struct i2c_client *client) if (!ddata) return -ENOMEM; - kb3930_power_off = ddata; ddata->client = client; i2c_set_clientdata(client, ddata); @@ -169,24 +165,14 @@ static int kb3930_probe(struct i2c_client *client) } if (ddata->off_gpios) { - register_restart_handler(&kb3930_restart_nb); - if (!pm_power_off) - pm_power_off = kb3930_pm_power_off; - } + ddata->power_handler.cb_data = ddata; + ddata->power_handler.restart_cb = kb3930_restart; + ddata->power_handler.power_off_cb = kb3930_power_off; - return 0; -} - -static int kb3930_remove(struct i2c_client *client) -{ - struct kb3930 *ddata = i2c_get_clientdata(client); - - if (ddata->off_gpios) { - if (pm_power_off == kb3930_pm_power_off) - pm_power_off = NULL; - unregister_restart_handler(&kb3930_restart_nb); + ret = devm_register_power_handler(dev, &ddata->power_handler); + if (ret) + return ret; } - kb3930_power_off = NULL; return 0; } @@ -199,7 +185,6 @@ MODULE_DEVICE_TABLE(of, kb3930_dt_ids); static struct i2c_driver kb3930_driver = { .probe_new = kb3930_probe, - .remove = kb3930_remove, .driver = { .name = "ene-kb3930", .of_match_table = kb3930_dt_ids, From 318fcb2ad34c0c3768b40ab97ec06dbd6733f2c9 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 20:56:07 +0300 Subject: [PATCH 1010/1202] mfd: axp20x: Use register_simple_power_off_handler() Use register_simple_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/mfd/axp20x.c | 22 +++++++++++----------- include/linux/mfd/axp20x.h | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c index 8161a5dc68e84..db31fdb169e42 100644 --- a/drivers/mfd/axp20x.c +++ b/drivers/mfd/axp20x.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -823,9 +824,10 @@ static const struct mfd_cell axp813_cells[] = { }, }; -static struct axp20x_dev *axp20x_pm_power_off; -static void axp20x_power_off(void) +static void axp20x_power_off(void *data) { + struct axp20x_dev *axp20x_pm_power_off = data; + if (axp20x_pm_power_off->variant == AXP288_ID) return; @@ -1000,10 +1002,12 @@ int axp20x_device_probe(struct axp20x_dev *axp20x) return ret; } - if (!pm_power_off) { - axp20x_pm_power_off = axp20x; - pm_power_off = axp20x_power_off; - } + axp20x->power_handler = + register_simple_power_off_handler(axp20x_power_off, axp20x); + + if (IS_ERR(axp20x->power_handler)) + dev_err(axp20x->dev, "failed to register power-off handler: %pe", + axp20x->power_handler); dev_info(axp20x->dev, "AXP20X driver loaded\n"); @@ -1013,11 +1017,7 @@ EXPORT_SYMBOL(axp20x_device_probe); void axp20x_device_remove(struct axp20x_dev *axp20x) { - if (axp20x == axp20x_pm_power_off) { - axp20x_pm_power_off = NULL; - pm_power_off = NULL; - } - + unregister_simple_power_off_handler(axp20x->power_handler); mfd_remove_devices(axp20x->dev); regmap_del_irq_chip(axp20x->irq, axp20x->regmap_irqc); } diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 9ab0e2fca7eac..49319a0ac3698 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -643,6 +643,7 @@ struct axp20x_dev { const struct mfd_cell *cells; const struct regmap_config *regmap_cfg; const struct regmap_irq_chip *regmap_irq_chip; + struct power_handler *power_handler; }; /* generic helper function for reading 9-16 bit wide regs */ From 15434731b94132987a594316cd095011e86b0975 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:27:06 +0300 Subject: [PATCH 1011/1202] mfd: retu: Use devm_register_simple_power_off_handler() Use devm_register_simple_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/mfd/retu-mfd.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/mfd/retu-mfd.c b/drivers/mfd/retu-mfd.c index c748fd29a2204..d18f05c1f0950 100644 --- a/drivers/mfd/retu-mfd.c +++ b/drivers/mfd/retu-mfd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -81,9 +82,6 @@ static struct regmap_irq_chip retu_irq_chip = { .ack_base = RETU_REG_IDR, }; -/* Retu device registered for the power off. */ -static struct retu_dev *retu_pm_power_off; - static const struct resource tahvo_usb_res[] = { { .name = "tahvo-usb", @@ -165,12 +163,12 @@ int retu_write(struct retu_dev *rdev, u8 reg, u16 data) } EXPORT_SYMBOL_GPL(retu_write); -static void retu_power_off(void) +static void retu_power_off(void *data) { - struct retu_dev *rdev = retu_pm_power_off; + struct retu_dev *rdev = data; int reg; - mutex_lock(&retu_pm_power_off->mutex); + mutex_lock(&rdev->mutex); /* Ignore power button state */ regmap_read(rdev->regmap, RETU_REG_CC1, ®); @@ -183,7 +181,7 @@ static void retu_power_off(void) for (;;) cpu_relax(); - mutex_unlock(&retu_pm_power_off->mutex); + mutex_unlock(&rdev->mutex); } static int retu_regmap_read(void *context, const void *reg, size_t reg_size, @@ -261,6 +259,17 @@ static int retu_probe(struct i2c_client *i2c, const struct i2c_device_id *id) (ret & RETU_REG_ASICR_VILMA) ? rdat->companion_name : "", (ret >> 4) & 0x7, ret & 0xf); + if (i2c->addr == 1) { + ret = devm_register_simple_power_off_handler(&i2c->dev, + retu_power_off, + rdev); + if (ret) { + dev_err(rdev->dev, + "could not register power-off handler: %d\n", ret); + return ret; + } + } + /* Mask all interrupts. */ ret = retu_write(rdev, rdat->irq_chip->mask_base, 0xffff); if (ret < 0) @@ -279,10 +288,6 @@ static int retu_probe(struct i2c_client *i2c, const struct i2c_device_id *id) return ret; } - if (i2c->addr == 1 && !pm_power_off) { - retu_pm_power_off = rdev; - pm_power_off = retu_power_off; - } return 0; } @@ -291,10 +296,6 @@ static int retu_remove(struct i2c_client *i2c) { struct retu_dev *rdev = i2c_get_clientdata(i2c); - if (retu_pm_power_off == rdev) { - pm_power_off = NULL; - retu_pm_power_off = NULL; - } mfd_remove_devices(rdev->dev); regmap_del_irq_chip(i2c->irq, rdev->irq_data); From 88ea66cc165ae67201a7f7da6b9a34f9aeb745a9 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:27:59 +0300 Subject: [PATCH 1012/1202] mfd: rk808: Use devm_register_simple_power_off_handler() Use devm_register_simple_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/mfd/rk808.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/mfd/rk808.c b/drivers/mfd/rk808.c index b181fe4013303..3bf3694690539 100644 --- a/drivers/mfd/rk808.c +++ b/drivers/mfd/rk808.c @@ -18,6 +18,7 @@ #include #include #include +#include #include struct rk808_reg_data { @@ -526,12 +527,11 @@ static const struct regmap_irq_chip rk818_irq_chip = { .init_ack_masked = true, }; -static struct i2c_client *rk808_i2c_client; - -static void rk808_pm_power_off(void) +static void rk808_pm_power_off(void *data) { int ret; unsigned int reg, bit; + struct i2c_client *rk808_i2c_client = data; struct rk808 *rk808 = i2c_get_clientdata(rk808_i2c_client); switch (rk808->variant) { @@ -725,8 +725,14 @@ static int rk808_probe(struct i2c_client *client, } if (of_property_read_bool(np, "rockchip,system-power-controller")) { - rk808_i2c_client = client; - pm_power_off = rk808_pm_power_off; + ret = devm_register_simple_power_off_handler(&client->dev, + rk808_pm_power_off, + client); + if (ret) { + dev_err(&client->dev, + "failed to register power-off handler %d\n", ret); + goto err_irq; + } } return 0; @@ -742,13 +748,6 @@ static int rk808_remove(struct i2c_client *client) regmap_del_irq_chip(client->irq, rk808->irq_data); - /** - * pm_power_off may points to a function from another module. - * Check if the pointer is set by us and only then overwrite it. - */ - if (pm_power_off == rk808_pm_power_off) - pm_power_off = NULL; - return 0; } From f84ff0fb1041ceb8b6f6e39dc28a6b415f70e891 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:26:36 +0300 Subject: [PATCH 1013/1202] mfd: palmas: Use devm_register_simple_power_off_handler() Use devm_register_simple_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/mfd/palmas.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c index f5b3fa973b132..c7d4d48d2fda2 100644 --- a/drivers/mfd/palmas.c +++ b/drivers/mfd/palmas.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -420,12 +421,12 @@ static void palmas_dt_to_pdata(struct i2c_client *i2c, "ti,system-power-controller"); } -static struct palmas *palmas_dev; -static void palmas_power_off(void) +static void palmas_power_off(void *data) { unsigned int addr; int ret, slave; u8 powerhold_mask; + struct palmas *palmas_dev = data; struct device_node *np = palmas_dev->dev->of_node; if (of_property_read_bool(np, "ti,palmas-override-powerhold")) { @@ -680,12 +681,16 @@ static int palmas_i2c_probe(struct i2c_client *i2c, */ if (node) { ret = devm_of_platform_populate(&i2c->dev); - if (ret < 0) { + if (ret < 0) + goto err_irq; + } + + if (pdata->pm_off) { + ret = devm_register_simple_power_off_handler(&i2c->dev, + palmas_power_off, + palmas); + if (ret) goto err_irq; - } else if (pdata->pm_off && !pm_power_off) { - palmas_dev = palmas; - pm_power_off = palmas_power_off; - } } return ret; @@ -712,11 +717,6 @@ static int palmas_i2c_remove(struct i2c_client *i2c) i2c_unregister_device(palmas->i2c_clients[i]); } - if (palmas == palmas_dev) { - pm_power_off = NULL; - palmas_dev = NULL; - } - return 0; } From 3e205a8aa281cd89988d6ea4e1f7dfc670cc437c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:23:42 +0300 Subject: [PATCH 1014/1202] mfd: max8907: Use devm_register_simple_power_off_handler() Use devm_register_simple_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/mfd/max8907.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/mfd/max8907.c b/drivers/mfd/max8907.c index 41f566e6a0965..58699510311bb 100644 --- a/drivers/mfd/max8907.c +++ b/drivers/mfd/max8907.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -174,9 +175,10 @@ static const struct regmap_irq_chip max8907_rtc_irq_chip = { .num_irqs = ARRAY_SIZE(max8907_rtc_irqs), }; -static struct max8907 *max8907_pm_off; -static void max8907_power_off(void) +static void max8907_power_off(void *data) { + struct max8907 *max8907_pm_off = data; + regmap_update_bits(max8907_pm_off->regmap_gen, MAX8907_REG_RESET_CNFG, MAX8907_MASK_POWER_OFF, MAX8907_MASK_POWER_OFF); } @@ -214,6 +216,17 @@ static int max8907_i2c_probe(struct i2c_client *i2c, goto err_regmap_gen; } + if (pm_off) { + ret = devm_register_simple_power_off_handler(&i2c->dev, + max8907_power_off, + max8907); + if (ret) { + dev_err(&i2c->dev, + "failed to register power-off handler: %d\n", ret); + return ret; + } + } + max8907->i2c_rtc = i2c_new_dummy_device(i2c->adapter, MAX8907_RTC_I2C_ADDR); if (IS_ERR(max8907->i2c_rtc)) { ret = PTR_ERR(max8907->i2c_rtc); @@ -260,11 +273,6 @@ static int max8907_i2c_probe(struct i2c_client *i2c, goto err_add_devices; } - if (pm_off && !pm_power_off) { - max8907_pm_off = max8907; - pm_power_off = max8907_power_off; - } - return 0; err_add_devices: From f0e2d0913013341d6a78eee22c78dc86a94f0ee2 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:28:48 +0300 Subject: [PATCH 1015/1202] mfd: tps6586x: Use devm_register_simple_power_off_handler() Use devm_register_simple_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/mfd/tps6586x.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c index c9303d3d66027..9033ed936d1ee 100644 --- a/drivers/mfd/tps6586x.c +++ b/drivers/mfd/tps6586x.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -461,9 +462,10 @@ static const struct regmap_config tps6586x_regmap_config = { .cache_type = REGCACHE_RBTREE, }; -static struct device *tps6586x_dev; -static void tps6586x_power_off(void) +static void tps6586x_power_off(void *data) { + struct device *tps6586x_dev = data; + if (tps6586x_clr_bits(tps6586x_dev, TPS6586X_SUPPLYENE, EXITSLREQ_BIT)) return; @@ -540,6 +542,16 @@ static int tps6586x_i2c_probe(struct i2c_client *client, return ret; } + if (pdata->pm_off) { + ret = devm_register_simple_power_off_handler(&client->dev, + tps6586x_power_off, + &client->dev); + if (ret) { + dev_err(&client->dev, + "failed to register power-off handler: %d\n", ret); + return ret; + } + } if (client->irq) { ret = tps6586x_irq_init(tps6586x, client->irq, @@ -564,11 +576,6 @@ static int tps6586x_i2c_probe(struct i2c_client *client, goto err_add_devs; } - if (pdata->pm_off && !pm_power_off) { - tps6586x_dev = &client->dev; - pm_power_off = tps6586x_power_off; - } - return 0; err_add_devs: From 4429eafc407bf07750537d5e7ffbaecd458d8129 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:29:06 +0300 Subject: [PATCH 1016/1202] mfd: tps65910: Use devm_register_simple_power_off_handler() Use devm_register_simple_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/mfd/tps65910.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c index 6e105cca27d47..8fab30dc84e5b 100644 --- a/drivers/mfd/tps65910.c +++ b/drivers/mfd/tps65910.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -429,9 +430,9 @@ struct tps65910_board *tps65910_parse_dt(struct i2c_client *client, } #endif -static struct i2c_client *tps65910_i2c_client; -static void tps65910_power_off(void) +static void tps65910_power_off(void *data) { + struct i2c_client *tps65910_i2c_client = data; struct tps65910 *tps65910; tps65910 = dev_get_drvdata(&tps65910_i2c_client->dev); @@ -503,9 +504,15 @@ static int tps65910_i2c_probe(struct i2c_client *i2c, tps65910_ck32k_init(tps65910, pmic_plat_data); tps65910_sleepinit(tps65910, pmic_plat_data); - if (pmic_plat_data->pm_off && !pm_power_off) { - tps65910_i2c_client = i2c; - pm_power_off = tps65910_power_off; + if (pmic_plat_data->pm_off) { + ret = devm_register_simple_power_off_handler(&i2c->dev, + tps65910_power_off, + i2c); + if (ret) { + dev_err(&i2c->dev, + "failed to register power-off handler: %d\n", ret); + return ret; + } } ret = devm_mfd_add_devices(tps65910->dev, -1, From 477daa908f3ad2503bc721efb14ecaa781746cb5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 7 Oct 2021 05:58:57 +0300 Subject: [PATCH 1017/1202] mfd: max77620: Use devm_register_simple_power_off_handler() Use devm_register_simple_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Nexus 7 Android tablet can be powered off using MAX77663 PMIC and using a special bootloader command. At first the bootloader option should be tried, it will have a higher priority than of PMIC that uses default priority. Signed-off-by: Dmitry Osipenko --- drivers/mfd/max77620.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c index fec2096474ad1..29487ccc191aa 100644 --- a/drivers/mfd/max77620.c +++ b/drivers/mfd/max77620.c @@ -31,11 +31,10 @@ #include #include #include +#include #include #include -static struct max77620_chip *max77620_scratch; - static const struct resource gpio_resources[] = { DEFINE_RES_IRQ(MAX77620_IRQ_TOP_GPIO), }; @@ -483,13 +482,13 @@ static int max77620_read_es_version(struct max77620_chip *chip) return ret; } -static void max77620_pm_power_off(void) +static void max77620_pm_power_off(void *data) { - struct max77620_chip *chip = max77620_scratch; + struct max77620_chip *chip = data; regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG1, - MAX77620_ONOFFCNFG1_SFT_RST, - MAX77620_ONOFFCNFG1_SFT_RST); + MAX77620_ONOFFCNFG1_SFT_RST, + MAX77620_ONOFFCNFG1_SFT_RST); } static int max77620_probe(struct i2c_client *client, @@ -566,9 +565,13 @@ static int max77620_probe(struct i2c_client *client, } pm_off = of_device_is_system_power_controller(client->dev.of_node); - if (pm_off && !pm_power_off) { - max77620_scratch = chip; - pm_power_off = max77620_pm_power_off; + if (pm_off) { + ret = devm_register_simple_power_off_handler(chip->dev, + max77620_pm_power_off, + chip); + if (ret < 0) + dev_err(chip->dev, + "Failed to register power-off handler: %d\n", ret); } return 0; From fd3d7ca3af2202f37b8e464a80561e0c65391dd5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 20:57:50 +0300 Subject: [PATCH 1018/1202] mfd: dm355evm_msp: Use devm_register_trivial_power_off_handler() Use devm_register_trivial_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/mfd/dm355evm_msp.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/mfd/dm355evm_msp.c b/drivers/mfd/dm355evm_msp.c index 54fb6cbd2aa06..5ee830f655896 100644 --- a/drivers/mfd/dm355evm_msp.c +++ b/drivers/mfd/dm355evm_msp.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -375,11 +376,10 @@ static void dm355evm_power_off(void) dm355evm_command(MSP_COMMAND_POWEROFF); } -static int dm355evm_msp_remove(struct i2c_client *client) +static void dm355evm_msp_remove(void *data) { - pm_power_off = NULL; + /* FIXME remove children ... */ msp430 = NULL; - return 0; } static int @@ -392,6 +392,11 @@ dm355evm_msp_probe(struct i2c_client *client, const struct i2c_device_id *id) return -EBUSY; msp430 = client; + status = devm_add_action_or_reset(&client->dev, dm355evm_msp_remove, + NULL); + if (status < 0) + goto fail; + /* display revision status; doubles as sanity check */ status = dm355evm_msp_read(DM355EVM_MSP_FIRMREV); if (status < 0) @@ -416,13 +421,15 @@ dm355evm_msp_probe(struct i2c_client *client, const struct i2c_device_id *id) goto fail; /* PM hookup */ - pm_power_off = dm355evm_power_off; + status = devm_register_trivial_power_off_handler(&client->dev, + dm355evm_power_off); + if (status) + dev_err(&client->dev, "failed to register power-off handler: %d", + status); return 0; fail: - /* FIXME remove children ... */ - dm355evm_msp_remove(client); return status; } @@ -436,7 +443,6 @@ static struct i2c_driver dm355evm_msp_driver = { .driver.name = "dm355evm_msp", .id_table = dm355evm_msp_ids, .probe = dm355evm_msp_probe, - .remove = dm355evm_msp_remove, }; static int __init dm355evm_msp_init(void) From 5f61e191ce1a26bba203c65131de8781cf7a2efe Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:29:26 +0300 Subject: [PATCH 1019/1202] mfd: twl4030: Use devm_register_trivial_power_off_handler() Use devm_register_trivial_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/mfd/twl4030-power.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c index 6b36932263ba7..72df4735d6289 100644 --- a/drivers/mfd/twl4030-power.c +++ b/drivers/mfd/twl4030-power.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -923,7 +924,7 @@ static int twl4030_power_probe(struct platform_device *pdev) } /* Board has to be wired properly to use this feature */ - if (twl4030_power_use_poweroff(pdata, node) && !pm_power_off) { + if (twl4030_power_use_poweroff(pdata, node)) { /* Default for SEQ_OFFSYNC is set, lets ensure this */ err = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &val, TWL4030_PM_MASTER_CFG_P123_TRANSITION); @@ -939,7 +940,12 @@ static int twl4030_power_probe(struct platform_device *pdev) } } - pm_power_off = twl4030_power_off; + err = devm_register_trivial_power_off_handler(&pdev->dev, + twl4030_power_off); + if (err) { + dev_err(&pdev->dev, "Failed to register power-off handler\n"); + goto relock; + } } relock: From a7ef0e0098899a837cb10c8c31c722ec350f823c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 20:47:48 +0300 Subject: [PATCH 1020/1202] mfd: ab8500: Use devm_register_trivial_power_off_handler() Use devm_register_trivial_power_off_handler() that replaces global pm_power_off variable and allows to register multiple power-off handlers. Signed-off-by: Dmitry Osipenko --- drivers/mfd/ab8500-sysctrl.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c index eeeb62415f538..20e933c34983d 100644 --- a/drivers/mfd/ab8500-sysctrl.c +++ b/drivers/mfd/ab8500-sysctrl.c @@ -134,20 +134,8 @@ static int ab8500_sysctrl_probe(struct platform_device *pdev) { sysctrl_dev = &pdev->dev; - if (!pm_power_off) - pm_power_off = ab8500_power_off; - - return 0; -} - -static int ab8500_sysctrl_remove(struct platform_device *pdev) -{ - sysctrl_dev = NULL; - - if (pm_power_off == ab8500_power_off) - pm_power_off = NULL; - - return 0; + return devm_register_trivial_power_off_handler(sysctrl_dev, + ab8500_power_off); } static const struct of_device_id ab8500_sysctrl_match[] = { @@ -161,7 +149,6 @@ static struct platform_driver ab8500_sysctrl_driver = { .of_match_table = ab8500_sysctrl_match, }, .probe = ab8500_sysctrl_probe, - .remove = ab8500_sysctrl_remove, }; static int __init ab8500_sysctrl_init(void) From fbafc88ec164ae0bfa78c2ef004da4116636348d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 23:55:35 +0300 Subject: [PATCH 1021/1202] reset: ath79: Use devm_register_simple_restart_handler() Use devm_register_simple_restart_handler() helper that reduces boilerplate code. Signed-off-by: Dmitry Osipenko --- drivers/reset/reset-ath79.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/reset/reset-ath79.c b/drivers/reset/reset-ath79.c index e48d8fcb31333..17af3b8cda3fa 100644 --- a/drivers/reset/reset-ath79.c +++ b/drivers/reset/reset-ath79.c @@ -72,15 +72,11 @@ static const struct reset_control_ops ath79_reset_ops = { .status = ath79_reset_status, }; -static int ath79_reset_restart_handler(struct notifier_block *nb, - unsigned long action, void *data) +static void ath79_reset_restart_handler(struct restart_data *data) { - struct ath79_reset *ath79_reset = - container_of(nb, struct ath79_reset, restart_nb); + struct ath79_reset *ath79_reset = data->cb_data; ath79_reset_assert(&ath79_reset->rcdev, FULL_CHIP_RESET); - - return NOTIFY_DONE; } static int ath79_reset_probe(struct platform_device *pdev) @@ -112,10 +108,9 @@ static int ath79_reset_probe(struct platform_device *pdev) if (err) return err; - ath79_reset->restart_nb.notifier_call = ath79_reset_restart_handler; - ath79_reset->restart_nb.priority = 128; - - err = register_restart_handler(&ath79_reset->restart_nb); + err = devm_register_simple_restart_handler(&pdev->dev, + ath79_reset_restart_handler, + ath79_reset); if (err) dev_warn(&pdev->dev, "Failed to register restart handler\n"); From 5ea569a4bb7c10cd928a34bb875cc629229f7a11 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 23:56:04 +0300 Subject: [PATCH 1022/1202] reset: intel-gw: Use devm_register_simple_restart_handler() Use devm_register_simple_restart_handler() helper that reduces boilerplate code. Signed-off-by: Dmitry Osipenko --- drivers/reset/reset-intel-gw.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/reset/reset-intel-gw.c b/drivers/reset/reset-intel-gw.c index effc177db80af..d4075ca005a76 100644 --- a/drivers/reset/reset-intel-gw.c +++ b/drivers/reset/reset-intel-gw.c @@ -154,15 +154,11 @@ static int intel_reset_xlate(struct reset_controller_dev *rcdev, return id; } -static int intel_reset_restart_handler(struct notifier_block *nb, - unsigned long action, void *data) +static void intel_reset_restart_handler(struct restart_data *data) { - struct intel_reset_data *reset_data; + struct intel_reset_data *reset_data = data->cb_data; - reset_data = container_of(nb, struct intel_reset_data, restart_nb); intel_assert_device(&reset_data->rcdev, reset_data->reboot_id); - - return NOTIFY_DONE; } static int intel_reset_probe(struct platform_device *pdev) @@ -216,9 +212,8 @@ static int intel_reset_probe(struct platform_device *pdev) if (data->soc_data->legacy) data->reboot_id |= FIELD_PREP(STAT_BIT_OFFSET_MASK, rb_id[2]); - data->restart_nb.notifier_call = intel_reset_restart_handler; - data->restart_nb.priority = 128; - register_restart_handler(&data->restart_nb); + devm_register_simple_restart_handler(dev, intel_reset_restart_handler, + data); return 0; } From f66f7d3f0a2ff548547ed3a0f68565a41a4948a1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 23:56:45 +0300 Subject: [PATCH 1023/1202] reset: lpc18xx: Use devm_register_prioritized_restart_handler() Use devm_register_prioritized_restart_handler() helper that reduces boilerplate code. Signed-off-by: Dmitry Osipenko --- drivers/reset/reset-lpc18xx.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/reset/reset-lpc18xx.c b/drivers/reset/reset-lpc18xx.c index 35d8dd4cccfc8..45189b2ec9e1e 100644 --- a/drivers/reset/reset-lpc18xx.c +++ b/drivers/reset/reset-lpc18xx.c @@ -41,18 +41,14 @@ struct lpc18xx_rgu_data { #define to_rgu_data(p) container_of(p, struct lpc18xx_rgu_data, rcdev) -static int lpc18xx_rgu_restart(struct notifier_block *nb, unsigned long mode, - void *cmd) +static void lpc18xx_rgu_restart(struct restart_data *data) { - struct lpc18xx_rgu_data *rc = container_of(nb, struct lpc18xx_rgu_data, - restart_nb); + struct lpc18xx_rgu_data *rc = data->cb_data; writel(BIT(LPC18XX_RGU_CORE_RST), rc->base + LPC18XX_RGU_CTRL0); mdelay(2000); pr_emerg("%s: unable to restart system\n", __func__); - - return NOTIFY_DONE; } /* @@ -198,9 +194,9 @@ static int lpc18xx_rgu_probe(struct platform_device *pdev) goto dis_clks; } - rc->restart_nb.priority = 192, - rc->restart_nb.notifier_call = lpc18xx_rgu_restart, - ret = register_restart_handler(&rc->restart_nb); + ret = devm_register_prioritized_restart_handler(&pdev->dev, + RESTART_PRIO_HIGH, + lpc18xx_rgu_restart, rc); if (ret) dev_warn(&pdev->dev, "failed to register restart handler\n"); From 8f301ba2d1e853a1c515f6504620da6d90f5b77d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 23:57:44 +0300 Subject: [PATCH 1024/1202] reset: npcm: Use devm_register_prioritized_restart_handler() Use devm_register_prioritized_restart_handler() helper that reduces boilerplate code. Signed-off-by: Dmitry Osipenko --- drivers/reset/reset-npcm.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/reset/reset-npcm.c b/drivers/reset/reset-npcm.c index 2ea4d3136e155..08fedb3b17c88 100644 --- a/drivers/reset/reset-npcm.c +++ b/drivers/reset/reset-npcm.c @@ -62,18 +62,14 @@ struct npcm_rc_data { #define to_rc_data(p) container_of(p, struct npcm_rc_data, rcdev) -static int npcm_rc_restart(struct notifier_block *nb, unsigned long mode, - void *cmd) +static void npcm_rc_restart(struct restart_data *data) { - struct npcm_rc_data *rc = container_of(nb, struct npcm_rc_data, - restart_nb); + struct npcm_rc_data *rc = data->cb_data; writel(NPCM_SWRST << rc->sw_reset_number, rc->base + NPCM_SWRSTR); mdelay(1000); pr_emerg("%s: unable to restart system\n", __func__); - - return NOTIFY_DONE; } static int npcm_rc_setclear_reset(struct reset_controller_dev *rcdev, @@ -269,9 +265,9 @@ static int npcm_rc_probe(struct platform_device *pdev) if (!of_property_read_u32(pdev->dev.of_node, "nuvoton,sw-reset-number", &rc->sw_reset_number)) { if (rc->sw_reset_number && rc->sw_reset_number < 5) { - rc->restart_nb.priority = 192, - rc->restart_nb.notifier_call = npcm_rc_restart, - ret = register_restart_handler(&rc->restart_nb); + ret = devm_register_prioritized_restart_handler(&pdev->dev, + RESTART_PRIO_HIGH, + npcm_rc_restart, rc); if (ret) dev_warn(&pdev->dev, "failed to register restart handler\n"); } From b6d2ad65f02e28ba92af94d2a189ce80f87818be Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 22 Apr 2020 22:20:42 +0300 Subject: [PATCH 1025/1202] HACK: ARM: seccomp: Enforce whitelisting of clock_gettime64 After updating to Ubuntu 20.04, I found that all chromium-based software got broken. This happens because Ubuntu 20.04 updated GLIBC to 2.31, which now uses clock_gettime64 by default, but chromium doesn't whitelist this syscall for seccomp. The clock_gettime64 is a VDSO function on ARM, and thus, there is no good reason to fail software that is not ready for a newer GLIBC versions. Falkon and Akregator are now working on Ubuntu 20.04 without a need to disable seccomp by software, which isn't always possible (like in a case of Akregator). Signed-off-by: Dmitry Osipenko --- arch/arm/kernel/ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 43b963ea4a0e2..f41b86070aaca 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -860,7 +860,9 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) /* Do seccomp after ptrace; syscall may have changed. */ #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER - if (secure_computing() == -1) + scno = syscall_get_nr(current, regs); + + if (scno != __NR_clock_gettime64 && secure_computing() == -1) return -1; #else /* XXX: remove this once OABI gets fixed */ From eedfea2d75d852b87403b4f19c53dee9f6c0790b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 6 Jun 2020 15:25:55 +0300 Subject: [PATCH 1026/1202] HACK: ARM: seccomp: Enforce whitelisting of clock_nanosleep_time64 Chromium browser crashes sometime because it doesn't whitelist the clock_nanosleep_time64 syscall. This is a problem crated by GLIBC, it won't be fixed until all userspace software that uses seccomp will update its syscall filtering rules. Signed-off-by: Dmitry Osipenko --- arch/arm/kernel/ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index f41b86070aaca..6394483f7a941 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -862,7 +862,9 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER scno = syscall_get_nr(current, regs); - if (scno != __NR_clock_gettime64 && secure_computing() == -1) + if (scno != __NR_clock_gettime64 && + scno != __NR_clock_nanosleep_time64 && + secure_computing() == -1) return -1; #else /* XXX: remove this once OABI gets fixed */ From e1f474d997d7716b53fc9c57de048e6b349512a1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 22 Oct 2021 01:19:27 +0300 Subject: [PATCH 1027/1202] soc/tegra: Enable runtime PM during OPP state-syncing GENPD core now can set up domain's performance state properly while device is RPM-suspended. Runtime PM of a device must be enabled during setup because GENPD checks whether device is suspended and check doesn't work while RPM is disabled. Instead of replicating the boilerplate RPM-enable code around OPP helper for each driver, let's make OPP helper to take care of enabling it. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/common.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/soc/tegra/common.c b/drivers/soc/tegra/common.c index cd33e99249c3a..d930a2b4faccf 100644 --- a/drivers/soc/tegra/common.c +++ b/drivers/soc/tegra/common.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,7 @@ static int tegra_core_dev_init_opp_state(struct device *dev) { unsigned long rate; struct clk *clk; + bool rpm_enabled; int err; clk = devm_clk_get(dev, NULL); @@ -57,8 +59,22 @@ static int tegra_core_dev_init_opp_state(struct device *dev) return -EINVAL; } + /* + * Runtime PM of the device must be enabled in order to set up + * GENPD's performance properly because GENPD core checks whether + * device is suspended and this check doesn't work while RPM is + * disabled. + */ + rpm_enabled = pm_runtime_enabled(dev); + if (!rpm_enabled) + pm_runtime_enable(dev); + /* first dummy rate-setting initializes voltage vote */ err = dev_pm_opp_set_rate(dev, rate); + + if (!rpm_enabled) + pm_runtime_disable(dev); + if (err) { dev_err(dev, "failed to initialize OPP clock: %d\n", err); return err; From c800edbcc5301356dca7074b922e270402b50c86 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 1 Jul 2021 00:56:24 +0300 Subject: [PATCH 1028/1202] soc/tegra: Add devm_tegra_core_dev_init_opp_table_common() Only couple drivers need to get the -ENODEV error code and majority of drivers need to explicitly initialize the performance state. Add new common helper which sets up OPP table for these drivers. Signed-off-by: Dmitry Osipenko --- include/soc/tegra/common.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/soc/tegra/common.h b/include/soc/tegra/common.h index af41ad80ec216..8ec1ac07fc85c 100644 --- a/include/soc/tegra/common.h +++ b/include/soc/tegra/common.h @@ -39,4 +39,19 @@ devm_tegra_core_dev_init_opp_table(struct device *dev, } #endif +static inline int +devm_tegra_core_dev_init_opp_table_common(struct device *dev) +{ + struct tegra_core_opp_params opp_params = {}; + int err; + + opp_params.init_state = true; + + err = devm_tegra_core_dev_init_opp_table(dev, &opp_params); + if (err != -ENODEV) + return err; + + return 0; +} + #endif /* __SOC_TEGRA_COMMON_H__ */ From ed466218f3532f89cf418f1c4af032b05fc8d5b5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 1 Jul 2021 00:45:14 +0300 Subject: [PATCH 1029/1202] soc/tegra: Don't print error message when OPPs not available Previously we assumed that devm_tegra_core_dev_init_opp_table() will be used only by drivers that will always have device with OPP table, but this is not true anymore. For example now Tegra30 will have OPP table for PWM, but Tegra20 not and both use the same driver. Hence let's not print the error message about missing OPP table in the common helper, we can print it elsewhere. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/soc/tegra/common.c b/drivers/soc/tegra/common.c index d930a2b4faccf..bcc93e6f92051 100644 --- a/drivers/soc/tegra/common.c +++ b/drivers/soc/tegra/common.c @@ -127,9 +127,7 @@ int devm_tegra_core_dev_init_opp_table(struct device *dev, */ err = devm_pm_opp_of_add_table(dev); if (err) { - if (err == -ENODEV) - dev_err_once(dev, "OPP table not found, please update device-tree\n"); - else + if (err != -ENODEV) dev_err(dev, "failed to add OPP table: %d\n", err); return err; From 84829db2f9ee4d92dc8c30116b023adde9151407 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 25 Dec 2020 16:34:51 +0300 Subject: [PATCH 1030/1202] dt-bindings: clock: tegra-car: Document new clock sub-nodes Document sub-nodes which describe Tegra SoC clocks that require a higher voltage of the core power domain in order to operate properly on a higher clock rates. Each node contains a phandle to OPP table and power domain. The root PLLs and system clocks don't have any specific device dedicated to them, clock controller is in charge of managing power for them. Reviewed-by: Rob Herring Signed-off-by: Dmitry Osipenko --- .../bindings/clock/nvidia,tegra20-car.yaml | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml b/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml index 459d2a5253935..f832abb7f11ae 100644 --- a/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml +++ b/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.yaml @@ -42,6 +42,36 @@ properties: "#reset-cells": const: 1 +patternProperties: + "^(sclk)|(pll-[cem])$": + type: object + properties: + compatible: + enum: + - nvidia,tegra20-sclk + - nvidia,tegra30-sclk + - nvidia,tegra30-pllc + - nvidia,tegra30-plle + - nvidia,tegra30-pllm + + operating-points-v2: true + + clocks: + items: + - description: node's clock + + power-domains: + maxItems: 1 + description: phandle to the core SoC power domain + + required: + - compatible + - operating-points-v2 + - clocks + - power-domains + + additionalProperties: false + required: - compatible - reg @@ -59,6 +89,13 @@ examples: reg = <0x60006000 0x1000>; #clock-cells = <1>; #reset-cells = <1>; + + sclk { + compatible = "nvidia,tegra20-sclk"; + operating-points-v2 = <&opp_table>; + clocks = <&tegra_car TEGRA20_CLK_SCLK>; + power-domains = <&domain>; + }; }; usb-controller@c5004000 { From 44a756e3dbba59a80b2a6507f3a43ba48cc8c12d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 22 Nov 2020 22:36:01 +0300 Subject: [PATCH 1031/1202] clk: tegra: Support runtime PM and power domain The Clock-and-Reset controller resides in a core power domain on NVIDIA Tegra SoCs. In order to support voltage scaling of the core power domain, we hook up DVFS-capable clocks to the core GENPD for managing of the GENPD's performance state based on the clock changes. Some clocks don't have any specific physical hardware unit that backs them, like root PLLs and system clock and they have theirs own voltage requirements. This patch adds new clk-device driver that backs the clocks and provides runtime PM functionality for them. A virtual clk-device is created for each such DVFS-capable clock at the clock's registration time by the new tegra_clk_register() helper. Driver changes clock's device GENPD performance state based on clk-rate notifications. In result we have this sequence of events: 1. Clock driver creates virtual device for selective clocks, enables runtime PM for the created device and registers the clock. 2. Clk-device driver starts to listen to clock rate changes. 3. Something changes clk rate or enables/disables clk. 4. CCF core propagates the change through the clk tree. 5. Clk-device driver gets clock rate-change notification or GENPD core handles prepare/unprepare of the clock. 6. Clk-device driver changes GENPD performance state on clock rate change. 7. GENPD driver changes voltage regulator state change. 8. The regulator state is committed to hardware via I2C. We rely on fact that DVFS is not needed for Tegra I2C and that Tegra I2C driver already keeps clock always-prepared. Hence I2C subsystem stays independent from the clk power management and there are no deadlock spots in the sequence. Currently all clocks are registered very early during kernel boot when the device driver core isn't available yet. The clk-device can't be created at that time. This patch splits the registration of the clocks in two phases: 1. Register all essential clocks which don't use RPM and are needed during early boot. 2. Register at a later boot time the rest of clocks. This patch adds power management support for Tegra20 and Tegra30 clocks. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/clk/tegra/Makefile | 1 + drivers/clk/tegra/clk-device.c | 199 ++++++++++++++++++++++++++++++++ drivers/clk/tegra/clk-pll.c | 2 +- drivers/clk/tegra/clk-super.c | 2 +- drivers/clk/tegra/clk-tegra20.c | 77 +++++++++--- drivers/clk/tegra/clk-tegra30.c | 116 ++++++++++++++----- drivers/clk/tegra/clk.c | 75 +++++++++++- drivers/clk/tegra/clk.h | 2 + 8 files changed, 420 insertions(+), 54 deletions(-) create mode 100644 drivers/clk/tegra/clk-device.c diff --git a/drivers/clk/tegra/Makefile b/drivers/clk/tegra/Makefile index 7b1816856eb59..a0715cdfc1a4b 100644 --- a/drivers/clk/tegra/Makefile +++ b/drivers/clk/tegra/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += clk.o obj-y += clk-audio-sync.o +obj-y += clk-device.o obj-y += clk-dfll.o obj-y += clk-divider.o obj-y += clk-periph.o diff --git a/drivers/clk/tegra/clk-device.c b/drivers/clk/tegra/clk-device.c new file mode 100644 index 0000000000000..c58beaf8afbcd --- /dev/null +++ b/drivers/clk/tegra/clk-device.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "clk.h" + +/* + * This driver manages performance state of the core power domain for the + * independent PLLs and system clocks. We created a virtual clock device + * for such clocks, see tegra_clk_dev_register(). + */ + +struct tegra_clk_device { + struct notifier_block clk_nb; + struct device *dev; + struct clk_hw *hw; + struct mutex lock; +}; + +static int tegra_clock_set_pd_state(struct tegra_clk_device *clk_dev, + unsigned long rate) +{ + struct device *dev = clk_dev->dev; + struct dev_pm_opp *opp; + unsigned int pstate; + + opp = dev_pm_opp_find_freq_ceil(dev, &rate); + if (opp == ERR_PTR(-ERANGE)) { + /* + * Some clocks may be unused by a particular board and they + * may have uninitiated clock rate that is overly high. In + * this case clock is expected to be disabled, but still we + * need to set up performance state of the power domain and + * not error out clk initialization. A typical example is + * a PCIe clock on Android tablets. + */ + dev_dbg(dev, "failed to find ceil OPP for %luHz\n", rate); + opp = dev_pm_opp_find_freq_floor(dev, &rate); + } + + if (IS_ERR(opp)) { + dev_err(dev, "failed to find OPP for %luHz: %pe\n", rate, opp); + return PTR_ERR(opp); + } + + pstate = dev_pm_opp_get_required_pstate(opp, 0); + dev_pm_opp_put(opp); + + return dev_pm_genpd_set_performance_state(dev, pstate); +} + +static int tegra_clock_change_notify(struct notifier_block *nb, + unsigned long msg, void *data) +{ + struct clk_notifier_data *cnd = data; + struct tegra_clk_device *clk_dev; + int err = 0; + + clk_dev = container_of(nb, struct tegra_clk_device, clk_nb); + + mutex_lock(&clk_dev->lock); + switch (msg) { + case PRE_RATE_CHANGE: + if (cnd->new_rate > cnd->old_rate) + err = tegra_clock_set_pd_state(clk_dev, cnd->new_rate); + break; + + case ABORT_RATE_CHANGE: + err = tegra_clock_set_pd_state(clk_dev, cnd->old_rate); + break; + + case POST_RATE_CHANGE: + if (cnd->new_rate < cnd->old_rate) + err = tegra_clock_set_pd_state(clk_dev, cnd->new_rate); + break; + + default: + break; + } + mutex_unlock(&clk_dev->lock); + + return notifier_from_errno(err); +} + +static int tegra_clock_sync_pd_state(struct tegra_clk_device *clk_dev) +{ + unsigned long rate; + int ret; + + mutex_lock(&clk_dev->lock); + + rate = clk_hw_get_rate(clk_dev->hw); + ret = tegra_clock_set_pd_state(clk_dev, rate); + + mutex_unlock(&clk_dev->lock); + + return ret; +} + +static int tegra_clock_probe(struct platform_device *pdev) +{ + struct tegra_core_opp_params opp_params = {}; + struct tegra_clk_device *clk_dev; + struct device *dev = &pdev->dev; + struct clk *clk; + int err; + + if (!dev->pm_domain) + return -EINVAL; + + clk_dev = devm_kzalloc(dev, sizeof(*clk_dev), GFP_KERNEL); + if (!clk_dev) + return -ENOMEM; + + clk = devm_clk_get(dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + clk_dev->dev = dev; + clk_dev->hw = __clk_get_hw(clk); + clk_dev->clk_nb.notifier_call = tegra_clock_change_notify; + mutex_init(&clk_dev->lock); + + platform_set_drvdata(pdev, clk_dev); + + /* + * Runtime PM was already enabled for this device by the parent clk + * driver and power domain state should be synced under clk_dev lock, + * hence we don't use the common OPP helper that initializes OPP + * state. For some clocks common OPP helper may fail to find ceil + * rate, it's handled by this driver. + */ + err = devm_tegra_core_dev_init_opp_table(dev, &opp_params); + if (err) + return err; + + err = clk_notifier_register(clk, &clk_dev->clk_nb); + if (err) { + dev_err(dev, "failed to register clk notifier: %d\n", err); + return err; + } + + /* + * The driver is attaching to a potentially active/resumed clock, hence + * we need to sync the power domain performance state in a accordance to + * the clock rate if clock is resumed. + */ + err = tegra_clock_sync_pd_state(clk_dev); + if (err) + goto unreg_clk; + + return 0; + +unreg_clk: + clk_notifier_unregister(clk, &clk_dev->clk_nb); + + return err; +} + +/* + * Tegra GENPD driver enables clocks during NOIRQ phase. It can't be done + * for clocks served by this driver because runtime PM is unavailable in + * NOIRQ phase. We will keep clocks resumed during suspend to mitigate this + * problem. In practice this makes no difference from a power management + * perspective since voltage is kept at a nominal level during suspend anyways. + */ +static const struct dev_pm_ops tegra_clock_pm = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_resume_and_get, pm_runtime_put) +}; + +static const struct of_device_id tegra_clock_match[] = { + { .compatible = "nvidia,tegra20-sclk" }, + { .compatible = "nvidia,tegra30-sclk" }, + { .compatible = "nvidia,tegra30-pllc" }, + { .compatible = "nvidia,tegra30-plle" }, + { .compatible = "nvidia,tegra30-pllm" }, + { } +}; + +static struct platform_driver tegra_clock_driver = { + .driver = { + .name = "tegra-clock", + .of_match_table = tegra_clock_match, + .pm = &tegra_clock_pm, + .suppress_bind_attrs = true, + }, + .probe = tegra_clock_probe, +}; +builtin_platform_driver(tegra_clock_driver); diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c index eaa079c177c33..100b5d9b7e26e 100644 --- a/drivers/clk/tegra/clk-pll.c +++ b/drivers/clk/tegra/clk-pll.c @@ -1914,7 +1914,7 @@ static struct clk *_tegra_clk_register_pll(struct tegra_clk_pll *pll, /* Data in .init is copied by clk_register(), so stack variable OK */ pll->hw.init = &init; - return clk_register(NULL, &pll->hw); + return tegra_clk_dev_register(&pll->hw); } struct clk *tegra_clk_register_pll(const char *name, const char *parent_name, diff --git a/drivers/clk/tegra/clk-super.c b/drivers/clk/tegra/clk-super.c index 6099c6e9acd45..a98a420398fa1 100644 --- a/drivers/clk/tegra/clk-super.c +++ b/drivers/clk/tegra/clk-super.c @@ -226,7 +226,7 @@ struct clk *tegra_clk_register_super_mux(const char *name, /* Data in .init is copied by clk_register(), so stack variable OK */ super->hw.init = &init; - clk = clk_register(NULL, &super->hw); + clk = tegra_clk_dev_register(&super->hw); if (IS_ERR(clk)) kfree(super); diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c index 3664593a5ba4e..be3c33441cfc4 100644 --- a/drivers/clk/tegra/clk-tegra20.c +++ b/drivers/clk/tegra/clk-tegra20.c @@ -6,8 +6,11 @@ #include #include #include +#include #include #include +#include +#include #include #include #include @@ -414,7 +417,7 @@ static struct tegra_clk_pll_params pll_e_params = { .fixed_rate = 100000000, }; -static struct tegra_devclk devclks[] __initdata = { +static struct tegra_devclk devclks[] = { { .con_id = "pll_c", .dt_id = TEGRA20_CLK_PLL_C }, { .con_id = "pll_c_out1", .dt_id = TEGRA20_CLK_PLL_C_OUT1 }, { .con_id = "pll_p", .dt_id = TEGRA20_CLK_PLL_P }, @@ -710,13 +713,6 @@ static void tegra20_super_clk_init(void) NULL); clks[TEGRA20_CLK_CCLK] = clk; - /* SCLK */ - clk = tegra_clk_register_super_mux("sclk", sclk_parents, - ARRAY_SIZE(sclk_parents), - CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, - clk_base + SCLK_BURST_POLICY, 0, 4, 0, 0, NULL); - clks[TEGRA20_CLK_SCLK] = clk; - /* twd */ clk = clk_register_fixed_factor(NULL, "twd", "cclk", 0, 1, 4); clks[TEGRA20_CLK_TWD] = clk; @@ -1014,7 +1010,7 @@ static struct tegra_cpu_car_ops tegra20_cpu_car_ops = { #endif }; -static struct tegra_clk_init_table init_table[] __initdata = { +static struct tegra_clk_init_table init_table[] = { { TEGRA20_CLK_PLL_P, TEGRA20_CLK_CLK_MAX, 216000000, 1 }, { TEGRA20_CLK_PLL_P_OUT1, TEGRA20_CLK_CLK_MAX, 28800000, 1 }, { TEGRA20_CLK_PLL_P_OUT2, TEGRA20_CLK_CLK_MAX, 48000000, 1 }, @@ -1052,11 +1048,6 @@ static struct tegra_clk_init_table init_table[] __initdata = { { TEGRA20_CLK_CLK_MAX, TEGRA20_CLK_CLK_MAX, 0, 0 }, }; -static void __init tegra20_clock_apply_init_table(void) -{ - tegra_init_from_table(init_table, clks, TEGRA20_CLK_CLK_MAX); -} - /* * Some clocks may be used by different drivers depending on the board * configuration. List those here to register them twice in the clock lookup @@ -1076,6 +1067,8 @@ static const struct of_device_id pmc_match[] __initconst = { { }, }; +static bool tegra20_car_initialized; + static struct clk *tegra20_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data) { @@ -1083,6 +1076,16 @@ static struct clk *tegra20_clk_src_onecell_get(struct of_phandle_args *clkspec, struct clk_hw *hw; struct clk *clk; + /* + * Timer clocks are needed early, the rest of the clocks shouldn't be + * available to device drivers until clock tree is fully initialized. + */ + if (clkspec->args[0] != TEGRA20_CLK_RTC && + clkspec->args[0] != TEGRA20_CLK_TWD && + clkspec->args[0] != TEGRA20_CLK_TIMER && + !tegra20_car_initialized) + return ERR_PTR(-EPROBE_DEFER); + clk = of_clk_src_onecell_get(clkspec, data); if (IS_ERR(clk)) return clk; @@ -1149,10 +1152,48 @@ static void __init tegra20_clock_init(struct device_node *np) tegra_init_dup_clks(tegra_clk_duplicates, clks, TEGRA20_CLK_CLK_MAX); tegra_add_of_provider(np, tegra20_clk_src_onecell_get); - tegra_register_devclks(devclks, ARRAY_SIZE(devclks)); - - tegra_clk_apply_init_table = tegra20_clock_apply_init_table; tegra_cpu_car_ops = &tegra20_cpu_car_ops; } -CLK_OF_DECLARE(tegra20, "nvidia,tegra20-car", tegra20_clock_init); +CLK_OF_DECLARE_DRIVER(tegra20, "nvidia,tegra20-car", tegra20_clock_init); + +/* + * Clocks that use runtime PM can't be created at the tegra20_clock_init + * time because drivers' base isn't initialized yet, and thus platform + * devices can't be created for the clocks. Hence we need to split the + * registration of the clocks into two phases. The first phase registers + * essential clocks which don't require RPM and are actually used during + * early boot. The second phase registers clocks which use RPM and this + * is done when device drivers' core API is ready. + */ +static int tegra20_car_probe(struct platform_device *pdev) +{ + struct clk *clk; + + clk = tegra_clk_register_super_mux("sclk", sclk_parents, + ARRAY_SIZE(sclk_parents), + CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, + clk_base + SCLK_BURST_POLICY, 0, 4, 0, 0, NULL); + clks[TEGRA20_CLK_SCLK] = clk; + + tegra_register_devclks(devclks, ARRAY_SIZE(devclks)); + tegra_init_from_table(init_table, clks, TEGRA20_CLK_CLK_MAX); + tegra20_car_initialized = true; + + return 0; +} + +static const struct of_device_id tegra20_car_match[] = { + { .compatible = "nvidia,tegra20-car" }, + { } +}; + +static struct platform_driver tegra20_car_driver = { + .driver = { + .name = "tegra20-car", + .of_match_table = tegra20_car_match, + .suppress_bind_attrs = true, + }, + .probe = tegra20_car_probe, +}; +builtin_platform_driver(tegra20_car_driver); diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 64121bc66d85a..04b4961238209 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -7,8 +7,11 @@ #include #include #include +#include #include #include +#include +#include #include #include @@ -532,7 +535,7 @@ static unsigned long tegra30_input_freq[] = { [12] = 26000000, }; -static struct tegra_devclk devclks[] __initdata = { +static struct tegra_devclk devclks[] = { { .con_id = "pll_c", .dt_id = TEGRA30_CLK_PLL_C }, { .con_id = "pll_c_out1", .dt_id = TEGRA30_CLK_PLL_C_OUT1 }, { .con_id = "pll_p", .dt_id = TEGRA30_CLK_PLL_P }, @@ -812,11 +815,6 @@ static void __init tegra30_pll_init(void) { struct clk *clk; - /* PLLC */ - clk = tegra_clk_register_pll("pll_c", "pll_ref", clk_base, pmc_base, 0, - &pll_c_params, NULL); - clks[TEGRA30_CLK_PLL_C] = clk; - /* PLLC_OUT1 */ clk = tegra_clk_register_divider("pll_c_out1_div", "pll_c", clk_base + PLLC_OUT, 0, TEGRA_DIVIDER_ROUND_UP, @@ -826,11 +824,6 @@ static void __init tegra30_pll_init(void) 0, NULL); clks[TEGRA30_CLK_PLL_C_OUT1] = clk; - /* PLLM */ - clk = tegra_clk_register_pll("pll_m", "pll_ref", clk_base, pmc_base, - CLK_SET_RATE_GATE, &pll_m_params, NULL); - clks[TEGRA30_CLK_PLL_M] = clk; - /* PLLM_OUT1 */ clk = tegra_clk_register_divider("pll_m_out1_div", "pll_m", clk_base + PLLM_OUT, 0, TEGRA_DIVIDER_ROUND_UP, @@ -880,9 +873,6 @@ static void __init tegra30_pll_init(void) ARRAY_SIZE(pll_e_parents), CLK_SET_RATE_NO_REPARENT, clk_base + PLLE_AUX, 2, 1, 0, NULL); - clk = tegra_clk_register_plle("pll_e", "pll_e_mux", clk_base, pmc_base, - CLK_GET_RATE_NOCACHE, &pll_e_params, NULL); - clks[TEGRA30_CLK_PLL_E] = clk; } static const char *cclk_g_parents[] = { "clk_m", "pll_c", "clk_32k", "pll_m", @@ -971,14 +961,6 @@ static void __init tegra30_super_clk_init(void) NULL); clks[TEGRA30_CLK_CCLK_LP] = clk; - /* SCLK */ - clk = tegra_clk_register_super_mux("sclk", sclk_parents, - ARRAY_SIZE(sclk_parents), - CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, - clk_base + SCLK_BURST_POLICY, - 0, 4, 0, 0, NULL); - clks[TEGRA30_CLK_SCLK] = clk; - /* twd */ clk = clk_register_fixed_factor(NULL, "twd", "cclk_g", CLK_SET_RATE_PARENT, 1, 2); @@ -1214,7 +1196,7 @@ static struct tegra_cpu_car_ops tegra30_cpu_car_ops = { #endif }; -static struct tegra_clk_init_table init_table[] __initdata = { +static struct tegra_clk_init_table init_table[] = { { TEGRA30_CLK_UARTA, TEGRA30_CLK_PLL_P, 408000000, 0 }, { TEGRA30_CLK_UARTB, TEGRA30_CLK_PLL_P, 408000000, 0 }, { TEGRA30_CLK_UARTC, TEGRA30_CLK_PLL_P, 408000000, 0 }, @@ -1259,11 +1241,6 @@ static struct tegra_clk_init_table init_table[] __initdata = { { TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0 }, }; -static void __init tegra30_clock_apply_init_table(void) -{ - tegra_init_from_table(init_table, clks, TEGRA30_CLK_CLK_MAX); -} - /* * Some clocks may be used by different drivers depending on the board * configuration. List those here to register them twice in the clock lookup @@ -1294,12 +1271,24 @@ static struct tegra_audio_clk_info tegra30_audio_plls[] = { { "pll_a", &pll_a_params, tegra_clk_pll_a, "pll_p_out1" }, }; +static bool tegra30_car_initialized; + static struct clk *tegra30_clk_src_onecell_get(struct of_phandle_args *clkspec, void *data) { struct clk_hw *hw; struct clk *clk; + /* + * Timer clocks are needed early, the rest of the clocks shouldn't be + * available to device drivers until clock tree is fully initialized. + */ + if (clkspec->args[0] != TEGRA30_CLK_RTC && + clkspec->args[0] != TEGRA30_CLK_TWD && + clkspec->args[0] != TEGRA30_CLK_TIMER && + !tegra30_car_initialized) + return ERR_PTR(-EPROBE_DEFER); + clk = of_clk_src_onecell_get(clkspec, data); if (IS_ERR(clk)) return clk; @@ -1357,10 +1346,75 @@ static void __init tegra30_clock_init(struct device_node *np) tegra_init_dup_clks(tegra_clk_duplicates, clks, TEGRA30_CLK_CLK_MAX); tegra_add_of_provider(np, tegra30_clk_src_onecell_get); + + tegra_cpu_car_ops = &tegra30_cpu_car_ops; +} +CLK_OF_DECLARE_DRIVER(tegra30, "nvidia,tegra30-car", tegra30_clock_init); + +/* + * Clocks that use runtime PM can't be created at the tegra30_clock_init + * time because drivers' base isn't initialized yet, and thus platform + * devices can't be created for the clocks. Hence we need to split the + * registration of the clocks into two phases. The first phase registers + * essential clocks which don't require RPM and are actually used during + * early boot. The second phase registers clocks which use RPM and this + * is done when device drivers' core API is ready. + */ +static int tegra30_car_probe(struct platform_device *pdev) +{ + struct clk *clk; + + /* PLLC */ + clk = tegra_clk_register_pll("pll_c", "pll_ref", clk_base, pmc_base, 0, + &pll_c_params, NULL); + clks[TEGRA30_CLK_PLL_C] = clk; + + /* PLLE */ + clk = tegra_clk_register_plle("pll_e", "pll_e_mux", clk_base, pmc_base, + CLK_GET_RATE_NOCACHE, &pll_e_params, NULL); + clks[TEGRA30_CLK_PLL_E] = clk; + + /* PLLM */ + clk = tegra_clk_register_pll("pll_m", "pll_ref", clk_base, pmc_base, + CLK_SET_RATE_GATE, &pll_m_params, NULL); + clks[TEGRA30_CLK_PLL_M] = clk; + + /* SCLK */ + clk = tegra_clk_register_super_mux("sclk", sclk_parents, + ARRAY_SIZE(sclk_parents), + CLK_SET_RATE_PARENT | CLK_IS_CRITICAL, + clk_base + SCLK_BURST_POLICY, + 0, 4, 0, 0, NULL); + clks[TEGRA30_CLK_SCLK] = clk; + tegra_register_devclks(devclks, ARRAY_SIZE(devclks)); + tegra_init_from_table(init_table, clks, TEGRA30_CLK_CLK_MAX); + tegra30_car_initialized = true; - tegra_clk_apply_init_table = tegra30_clock_apply_init_table; + return 0; +} - tegra_cpu_car_ops = &tegra30_cpu_car_ops; +static const struct of_device_id tegra30_car_match[] = { + { .compatible = "nvidia,tegra30-car" }, + { } +}; + +static struct platform_driver tegra30_car_driver = { + .driver = { + .name = "tegra30-car", + .of_match_table = tegra30_car_match, + .suppress_bind_attrs = true, + }, + .probe = tegra30_car_probe, +}; + +/* + * Clock driver must be registered before memory controller driver, + * which doesn't support deferred probing for today and is registered + * from arch init-level. + */ +static int tegra30_car_init(void) +{ + return platform_driver_register(&tegra30_car_driver); } -CLK_OF_DECLARE(tegra30, "nvidia,tegra30-car", tegra30_clock_init); +postcore_initcall(tegra30_car_init); diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c index f6cdce441cf7a..26bda45813c0d 100644 --- a/drivers/clk/tegra/clk.c +++ b/drivers/clk/tegra/clk.c @@ -9,14 +9,19 @@ #include #include #include +#include #include +#include +#include #include +#include #include #include "clk.h" /* Global data of Tegra CPU CAR ops */ +static struct device_node *tegra_car_np; static struct tegra_cpu_car_ops dummy_car_ops; struct tegra_cpu_car_ops *tegra_cpu_car_ops = &dummy_car_ops; @@ -261,8 +266,8 @@ void __init tegra_init_dup_clks(struct tegra_clk_duplicate *dup_list, } } -void __init tegra_init_from_table(struct tegra_clk_init_table *tbl, - struct clk *clks[], int clk_max) +void tegra_init_from_table(struct tegra_clk_init_table *tbl, + struct clk *clks[], int clk_max) { struct clk *clk; @@ -320,6 +325,8 @@ void __init tegra_add_of_provider(struct device_node *np, { int i; + tegra_car_np = np; + for (i = 0; i < clk_num; i++) { if (IS_ERR(clks[i])) { pr_err @@ -348,7 +355,7 @@ void __init tegra_init_special_resets(unsigned int num, special_reset_deassert = deassert; } -void __init tegra_register_devclks(struct tegra_devclk *dev_clks, int num) +void tegra_register_devclks(struct tegra_devclk *dev_clks, int num) { int i; @@ -372,6 +379,68 @@ struct clk ** __init tegra_lookup_dt_id(int clk_id, return NULL; } +static struct device_node *tegra_clk_get_of_node(struct clk_hw *hw) +{ + struct device_node *np; + char *node_name; + + node_name = kstrdup(hw->init->name, GFP_KERNEL); + if (!node_name) + return NULL; + + strreplace(node_name, '_', '-'); + + for_each_child_of_node(tegra_car_np, np) { + if (!strcmp(np->name, node_name)) + break; + } + + kfree(node_name); + + return np; +} + +struct clk *tegra_clk_dev_register(struct clk_hw *hw) +{ + struct platform_device *pdev, *parent; + const char *dev_name = NULL; + struct device *dev = NULL; + struct device_node *np; + + np = tegra_clk_get_of_node(hw); + + if (!of_device_is_available(np)) + goto put_node; + + dev_name = kasprintf(GFP_KERNEL, "tegra_clk_%s", hw->init->name); + if (!dev_name) + goto put_node; + + parent = of_find_device_by_node(tegra_car_np); + if (parent) { + pdev = of_platform_device_create(np, dev_name, &parent->dev); + put_device(&parent->dev); + + if (!pdev) { + pr_err("%s: failed to create device for %pOF\n", + __func__, np); + goto free_name; + } + + dev = &pdev->dev; + pm_runtime_enable(dev); + } else { + WARN(1, "failed to find device for %pOF\n", tegra_car_np); + } + +free_name: + kfree(dev_name); +put_node: + of_node_put(np); + + return clk_register(dev, hw); +} + tegra_clk_apply_init_table_func tegra_clk_apply_init_table; static int __init tegra_clocks_apply_init_table(void) diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h index 0c3ba0ccce1ac..5d80d8b79b8e0 100644 --- a/drivers/clk/tegra/clk.h +++ b/drivers/clk/tegra/clk.h @@ -927,4 +927,6 @@ struct clk *tegra20_clk_register_emc(void __iomem *ioaddr, bool low_jitter); struct clk *tegra210_clk_register_emc(struct device_node *np, void __iomem *regs); +struct clk *tegra_clk_dev_register(struct clk_hw *hw); + #endif /* TEGRA_CLK_H */ From b198ea640e7738355ef06305b580924ddabfa569 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 04:52:14 +0300 Subject: [PATCH 1032/1202] dt-bindings: host1x: Document OPP and power domain properties Document new DVFS OPP table and power domain properties of the Host1x bus and devices sitting on the bus. Reviewed-by: Rob Herring Signed-off-by: Dmitry Osipenko --- .../display/tegra/nvidia,tegra20-host1x.txt | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt index 8a6d3e1ee306a..62861a8fb5c68 100644 --- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt +++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt @@ -20,6 +20,18 @@ Required properties: - reset-names: Must include the following entries: - host1x +Optional properties: +- operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to HEG or core power domain. + +For each opp entry in 'operating-points-v2' table of host1x and its modules: +- opp-supported-hw: One bitfield indicating: + On Tegra20: SoC process ID mask + On Tegra30+: SoC speedo ID mask + + A bitwise AND is performed against the value and if any bit + matches, the OPP gets enabled. + Each host1x client module having to perform DMA through the Memory Controller should have the interconnect endpoints set to the Memory Client and External Memory respectively. @@ -45,6 +57,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to MPE power domain. - vi: video input @@ -128,6 +142,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to VENC power domain. - epp: encoder pre-processor @@ -147,6 +163,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to HEG or core power domain. - isp: image signal processor @@ -166,6 +184,7 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - power-domains: Phandle to VENC or core power domain. - gr2d: 2D graphics engine @@ -185,6 +204,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to HEG or core power domain. - gr3d: 3D graphics engine @@ -209,6 +230,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandles to 3D or core power domain. - dc: display controller @@ -241,6 +264,8 @@ of the following host1x client modules: - interconnect-names: Must include name of the interconnect path for each interconnect entry. Consult TRM documentation for information about available memory clients, see MEMORY CONTROLLER section. + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to core power domain. - hdmi: High Definition Multimedia Interface @@ -267,6 +292,7 @@ of the following host1x client modules: - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection - nvidia,edid: supplies a binary EDID blob - nvidia,panel: phandle of a display panel + - operating-points-v2: See ../bindings/opp/opp.txt for details. - tvo: TV encoder output @@ -277,6 +303,10 @@ of the following host1x client modules: - clocks: Must contain one entry, for the module clock. See ../clocks/clock-bindings.txt for details. + Optional properties: + - operating-points-v2: See ../bindings/opp/opp.txt for details. + - power-domains: Phandle to core power domain. + - dsi: display serial interface Required properties: @@ -305,6 +335,7 @@ of the following host1x client modules: - nvidia,panel: phandle of a display panel - nvidia,ganged-mode: contains a phandle to a second DSI controller to gang up with in order to support up to 8 data lanes + - operating-points-v2: See ../bindings/opp/opp.txt for details. - sor: serial output resource @@ -408,6 +439,8 @@ Example: clocks = <&tegra_car TEGRA20_CLK_HOST1X>; resets = <&tegra_car 28>; reset-names = "host1x"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; #address-cells = <1>; #size-cells = <1>; @@ -421,6 +454,8 @@ Example: clocks = <&tegra_car TEGRA20_CLK_MPE>; resets = <&tegra_car 60>; reset-names = "mpe"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; }; vi@54080000 { @@ -429,6 +464,7 @@ Example: interrupts = ; assigned-clocks = <&tegra_car TEGRA210_CLK_VI>; assigned-clock-parents = <&tegra_car TEGRA210_CLK_PLL_C4_OUT0>; + operating-points-v2 = <&dvfs_opp_table>; clocks = <&tegra_car TEGRA210_CLK_VI>; power-domains = <&pd_venc>; @@ -510,6 +546,8 @@ Example: clocks = <&tegra_car TEGRA20_CLK_EPP>; resets = <&tegra_car 19>; reset-names = "epp"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; }; isp { @@ -528,6 +566,8 @@ Example: clocks = <&tegra_car TEGRA20_CLK_GR2D>; resets = <&tegra_car 21>; reset-names = "2d"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; }; gr3d { @@ -536,6 +576,8 @@ Example: clocks = <&tegra_car TEGRA20_CLK_GR3D>; resets = <&tegra_car 24>; reset-names = "3d"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; }; dc@54200000 { @@ -547,6 +589,8 @@ Example: clock-names = "dc", "parent"; resets = <&tegra_car 27>; reset-names = "dc"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; interconnects = <&mc TEGRA20_MC_DISPLAY0A &emc>, <&mc TEGRA20_MC_DISPLAY0B &emc>, @@ -571,6 +615,8 @@ Example: clock-names = "dc", "parent"; resets = <&tegra_car 26>; reset-names = "dc"; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; interconnects = <&mc TEGRA20_MC_DISPLAY0AB &emc>, <&mc TEGRA20_MC_DISPLAY0BB &emc>, @@ -596,6 +642,7 @@ Example: resets = <&tegra_car 51>; reset-names = "hdmi"; status = "disabled"; + operating-points-v2 = <&dvfs_opp_table>; }; tvo { @@ -604,6 +651,7 @@ Example: interrupts = <0 76 0x04>; clocks = <&tegra_car TEGRA20_CLK_TVO>; status = "disabled"; + operating-points-v2 = <&dvfs_opp_table>; }; dsi { @@ -615,6 +663,7 @@ Example: resets = <&tegra_car 48>; reset-names = "dsi"; status = "disabled"; + operating-points-v2 = <&dvfs_opp_table>; }; }; From 9457185df3fb639b15c0955f4997df7f284d421f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 21 Dec 2020 02:07:47 +0300 Subject: [PATCH 1033/1202] dt-bindings: host1x: Document Memory Client resets of Host1x, GR2D and GR3D Memory Client should be blocked before hardware reset is asserted in order to prevent memory corruption and hanging of memory controller. Document Memory Client resets of Host1x, GR2D and GR3D hardware units. Reviewed-by: Rob Herring Signed-off-by: Dmitry Osipenko --- .../bindings/display/tegra/nvidia,tegra20-host1x.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt index 62861a8fb5c68..e61999ce54e96 100644 --- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt +++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt @@ -19,6 +19,7 @@ Required properties: See ../reset/reset.txt for details. - reset-names: Must include the following entries: - host1x + - mc Optional properties: - operating-points-v2: See ../bindings/opp/opp.txt for details. @@ -198,6 +199,7 @@ of the following host1x client modules: See ../reset/reset.txt for details. - reset-names: Must include the following entries: - 2d + - mc Optional properties: - interconnects: Must contain entry for the GR2D memory clients. @@ -224,6 +226,8 @@ of the following host1x client modules: - reset-names: Must include the following entries: - 3d - 3d2 (Only required on SoCs with two 3D clocks) + - mc + - mc2 (Only required on SoCs with two 3D clocks) Optional properties: - interconnects: Must contain entry for the GR3D memory clients. From 6664c8fb40637f04dab840225cea3d1126e69a91 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 06:10:40 +0300 Subject: [PATCH 1034/1202] gpu: host1x: Add initial runtime PM and OPP support Add runtime PM and OPP support to the Host1x driver. For the starter we will keep host1x always-on because dynamic power management require a major refactoring of the driver code since lot's of code paths are missing the RPM handling and we're going to remove some of these paths in the future. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/host1x/debug.c | 15 +++ drivers/gpu/host1x/dev.c | 150 +++++++++++++++++++++++------ drivers/gpu/host1x/dev.h | 3 +- drivers/gpu/host1x/hw/channel_hw.c | 44 ++++----- drivers/gpu/host1x/intr.c | 3 - drivers/gpu/host1x/syncpt.c | 5 +- 6 files changed, 164 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c index 8a14880c61bbd..18d9c8d206e3f 100644 --- a/drivers/gpu/host1x/debug.c +++ b/drivers/gpu/host1x/debug.c @@ -7,6 +7,7 @@ */ #include +#include #include #include @@ -52,6 +53,11 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) { struct host1x *m = dev_get_drvdata(ch->dev->parent); struct output *o = data; + int err; + + err = pm_runtime_resume_and_get(m->dev); + if (err < 0) + return err; mutex_lock(&ch->cdma.lock); mutex_lock(&debug_lock); @@ -64,6 +70,8 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) mutex_unlock(&debug_lock); mutex_unlock(&ch->cdma.lock); + pm_runtime_put(m->dev); + return 0; } @@ -71,9 +79,14 @@ static void show_syncpts(struct host1x *m, struct output *o) { struct list_head *pos; unsigned int i; + int err; host1x_debug_output(o, "---- syncpts ----\n"); + err = pm_runtime_resume_and_get(m->dev); + if (err < 0) + return; + for (i = 0; i < host1x_syncpt_nb_pts(m); i++) { u32 max = host1x_syncpt_read_max(m->syncpt + i); u32 min = host1x_syncpt_load(m->syncpt + i); @@ -101,6 +114,8 @@ static void show_syncpts(struct host1x *m, struct output *o) base_val); } + pm_runtime_put(m->dev); + host1x_debug_output(o, "\n"); } diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 3d4cabdbc78db..c42ab78327e76 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -6,14 +6,18 @@ */ #include +#include #include #include #include #include #include #include +#include #include +#include + #define CREATE_TRACE_POINTS #include #undef CREATE_TRACE_POINTS @@ -208,6 +212,9 @@ static void host1x_setup_sid_table(struct host1x *host) const struct host1x_info *info = host->info; unsigned int i; + if (!info->has_hypervisor) + return; + for (i = 0; i < info->num_sid_entries; i++) { const struct host1x_sid_entry *entry = &info->sid_table[i]; @@ -365,6 +372,27 @@ static void host1x_iommu_exit(struct host1x *host) } } +static int host1x_get_resets(struct host1x *host) +{ + int err; + + host->resets[0].id = "mc"; + host->resets[1].id = "host1x"; + host->nresets = ARRAY_SIZE(host->resets); + + err = devm_reset_control_bulk_get_optional_exclusive_released( + host->dev, host->nresets, host->resets); + if (err) { + dev_err(host->dev, "failed to get reset: %d\n", err); + return err; + } + + if (WARN_ON(!host->resets[1].rstc)) + return -ENOENT; + + return 0; +} + static int host1x_probe(struct platform_device *pdev) { struct host1x *host; @@ -442,12 +470,9 @@ static int host1x_probe(struct platform_device *pdev) return err; } - host->rst = devm_reset_control_get(&pdev->dev, "host1x"); - if (IS_ERR(host->rst)) { - err = PTR_ERR(host->rst); - dev_err(&pdev->dev, "failed to get reset: %d\n", err); + err = host1x_get_resets(host); + if (err) return err; - } err = host1x_iommu_init(host); if (err < 0) { @@ -462,22 +487,10 @@ static int host1x_probe(struct platform_device *pdev) goto iommu_exit; } - err = clk_prepare_enable(host->clk); - if (err < 0) { - dev_err(&pdev->dev, "failed to enable clock\n"); - goto free_channels; - } - - err = reset_control_deassert(host->rst); - if (err < 0) { - dev_err(&pdev->dev, "failed to deassert reset: %d\n", err); - goto unprepare_disable; - } - err = host1x_syncpt_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize syncpts\n"); - goto reset_assert; + goto free_channels; } err = host1x_intr_init(host, syncpt_irq); @@ -486,10 +499,18 @@ static int host1x_probe(struct platform_device *pdev) goto deinit_syncpt; } - host1x_debug_init(host); + pm_runtime_enable(&pdev->dev); + + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + goto pm_disable; - if (host->info->has_hypervisor) - host1x_setup_sid_table(host); + /* the driver's code isn't ready yet for the dynamic RPM */ + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) + goto pm_disable; + + host1x_debug_init(host); err = host1x_register(host); if (err < 0) @@ -505,13 +526,14 @@ static int host1x_probe(struct platform_device *pdev) host1x_unregister(host); deinit_debugfs: host1x_debug_deinit(host); + + pm_runtime_put_sync_suspend(&pdev->dev); +pm_disable: + pm_runtime_disable(&pdev->dev); + host1x_intr_deinit(host); deinit_syncpt: host1x_syncpt_deinit(host); -reset_assert: - reset_control_assert(host->rst); -unprepare_disable: - clk_disable_unprepare(host->clk); free_channels: host1x_channel_list_free(&host->channel_list); iommu_exit: @@ -526,20 +548,94 @@ static int host1x_remove(struct platform_device *pdev) host1x_unregister(host); host1x_debug_deinit(host); + + pm_runtime_force_suspend(&pdev->dev); + host1x_intr_deinit(host); host1x_syncpt_deinit(host); - reset_control_assert(host->rst); - clk_disable_unprepare(host->clk); host1x_iommu_exit(host); host1x_bo_cache_destroy(&host->cache); return 0; } +static int __maybe_unused host1x_runtime_suspend(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev); + int err; + + host1x_intr_stop(host); + host1x_syncpt_save(host); + + err = reset_control_bulk_assert(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + goto resume_host1x; + } + + usleep_range(1000, 2000); + + clk_disable_unprepare(host->clk); + reset_control_bulk_release(host->nresets, host->resets); + + return 0; + +resume_host1x: + host1x_setup_sid_table(host); + host1x_syncpt_restore(host); + host1x_intr_start(host); + + return err; +} + +static int __maybe_unused host1x_runtime_resume(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev); + int err; + + err = reset_control_bulk_acquire(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_prepare_enable(host->clk); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; + } + + err = reset_control_bulk_deassert(host->nresets, host->resets); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + + host1x_setup_sid_table(host); + host1x_syncpt_restore(host); + host1x_intr_start(host); + + return 0; + +disable_clk: + clk_disable_unprepare(host->clk); +release_reset: + reset_control_bulk_release(host->nresets, host->resets); + + return err; +} + +static const struct dev_pm_ops host1x_pm = { + SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume, + NULL) + /* TODO: add system suspend-resume once driver will be ready for that */ +}; + static struct platform_driver tegra_host1x_driver = { .driver = { .name = "tegra-host1x", .of_match_table = host1x_of_match, + .pm = &host1x_pm, }, .probe = host1x_probe, .remove = host1x_remove, diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index 5b7fdea5d1699..ca4b082f0cd49 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -118,7 +118,8 @@ struct host1x { struct host1x_syncpt_base *bases; struct device *dev; struct clk *clk; - struct reset_control *rst; + struct reset_control_bulk_data resets[2]; + unsigned int nresets; struct iommu_group *group; struct iommu_domain *domain; diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 1999780a7203a..6b40e9af1e886 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -159,6 +159,27 @@ static void host1x_channel_set_streamid(struct host1x_channel *channel) #endif } +static void host1x_enable_gather_filter(struct host1x_channel *ch) +{ +#if HOST1X_HW >= 6 + struct host1x *host = dev_get_drvdata(ch->dev->parent); + u32 val; + + if (!host->hv_regs) + return; + + val = host1x_hypervisor_readl( + host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); + val |= BIT(ch->id % 32); + host1x_hypervisor_writel( + host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); +#elif HOST1X_HW >= 4 + host1x_ch_writel(ch, + HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1), + HOST1X_CHANNEL_CHANNELCTRL); +#endif +} + static int channel_submit(struct host1x_job *job) { struct host1x_channel *ch = job->channel; @@ -190,6 +211,7 @@ static int channel_submit(struct host1x_job *job) } host1x_channel_set_streamid(ch); + host1x_enable_gather_filter(ch); /* begin a CDMA submit */ err = host1x_cdma_begin(&ch->cdma, job); @@ -249,27 +271,6 @@ static int channel_submit(struct host1x_job *job) return err; } -static void enable_gather_filter(struct host1x *host, - struct host1x_channel *ch) -{ -#if HOST1X_HW >= 6 - u32 val; - - if (!host->hv_regs) - return; - - val = host1x_hypervisor_readl( - host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); - val |= BIT(ch->id % 32); - host1x_hypervisor_writel( - host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); -#elif HOST1X_HW >= 4 - host1x_ch_writel(ch, - HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1), - HOST1X_CHANNEL_CHANNELCTRL); -#endif -} - static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, unsigned int index) { @@ -278,7 +279,6 @@ static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, #else ch->regs = dev->regs + index * 0x100; #endif - enable_gather_filter(dev, ch); return 0; } diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 45b6be927ec4d..965ba21818b15 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -297,14 +297,11 @@ int host1x_intr_init(struct host1x *host, unsigned int irq_sync) "host1x_sp_%02u", id); } - host1x_intr_start(host); - return 0; } void host1x_intr_deinit(struct host1x *host) { - host1x_intr_stop(host); } void host1x_intr_start(struct host1x *host) diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index d198a10848c6b..e08e331e46aea 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -143,6 +143,8 @@ void host1x_syncpt_restore(struct host1x *host) for (i = 0; i < host1x_syncpt_nb_bases(host); i++) host1x_hw_syncpt_restore_wait_base(host, sp_base + i); + host1x_hw_syncpt_enable_protection(host); + wmb(); } @@ -366,9 +368,6 @@ int host1x_syncpt_init(struct host1x *host) host->syncpt = syncpt; host->bases = bases; - host1x_syncpt_restore(host); - host1x_hw_syncpt_enable_protection(host); - /* Allocate sync point to use for clearing waits for expired fences */ host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop"); if (!host->nop_sp) From a08669d32d9dfe56c392ef1350e0eb2b1c66031c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 3 Dec 2020 00:24:27 +0300 Subject: [PATCH 1035/1202] gpu: host1x: Add host1x_channel_stop() Add host1x_channel_stop() which waits till channel becomes idle and then stops the channel hardware. This is needed for supporting suspend/resume by host1x drivers since the hardware state is lost after power-gating, thus the channel needs to be stopped before client enters into suspend. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/host1x/channel.c | 8 ++++++++ include/linux/host1x.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c index 4cd212bb570d5..2a9a3a8d5931a 100644 --- a/drivers/gpu/host1x/channel.c +++ b/drivers/gpu/host1x/channel.c @@ -75,6 +75,14 @@ struct host1x_channel *host1x_channel_get_index(struct host1x *host, return ch; } +void host1x_channel_stop(struct host1x_channel *channel) +{ + struct host1x *host = dev_get_drvdata(channel->dev->parent); + + host1x_hw_cdma_stop(host, &channel->cdma); +} +EXPORT_SYMBOL(host1x_channel_stop); + static void release_channel(struct kref *kref) { struct host1x_channel *channel = diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 2ca53d7ed7ca4..e8dc5bc41f796 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -226,6 +226,7 @@ struct host1x_job; struct host1x_channel *host1x_channel_request(struct host1x_client *client); struct host1x_channel *host1x_channel_get(struct host1x_channel *channel); +void host1x_channel_stop(struct host1x_channel *channel); void host1x_channel_put(struct host1x_channel *channel); int host1x_job_submit(struct host1x_job *job); From c4a7d4ab63261bb8318b232b6fc58de007e82a87 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 02:48:03 +0300 Subject: [PATCH 1036/1202] drm/tegra: dc: Support OPP and SoC core voltage scaling Add OPP and SoC core voltage scaling support to the display controller driver. This is required for enabling system-wide DVFS on pre-Tegra186 SoCs. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/dc.c | 79 ++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/tegra/dc.h | 2 + 2 files changed, 81 insertions(+) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index a457ee954a495..eb70eee8992aa 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -11,9 +11,12 @@ #include #include #include +#include +#include #include #include +#include #include #include @@ -1834,6 +1837,52 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc, return 0; } +static void tegra_dc_update_voltage_state(struct tegra_dc *dc, + struct tegra_dc_state *state) +{ + unsigned long rate, pstate; + struct dev_pm_opp *opp; + int err; + + if (!dc->has_opp_table) + return; + + /* calculate actual pixel clock rate which depends on internal divider */ + rate = DIV_ROUND_UP(clk_get_rate(dc->clk) * 2, state->div + 2); + + /* find suitable OPP for the rate */ + opp = dev_pm_opp_find_freq_ceil(dc->dev, &rate); + + /* + * Very high resolution modes may results in a clock rate that is + * above the characterized maximum. In this case it's okay to fall + * back to the characterized maximum. + */ + if (opp == ERR_PTR(-ERANGE)) + opp = dev_pm_opp_find_freq_floor(dc->dev, &rate); + + if (IS_ERR(opp)) { + dev_err(dc->dev, "failed to find OPP for %luHz: %pe\n", + rate, opp); + return; + } + + pstate = dev_pm_opp_get_required_pstate(opp, 0); + dev_pm_opp_put(opp); + + /* + * The minimum core voltage depends on the pixel clock rate (which + * depends on internal clock divider of the CRTC) and not on the + * rate of the display controller clock. This is why we're not using + * dev_pm_opp_set_rate() API and instead controlling the power domain + * directly. + */ + err = dev_pm_genpd_set_performance_state(dc->dev, pstate); + if (err) + dev_err(dc->dev, "failed to set power domain state to %lu: %d\n", + pstate, err); +} + static void tegra_dc_set_clock_rate(struct tegra_dc *dc, struct tegra_dc_state *state) { @@ -1867,6 +1916,8 @@ static void tegra_dc_set_clock_rate(struct tegra_dc *dc, DRM_DEBUG_KMS("rate: %lu, div: %u\n", clk_get_rate(dc->clk), state->div); DRM_DEBUG_KMS("pclk: %lu\n", state->pclk); + + tegra_dc_update_voltage_state(dc, state); } static void tegra_dc_stop(struct tegra_dc *dc) @@ -2057,6 +2108,13 @@ static void tegra_crtc_atomic_disable(struct drm_crtc *crtc, err = host1x_client_suspend(&dc->client); if (err < 0) dev_err(dc->dev, "failed to suspend: %d\n", err); + + if (dc->has_opp_table) { + err = dev_pm_genpd_set_performance_state(dc->dev, 0); + if (err) + dev_err(dc->dev, + "failed to clear power domain state: %d\n", err); + } } static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, @@ -3058,6 +3116,23 @@ static int tegra_dc_couple(struct tegra_dc *dc) return 0; } +static int tegra_dc_init_opp_table(struct tegra_dc *dc) +{ + struct tegra_core_opp_params opp_params = {}; + int err; + + err = devm_tegra_core_dev_init_opp_table(dc->dev, &opp_params); + if (err && err != -ENODEV) + return err; + + if (err) + dc->has_opp_table = false; + else + dc->has_opp_table = true; + + return 0; +} + static int tegra_dc_probe(struct platform_device *pdev) { u64 dma_mask = dma_get_mask(pdev->dev.parent); @@ -3123,6 +3198,10 @@ static int tegra_dc_probe(struct platform_device *pdev) tegra_powergate_power_off(dc->powergate); } + err = tegra_dc_init_opp_table(dc); + if (err < 0) + return err; + dc->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(dc->regs)) return PTR_ERR(dc->regs); diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index c9c4c45c05183..3f91a10ea6c7d 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -101,6 +101,8 @@ struct tegra_dc { struct drm_info_list *debugfs_files; const struct tegra_dc_soc_info *soc; + + bool has_opp_table; }; static inline struct tegra_dc * From 1f5897385e827fade492c61db4b61523ebe0685c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 22:14:02 +0300 Subject: [PATCH 1037/1202] drm/tegra: hdmi: Add OPP support The HDMI on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now HDMI driver must use OPP API for driving the controller's clock rate because OPP API takes care of reconfiguring the domain's performance state based on HDMI clock rate. Add OPP support to the HDMI driver. Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/hdmi.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index e5d2a40260288..9a87d351a8289 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -11,10 +11,13 @@ #include #include #include +#include #include #include #include +#include + #include #include #include @@ -1195,7 +1198,7 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder) h_back_porch = mode->htotal - mode->hsync_end; h_front_porch = mode->hsync_start - mode->hdisplay; - err = clk_set_rate(hdmi->clk, hdmi->pixel_clock); + err = dev_pm_opp_set_rate(hdmi->dev, hdmi->pixel_clock); if (err < 0) { dev_err(hdmi->dev, "failed to set HDMI clock frequency: %d\n", err); @@ -1732,7 +1735,14 @@ static int tegra_hdmi_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, hdmi); - pm_runtime_enable(&pdev->dev); + + err = devm_pm_runtime_enable(&pdev->dev); + if (err) + return err; + + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + return err; INIT_LIST_HEAD(&hdmi->client.list); hdmi->client.ops = &hdmi_client_ops; @@ -1753,8 +1763,6 @@ static int tegra_hdmi_remove(struct platform_device *pdev) struct tegra_hdmi *hdmi = platform_get_drvdata(pdev); int err; - pm_runtime_disable(&pdev->dev); - err = host1x_client_unregister(&hdmi->client); if (err < 0) { dev_err(&pdev->dev, "failed to unregister host1x client: %d\n", From b2232257333d33a0a82c7015428a5bc3c371d48b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 02:54:13 +0300 Subject: [PATCH 1038/1202] drm/tegra: gr2d: Support generic power domain and runtime PM Add runtime power management and support generic power domains. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/gr2d.c | 184 ++++++++++++++++++++++++++++------- 1 file changed, 148 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index ba3722f1b8651..b4cd7655b3dc3 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -8,12 +8,21 @@ #include #include #include +#include #include +#include + #include "drm.h" #include "gem.h" #include "gr2d.h" +enum { + RST_MC, + RST_GR2D, + RST_GR2D_MAX, +}; + struct gr2d_soc { unsigned int version; }; @@ -21,9 +30,11 @@ struct gr2d_soc { struct gr2d { struct tegra_drm_client client; struct host1x_channel *channel; - struct reset_control *rst; struct clk *clk; + struct reset_control_bulk_data resets[RST_GR2D_MAX]; + unsigned int nresets; + const struct gr2d_soc *soc; DECLARE_BITMAP(addr_regs, GR2D_NUM_REGS); @@ -59,15 +70,22 @@ static int gr2d_init(struct host1x_client *client) goto free; } + pm_runtime_enable(client->dev); + pm_runtime_use_autosuspend(client->dev); + pm_runtime_set_autosuspend_delay(client->dev, 200); + err = tegra_drm_register_client(dev->dev_private, drm); if (err < 0) { dev_err(client->dev, "failed to register client: %d\n", err); - goto detach; + goto disable_rpm; } return 0; -detach: +disable_rpm: + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_client_iommu_detach(client); free: host1x_syncpt_put(client->syncpts[0]); @@ -88,10 +106,15 @@ static int gr2d_exit(struct host1x_client *client) if (err < 0) return err; + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_client_iommu_detach(client); host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(gr2d->channel); + gr2d->channel = NULL; + return 0; } @@ -104,10 +127,17 @@ static int gr2d_open_channel(struct tegra_drm_client *client, struct tegra_drm_context *context) { struct gr2d *gr2d = to_gr2d(client); + int err; + + err = pm_runtime_resume_and_get(client->base.dev); + if (err) + return err; context->channel = host1x_channel_get(gr2d->channel); - if (!context->channel) + if (!context->channel) { + pm_runtime_put(context->client->base.dev); return -ENOMEM; + } return 0; } @@ -115,6 +145,7 @@ static int gr2d_open_channel(struct tegra_drm_client *client, static void gr2d_close_channel(struct tegra_drm_context *context) { host1x_channel_put(context->channel); + pm_runtime_put(context->client->base.dev); } static int gr2d_is_addr_reg(struct device *dev, u32 class, u32 offset) @@ -193,6 +224,27 @@ static const u32 gr2d_addr_regs[] = { GR2D_VA_BASE_ADDR_SB, }; +static int gr2d_get_resets(struct device *dev, struct gr2d *gr2d) +{ + int err; + + gr2d->resets[RST_MC].id = "mc"; + gr2d->resets[RST_GR2D].id = "2d"; + gr2d->nresets = RST_GR2D_MAX; + + err = devm_reset_control_bulk_get_optional_exclusive_released( + dev, gr2d->nresets, gr2d->resets); + if (err) { + dev_err(dev, "failed to get reset: %d\n", err); + return err; + } + + if (WARN_ON(!gr2d->resets[RST_GR2D].rstc)) + return -ENOENT; + + return 0; +} + static int gr2d_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -205,37 +257,23 @@ static int gr2d_probe(struct platform_device *pdev) if (!gr2d) return -ENOMEM; + platform_set_drvdata(pdev, gr2d); + gr2d->soc = of_device_get_match_data(dev); syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL); if (!syncpts) return -ENOMEM; - gr2d->rst = devm_reset_control_get(dev, NULL); - if (IS_ERR(gr2d->rst)) { - dev_err(dev, "cannot get reset\n"); - return PTR_ERR(gr2d->rst); - } - gr2d->clk = devm_clk_get(dev, NULL); if (IS_ERR(gr2d->clk)) { dev_err(dev, "cannot get clock\n"); return PTR_ERR(gr2d->clk); } - err = clk_prepare_enable(gr2d->clk); - if (err) { - dev_err(dev, "cannot turn on clock\n"); + err = gr2d_get_resets(dev, gr2d); + if (err) return err; - } - - usleep_range(2000, 4000); - - err = reset_control_deassert(gr2d->rst); - if (err < 0) { - dev_err(dev, "failed to deassert reset: %d\n", err); - goto disable_clk; - } INIT_LIST_HEAD(&gr2d->client.base.list); gr2d->client.base.ops = &gr2d_client_ops; @@ -248,26 +286,21 @@ static int gr2d_probe(struct platform_device *pdev) gr2d->client.version = gr2d->soc->version; gr2d->client.ops = &gr2d_ops; + err = devm_tegra_core_dev_init_opp_table_common(dev); + if (err) + return err; + err = host1x_client_register(&gr2d->client.base); if (err < 0) { dev_err(dev, "failed to register host1x client: %d\n", err); - goto assert_rst; + return err; } /* initialize address register map */ for (i = 0; i < ARRAY_SIZE(gr2d_addr_regs); i++) set_bit(gr2d_addr_regs[i], gr2d->addr_regs); - platform_set_drvdata(pdev, gr2d); - return 0; - -assert_rst: - (void)reset_control_assert(gr2d->rst); -disable_clk: - clk_disable_unprepare(gr2d->clk); - - return err; } static int gr2d_remove(struct platform_device *pdev) @@ -282,21 +315,100 @@ static int gr2d_remove(struct platform_device *pdev) return err; } - err = reset_control_assert(gr2d->rst); - if (err < 0) - dev_err(&pdev->dev, "failed to assert reset: %d\n", err); + return 0; +} - usleep_range(2000, 4000); +static int __maybe_unused gr2d_runtime_suspend(struct device *dev) +{ + struct gr2d *gr2d = dev_get_drvdata(dev); + int err; + + host1x_channel_stop(gr2d->channel); + reset_control_bulk_release(gr2d->nresets, gr2d->resets); + + /* + * GR2D module shouldn't be reset while hardware is idling, otherwise + * host1x's cmdproc will stuck on trying to access any G2 register + * after reset. GR2D module could be either hot-reset or reset after + * power-gating of the HEG partition. Hence we will put in reset only + * the memory client part of the module, the HEG GENPD will take care + * of resetting GR2D module across power-gating. + * + * On Tegra20 there is no HEG partition, but it's okay to have + * undetermined h/w state since userspace is expected to reprogram + * the state on each job submission anyways. + */ + err = reset_control_acquire(gr2d->resets[RST_MC].rstc); + if (err) { + dev_err(dev, "failed to acquire MC reset: %d\n", err); + goto acquire_reset; + } + + err = reset_control_assert(gr2d->resets[RST_MC].rstc); + reset_control_release(gr2d->resets[RST_MC].rstc); + if (err) { + dev_err(dev, "failed to assert MC reset: %d\n", err); + goto acquire_reset; + } clk_disable_unprepare(gr2d->clk); return 0; + +acquire_reset: + reset_control_bulk_acquire(gr2d->nresets, gr2d->resets); + reset_control_bulk_deassert(gr2d->nresets, gr2d->resets); + + return err; +} + +static int __maybe_unused gr2d_runtime_resume(struct device *dev) +{ + struct gr2d *gr2d = dev_get_drvdata(dev); + int err; + + err = reset_control_bulk_acquire(gr2d->nresets, gr2d->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_prepare_enable(gr2d->clk); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; + } + + usleep_range(2000, 4000); + + /* this is a reset array which deasserts both 2D MC and 2D itself */ + err = reset_control_bulk_deassert(gr2d->nresets, gr2d->resets); + if (err) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + + return 0; + +disable_clk: + clk_disable_unprepare(gr2d->clk); +release_reset: + reset_control_bulk_release(gr2d->nresets, gr2d->resets); + + return err; } +static const struct dev_pm_ops tegra_gr2d_pm = { + SET_RUNTIME_PM_OPS(gr2d_runtime_suspend, gr2d_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + struct platform_driver tegra_gr2d_driver = { .driver = { .name = "tegra-gr2d", .of_match_table = gr2d_match, + .pm = &tegra_gr2d_pm, }, .probe = gr2d_probe, .remove = gr2d_remove, From a4780bd9499453260403d96e0e67dde7b60752ce Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 02:54:31 +0300 Subject: [PATCH 1039/1202] drm/tegra: gr3d: Support generic power domain and runtime PM Add runtime power management and support generic power domains. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/gr3d.c | 363 +++++++++++++++++++++++++++++------ 1 file changed, 305 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index 24442ade0da35..25f3f9820e921 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -5,32 +5,47 @@ */ #include +#include #include #include #include #include #include +#include +#include +#include #include +#include #include #include "drm.h" #include "gem.h" #include "gr3d.h" +enum { + RST_MC, + RST_GR3D, + RST_MC2, + RST_GR3D2, + RST_GR3D_MAX, +}; + struct gr3d_soc { unsigned int version; + unsigned int num_clocks; + unsigned int num_resets; }; struct gr3d { struct tegra_drm_client client; struct host1x_channel *channel; - struct clk *clk_secondary; - struct clk *clk; - struct reset_control *rst_secondary; - struct reset_control *rst; const struct gr3d_soc *soc; + struct clk_bulk_data *clocks; + unsigned int nclocks; + struct reset_control_bulk_data resets[RST_GR3D_MAX]; + unsigned int nresets; DECLARE_BITMAP(addr_regs, GR3D_NUM_REGS); }; @@ -65,15 +80,22 @@ static int gr3d_init(struct host1x_client *client) goto free; } + pm_runtime_enable(client->dev); + pm_runtime_use_autosuspend(client->dev); + pm_runtime_set_autosuspend_delay(client->dev, 200); + err = tegra_drm_register_client(dev->dev_private, drm); if (err < 0) { dev_err(client->dev, "failed to register client: %d\n", err); - goto detach; + goto disable_rpm; } return 0; -detach: +disable_rpm: + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_client_iommu_detach(client); free: host1x_syncpt_put(client->syncpts[0]); @@ -93,10 +115,15 @@ static int gr3d_exit(struct host1x_client *client) if (err < 0) return err; + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_client_iommu_detach(client); host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(gr3d->channel); + gr3d->channel = NULL; + return 0; } @@ -109,10 +136,17 @@ static int gr3d_open_channel(struct tegra_drm_client *client, struct tegra_drm_context *context) { struct gr3d *gr3d = to_gr3d(client); + int err; + + err = pm_runtime_resume_and_get(client->base.dev); + if (err) + return err; context->channel = host1x_channel_get(gr3d->channel); - if (!context->channel) + if (!context->channel) { + pm_runtime_put(context->client->base.dev); return -ENOMEM; + } return 0; } @@ -120,6 +154,7 @@ static int gr3d_open_channel(struct tegra_drm_client *client, static void gr3d_close_channel(struct tegra_drm_context *context) { host1x_channel_put(context->channel); + pm_runtime_put(context->client->base.dev); } static int gr3d_is_addr_reg(struct device *dev, u32 class, u32 offset) @@ -155,14 +190,20 @@ static const struct tegra_drm_client_ops gr3d_ops = { static const struct gr3d_soc tegra20_gr3d_soc = { .version = 0x20, + .num_clocks = 1, + .num_resets = 2, }; static const struct gr3d_soc tegra30_gr3d_soc = { .version = 0x30, + .num_clocks = 2, + .num_resets = 4, }; static const struct gr3d_soc tegra114_gr3d_soc = { .version = 0x35, + .num_clocks = 1, + .num_resets = 2, }; static const struct of_device_id tegra_gr3d_match[] = { @@ -278,69 +319,216 @@ static const u32 gr3d_addr_regs[] = { GR3D_GLOBAL_SAMP23SURFADDR(15), }; -static int gr3d_probe(struct platform_device *pdev) +static int gr3d_power_up_legacy_domain(struct device *dev, const char *name, + unsigned int id) { - struct device_node *np = pdev->dev.of_node; - struct host1x_syncpt **syncpts; - struct gr3d *gr3d; + struct gr3d *gr3d = dev_get_drvdata(dev); + struct reset_control *reset; + struct clk *clk; unsigned int i; int err; - gr3d = devm_kzalloc(&pdev->dev, sizeof(*gr3d), GFP_KERNEL); - if (!gr3d) - return -ENOMEM; - - gr3d->soc = of_device_get_match_data(&pdev->dev); + /* + * Tegra20 device-tree doesn't specify 3d clock name and there is only + * one clock for Tegra20. Tegra30+ device-trees always specified names + * for the clocks. + */ + if (gr3d->nclocks == 1) { + if (id == TEGRA_POWERGATE_3D1) + return 0; + + clk = gr3d->clocks[0].clk; + } else { + for (i = 0; i < gr3d->nclocks; i++) { + if (WARN_ON(!gr3d->clocks[i].id)) + continue; + + if (!strcmp(gr3d->clocks[i].id, name)) { + clk = gr3d->clocks[i].clk; + break; + } + } - syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL); - if (!syncpts) - return -ENOMEM; + if (WARN_ON(i == gr3d->nclocks)) + return -EINVAL; + } - gr3d->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(gr3d->clk)) { - dev_err(&pdev->dev, "cannot get clock\n"); - return PTR_ERR(gr3d->clk); + /* + * We use array of resets, which includes MC resets, and MC + * reset shouldn't be asserted while hardware is gated because + * MC flushing will fail for gated hardware. Hence for legacy + * PD we request the individual reset separately. + */ + reset = reset_control_get_exclusive_released(dev, name); + if (IS_ERR(reset)) + return PTR_ERR(reset); + + err = reset_control_acquire(reset); + if (err) { + dev_err(dev, "failed to acquire %s reset: %d\n", name, err); + } else { + err = tegra_powergate_sequence_power_up(id, clk, reset); + reset_control_release(reset); } - gr3d->rst = devm_reset_control_get(&pdev->dev, "3d"); - if (IS_ERR(gr3d->rst)) { - dev_err(&pdev->dev, "cannot get reset\n"); - return PTR_ERR(gr3d->rst); + reset_control_put(reset); + if (err) + return err; + + /* + * tegra_powergate_sequence_power_up() leaves clocks enabled, + * while GENPD not. Hence keep clock-enable balanced. + */ + clk_disable_unprepare(clk); + + return 0; +} + +static void gr3d_del_link(void *link) +{ + device_link_del(link); +} + +static int gr3d_init_power(struct device *dev, struct gr3d *gr3d) +{ + static const char * const opp_genpd_names[] = { "3d0", "3d1", NULL }; + const u32 link_flags = DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME; + struct device **opp_virt_devs, *pd_dev; + struct device_link *link; + unsigned int i; + int err; + + err = of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells"); + if (err < 0) { + if (err != -ENOENT) + return err; + + /* + * Older device-trees don't use GENPD. In this case we should + * toggle power domain manually. + */ + err = gr3d_power_up_legacy_domain(dev, "3d", + TEGRA_POWERGATE_3D); + if (err) + return err; + + err = gr3d_power_up_legacy_domain(dev, "3d2", + TEGRA_POWERGATE_3D1); + if (err) + return err; + + return 0; } - if (of_device_is_compatible(np, "nvidia,tegra30-gr3d")) { - gr3d->clk_secondary = devm_clk_get(&pdev->dev, "3d2"); - if (IS_ERR(gr3d->clk_secondary)) { - dev_err(&pdev->dev, "cannot get secondary clock\n"); - return PTR_ERR(gr3d->clk_secondary); + /* + * The PM domain core automatically attaches a single power domain, + * otherwise it skips attaching completely. We have a single domain + * on Tegra20 and two domains on Tegra30+. + */ + if (dev->pm_domain) + return 0; + + err = devm_pm_opp_attach_genpd(dev, opp_genpd_names, &opp_virt_devs); + if (err) + return err; + + for (i = 0; opp_genpd_names[i]; i++) { + pd_dev = opp_virt_devs[i]; + if (!pd_dev) { + dev_err(dev, "failed to get %s power domain\n", + opp_genpd_names[i]); + return -EINVAL; } - gr3d->rst_secondary = devm_reset_control_get(&pdev->dev, - "3d2"); - if (IS_ERR(gr3d->rst_secondary)) { - dev_err(&pdev->dev, "cannot get secondary reset\n"); - return PTR_ERR(gr3d->rst_secondary); + link = device_link_add(dev, pd_dev, link_flags); + if (!link) { + dev_err(dev, "failed to link to %s\n", dev_name(pd_dev)); + return -EINVAL; } + + err = devm_add_action_or_reset(dev, gr3d_del_link, link); + if (err) + return err; } - err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D, gr3d->clk, - gr3d->rst); + return 0; +} + +static int gr3d_get_clocks(struct device *dev, struct gr3d *gr3d) +{ + int err; + + err = devm_clk_bulk_get_all(dev, &gr3d->clocks); if (err < 0) { - dev_err(&pdev->dev, "failed to power up 3D unit\n"); + dev_err(dev, "failed to get clock: %d\n", err); return err; } + gr3d->nclocks = err; - if (gr3d->clk_secondary) { - err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D1, - gr3d->clk_secondary, - gr3d->rst_secondary); - if (err < 0) { - dev_err(&pdev->dev, - "failed to power up secondary 3D unit\n"); - return err; - } + if (gr3d->nclocks != gr3d->soc->num_clocks) { + dev_err(dev, "invalid number of clocks: %u\n", gr3d->nclocks); + return -ENOENT; + } + + return 0; +} + +static int gr3d_get_resets(struct device *dev, struct gr3d *gr3d) +{ + int err; + + gr3d->resets[RST_MC].id = "mc"; + gr3d->resets[RST_MC2].id = "mc2"; + gr3d->resets[RST_GR3D].id = "3d"; + gr3d->resets[RST_GR3D2].id = "3d2"; + gr3d->nresets = gr3d->soc->num_resets; + + err = devm_reset_control_bulk_get_optional_exclusive_released( + dev, gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to get reset: %d\n", err); + return err; } + if (WARN_ON(!gr3d->resets[RST_GR3D].rstc) || + WARN_ON(!gr3d->resets[RST_GR3D2].rstc && gr3d->nresets == 4)) + return -ENOENT; + + return 0; +} + +static int gr3d_probe(struct platform_device *pdev) +{ + struct host1x_syncpt **syncpts; + struct gr3d *gr3d; + unsigned int i; + int err; + + gr3d = devm_kzalloc(&pdev->dev, sizeof(*gr3d), GFP_KERNEL); + if (!gr3d) + return -ENOMEM; + + platform_set_drvdata(pdev, gr3d); + + gr3d->soc = of_device_get_match_data(&pdev->dev); + + syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL); + if (!syncpts) + return -ENOMEM; + + err = gr3d_get_clocks(&pdev->dev, gr3d); + if (err) + return err; + + err = gr3d_get_resets(&pdev->dev, gr3d); + if (err) + return err; + + err = gr3d_init_power(&pdev->dev, gr3d); + if (err) + return err; + INIT_LIST_HEAD(&gr3d->client.base.list); gr3d->client.base.ops = &gr3d_client_ops; gr3d->client.base.dev = &pdev->dev; @@ -352,6 +540,10 @@ static int gr3d_probe(struct platform_device *pdev) gr3d->client.version = gr3d->soc->version; gr3d->client.ops = &gr3d_ops; + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + return err; + err = host1x_client_register(&gr3d->client.base); if (err < 0) { dev_err(&pdev->dev, "failed to register host1x client: %d\n", @@ -363,8 +555,6 @@ static int gr3d_probe(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(gr3d_addr_regs); i++) set_bit(gr3d_addr_regs[i], gr3d->addr_regs); - platform_set_drvdata(pdev, gr3d); - return 0; } @@ -380,23 +570,80 @@ static int gr3d_remove(struct platform_device *pdev) return err; } - if (gr3d->clk_secondary) { - reset_control_assert(gr3d->rst_secondary); - tegra_powergate_power_off(TEGRA_POWERGATE_3D1); - clk_disable_unprepare(gr3d->clk_secondary); + return 0; +} + +static int __maybe_unused gr3d_runtime_suspend(struct device *dev) +{ + struct gr3d *gr3d = dev_get_drvdata(dev); + int err; + + host1x_channel_stop(gr3d->channel); + + err = reset_control_bulk_assert(gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; } - reset_control_assert(gr3d->rst); - tegra_powergate_power_off(TEGRA_POWERGATE_3D); - clk_disable_unprepare(gr3d->clk); + usleep_range(10, 20); + + /* + * Older device-trees don't specify MC resets and power-gating can't + * be done safely in that case. Hence we will keep the power ungated + * for older DTBs. For newer DTBs, GENPD will perform the power-gating. + */ + + clk_bulk_disable_unprepare(gr3d->nclocks, gr3d->clocks); + reset_control_bulk_release(gr3d->nresets, gr3d->resets); return 0; } +static int __maybe_unused gr3d_runtime_resume(struct device *dev) +{ + struct gr3d *gr3d = dev_get_drvdata(dev); + int err; + + err = reset_control_bulk_acquire(gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_bulk_prepare_enable(gr3d->nclocks, gr3d->clocks); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; + } + + err = reset_control_bulk_deassert(gr3d->nresets, gr3d->resets); + if (err) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + + return 0; + +disable_clk: + clk_bulk_disable_unprepare(gr3d->nclocks, gr3d->clocks); +release_reset: + reset_control_bulk_release(gr3d->nresets, gr3d->resets); + + return err; +} + +static const struct dev_pm_ops tegra_gr3d_pm = { + SET_RUNTIME_PM_OPS(gr3d_runtime_suspend, gr3d_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + struct platform_driver tegra_gr3d_driver = { .driver = { .name = "tegra-gr3d", .of_match_table = tegra_gr3d_match, + .pm = &tegra_gr3d_pm, }, .probe = gr3d_probe, .remove = gr3d_remove, From 66e7034eba6feb0be9e8611fa929517a2b8cc605 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 3 Dec 2020 02:51:31 +0300 Subject: [PATCH 1040/1202] drm/tegra: vic: Stop channel on suspend CDMA must be stopped before hardware is suspended. Add channel stopping to RPM suspend callback. Add system level suspend-resume callbacks. Runtime PM initialization is moved to host1x client init phase because RPM callback now uses host1x channel that is available only when host1x client is registered. Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/vic.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index b58e2b99f81a6..9622ca96c5395 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -151,9 +151,13 @@ static int vic_init(struct host1x_client *client) goto free_channel; } + pm_runtime_enable(client->dev); + pm_runtime_use_autosuspend(client->dev); + pm_runtime_set_autosuspend_delay(client->dev, 500); + err = tegra_drm_register_client(tegra, drm); if (err < 0) - goto free_syncpt; + goto disable_rpm; /* * Inherit the DMA parameters (such as maximum segment size) from the @@ -163,7 +167,10 @@ static int vic_init(struct host1x_client *client) return 0; -free_syncpt: +disable_rpm: + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_syncpt_put(client->syncpts[0]); free_channel: host1x_channel_put(vic->channel); @@ -188,10 +195,15 @@ static int vic_exit(struct host1x_client *client) if (err < 0) return err; + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(vic->channel); host1x_client_iommu_detach(client); + vic->channel = NULL; + if (client->group) { dma_unmap_single(vic->dev, vic->falcon.firmware.phys, vic->falcon.firmware.size, DMA_TO_DEVICE); @@ -315,6 +327,8 @@ static int vic_runtime_suspend(struct device *dev) struct vic *vic = dev_get_drvdata(dev); int err; + host1x_channel_stop(vic->channel); + err = reset_control_assert(vic->rst); if (err < 0) return err; @@ -482,19 +496,8 @@ static int vic_probe(struct platform_device *pdev) goto exit_falcon; } - pm_runtime_enable(&pdev->dev); - if (!pm_runtime_enabled(&pdev->dev)) { - err = vic_runtime_resume(&pdev->dev); - if (err < 0) - goto unregister_client; - } - pm_runtime_set_autosuspend_delay(&pdev->dev, 500); - pm_runtime_use_autosuspend(&pdev->dev); - return 0; -unregister_client: - host1x_client_unregister(&vic->client.base); exit_falcon: falcon_exit(&vic->falcon); @@ -513,11 +516,6 @@ static int vic_remove(struct platform_device *pdev) return err; } - if (pm_runtime_enabled(&pdev->dev)) - pm_runtime_disable(&pdev->dev); - else - vic_runtime_suspend(&pdev->dev); - falcon_exit(&vic->falcon); return 0; @@ -525,6 +523,8 @@ static int vic_remove(struct platform_device *pdev) static const struct dev_pm_ops vic_pm_ops = { SET_RUNTIME_PM_OPS(vic_runtime_suspend, vic_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; struct platform_driver tegra_vic_driver = { From 2d301e734e4ce42743435f51f2360cbb56f39ff4 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 22 Oct 2021 01:03:52 +0300 Subject: [PATCH 1041/1202] drm/tegra: nvdec: Stop channel on suspend CDMA must be stopped before hardware is suspended. Add channel stopping to RPM suspend callback. Add system level suspend-resume callbacks. Runtime PM initialization is moved to host1x client init phase because RPM callback now uses host1x channel that is available only when host1x client is registered. Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/nvdec.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c index 48c90e26e90a4..0f1146be9d6dc 100644 --- a/drivers/gpu/drm/tegra/nvdec.c +++ b/drivers/gpu/drm/tegra/nvdec.c @@ -112,9 +112,13 @@ static int nvdec_init(struct host1x_client *client) goto free_channel; } + pm_runtime_enable(client->dev); + pm_runtime_use_autosuspend(client->dev); + pm_runtime_set_autosuspend_delay(client->dev, 500); + err = tegra_drm_register_client(tegra, drm); if (err < 0) - goto free_syncpt; + goto disable_rpm; /* * Inherit the DMA parameters (such as maximum segment size) from the @@ -124,7 +128,10 @@ static int nvdec_init(struct host1x_client *client) return 0; -free_syncpt: +disable_rpm: + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_syncpt_put(client->syncpts[0]); free_channel: host1x_channel_put(nvdec->channel); @@ -149,10 +156,15 @@ static int nvdec_exit(struct host1x_client *client) if (err < 0) return err; + pm_runtime_dont_use_autosuspend(client->dev); + pm_runtime_force_suspend(client->dev); + host1x_syncpt_put(client->syncpts[0]); host1x_channel_put(nvdec->channel); host1x_client_iommu_detach(client); + nvdec->channel = NULL; + if (client->group) { dma_unmap_single(nvdec->dev, nvdec->falcon.firmware.phys, nvdec->falcon.firmware.size, DMA_TO_DEVICE); @@ -267,6 +279,8 @@ static int nvdec_runtime_suspend(struct device *dev) { struct nvdec *nvdec = dev_get_drvdata(dev); + host1x_channel_stop(nvdec->channel); + clk_disable_unprepare(nvdec->clk); return 0; @@ -411,10 +425,6 @@ static int nvdec_probe(struct platform_device *pdev) goto exit_falcon; } - pm_runtime_enable(&pdev->dev); - pm_runtime_set_autosuspend_delay(&pdev->dev, 500); - pm_runtime_use_autosuspend(&pdev->dev); - return 0; exit_falcon: @@ -435,11 +445,6 @@ static int nvdec_remove(struct platform_device *pdev) return err; } - if (pm_runtime_enabled(&pdev->dev)) - pm_runtime_disable(&pdev->dev); - else - nvdec_runtime_suspend(&pdev->dev); - falcon_exit(&nvdec->falcon); return 0; @@ -447,6 +452,8 @@ static int nvdec_remove(struct platform_device *pdev) static const struct dev_pm_ops nvdec_pm_ops = { SET_RUNTIME_PM_OPS(nvdec_runtime_suspend, nvdec_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; struct platform_driver tegra_nvdec_driver = { From 577e826169e9882cdce1eea17bcf64de169e6a40 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 22 Oct 2021 02:26:55 +0300 Subject: [PATCH 1042/1202] drm/tegra: submit: Remove pm_runtime_enabled() checks Runtime PM is now universally available, make it mandatory by removing the pm_runtime_enabled() checks. Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/submit.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c index c32698404e36a..3bbd8de5711cf 100644 --- a/drivers/gpu/drm/tegra/submit.c +++ b/drivers/gpu/drm/tegra/submit.c @@ -504,8 +504,7 @@ static void release_job(struct host1x_job *job) kfree(job_data->used_mappings); kfree(job_data); - if (pm_runtime_enabled(client->base.dev)) - pm_runtime_put_autosuspend(client->base.dev); + pm_runtime_put_autosuspend(client->base.dev); } int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, @@ -589,12 +588,10 @@ int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data, } /* Boot engine. */ - if (pm_runtime_enabled(context->client->base.dev)) { - err = pm_runtime_resume_and_get(context->client->base.dev); - if (err < 0) { - SUBMIT_ERR(context, "could not power up engine: %d", err); - goto unpin_job; - } + err = pm_runtime_resume_and_get(context->client->base.dev); + if (err < 0) { + SUBMIT_ERR(context, "could not power up engine: %d", err); + goto unpin_job; } job->user_data = job_data; From 99defd8647876f56d23b7eb273efbf28ecf911fa Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 22 Oct 2021 02:28:02 +0300 Subject: [PATCH 1043/1202] drm/tegra: submit: Add missing pm_runtime_mark_last_busy() Runtime PM auto-suspension doesn't work without pm_runtime_mark_last_busy(), add it. Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/submit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c index 3bbd8de5711cf..6d6dd8c354757 100644 --- a/drivers/gpu/drm/tegra/submit.c +++ b/drivers/gpu/drm/tegra/submit.c @@ -504,6 +504,7 @@ static void release_job(struct host1x_job *job) kfree(job_data->used_mappings); kfree(job_data); + pm_runtime_mark_last_busy(client->base.dev); pm_runtime_put_autosuspend(client->base.dev); } From 5613633d5cfaf4b5fc1eb79e8abf8d56e41b282d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 20:56:50 +0300 Subject: [PATCH 1044/1202] usb: chipidea: tegra: Add runtime PM and OPP support The Tegra USB controller belongs to the core power domain and we're going to enable GENPD support for the core domain. Now USB controller must be resumed using runtime PM API in order to initialize the USB power state. We already support runtime PM for the CI device, but CI's PM is separated from the RPM managed by tegra-usb driver. Add runtime PM and OPP support to the driver. Acked-by: Peter Chen Signed-off-by: Dmitry Osipenko --- drivers/usb/chipidea/ci_hdrc_tegra.c | 55 ++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_tegra.c b/drivers/usb/chipidea/ci_hdrc_tegra.c index 60361141ac04f..0fff1e4407caf 100644 --- a/drivers/usb/chipidea/ci_hdrc_tegra.c +++ b/drivers/usb/chipidea/ci_hdrc_tegra.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -15,6 +16,8 @@ #include #include +#include + #include "../host/ehci.h" #include "ci.h" @@ -278,6 +281,8 @@ static int tegra_usb_probe(struct platform_device *pdev) if (!usb) return -ENOMEM; + platform_set_drvdata(pdev, usb); + soc = of_device_get_match_data(&pdev->dev); if (!soc) { dev_err(&pdev->dev, "failed to match OF data\n"); @@ -296,11 +301,17 @@ static int tegra_usb_probe(struct platform_device *pdev) return err; } - err = clk_prepare_enable(usb->clk); - if (err < 0) { - dev_err(&pdev->dev, "failed to enable clock: %d\n", err); + err = devm_pm_runtime_enable(&pdev->dev); + if (err) + return err; + + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + return err; + + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) return err; - } if (device_property_present(&pdev->dev, "nvidia,needs-double-reset")) usb->needs_double_reset = true; @@ -320,8 +331,6 @@ static int tegra_usb_probe(struct platform_device *pdev) if (err) goto fail_power_off; - platform_set_drvdata(pdev, usb); - /* setup and register ChipIdea HDRC device */ usb->soc = soc; usb->data.name = "tegra-usb"; @@ -350,7 +359,8 @@ static int tegra_usb_probe(struct platform_device *pdev) phy_shutdown: usb_phy_shutdown(usb->phy); fail_power_off: - clk_disable_unprepare(usb->clk); + pm_runtime_put_sync_suspend(&pdev->dev); + return err; } @@ -360,15 +370,46 @@ static int tegra_usb_remove(struct platform_device *pdev) ci_hdrc_remove_device(usb->dev); usb_phy_shutdown(usb->phy); + + pm_runtime_put_sync_suspend(&pdev->dev); + pm_runtime_force_suspend(&pdev->dev); + + return 0; +} + +static int __maybe_unused tegra_usb_runtime_resume(struct device *dev) +{ + struct tegra_usb *usb = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(usb->clk); + if (err < 0) { + dev_err(dev, "failed to enable clock: %d\n", err); + return err; + } + + return 0; +} + +static int __maybe_unused tegra_usb_runtime_suspend(struct device *dev) +{ + struct tegra_usb *usb = dev_get_drvdata(dev); + clk_disable_unprepare(usb->clk); return 0; } +static const struct dev_pm_ops tegra_usb_pm = { + SET_RUNTIME_PM_OPS(tegra_usb_runtime_suspend, tegra_usb_runtime_resume, + NULL) +}; + static struct platform_driver tegra_usb_driver = { .driver = { .name = "tegra-usb", .of_match_table = tegra_usb_of_match, + .pm = &tegra_usb_pm, }, .probe = tegra_usb_probe, .remove = tegra_usb_remove, From 0bc34d79a926d39d8b1923ebd763636d3e4d9e5d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 21:00:06 +0300 Subject: [PATCH 1045/1202] bus: tegra-gmi: Add runtime PM and OPP support The GMI bus on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now GMI must be resumed using runtime PM API in order to initialize the GMI power state. Add runtime PM and OPP support to the GMI driver. Signed-off-by: Dmitry Osipenko --- drivers/bus/tegra-gmi.c | 53 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/bus/tegra-gmi.c b/drivers/bus/tegra-gmi.c index a6570789f7afe..8a5b2a61c478a 100644 --- a/drivers/bus/tegra-gmi.c +++ b/drivers/bus/tegra-gmi.c @@ -13,8 +13,11 @@ #include #include #include +#include #include +#include + #define TEGRA_GMI_CONFIG 0x00 #define TEGRA_GMI_CONFIG_GO BIT(31) #define TEGRA_GMI_BUS_WIDTH_32BIT BIT(30) @@ -54,9 +57,9 @@ static int tegra_gmi_enable(struct tegra_gmi *gmi) { int err; - err = clk_prepare_enable(gmi->clk); - if (err < 0) { - dev_err(gmi->dev, "failed to enable clock: %d\n", err); + err = pm_runtime_resume_and_get(gmi->dev); + if (err) { + pm_runtime_disable(gmi->dev); return err; } @@ -83,7 +86,9 @@ static void tegra_gmi_disable(struct tegra_gmi *gmi) writel(config, gmi->base + TEGRA_GMI_CONFIG); reset_control_assert(gmi->rst); - clk_disable_unprepare(gmi->clk); + + pm_runtime_put_sync_suspend(gmi->dev); + pm_runtime_force_suspend(gmi->dev); } static int tegra_gmi_parse_dt(struct tegra_gmi *gmi) @@ -213,6 +218,7 @@ static int tegra_gmi_probe(struct platform_device *pdev) if (!gmi) return -ENOMEM; + platform_set_drvdata(pdev, gmi); gmi->dev = dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -232,6 +238,14 @@ static int tegra_gmi_probe(struct platform_device *pdev) return PTR_ERR(gmi->rst); } + err = devm_pm_runtime_enable(gmi->dev); + if (err) + return err; + + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + return err; + err = tegra_gmi_parse_dt(gmi); if (err) return err; @@ -247,8 +261,6 @@ static int tegra_gmi_probe(struct platform_device *pdev) return err; } - platform_set_drvdata(pdev, gmi); - return 0; } @@ -262,6 +274,34 @@ static int tegra_gmi_remove(struct platform_device *pdev) return 0; } +static int __maybe_unused tegra_gmi_runtime_resume(struct device *dev) +{ + struct tegra_gmi *gmi = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(gmi->clk); + if (err < 0) { + dev_err(gmi->dev, "failed to enable clock: %d\n", err); + return err; + } + + return 0; +} + +static int __maybe_unused tegra_gmi_runtime_suspend(struct device *dev) +{ + struct tegra_gmi *gmi = dev_get_drvdata(dev); + + clk_disable_unprepare(gmi->clk); + + return 0; +} + +static const struct dev_pm_ops tegra_gmi_pm = { + SET_RUNTIME_PM_OPS(tegra_gmi_runtime_suspend, tegra_gmi_runtime_resume, + NULL) +}; + static const struct of_device_id tegra_gmi_id_table[] = { { .compatible = "nvidia,tegra20-gmi", }, { .compatible = "nvidia,tegra30-gmi", }, @@ -275,6 +315,7 @@ static struct platform_driver tegra_gmi_driver = { .driver = { .name = "tegra-gmi", .of_match_table = tegra_gmi_id_table, + .pm = &tegra_gmi_pm, }, }; module_platform_driver(tegra_gmi_driver); From 3983e1a6f10a14bcec8cb0273f0ec08ba541e074 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 21:08:35 +0300 Subject: [PATCH 1046/1202] pwm: tegra: Add runtime PM and OPP support The PWM on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now PWM must be resumed using runtime PM API in order to initialize the PWM power state. The PWM clock rate must be changed using OPP API that will reconfigure the power domain performance state in accordance to the rate. Add runtime PM and OPP support to the PWM driver. Signed-off-by: Dmitry Osipenko --- drivers/pwm/pwm-tegra.c | 84 ++++++++++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 18 deletions(-) diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c index 11a10b575ace9..0ce55644d89f1 100644 --- a/drivers/pwm/pwm-tegra.c +++ b/drivers/pwm/pwm-tegra.c @@ -42,12 +42,16 @@ #include #include #include +#include #include #include #include +#include #include #include +#include + #define PWM_ENABLE (1 << 31) #define PWM_DUTY_WIDTH 8 #define PWM_DUTY_SHIFT 16 @@ -145,7 +149,7 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, required_clk_rate = (NSEC_PER_SEC / period_ns) << PWM_DUTY_WIDTH; - err = clk_set_rate(pc->clk, required_clk_rate); + err = dev_pm_opp_set_rate(pc->dev, required_clk_rate); if (err < 0) return -EINVAL; @@ -181,8 +185,8 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * before writing the register. Otherwise, keep it enabled. */ if (!pwm_is_enabled(pwm)) { - err = clk_prepare_enable(pc->clk); - if (err < 0) + err = pm_runtime_resume_and_get(pc->dev); + if (err) return err; } else val |= PWM_ENABLE; @@ -193,7 +197,7 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * If the PWM is not enabled, turn the clock off again to save power. */ if (!pwm_is_enabled(pwm)) - clk_disable_unprepare(pc->clk); + pm_runtime_put(pc->dev); return 0; } @@ -204,8 +208,8 @@ static int tegra_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) int rc = 0; u32 val; - rc = clk_prepare_enable(pc->clk); - if (rc < 0) + rc = pm_runtime_resume_and_get(pc->dev); + if (rc) return rc; val = pwm_readl(pc, pwm->hwpwm); @@ -224,7 +228,7 @@ static void tegra_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) val &= ~PWM_ENABLE; pwm_writel(pc, pwm->hwpwm, val); - clk_disable_unprepare(pc->clk); + pm_runtime_put_sync(pc->dev); } static const struct pwm_ops tegra_pwm_ops = { @@ -256,11 +260,23 @@ static int tegra_pwm_probe(struct platform_device *pdev) if (IS_ERR(pwm->clk)) return PTR_ERR(pwm->clk); + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret) + return ret; + + ret = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (ret) + return ret; + + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret) + return ret; + /* Set maximum frequency of the IP */ - ret = clk_set_rate(pwm->clk, pwm->soc->max_frequency); + ret = dev_pm_opp_set_rate(pwm->dev, pwm->soc->max_frequency); if (ret < 0) { dev_err(&pdev->dev, "Failed to set max frequency: %d\n", ret); - return ret; + goto put_pm; } /* @@ -278,7 +294,7 @@ static int tegra_pwm_probe(struct platform_device *pdev) if (IS_ERR(pwm->rst)) { ret = PTR_ERR(pwm->rst); dev_err(&pdev->dev, "Reset control is not found: %d\n", ret); - return ret; + goto put_pm; } reset_control_deassert(pwm->rst); @@ -291,10 +307,15 @@ static int tegra_pwm_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); reset_control_assert(pwm->rst); - return ret; + goto put_pm; } + pm_runtime_put(&pdev->dev); + return 0; +put_pm: + pm_runtime_put_sync_suspend(&pdev->dev); + return ret; } static int tegra_pwm_remove(struct platform_device *pdev) @@ -305,20 +326,44 @@ static int tegra_pwm_remove(struct platform_device *pdev) reset_control_assert(pc->rst); + pm_runtime_force_suspend(&pdev->dev); + return 0; } -#ifdef CONFIG_PM_SLEEP -static int tegra_pwm_suspend(struct device *dev) +static int __maybe_unused tegra_pwm_runtime_suspend(struct device *dev) { - return pinctrl_pm_select_sleep_state(dev); + struct tegra_pwm_chip *pc = dev_get_drvdata(dev); + int err; + + clk_disable_unprepare(pc->clk); + + err = pinctrl_pm_select_sleep_state(dev); + if (err) { + clk_prepare_enable(pc->clk); + return err; + } + + return 0; } -static int tegra_pwm_resume(struct device *dev) +static int __maybe_unused tegra_pwm_runtime_resume(struct device *dev) { - return pinctrl_pm_select_default_state(dev); + struct tegra_pwm_chip *pc = dev_get_drvdata(dev); + int err; + + err = pinctrl_pm_select_default_state(dev); + if (err) + return err; + + err = clk_prepare_enable(pc->clk); + if (err) { + pinctrl_pm_select_sleep_state(dev); + return err; + } + + return 0; } -#endif static const struct tegra_pwm_soc tegra20_pwm_soc = { .num_channels = 4, @@ -344,7 +389,10 @@ static const struct of_device_id tegra_pwm_of_match[] = { MODULE_DEVICE_TABLE(of, tegra_pwm_of_match); static const struct dev_pm_ops tegra_pwm_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(tegra_pwm_suspend, tegra_pwm_resume) + SET_RUNTIME_PM_OPS(tegra_pwm_runtime_suspend, tegra_pwm_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static struct platform_driver tegra_pwm_driver = { From 625b7c7f7f1184e49fde2948d897482f0db2c188 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 12 Sep 2021 23:08:17 +0300 Subject: [PATCH 1047/1202] mmc: sdhci-tegra: Add runtime PM and OPP support The SDHCI on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now SDHCI must be resumed using runtime PM API in order to initialize the SDHCI power state. The SDHCI clock rate must be changed using OPP API that will reconfigure the power domain performance state in accordance to the rate. Add runtime PM and OPP support to the SDHCI driver. Signed-off-by: Dmitry Osipenko --- drivers/mmc/host/sdhci-tegra.c | 81 +++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 16 deletions(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index a5001875876b9..6435a75142a6f 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -24,6 +26,8 @@ #include #include +#include + #include "sdhci-pltfm.h" #include "cqhci.h" @@ -760,7 +764,9 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host); + struct device *dev = mmc_dev(host->mmc); unsigned long host_clk; + int err; if (!clock) return sdhci_set_clock(host, clock); @@ -778,7 +784,12 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) * from clk_get_rate() is used. */ host_clk = tegra_host->ddr_signaling ? clock * 2 : clock; - clk_set_rate(pltfm_host->clk, host_clk); + + err = dev_pm_opp_set_rate(dev, host_clk); + if (err) + dev_err(dev, "failed to set clk rate to %luHz: %d\n", + host_clk, err); + tegra_host->curr_clk_rate = host_clk; if (tegra_host->ddr_signaling) host->max_clk = host_clk; @@ -1705,7 +1716,6 @@ static int sdhci_tegra_probe(struct platform_device *pdev) "failed to get clock\n"); goto err_clk_get; } - clk_prepare_enable(clk); pltfm_host->clk = clk; tegra_host->rst = devm_reset_control_get_exclusive(&pdev->dev, @@ -1716,15 +1726,24 @@ static int sdhci_tegra_probe(struct platform_device *pdev) goto err_rst_get; } - rc = reset_control_assert(tegra_host->rst); + rc = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); if (rc) goto err_rst_get; + pm_runtime_enable(&pdev->dev); + rc = pm_runtime_resume_and_get(&pdev->dev); + if (rc) + goto err_pm_get; + + rc = reset_control_assert(tegra_host->rst); + if (rc) + goto err_rst_assert; + usleep_range(2000, 4000); rc = reset_control_deassert(tegra_host->rst); if (rc) - goto err_rst_get; + goto err_rst_assert; usleep_range(2000, 4000); @@ -1736,8 +1755,11 @@ static int sdhci_tegra_probe(struct platform_device *pdev) err_add_host: reset_control_assert(tegra_host->rst); +err_rst_assert: + pm_runtime_put_sync_suspend(&pdev->dev); +err_pm_get: + pm_runtime_disable(&pdev->dev); err_rst_get: - clk_disable_unprepare(pltfm_host->clk); err_clk_get: clk_disable_unprepare(tegra_host->tmclk); err_power_req: @@ -1756,19 +1778,38 @@ static int sdhci_tegra_remove(struct platform_device *pdev) reset_control_assert(tegra_host->rst); usleep_range(2000, 4000); - clk_disable_unprepare(pltfm_host->clk); - clk_disable_unprepare(tegra_host->tmclk); + pm_runtime_put_sync_suspend(&pdev->dev); + pm_runtime_force_suspend(&pdev->dev); + + clk_disable_unprepare(tegra_host->tmclk); sdhci_pltfm_free(pdev); return 0; } -#ifdef CONFIG_PM_SLEEP -static int __maybe_unused sdhci_tegra_suspend(struct device *dev) +static int __maybe_unused sdhci_tegra_runtime_suspend(struct device *dev) { struct sdhci_host *host = dev_get_drvdata(dev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + + clk_disable_unprepare(pltfm_host->clk); + + return 0; +} + +static int __maybe_unused sdhci_tegra_runtime_resume(struct device *dev) +{ + struct sdhci_host *host = dev_get_drvdata(dev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + + return clk_prepare_enable(pltfm_host->clk); +} + +#ifdef CONFIG_PM_SLEEP +static int sdhci_tegra_suspend(struct device *dev) +{ + struct sdhci_host *host = dev_get_drvdata(dev); int ret; if (host->mmc->caps2 & MMC_CAP2_CQE) { @@ -1783,17 +1824,22 @@ static int __maybe_unused sdhci_tegra_suspend(struct device *dev) return ret; } - clk_disable_unprepare(pltfm_host->clk); + ret = pm_runtime_force_suspend(dev); + if (ret) { + sdhci_resume_host(host); + cqhci_resume(host->mmc); + return ret; + } + return 0; } -static int __maybe_unused sdhci_tegra_resume(struct device *dev) +static int sdhci_tegra_resume(struct device *dev) { struct sdhci_host *host = dev_get_drvdata(dev); - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); int ret; - ret = clk_prepare_enable(pltfm_host->clk); + ret = pm_runtime_force_resume(dev); if (ret) return ret; @@ -1812,13 +1858,16 @@ static int __maybe_unused sdhci_tegra_resume(struct device *dev) suspend_host: sdhci_suspend_host(host); disable_clk: - clk_disable_unprepare(pltfm_host->clk); + pm_runtime_force_suspend(dev); return ret; } #endif -static SIMPLE_DEV_PM_OPS(sdhci_tegra_dev_pm_ops, sdhci_tegra_suspend, - sdhci_tegra_resume); +static const struct dev_pm_ops sdhci_tegra_dev_pm_ops = { + SET_RUNTIME_PM_OPS(sdhci_tegra_runtime_suspend, sdhci_tegra_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(sdhci_tegra_suspend, sdhci_tegra_resume) +}; static struct platform_driver sdhci_tegra_driver = { .driver = { From eba0c4b5f8a21cc2e1b81436c9399e1a3d6c546e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 21:35:11 +0300 Subject: [PATCH 1048/1202] mtd: rawnand: tegra: Add runtime PM and OPP support The NAND on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now NAND must be resumed using runtime PM API in order to initialize the NAND power state. Add runtime PM and OPP support to the NAND driver. Acked-by: Miquel Raynal Signed-off-by: Dmitry Osipenko --- drivers/mtd/nand/raw/tegra_nand.c | 60 ++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c index 32431bbe69b8e..0124aba0b4f0f 100644 --- a/drivers/mtd/nand/raw/tegra_nand.c +++ b/drivers/mtd/nand/raw/tegra_nand.c @@ -17,8 +17,11 @@ #include #include #include +#include #include +#include + #define COMMAND 0x00 #define COMMAND_GO BIT(31) #define COMMAND_CLE BIT(30) @@ -1151,6 +1154,7 @@ static int tegra_nand_probe(struct platform_device *pdev) return -ENOMEM; ctrl->dev = &pdev->dev; + platform_set_drvdata(pdev, ctrl); nand_controller_init(&ctrl->controller); ctrl->controller.ops = &tegra_nand_controller_ops; @@ -1166,14 +1170,26 @@ static int tegra_nand_probe(struct platform_device *pdev) if (IS_ERR(ctrl->clk)) return PTR_ERR(ctrl->clk); - err = clk_prepare_enable(ctrl->clk); + err = devm_pm_runtime_enable(&pdev->dev); + if (err) + return err; + + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + return err; + + /* + * This driver doesn't support active power management yet, + * so we will simply keep device resumed. + */ + err = pm_runtime_resume_and_get(&pdev->dev); if (err) return err; err = reset_control_reset(rst); if (err) { dev_err(ctrl->dev, "Failed to reset HW: %d\n", err); - goto err_disable_clk; + goto err_put_pm; } writel_relaxed(HWSTATUS_CMD_DEFAULT, ctrl->regs + HWSTATUS_CMD); @@ -1188,21 +1204,19 @@ static int tegra_nand_probe(struct platform_device *pdev) dev_name(&pdev->dev), ctrl); if (err) { dev_err(ctrl->dev, "Failed to get IRQ: %d\n", err); - goto err_disable_clk; + goto err_put_pm; } writel_relaxed(DMA_MST_CTRL_IS_DONE, ctrl->regs + DMA_MST_CTRL); err = tegra_nand_chips_init(ctrl->dev, ctrl); if (err) - goto err_disable_clk; - - platform_set_drvdata(pdev, ctrl); + goto err_put_pm; return 0; -err_disable_clk: - clk_disable_unprepare(ctrl->clk); +err_put_pm: + pm_runtime_put_sync_suspend(ctrl->dev); return err; } @@ -1219,11 +1233,40 @@ static int tegra_nand_remove(struct platform_device *pdev) nand_cleanup(chip); + pm_runtime_put_sync_suspend(ctrl->dev); + pm_runtime_force_suspend(ctrl->dev); + + return 0; +} + +static int __maybe_unused tegra_nand_runtime_resume(struct device *dev) +{ + struct tegra_nand_controller *ctrl = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(ctrl->clk); + if (err) { + dev_err(dev, "Failed to enable clock: %d\n", err); + return err; + } + + return 0; +} + +static int __maybe_unused tegra_nand_runtime_suspend(struct device *dev) +{ + struct tegra_nand_controller *ctrl = dev_get_drvdata(dev); + clk_disable_unprepare(ctrl->clk); return 0; } +static const struct dev_pm_ops tegra_nand_pm = { + SET_RUNTIME_PM_OPS(tegra_nand_runtime_suspend, tegra_nand_runtime_resume, + NULL) +}; + static const struct of_device_id tegra_nand_of_match[] = { { .compatible = "nvidia,tegra20-nand" }, { /* sentinel */ } @@ -1234,6 +1277,7 @@ static struct platform_driver tegra_nand_driver = { .driver = { .name = "tegra-nand", .of_match_table = tegra_nand_of_match, + .pm = &tegra_nand_pm, }, .probe = tegra_nand_probe, .remove = tegra_nand_remove, From 720251a42b60d031dae3679e9b4fd3910ca5e18e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 22:09:56 +0300 Subject: [PATCH 1049/1202] spi: tegra20-slink: Add OPP support The SPI on Tegra belongs to the core power domain and we're going to enable GENPD support for the core domain. Now SPI driver must use OPP API for driving the controller's clock rate because OPP API takes care of reconfiguring the domain's performance state in accordance to the rate. Add OPP support to the driver. Acked-by: Mark Brown Signed-off-by: Dmitry Osipenko --- drivers/spi/spi-tegra20-slink.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 33302f6b42d7b..584fa25d39183 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -18,12 +18,15 @@ #include #include #include +#include #include #include #include #include #include +#include + #define SLINK_COMMAND 0x000 #define SLINK_BIT_LENGTH(x) (((x) & 0x1f) << 0) #define SLINK_WORD_SIZE(x) (((x) & 0x1f) << 5) @@ -680,7 +683,7 @@ static int tegra_slink_start_transfer_one(struct spi_device *spi, bits_per_word = t->bits_per_word; speed = t->speed_hz; if (speed != tspi->cur_speed) { - clk_set_rate(tspi->clk, speed * 4); + dev_pm_opp_set_rate(tspi->dev, speed * 4); tspi->cur_speed = speed; } @@ -1066,6 +1069,10 @@ static int tegra_slink_probe(struct platform_device *pdev) goto exit_free_master; } + ret = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (ret) + goto exit_free_master; + tspi->max_buf_size = SLINK_FIFO_DEPTH << 2; tspi->dma_buf_size = DEFAULT_SPI_DMA_BUF_LEN; From 190f1e5f06f4ddead4777588e6a8ea8a7985a106 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 05:10:49 +0300 Subject: [PATCH 1050/1202] media: dt: bindings: tegra-vde: Convert to schema Convert NVIDIA Tegra video decoder binding to schema. Reviewed-by: Rob Herring Acked-by: Hans Verkuil Signed-off-by: Dmitry Osipenko --- .../bindings/media/nvidia,tegra-vde.txt | 64 ----------- .../bindings/media/nvidia,tegra-vde.yaml | 107 ++++++++++++++++++ 2 files changed, 107 insertions(+), 64 deletions(-) delete mode 100644 Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt create mode 100644 Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt deleted file mode 100644 index 602169b8aa198..0000000000000 --- a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt +++ /dev/null @@ -1,64 +0,0 @@ -NVIDIA Tegra Video Decoder Engine - -Required properties: -- compatible : Must contain one of the following values: - - "nvidia,tegra20-vde" - - "nvidia,tegra30-vde" - - "nvidia,tegra114-vde" - - "nvidia,tegra124-vde" - - "nvidia,tegra132-vde" -- reg : Must contain an entry for each entry in reg-names. -- reg-names : Must include the following entries: - - sxe - - bsev - - mbe - - ppe - - mce - - tfe - - ppb - - vdma - - frameid -- iram : Must contain phandle to the mmio-sram device node that represents - IRAM region used by VDE. -- interrupts : Must contain an entry for each entry in interrupt-names. -- interrupt-names : Must include the following entries: - - sync-token - - bsev - - sxe -- clocks : Must include the following entries: - - vde -- resets : Must contain an entry for each entry in reset-names. -- reset-names : Should include the following entries: - - vde - -Optional properties: -- resets : Must contain an entry for each entry in reset-names. -- reset-names : Must include the following entries: - - mc -- iommus: Must contain phandle to the IOMMU device node. - -Example: - -video-codec@6001a000 { - compatible = "nvidia,tegra20-vde"; - reg = <0x6001a000 0x1000 /* Syntax Engine */ - 0x6001b000 0x1000 /* Video Bitstream Engine */ - 0x6001c000 0x100 /* Macroblock Engine */ - 0x6001c200 0x100 /* Post-processing Engine */ - 0x6001c400 0x100 /* Motion Compensation Engine */ - 0x6001c600 0x100 /* Transform Engine */ - 0x6001c800 0x100 /* Pixel prediction block */ - 0x6001ca00 0x100 /* Video DMA */ - 0x6001d800 0x300 /* Video frame controls */>; - reg-names = "sxe", "bsev", "mbe", "ppe", "mce", - "tfe", "ppb", "vdma", "frameid"; - iram = <&vde_pool>; /* IRAM region */ - interrupts = , /* Sync token interrupt */ - , /* BSE-V interrupt */ - ; /* SXE interrupt */ - interrupt-names = "sync-token", "bsev", "sxe"; - clocks = <&tegra_car TEGRA20_CLK_VDE>; - reset-names = "vde", "mc"; - resets = <&tegra_car 61>, <&mc TEGRA20_MC_RESET_VDE>; - iommus = <&mc TEGRA_SWGROUP_VDE>; -}; diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml new file mode 100644 index 0000000000000..c143aaa063468 --- /dev/null +++ b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/media/nvidia,tegra-vde.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NVIDIA Tegra Video Decoder Engine + +maintainers: + - Dmitry Osipenko + - Jon Hunter + - Thierry Reding + +properties: + compatible: + oneOf: + - items: + - enum: + - nvidia,tegra132-vde + - nvidia,tegra124-vde + - nvidia,tegra114-vde + - items: + - const: nvidia,tegra30-vde + - const: nvidia,tegra20-vde + - items: + - const: nvidia,tegra20-vde + + reg: + maxItems: 9 + + reg-names: + items: + - const: sxe + - const: bsev + - const: mbe + - const: ppe + - const: mce + - const: tfe + - const: ppb + - const: vdma + - const: frameid + + clocks: + maxItems: 1 + + resets: + maxItems: 2 + + reset-names: + items: + - const: vde + - const: mc + + interrupts: + maxItems: 3 + + interrupt-names: + items: + - const: sync-token + - const: bsev + - const: sxe + + iommus: + maxItems: 1 + + iram: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle of the SRAM MMIO node. + +required: + - compatible + - reg + - reg-names + - clocks + - resets + - reset-names + - interrupts + - interrupt-names + +additionalProperties: false + +examples: + - | + video-codec@6001a000 { + compatible = "nvidia,tegra20-vde"; + reg = <0x6001a000 0x1000>, /* Syntax Engine */ + <0x6001b000 0x1000>, /* Video Bitstream Engine */ + <0x6001c000 0x100>, /* Macroblock Engine */ + <0x6001c200 0x100>, /* Post-processing Engine */ + <0x6001c400 0x100>, /* Motion Compensation Engine */ + <0x6001c600 0x100>, /* Transform Engine */ + <0x6001c800 0x100>, /* Pixel prediction block */ + <0x6001ca00 0x100>, /* Video DMA */ + <0x6001d800 0x300>; /* Video frame controls */ + reg-names = "sxe", "bsev", "mbe", "ppe", "mce", + "tfe", "ppb", "vdma", "frameid"; + iram = <&iram>; /* IRAM MMIO region */ + interrupts = <0 9 4>, /* Sync token */ + <0 10 4>, /* BSE-V */ + <0 12 4>; /* SXE */ + interrupt-names = "sync-token", "bsev", "sxe"; + clocks = <&clk 61>; + reset-names = "vde", "mc"; + resets = <&rst 61>, <&mem 13>; + iommus = <&mem 15>; + }; From 88fab3fb6c2075059785395aa44a065ef91f8275 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 6 Apr 2021 00:06:25 +0300 Subject: [PATCH 1051/1202] media: dt: bindings: tegra-vde: Document OPP and power domain Document new OPP table and power domain properties of the video decoder hardware. Reviewed-by: Rob Herring Acked-by: Hans Verkuil Signed-off-by: Dmitry Osipenko --- .../devicetree/bindings/media/nvidia,tegra-vde.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml index c143aaa063468..4ecdee1be37e7 100644 --- a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml +++ b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.yaml @@ -68,6 +68,16 @@ properties: description: Phandle of the SRAM MMIO node. + operating-points-v2: + description: + Should contain freqs and voltages and opp-supported-hw property, + which is a bitfield indicating SoC speedo or process ID mask. + + power-domains: + maxItems: 1 + description: + Phandle to the SoC core power domain. + required: - compatible - reg @@ -104,4 +114,6 @@ examples: reset-names = "vde", "mc"; resets = <&rst 61>, <&mem 13>; iommus = <&mem 15>; + operating-points-v2 = <&dvfs_opp_table>; + power-domains = <&domain>; }; From 70be9a428959246d0e4a9466793d13274bd8da94 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 2 Nov 2020 03:03:57 +0300 Subject: [PATCH 1052/1202] media: staging: tegra-vde: Support generic power domain Currently driver supports legacy power domain API, this patch adds generic power domain support. This allows us to utilize a modern GENPD API for newer device-trees. Tested-by: Peter Geis # Ouya T30 Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 and TK1 T124 Tested-by: Matt Merhar # Ouya T30 Acked-by: Hans Verkuil Signed-off-by: Dmitry Osipenko --- drivers/staging/media/tegra-vde/vde.c | 63 ++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 11 deletions(-) diff --git a/drivers/staging/media/tegra-vde/vde.c b/drivers/staging/media/tegra-vde/vde.c index ed4c1250b3038..859f60a709049 100644 --- a/drivers/staging/media/tegra-vde/vde.c +++ b/drivers/staging/media/tegra-vde/vde.c @@ -20,6 +20,7 @@ #include #include +#include #include #include "uapi.h" @@ -920,13 +921,17 @@ static __maybe_unused int tegra_vde_runtime_suspend(struct device *dev) struct tegra_vde *vde = dev_get_drvdata(dev); int err; - err = tegra_powergate_power_off(TEGRA_POWERGATE_VDEC); - if (err) { - dev_err(dev, "Failed to power down HW: %d\n", err); - return err; + if (!dev->pm_domain) { + err = tegra_powergate_power_off(TEGRA_POWERGATE_VDEC); + if (err) { + dev_err(dev, "Failed to power down HW: %d\n", err); + return err; + } } clk_disable_unprepare(vde->clk); + reset_control_release(vde->rst); + reset_control_release(vde->rst_mc); return 0; } @@ -936,14 +941,45 @@ static __maybe_unused int tegra_vde_runtime_resume(struct device *dev) struct tegra_vde *vde = dev_get_drvdata(dev); int err; - err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_VDEC, - vde->clk, vde->rst); + err = reset_control_acquire(vde->rst_mc); if (err) { - dev_err(dev, "Failed to power up HW : %d\n", err); + dev_err(dev, "Failed to acquire mc reset: %d\n", err); return err; } + err = reset_control_acquire(vde->rst); + if (err) { + dev_err(dev, "Failed to acquire reset: %d\n", err); + goto release_mc_reset; + } + + if (!dev->pm_domain) { + err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_VDEC, + vde->clk, vde->rst); + if (err) { + dev_err(dev, "Failed to power up HW : %d\n", err); + goto release_reset; + } + } else { + /* + * tegra_powergate_sequence_power_up() leaves clocks enabled, + * while GENPD not. + */ + err = clk_prepare_enable(vde->clk); + if (err) { + dev_err(dev, "Failed to enable clock: %d\n", err); + goto release_reset; + } + } + return 0; + +release_reset: + reset_control_release(vde->rst); +release_mc_reset: + reset_control_release(vde->rst_mc); + + return err; } static int tegra_vde_probe(struct platform_device *pdev) @@ -1001,14 +1037,14 @@ static int tegra_vde_probe(struct platform_device *pdev) return err; } - vde->rst = devm_reset_control_get(dev, NULL); + vde->rst = devm_reset_control_get_exclusive_released(dev, NULL); if (IS_ERR(vde->rst)) { err = PTR_ERR(vde->rst); dev_err(dev, "Could not get VDE reset %d\n", err); return err; } - vde->rst_mc = devm_reset_control_get_optional(dev, "mc"); + vde->rst_mc = devm_reset_control_get_optional_exclusive_released(dev, "mc"); if (IS_ERR(vde->rst_mc)) { err = PTR_ERR(vde->rst_mc); dev_err(dev, "Could not get MC reset %d\n", err); @@ -1026,6 +1062,12 @@ static int tegra_vde_probe(struct platform_device *pdev) return err; } + err = devm_tegra_core_dev_init_opp_table_common(dev); + if (err) { + dev_err(dev, "Could initialize OPP table %d\n", err); + return err; + } + vde->iram_pool = of_gen_pool_get(dev->of_node, "iram", 0); if (!vde->iram_pool) { dev_err(dev, "Could not get IRAM pool\n"); @@ -1133,8 +1175,7 @@ static void tegra_vde_shutdown(struct platform_device *pdev) * On some devices bootloader isn't ready to a power-gated VDE on * a warm-reboot, machine will hang in that case. */ - if (pm_runtime_status_suspended(&pdev->dev)) - tegra_vde_runtime_resume(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); } static __maybe_unused int tegra_vde_pm_suspend(struct device *dev) From 409be73a3eabf34df9f24e4b0b9af616e8a8c958 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 14 Aug 2021 02:22:24 +0300 Subject: [PATCH 1053/1202] soc/tegra: fuse: Reset hardware The FUSE controller is enabled at a boot time. Reset it in order to put hardware and clock into clean and disabled state. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/fuse/fuse-tegra.c | 25 +++++++++++++++++++++++++ drivers/soc/tegra/fuse/fuse.h | 1 + 2 files changed, 26 insertions(+) diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index f2151815db585..cc032729a143e 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -243,6 +244,30 @@ static int tegra_fuse_probe(struct platform_device *pdev) goto restore; } + fuse->rst = devm_reset_control_get_optional(&pdev->dev, "fuse"); + if (IS_ERR(fuse->rst)) { + err = PTR_ERR(fuse->rst); + dev_err(&pdev->dev, "failed to get FUSE reset: %pe\n", + fuse->rst); + goto restore; + } + + /* + * FUSE clock is enabled at a boot time, hence this resume/suspend + * disables the clock besides the h/w resetting. + */ + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) + goto restore; + + err = reset_control_reset(fuse->rst); + pm_runtime_put(&pdev->dev); + + if (err < 0) { + dev_err(&pdev->dev, "failed to reset FUSE: %d\n", err); + goto restore; + } + /* release the early I/O memory mapping */ iounmap(base); diff --git a/drivers/soc/tegra/fuse/fuse.h b/drivers/soc/tegra/fuse/fuse.h index de58feba04350..1b719d85bd045 100644 --- a/drivers/soc/tegra/fuse/fuse.h +++ b/drivers/soc/tegra/fuse/fuse.h @@ -43,6 +43,7 @@ struct tegra_fuse { void __iomem *base; phys_addr_t phys; struct clk *clk; + struct reset_control *rst; u32 (*read_early)(struct tegra_fuse *fuse, unsigned int offset); u32 (*read)(struct tegra_fuse *fuse, unsigned int offset); From 7f6ce0999de10dbe710aee9a87eb33ffc1184dbb Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 14 Aug 2021 19:37:26 +0300 Subject: [PATCH 1054/1202] soc/tegra: fuse: Use resource-managed helpers Use resource-managed helpers to make code cleaner and more correct, properly releasing all resources in case of driver probe error. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/fuse/fuse-tegra.c | 32 ++++++++++++++------------ drivers/soc/tegra/fuse/fuse-tegra20.c | 33 ++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index cc032729a143e..fe4f935ce73ad 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -182,6 +182,12 @@ static const struct nvmem_cell_info tegra_fuse_cells[] = { }, }; +static void tegra_fuse_restore(void *base) +{ + fuse->clk = NULL; + fuse->base = base; +} + static int tegra_fuse_probe(struct platform_device *pdev) { void __iomem *base = fuse->base; @@ -189,13 +195,16 @@ static int tegra_fuse_probe(struct platform_device *pdev) struct resource *res; int err; + err = devm_add_action(&pdev->dev, tegra_fuse_restore, base); + if (err) + return err; + /* take over the memory region from the early initialization */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); fuse->phys = res->start; fuse->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(fuse->base)) { err = PTR_ERR(fuse->base); - fuse->base = base; return err; } @@ -205,19 +214,20 @@ static int tegra_fuse_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to get FUSE clock: %ld", PTR_ERR(fuse->clk)); - fuse->base = base; return PTR_ERR(fuse->clk); } platform_set_drvdata(pdev, fuse); fuse->dev = &pdev->dev; - pm_runtime_enable(&pdev->dev); + err = devm_pm_runtime_enable(&pdev->dev); + if (err) + return err; if (fuse->soc->probe) { err = fuse->soc->probe(fuse); if (err < 0) - goto restore; + return err; } memset(&nvmem, 0, sizeof(nvmem)); @@ -241,7 +251,7 @@ static int tegra_fuse_probe(struct platform_device *pdev) err = PTR_ERR(fuse->nvmem); dev_err(&pdev->dev, "failed to register NVMEM device: %d\n", err); - goto restore; + return err; } fuse->rst = devm_reset_control_get_optional(&pdev->dev, "fuse"); @@ -249,7 +259,7 @@ static int tegra_fuse_probe(struct platform_device *pdev) err = PTR_ERR(fuse->rst); dev_err(&pdev->dev, "failed to get FUSE reset: %pe\n", fuse->rst); - goto restore; + return err; } /* @@ -258,26 +268,20 @@ static int tegra_fuse_probe(struct platform_device *pdev) */ err = pm_runtime_resume_and_get(&pdev->dev); if (err) - goto restore; + return err; err = reset_control_reset(fuse->rst); pm_runtime_put(&pdev->dev); if (err < 0) { dev_err(&pdev->dev, "failed to reset FUSE: %d\n", err); - goto restore; + return err; } /* release the early I/O memory mapping */ iounmap(base); return 0; - -restore: - fuse->clk = NULL; - fuse->base = base; - pm_runtime_disable(&pdev->dev); - return err; } static int __maybe_unused tegra_fuse_runtime_resume(struct device *dev) diff --git a/drivers/soc/tegra/fuse/fuse-tegra20.c b/drivers/soc/tegra/fuse/fuse-tegra20.c index 8ec9fc5e5e4b5..12503f563e360 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra20.c +++ b/drivers/soc/tegra/fuse/fuse-tegra20.c @@ -94,9 +94,28 @@ static bool dma_filter(struct dma_chan *chan, void *filter_param) return of_device_is_compatible(np, "nvidia,tegra20-apbdma"); } +static void tegra20_fuse_release_channel(void *data) +{ + struct tegra_fuse *fuse = data; + + dma_release_channel(fuse->apbdma.chan); + fuse->apbdma.chan = NULL; +} + +static void tegra20_fuse_free_coherent(void *data) +{ + struct tegra_fuse *fuse = data; + + dma_free_coherent(fuse->dev, sizeof(u32), fuse->apbdma.virt, + fuse->apbdma.phys); + fuse->apbdma.virt = NULL; + fuse->apbdma.phys = 0x0; +} + static int tegra20_fuse_probe(struct tegra_fuse *fuse) { dma_cap_mask_t mask; + int err; dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); @@ -105,13 +124,21 @@ static int tegra20_fuse_probe(struct tegra_fuse *fuse) if (!fuse->apbdma.chan) return -EPROBE_DEFER; + err = devm_add_action_or_reset(fuse->dev, tegra20_fuse_release_channel, + fuse); + if (err) + return err; + fuse->apbdma.virt = dma_alloc_coherent(fuse->dev, sizeof(u32), &fuse->apbdma.phys, GFP_KERNEL); - if (!fuse->apbdma.virt) { - dma_release_channel(fuse->apbdma.chan); + if (!fuse->apbdma.virt) return -ENOMEM; - } + + err = devm_add_action_or_reset(fuse->dev, tegra20_fuse_free_coherent, + fuse); + if (err) + return err; fuse->apbdma.config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; fuse->apbdma.config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; From ad53c1b75cb149cd068e8bcb7ca7d1ae2c44796f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 28 Jul 2021 00:32:39 +0300 Subject: [PATCH 1055/1202] soc/tegra: regulators: Prepare for suspend Depending on hardware version, Tegra SoC may require a higher voltages during resume from system suspend, otherwise hardware will crash. Set SoC voltages to a nominal levels during suspend. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/regulators-tegra20.c | 99 ++++++++++++++++++++ drivers/soc/tegra/regulators-tegra30.c | 122 +++++++++++++++++++++++++ 2 files changed, 221 insertions(+) diff --git a/drivers/soc/tegra/regulators-tegra20.c b/drivers/soc/tegra/regulators-tegra20.c index b8ce9fd0650dd..6a2f90ab9d3eb 100644 --- a/drivers/soc/tegra/regulators-tegra20.c +++ b/drivers/soc/tegra/regulators-tegra20.c @@ -16,7 +16,9 @@ #include #include #include +#include +#include #include struct tegra_regulator_coupler { @@ -25,9 +27,12 @@ struct tegra_regulator_coupler { struct regulator_dev *cpu_rdev; struct regulator_dev *rtc_rdev; struct notifier_block reboot_notifier; + struct notifier_block suspend_notifier; int core_min_uV, cpu_min_uV; bool sys_reboot_mode_req; bool sys_reboot_mode; + bool sys_suspend_mode_req; + bool sys_suspend_mode; }; static inline struct tegra_regulator_coupler * @@ -105,6 +110,28 @@ static int tegra20_core_rtc_max_spread(struct regulator_dev *core_rdev, return 150000; } +static int tegra20_cpu_nominal_uV(void) +{ + switch (tegra_sku_info.soc_speedo_id) { + case 0: + return 1100000; + case 1: + return 1025000; + default: + return 1125000; + } +} + +static int tegra20_core_nominal_uV(void) +{ + switch (tegra_sku_info.soc_speedo_id) { + default: + return 1225000; + case 2: + return 1300000; + } +} + static int tegra20_core_rtc_update(struct tegra_regulator_coupler *tegra, struct regulator_dev *core_rdev, struct regulator_dev *rtc_rdev, @@ -144,6 +171,11 @@ static int tegra20_core_rtc_update(struct tegra_regulator_coupler *tegra, if (err) return err; + /* prepare voltage level for suspend */ + if (tegra->sys_suspend_mode) + core_min_uV = clamp(tegra20_core_nominal_uV(), + core_min_uV, core_max_uV); + core_uV = regulator_get_voltage_rdev(core_rdev); if (core_uV < 0) return core_uV; @@ -279,6 +311,11 @@ static int tegra20_cpu_voltage_update(struct tegra_regulator_coupler *tegra, if (tegra->sys_reboot_mode) cpu_min_uV = max(cpu_min_uV, tegra->cpu_min_uV); + /* prepare voltage level for suspend */ + if (tegra->sys_suspend_mode) + cpu_min_uV = clamp(tegra20_cpu_nominal_uV(), + cpu_min_uV, cpu_max_uV); + if (cpu_min_uV > cpu_uV) { err = tegra20_core_rtc_update(tegra, core_rdev, rtc_rdev, cpu_uV, cpu_min_uV); @@ -320,6 +357,7 @@ static int tegra20_regulator_balance_voltage(struct regulator_coupler *coupler, } tegra->sys_reboot_mode = READ_ONCE(tegra->sys_reboot_mode_req); + tegra->sys_suspend_mode = READ_ONCE(tegra->sys_suspend_mode_req); if (rdev == cpu_rdev) return tegra20_cpu_voltage_update(tegra, cpu_rdev, @@ -334,6 +372,63 @@ static int tegra20_regulator_balance_voltage(struct regulator_coupler *coupler, return -EPERM; } +static int tegra20_regulator_prepare_suspend(struct tegra_regulator_coupler *tegra, + bool sys_suspend_mode) +{ + int err; + + if (!tegra->core_rdev || !tegra->rtc_rdev || !tegra->cpu_rdev) + return 0; + + /* + * All power domains are enabled early during resume from suspend + * by GENPD core. Domains like VENC may require a higher voltage + * when enabled during resume from suspend. This also prepares + * hardware for resuming from LP0. + */ + + WRITE_ONCE(tegra->sys_suspend_mode_req, sys_suspend_mode); + + err = regulator_sync_voltage_rdev(tegra->cpu_rdev); + if (err) + return err; + + err = regulator_sync_voltage_rdev(tegra->core_rdev); + if (err) + return err; + + return 0; +} + +static int tegra20_regulator_suspend(struct notifier_block *notifier, + unsigned long mode, void *arg) +{ + struct tegra_regulator_coupler *tegra; + int ret = 0; + + tegra = container_of(notifier, struct tegra_regulator_coupler, + suspend_notifier); + + switch (mode) { + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + case PM_SUSPEND_PREPARE: + ret = tegra20_regulator_prepare_suspend(tegra, true); + break; + + case PM_POST_HIBERNATION: + case PM_POST_RESTORE: + case PM_POST_SUSPEND: + ret = tegra20_regulator_prepare_suspend(tegra, false); + break; + } + + if (ret) + pr_err("failed to prepare regulators: %d\n", ret); + + return notifier_from_errno(ret); +} + static int tegra20_regulator_prepare_reboot(struct tegra_regulator_coupler *tegra, bool sys_reboot_mode) { @@ -444,6 +539,7 @@ static struct tegra_regulator_coupler tegra20_coupler = { .balance_voltage = tegra20_regulator_balance_voltage, }, .reboot_notifier.notifier_call = tegra20_regulator_reboot, + .suspend_notifier.notifier_call = tegra20_regulator_suspend, }; static int __init tegra_regulator_coupler_init(void) @@ -456,6 +552,9 @@ static int __init tegra_regulator_coupler_init(void) err = register_reboot_notifier(&tegra20_coupler.reboot_notifier); WARN_ON(err); + err = register_pm_notifier(&tegra20_coupler.suspend_notifier); + WARN_ON(err); + return regulator_coupler_register(&tegra20_coupler.coupler); } arch_initcall(tegra_regulator_coupler_init); diff --git a/drivers/soc/tegra/regulators-tegra30.c b/drivers/soc/tegra/regulators-tegra30.c index e74bbc9c78597..8fd43c6891349 100644 --- a/drivers/soc/tegra/regulators-tegra30.c +++ b/drivers/soc/tegra/regulators-tegra30.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -25,9 +26,12 @@ struct tegra_regulator_coupler { struct regulator_dev *core_rdev; struct regulator_dev *cpu_rdev; struct notifier_block reboot_notifier; + struct notifier_block suspend_notifier; int core_min_uV, cpu_min_uV; bool sys_reboot_mode_req; bool sys_reboot_mode; + bool sys_suspend_mode_req; + bool sys_suspend_mode; }; static inline struct tegra_regulator_coupler * @@ -113,6 +117,52 @@ static int tegra30_core_cpu_limit(int cpu_uV) return -EINVAL; } +static int tegra30_cpu_nominal_uV(void) +{ + switch (tegra_sku_info.cpu_speedo_id) { + case 10 ... 11: + return 850000; + + case 9: + return 912000; + + case 1 ... 3: + case 7 ... 8: + return 1050000; + + default: + return 1125000; + + case 4 ... 6: + case 12 ... 13: + return 1237000; + } +} + +static int tegra30_core_nominal_uV(void) +{ + switch (tegra_sku_info.soc_speedo_id) { + case 0: + return 1200000; + + case 1: + if (tegra_sku_info.cpu_speedo_id != 7 && + tegra_sku_info.cpu_speedo_id != 8) + return 1200000; + + fallthrough; + + case 2: + if (tegra_sku_info.cpu_speedo_id != 13) + return 1300000; + + return 1350000; + + default: + return 1250000; + } +} + static int tegra30_voltage_update(struct tegra_regulator_coupler *tegra, struct regulator_dev *cpu_rdev, struct regulator_dev *core_rdev) @@ -168,6 +218,11 @@ static int tegra30_voltage_update(struct tegra_regulator_coupler *tegra, if (err) return err; + /* prepare voltage level for suspend */ + if (tegra->sys_suspend_mode) + core_min_uV = clamp(tegra30_core_nominal_uV(), + core_min_uV, core_max_uV); + core_uV = regulator_get_voltage_rdev(core_rdev); if (core_uV < 0) return core_uV; @@ -223,6 +278,11 @@ static int tegra30_voltage_update(struct tegra_regulator_coupler *tegra, if (tegra->sys_reboot_mode) cpu_min_uV = max(cpu_min_uV, tegra->cpu_min_uV); + /* prepare voltage level for suspend */ + if (tegra->sys_suspend_mode) + cpu_min_uV = clamp(tegra30_cpu_nominal_uV(), + cpu_min_uV, cpu_max_uV); + if (core_min_limited_uV > core_uV) { pr_err("core voltage constraint violated: %d %d %d\n", core_uV, core_min_limited_uV, cpu_uV); @@ -292,10 +352,68 @@ static int tegra30_regulator_balance_voltage(struct regulator_coupler *coupler, } tegra->sys_reboot_mode = READ_ONCE(tegra->sys_reboot_mode_req); + tegra->sys_suspend_mode = READ_ONCE(tegra->sys_suspend_mode_req); return tegra30_voltage_update(tegra, cpu_rdev, core_rdev); } +static int tegra30_regulator_prepare_suspend(struct tegra_regulator_coupler *tegra, + bool sys_suspend_mode) +{ + int err; + + if (!tegra->core_rdev || !tegra->cpu_rdev) + return 0; + + /* + * All power domains are enabled early during resume from suspend + * by GENPD core. Domains like VENC may require a higher voltage + * when enabled during resume from suspend. This also prepares + * hardware for resuming from LP0. + */ + + WRITE_ONCE(tegra->sys_suspend_mode_req, sys_suspend_mode); + + err = regulator_sync_voltage_rdev(tegra->cpu_rdev); + if (err) + return err; + + err = regulator_sync_voltage_rdev(tegra->core_rdev); + if (err) + return err; + + return 0; +} + +static int tegra30_regulator_suspend(struct notifier_block *notifier, + unsigned long mode, void *arg) +{ + struct tegra_regulator_coupler *tegra; + int ret = 0; + + tegra = container_of(notifier, struct tegra_regulator_coupler, + suspend_notifier); + + switch (mode) { + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + case PM_SUSPEND_PREPARE: + ret = tegra30_regulator_prepare_suspend(tegra, true); + break; + + case PM_POST_HIBERNATION: + case PM_POST_RESTORE: + case PM_POST_SUSPEND: + ret = tegra30_regulator_prepare_suspend(tegra, false); + break; + } + + if (ret) + pr_err("failed to prepare regulators: %d\n", ret); + + return notifier_from_errno(ret); +} + static int tegra30_regulator_prepare_reboot(struct tegra_regulator_coupler *tegra, bool sys_reboot_mode) { @@ -395,6 +513,7 @@ static struct tegra_regulator_coupler tegra30_coupler = { .balance_voltage = tegra30_regulator_balance_voltage, }, .reboot_notifier.notifier_call = tegra30_regulator_reboot, + .suspend_notifier.notifier_call = tegra30_regulator_suspend, }; static int __init tegra_regulator_coupler_init(void) @@ -407,6 +526,9 @@ static int __init tegra_regulator_coupler_init(void) err = register_reboot_notifier(&tegra30_coupler.reboot_notifier); WARN_ON(err); + err = register_pm_notifier(&tegra30_coupler.suspend_notifier); + WARN_ON(err); + return regulator_coupler_register(&tegra30_coupler.coupler); } arch_initcall(tegra_regulator_coupler_init); From afca0d05776e8cf778670541a079be1bab740ad3 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 19 Sep 2021 19:41:11 +0300 Subject: [PATCH 1056/1202] soc/tegra: pmc: Rename 3d power domains Device-tree schema doesn't allow domain name to start with a number. We don't use 3d domain yet in device-trees, so rename it to the name used by Tegra TRMs: TD, TD2. Reported-by: David Heidelberg Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/pmc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index a01330099e1ac..d106f974a66a8 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -2981,7 +2981,7 @@ static SIMPLE_DEV_PM_OPS(tegra_pmc_pm_ops, tegra_pmc_suspend, tegra_pmc_resume); static const char * const tegra20_powergates[] = { [TEGRA_POWERGATE_CPU] = "cpu", - [TEGRA_POWERGATE_3D] = "3d", + [TEGRA_POWERGATE_3D] = "td", [TEGRA_POWERGATE_VENC] = "venc", [TEGRA_POWERGATE_VDEC] = "vdec", [TEGRA_POWERGATE_PCIE] = "pcie", @@ -3089,7 +3089,7 @@ static const struct tegra_pmc_soc tegra20_pmc_soc = { static const char * const tegra30_powergates[] = { [TEGRA_POWERGATE_CPU] = "cpu0", - [TEGRA_POWERGATE_3D] = "3d0", + [TEGRA_POWERGATE_3D] = "td", [TEGRA_POWERGATE_VENC] = "venc", [TEGRA_POWERGATE_VDEC] = "vdec", [TEGRA_POWERGATE_PCIE] = "pcie", @@ -3101,7 +3101,7 @@ static const char * const tegra30_powergates[] = { [TEGRA_POWERGATE_CPU2] = "cpu2", [TEGRA_POWERGATE_CPU3] = "cpu3", [TEGRA_POWERGATE_CELP] = "celp", - [TEGRA_POWERGATE_3D1] = "3d1", + [TEGRA_POWERGATE_3D1] = "td2", }; static const u8 tegra30_cpu_powergates[] = { @@ -3150,7 +3150,7 @@ static const struct tegra_pmc_soc tegra30_pmc_soc = { static const char * const tegra114_powergates[] = { [TEGRA_POWERGATE_CPU] = "crail", - [TEGRA_POWERGATE_3D] = "3d", + [TEGRA_POWERGATE_3D] = "td", [TEGRA_POWERGATE_VENC] = "venc", [TEGRA_POWERGATE_VDEC] = "vdec", [TEGRA_POWERGATE_MPE] = "mpe", From 589bc842fd2b4de754544e510348c99c072f1cdb Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 20 Aug 2021 02:20:36 +0300 Subject: [PATCH 1057/1202] soc/tegra: pmc: Rename core power domain CORE power domain uses name of device-tree node, which is inconsistent with the names of PMC domains. Set the name to "core" to make it consistent. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index d106f974a66a8..edfc2f6f02229 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -1373,7 +1373,7 @@ static int tegra_pmc_core_pd_add(struct tegra_pmc *pmc, struct device_node *np) if (!genpd) return -ENOMEM; - genpd->name = np->name; + genpd->name = "core"; genpd->set_performance_state = tegra_pmc_core_pd_set_performance_state; genpd->opp_to_performance_state = tegra_pmc_core_pd_opp_to_performance_state; From 669bb38574bda3f34a41e02d080fe34a71692f6f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 30 Jun 2021 23:46:59 +0300 Subject: [PATCH 1058/1202] soc/tegra: pmc: Enable core domain support for Tegra20 and Tegra30 All device drivers got runtime PM and OPP support. Flip the core domain support status for Tegra20 and Tegra30 SoCs. Signed-off-by: Dmitry Osipenko --- drivers/soc/tegra/pmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index edfc2f6f02229..bc0dd7ef7f0be 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -3059,7 +3059,7 @@ static void tegra20_pmc_setup_irq_polarity(struct tegra_pmc *pmc, } static const struct tegra_pmc_soc tegra20_pmc_soc = { - .supports_core_domain = false, + .supports_core_domain = true, .num_powergates = ARRAY_SIZE(tegra20_powergates), .powergates = tegra20_powergates, .num_cpu_powergates = 0, @@ -3120,7 +3120,7 @@ static const char * const tegra30_reset_sources[] = { }; static const struct tegra_pmc_soc tegra30_pmc_soc = { - .supports_core_domain = false, + .supports_core_domain = true, .num_powergates = ARRAY_SIZE(tegra30_powergates), .powergates = tegra30_powergates, .num_cpu_powergates = ARRAY_SIZE(tegra30_cpu_powergates), From 28a5de18e7d428e9dad078121f4a3218ea97279c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 16 Oct 2021 18:49:00 +0300 Subject: [PATCH 1059/1202] ARM: tegra: Rename CPU and EMC OPP table device-tree nodes OPP table name now should start with "opp-table" and OPP entries shouldn't contain commas and @ signs in accordance to the new schema requirement. Reorganize CPU and EMC OPP table device-tree nodes. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra124-apalis-emc.dtsi | 4 +- .../arm/boot/dts/tegra124-jetson-tk1-emc.dtsi | 4 +- arch/arm/boot/dts/tegra124-nyan-big-emc.dtsi | 8 +- .../arm/boot/dts/tegra124-nyan-blaze-emc.dtsi | 8 +- .../boot/dts/tegra124-peripherals-opp.dtsi | 140 ++++++++--------- .../boot/dts/tegra20-acer-a500-picasso.dts | 4 +- arch/arm/boot/dts/tegra20-colibri.dtsi | 2 +- .../boot/dts/tegra20-cpu-opp-microvolt.dtsi | 82 +++++----- arch/arm/boot/dts/tegra20-cpu-opp.dtsi | 82 +++++----- arch/arm/boot/dts/tegra20-paz00.dts | 2 +- .../arm/boot/dts/tegra20-peripherals-opp.dtsi | 36 ++--- ...30-asus-nexus7-grouper-memory-timings.dtsi | 12 +- .../boot/dts/tegra30-cpu-opp-microvolt.dtsi | 144 +++++++++--------- arch/arm/boot/dts/tegra30-cpu-opp.dtsi | 144 +++++++++--------- arch/arm/boot/dts/tegra30-ouya.dts | 4 +- .../arm/boot/dts/tegra30-peripherals-opp.dtsi | 128 ++++++++-------- 16 files changed, 402 insertions(+), 402 deletions(-) diff --git a/arch/arm/boot/dts/tegra124-apalis-emc.dtsi b/arch/arm/boot/dts/tegra124-apalis-emc.dtsi index a7ac805eeed58..3d9cf16f233c2 100644 --- a/arch/arm/boot/dts/tegra124-apalis-emc.dtsi +++ b/arch/arm/boot/dts/tegra124-apalis-emc.dtsi @@ -1467,9 +1467,9 @@ }; &emc_icc_dvfs_opp_table { - /delete-node/ opp@1200000000,1100; + /delete-node/ opp-1200000000-1100; }; &emc_bw_dfs_opp_table { - /delete-node/ opp@1200000000; + /delete-node/ opp-1200000000; }; diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1-emc.dtsi b/arch/arm/boot/dts/tegra124-jetson-tk1-emc.dtsi index df4e463afbd1b..bb10cf9fc0f9a 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1-emc.dtsi +++ b/arch/arm/boot/dts/tegra124-jetson-tk1-emc.dtsi @@ -2422,9 +2422,9 @@ }; &emc_icc_dvfs_opp_table { - /delete-node/ opp@1200000000,1100; + /delete-node/ opp-1200000000-1100; }; &emc_bw_dfs_opp_table { - /delete-node/ opp@1200000000; + /delete-node/ opp-1200000000; }; diff --git a/arch/arm/boot/dts/tegra124-nyan-big-emc.dtsi b/arch/arm/boot/dts/tegra124-nyan-big-emc.dtsi index a0f56cc9da5c4..6f2ea9469d494 100644 --- a/arch/arm/boot/dts/tegra124-nyan-big-emc.dtsi +++ b/arch/arm/boot/dts/tegra124-nyan-big-emc.dtsi @@ -6651,11 +6651,11 @@ }; &emc_icc_dvfs_opp_table { - /delete-node/ opp@924000000,1100; - /delete-node/ opp@1200000000,1100; + /delete-node/ opp-924000000-1100; + /delete-node/ opp-1200000000-1100; }; &emc_bw_dfs_opp_table { - /delete-node/ opp@924000000; - /delete-node/ opp@1200000000; + /delete-node/ opp-924000000; + /delete-node/ opp-1200000000; }; diff --git a/arch/arm/boot/dts/tegra124-nyan-blaze-emc.dtsi b/arch/arm/boot/dts/tegra124-nyan-blaze-emc.dtsi index 35c98734d35fb..d47cdb863f88b 100644 --- a/arch/arm/boot/dts/tegra124-nyan-blaze-emc.dtsi +++ b/arch/arm/boot/dts/tegra124-nyan-blaze-emc.dtsi @@ -2050,11 +2050,11 @@ }; &emc_icc_dvfs_opp_table { - /delete-node/ opp@924000000,1100; - /delete-node/ opp@1200000000,1100; + /delete-node/ opp-924000000-1100; + /delete-node/ opp-1200000000-1100; }; &emc_bw_dfs_opp_table { - /delete-node/ opp@924000000; - /delete-node/ opp@1200000000; + /delete-node/ opp-924000000; + /delete-node/ opp-1200000000; }; diff --git a/arch/arm/boot/dts/tegra124-peripherals-opp.dtsi b/arch/arm/boot/dts/tegra124-peripherals-opp.dtsi index 781ac86010302..b262c1289da5a 100644 --- a/arch/arm/boot/dts/tegra124-peripherals-opp.dtsi +++ b/arch/arm/boot/dts/tegra124-peripherals-opp.dtsi @@ -1,421 +1,421 @@ // SPDX-License-Identifier: GPL-2.0 / { - emc_icc_dvfs_opp_table: emc-dvfs-opp-table { + emc_icc_dvfs_opp_table: opp-table-emc { compatible = "operating-points-v2"; - opp@12750000,800 { + opp-12750000-800 { opp-microvolt = <800000 800000 1150000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0003>; }; - opp@12750000,950 { + opp-12750000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0008>; }; - opp@12750000,1050 { + opp-12750000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0010>; }; - opp@12750000,1110 { + opp-12750000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0004>; }; - opp@20400000,800 { + opp-20400000-800 { opp-microvolt = <800000 800000 1150000>; opp-hz = /bits/ 64 <20400000>; opp-supported-hw = <0x0003>; }; - opp@20400000,950 { + opp-20400000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <20400000>; opp-supported-hw = <0x0008>; }; - opp@20400000,1050 { + opp-20400000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <20400000>; opp-supported-hw = <0x0010>; }; - opp@20400000,1110 { + opp-20400000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <20400000>; opp-supported-hw = <0x0004>; }; - opp@40800000,800 { + opp-40800000-800 { opp-microvolt = <800000 800000 1150000>; opp-hz = /bits/ 64 <40800000>; opp-supported-hw = <0x0003>; }; - opp@40800000,950 { + opp-40800000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <40800000>; opp-supported-hw = <0x0008>; }; - opp@40800000,1050 { + opp-40800000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <40800000>; opp-supported-hw = <0x0010>; }; - opp@40800000,1110 { + opp-40800000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <40800000>; opp-supported-hw = <0x0004>; }; - opp@68000000,800 { + opp-68000000-800 { opp-microvolt = <800000 800000 1150000>; opp-hz = /bits/ 64 <68000000>; opp-supported-hw = <0x0003>; }; - opp@68000000,950 { + opp-68000000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <68000000>; opp-supported-hw = <0x0008>; }; - opp@68000000,1050 { + opp-68000000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <68000000>; opp-supported-hw = <0x0010>; }; - opp@68000000,1110 { + opp-68000000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <68000000>; opp-supported-hw = <0x0004>; }; - opp@102000000,800 { + opp-102000000-800 { opp-microvolt = <800000 800000 1150000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0003>; }; - opp@102000000,950 { + opp-102000000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0008>; }; - opp@102000000,1050 { + opp-102000000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0010>; }; - opp@102000000,1110 { + opp-102000000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0004>; }; - opp@204000000,800 { + opp-204000000-800 { opp-microvolt = <800000 800000 1150000>; opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x0003>; opp-suspend; }; - opp@204000000,950 { + opp-204000000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x0008>; opp-suspend; }; - opp@204000000,1050 { + opp-204000000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x0010>; opp-suspend; }; - opp@204000000,1110 { + opp-204000000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x0004>; opp-suspend; }; - opp@264000000,800 { + opp-264000000-800 { opp-microvolt = <800000 800000 1150000>; opp-hz = /bits/ 64 <264000000>; opp-supported-hw = <0x0003>; }; - opp@264000000,950 { + opp-264000000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <264000000>; opp-supported-hw = <0x0008>; }; - opp@264000000,1050 { + opp-264000000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <264000000>; opp-supported-hw = <0x0010>; }; - opp@264000000,1110 { + opp-264000000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <264000000>; opp-supported-hw = <0x0004>; }; - opp@300000000,850 { + opp-300000000-850 { opp-microvolt = <850000 850000 1150000>; opp-hz = /bits/ 64 <300000000>; opp-supported-hw = <0x0003>; }; - opp@300000000,950 { + opp-300000000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <300000000>; opp-supported-hw = <0x0008>; }; - opp@300000000,1050 { + opp-300000000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <300000000>; opp-supported-hw = <0x0010>; }; - opp@300000000,1110 { + opp-300000000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <300000000>; opp-supported-hw = <0x0004>; }; - opp@348000000,850 { + opp-348000000-850 { opp-microvolt = <850000 850000 1150000>; opp-hz = /bits/ 64 <348000000>; opp-supported-hw = <0x0003>; }; - opp@348000000,950 { + opp-348000000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <348000000>; opp-supported-hw = <0x0008>; }; - opp@348000000,1050 { + opp-348000000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <348000000>; opp-supported-hw = <0x0010>; }; - opp@348000000,1110 { + opp-348000000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <348000000>; opp-supported-hw = <0x0004>; }; - opp@396000000,950 { + opp-396000000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <396000000>; opp-supported-hw = <0x0008>; }; - opp@396000000,1000 { + opp-396000000-1000 { opp-microvolt = <1000000 1000000 1150000>; opp-hz = /bits/ 64 <396000000>; opp-supported-hw = <0x0003>; }; - opp@396000000,1050 { + opp-396000000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <396000000>; opp-supported-hw = <0x0010>; }; - opp@396000000,1110 { + opp-396000000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <396000000>; opp-supported-hw = <0x0004>; }; - opp@528000000,950 { + opp-528000000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <528000000>; opp-supported-hw = <0x0008>; }; - opp@528000000,1000 { + opp-528000000-1000 { opp-microvolt = <1000000 1000000 1150000>; opp-hz = /bits/ 64 <528000000>; opp-supported-hw = <0x0003>; }; - opp@528000000,1050 { + opp-528000000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <528000000>; opp-supported-hw = <0x0010>; }; - opp@528000000,1110 { + opp-528000000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <528000000>; opp-supported-hw = <0x0004>; }; - opp@600000000,950 { + opp-600000000-950 { opp-microvolt = <950000 950000 1150000>; opp-hz = /bits/ 64 <600000000>; opp-supported-hw = <0x0008>; }; - opp@600000000,1000 { + opp-600000000-1000 { opp-microvolt = <1000000 1000000 1150000>; opp-hz = /bits/ 64 <600000000>; opp-supported-hw = <0x0003>; }; - opp@600000000,1050 { + opp-600000000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <600000000>; opp-supported-hw = <0x0010>; }; - opp@600000000,1110 { + opp-600000000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <600000000>; opp-supported-hw = <0x0004>; }; - opp@792000000,1000 { + opp-792000000-1000 { opp-microvolt = <1000000 1000000 1150000>; opp-hz = /bits/ 64 <792000000>; opp-supported-hw = <0x000B>; }; - opp@792000000,1050 { + opp-792000000-1050 { opp-microvolt = <1050000 1050000 1150000>; opp-hz = /bits/ 64 <792000000>; opp-supported-hw = <0x0010>; }; - opp@792000000,1110 { + opp-792000000-1110 { opp-microvolt = <1110000 1110000 1150000>; opp-hz = /bits/ 64 <792000000>; opp-supported-hw = <0x0004>; }; - opp@924000000,1100 { + opp-924000000-1100 { opp-microvolt = <1100000 1100000 1150000>; opp-hz = /bits/ 64 <924000000>; opp-supported-hw = <0x0013>; }; - opp@1200000000,1100 { + opp-1200000000-1100 { opp-microvolt = <1100000 1100000 1150000>; opp-hz = /bits/ 64 <1200000000>; opp-supported-hw = <0x0003>; }; }; - emc_bw_dfs_opp_table: emc-bandwidth-opp-table { + emc_bw_dfs_opp_table: opp-table-actmon { compatible = "operating-points-v2"; - opp@12750000 { + opp-12750000 { opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <204000>; }; - opp@20400000 { + opp-20400000 { opp-hz = /bits/ 64 <20400000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <326400>; }; - opp@40800000 { + opp-40800000 { opp-hz = /bits/ 64 <40800000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <652800>; }; - opp@68000000 { + opp-68000000 { opp-hz = /bits/ 64 <68000000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <1088000>; }; - opp@102000000 { + opp-102000000 { opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <1632000>; }; - opp@204000000 { + opp-204000000 { opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <3264000>; opp-suspend; }; - opp@264000000 { + opp-264000000 { opp-hz = /bits/ 64 <264000000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <4224000>; }; - opp@300000000 { + opp-300000000 { opp-hz = /bits/ 64 <300000000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <4800000>; }; - opp@348000000 { + opp-348000000 { opp-hz = /bits/ 64 <348000000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <5568000>; }; - opp@396000000 { + opp-396000000 { opp-hz = /bits/ 64 <396000000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <6336000>; }; - opp@528000000 { + opp-528000000 { opp-hz = /bits/ 64 <528000000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <8448000>; }; - opp@600000000 { + opp-600000000 { opp-hz = /bits/ 64 <600000000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <9600000>; }; - opp@792000000 { + opp-792000000 { opp-hz = /bits/ 64 <792000000>; opp-supported-hw = <0x001F>; opp-peak-kBps = <12672000>; }; - opp@924000000 { + opp-924000000 { opp-hz = /bits/ 64 <924000000>; opp-supported-hw = <0x0013>; opp-peak-kBps = <14784000>; }; - opp@1200000000 { + opp-1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-supported-hw = <0x0003>; opp-peak-kBps = <19200000>; diff --git a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts index 23d3f8daab232..9a3510dcbb31f 100644 --- a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts +++ b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts @@ -1506,6 +1506,6 @@ }; &emc_icc_dvfs_opp_table { - /delete-node/ opp@666000000; - /delete-node/ opp@760000000; + /delete-node/ opp-666000000; + /delete-node/ opp-760000000; }; diff --git a/arch/arm/boot/dts/tegra20-colibri.dtsi b/arch/arm/boot/dts/tegra20-colibri.dtsi index 585a5b441cf64..3f9cb5cd6bd8d 100644 --- a/arch/arm/boot/dts/tegra20-colibri.dtsi +++ b/arch/arm/boot/dts/tegra20-colibri.dtsi @@ -743,7 +743,7 @@ }; &emc_icc_dvfs_opp_table { - /delete-node/ opp@760000000; + /delete-node/ opp-760000000; }; &gpio { diff --git a/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi b/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi index 6f3e8c5fc5f0d..7330c1b13d933 100644 --- a/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi +++ b/arch/arm/boot/dts/tegra20-cpu-opp-microvolt.dtsi @@ -1,164 +1,164 @@ // SPDX-License-Identifier: GPL-2.0 / { - cpu0_opp_table: cpu_opp_table0 { - opp@216000000,750 { + cpu0_opp_table: opp-table-cpu0 { + opp-216000000-750 { opp-microvolt = <750000 750000 1125000>; }; - opp@216000000,800 { + opp-216000000-800 { opp-microvolt = <800000 800000 1125000>; }; - opp@312000000,750 { + opp-312000000-750 { opp-microvolt = <750000 750000 1125000>; }; - opp@312000000,800 { + opp-312000000-800 { opp-microvolt = <800000 800000 1125000>; }; - opp@456000000,750 { + opp-456000000-750 { opp-microvolt = <750000 750000 1125000>; }; - opp@456000000,800 { + opp-456000000-800 { opp-microvolt = <800000 800000 1125000>; }; - opp@456000000,825 { + opp-456000000-825 { opp-microvolt = <825000 825000 1125000>; }; - opp@608000000,750 { + opp-608000000-750 { opp-microvolt = <750000 750000 1125000>; }; - opp@608000000,800 { + opp-608000000-800 { opp-microvolt = <800000 800000 1125000>; }; - opp@608000000,825 { + opp-608000000-825 { opp-microvolt = <825000 825000 1125000>; }; - opp@608000000,850 { + opp-608000000-850 { opp-microvolt = <850000 850000 1125000>; }; - opp@608000000,900 { + opp-608000000-900 { opp-microvolt = <900000 900000 1125000>; }; - opp@760000000,775 { + opp-760000000-775 { opp-microvolt = <775000 775000 1125000>; }; - opp@760000000,800 { + opp-760000000-800 { opp-microvolt = <800000 800000 1125000>; }; - opp@760000000,850 { + opp-760000000-850 { opp-microvolt = <850000 850000 1125000>; }; - opp@760000000,875 { + opp-760000000-875 { opp-microvolt = <875000 875000 1125000>; }; - opp@760000000,900 { + opp-760000000-900 { opp-microvolt = <900000 900000 1125000>; }; - opp@760000000,975 { + opp-760000000-975 { opp-microvolt = <975000 975000 1125000>; }; - opp@816000000,800 { + opp-816000000-800 { opp-microvolt = <800000 800000 1125000>; }; - opp@816000000,850 { + opp-816000000-850 { opp-microvolt = <850000 850000 1125000>; }; - opp@816000000,875 { + opp-816000000-875 { opp-microvolt = <875000 875000 1125000>; }; - opp@816000000,950 { + opp-816000000-950 { opp-microvolt = <950000 950000 1125000>; }; - opp@816000000,1000 { + opp-816000000-1000 { opp-microvolt = <1000000 1000000 1125000>; }; - opp@912000000,850 { + opp-912000000-850 { opp-microvolt = <850000 850000 1125000>; }; - opp@912000000,900 { + opp-912000000-900 { opp-microvolt = <900000 900000 1125000>; }; - opp@912000000,925 { + opp-912000000-925 { opp-microvolt = <925000 925000 1125000>; }; - opp@912000000,950 { + opp-912000000-950 { opp-microvolt = <950000 950000 1125000>; }; - opp@912000000,1000 { + opp-912000000-1000 { opp-microvolt = <1000000 1000000 1125000>; }; - opp@912000000,1050 { + opp-912000000-1050 { opp-microvolt = <1050000 1050000 1125000>; }; - opp@1000000000,875 { + opp-1000000000-875 { opp-microvolt = <875000 875000 1125000>; }; - opp@1000000000,900 { + opp-1000000000-900 { opp-microvolt = <900000 900000 1125000>; }; - opp@1000000000,950 { + opp-1000000000-950 { opp-microvolt = <950000 950000 1125000>; }; - opp@1000000000,975 { + opp-1000000000-975 { opp-microvolt = <975000 975000 1125000>; }; - opp@1000000000,1000 { + opp-1000000000-1000 { opp-microvolt = <1000000 1000000 1125000>; }; - opp@1000000000,1025 { + opp-1000000000-1025 { opp-microvolt = <1025000 1025000 1125000>; }; - opp@1000000000,1100 { + opp-1000000000-1100 { opp-microvolt = <1100000 1100000 1125000>; }; - opp@1200000000,1000 { + opp-1200000000-1000 { opp-microvolt = <1000000 1000000 1125000>; }; - opp@1200000000,1050 { + opp-1200000000-1050 { opp-microvolt = <1050000 1050000 1125000>; }; - opp@1200000000,1100 { + opp-1200000000-1100 { opp-microvolt = <1100000 1100000 1125000>; }; - opp@1200000000,1125 { + opp-1200000000-1125 { opp-microvolt = <1125000 1125000 1125000>; }; }; diff --git a/arch/arm/boot/dts/tegra20-cpu-opp.dtsi b/arch/arm/boot/dts/tegra20-cpu-opp.dtsi index 135de316383b5..47c8e78ca9581 100644 --- a/arch/arm/boot/dts/tegra20-cpu-opp.dtsi +++ b/arch/arm/boot/dts/tegra20-cpu-opp.dtsi @@ -1,250 +1,250 @@ // SPDX-License-Identifier: GPL-2.0 / { - cpu0_opp_table: cpu_opp_table0 { + cpu0_opp_table: opp-table-cpu0 { compatible = "operating-points-v2"; opp-shared; - opp@216000000,750 { + opp-216000000-750 { clock-latency-ns = <400000>; opp-supported-hw = <0x0F 0x0003>; opp-hz = /bits/ 64 <216000000>; opp-suspend; }; - opp@216000000,800 { + opp-216000000-800 { clock-latency-ns = <400000>; opp-supported-hw = <0x0F 0x0004>; opp-hz = /bits/ 64 <216000000>; opp-suspend; }; - opp@312000000,750 { + opp-312000000-750 { clock-latency-ns = <400000>; opp-supported-hw = <0x0F 0x0003>; opp-hz = /bits/ 64 <312000000>; }; - opp@312000000,800 { + opp-312000000-800 { clock-latency-ns = <400000>; opp-supported-hw = <0x0F 0x0004>; opp-hz = /bits/ 64 <312000000>; }; - opp@456000000,750 { + opp-456000000-750 { clock-latency-ns = <400000>; opp-supported-hw = <0x0C 0x0003>; opp-hz = /bits/ 64 <456000000>; }; - opp@456000000,800 { + opp-456000000-800 { clock-latency-ns = <400000>; opp-supported-hw = <0x03 0x0006>, <0x04 0x0004>, <0x08 0x0004>; opp-hz = /bits/ 64 <456000000>; }; - opp@456000000,825 { + opp-456000000-825 { clock-latency-ns = <400000>; opp-supported-hw = <0x03 0x0001>; opp-hz = /bits/ 64 <456000000>; }; - opp@608000000,750 { + opp-608000000-750 { clock-latency-ns = <400000>; opp-supported-hw = <0x08 0x0003>; opp-hz = /bits/ 64 <608000000>; }; - opp@608000000,800 { + opp-608000000-800 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0006>, <0x08 0x0004>; opp-hz = /bits/ 64 <608000000>; }; - opp@608000000,825 { + opp-608000000-825 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0001>; opp-hz = /bits/ 64 <608000000>; }; - opp@608000000,850 { + opp-608000000-850 { clock-latency-ns = <400000>; opp-supported-hw = <0x03 0x0006>; opp-hz = /bits/ 64 <608000000>; }; - opp@608000000,900 { + opp-608000000-900 { clock-latency-ns = <400000>; opp-supported-hw = <0x03 0x0001>; opp-hz = /bits/ 64 <608000000>; }; - opp@760000000,775 { + opp-760000000-775 { clock-latency-ns = <400000>; opp-supported-hw = <0x08 0x0003>; opp-hz = /bits/ 64 <760000000>; }; - opp@760000000,800 { + opp-760000000-800 { clock-latency-ns = <400000>; opp-supported-hw = <0x08 0x0004>; opp-hz = /bits/ 64 <760000000>; }; - opp@760000000,850 { + opp-760000000-850 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0006>; opp-hz = /bits/ 64 <760000000>; }; - opp@760000000,875 { + opp-760000000-875 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0001>, <0x02 0x0002>, <0x01 0x0004>, <0x02 0x0004>; opp-hz = /bits/ 64 <760000000>; }; - opp@760000000,900 { + opp-760000000-900 { clock-latency-ns = <400000>; opp-supported-hw = <0x01 0x0002>; opp-hz = /bits/ 64 <760000000>; }; - opp@760000000,975 { + opp-760000000-975 { clock-latency-ns = <400000>; opp-supported-hw = <0x03 0x0001>; opp-hz = /bits/ 64 <760000000>; }; - opp@816000000,800 { + opp-816000000-800 { clock-latency-ns = <400000>; opp-supported-hw = <0x08 0x0007>; opp-hz = /bits/ 64 <816000000>; }; - opp@816000000,850 { + opp-816000000-850 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0002>; opp-hz = /bits/ 64 <816000000>; }; - opp@816000000,875 { + opp-816000000-875 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0005>; opp-hz = /bits/ 64 <816000000>; }; - opp@816000000,950 { + opp-816000000-950 { clock-latency-ns = <400000>; opp-supported-hw = <0x03 0x0006>; opp-hz = /bits/ 64 <816000000>; }; - opp@816000000,1000 { + opp-816000000-1000 { clock-latency-ns = <400000>; opp-supported-hw = <0x03 0x0001>; opp-hz = /bits/ 64 <816000000>; }; - opp@912000000,850 { + opp-912000000-850 { clock-latency-ns = <400000>; opp-supported-hw = <0x08 0x0007>; opp-hz = /bits/ 64 <912000000>; }; - opp@912000000,900 { + opp-912000000-900 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0002>; opp-hz = /bits/ 64 <912000000>; }; - opp@912000000,925 { + opp-912000000-925 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0001>; opp-hz = /bits/ 64 <912000000>; }; - opp@912000000,950 { + opp-912000000-950 { clock-latency-ns = <400000>; opp-supported-hw = <0x02 0x0006>, <0x01 0x0004>, <0x04 0x0004>; opp-hz = /bits/ 64 <912000000>; }; - opp@912000000,1000 { + opp-912000000-1000 { clock-latency-ns = <400000>; opp-supported-hw = <0x01 0x0002>; opp-hz = /bits/ 64 <912000000>; }; - opp@912000000,1050 { + opp-912000000-1050 { clock-latency-ns = <400000>; opp-supported-hw = <0x03 0x0001>; opp-hz = /bits/ 64 <912000000>; }; - opp@1000000000,875 { + opp-1000000000-875 { clock-latency-ns = <400000>; opp-supported-hw = <0x08 0x0007>; opp-hz = /bits/ 64 <1000000000>; }; - opp@1000000000,900 { + opp-1000000000-900 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0002>; opp-hz = /bits/ 64 <1000000000>; }; - opp@1000000000,950 { + opp-1000000000-950 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0004>; opp-hz = /bits/ 64 <1000000000>; }; - opp@1000000000,975 { + opp-1000000000-975 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0001>; opp-hz = /bits/ 64 <1000000000>; }; - opp@1000000000,1000 { + opp-1000000000-1000 { clock-latency-ns = <400000>; opp-supported-hw = <0x02 0x0006>, <0x01 0x0004>; opp-hz = /bits/ 64 <1000000000>; }; - opp@1000000000,1025 { + opp-1000000000-1025 { clock-latency-ns = <400000>; opp-supported-hw = <0x01 0x0002>; opp-hz = /bits/ 64 <1000000000>; }; - opp@1000000000,1100 { + opp-1000000000-1100 { clock-latency-ns = <400000>; opp-supported-hw = <0x03 0x0001>; opp-hz = /bits/ 64 <1000000000>; }; - opp@1200000000,1000 { + opp-1200000000-1000 { clock-latency-ns = <400000>; opp-supported-hw = <0x08 0x0004>; opp-hz = /bits/ 64 <1200000000>; }; - opp@1200000000,1050 { + opp-1200000000-1050 { clock-latency-ns = <400000>; opp-supported-hw = <0x04 0x0004>; opp-hz = /bits/ 64 <1200000000>; }; - opp@1200000000,1100 { + opp-1200000000-1100 { clock-latency-ns = <400000>; opp-supported-hw = <0x02 0x0004>; opp-hz = /bits/ 64 <1200000000>; }; - opp@1200000000,1125 { + opp-1200000000-1125 { clock-latency-ns = <400000>; opp-supported-hw = <0x01 0x0004>; opp-hz = /bits/ 64 <1200000000>; diff --git a/arch/arm/boot/dts/tegra20-paz00.dts b/arch/arm/boot/dts/tegra20-paz00.dts index 5b38b0606f993..50fe311b2cdfa 100644 --- a/arch/arm/boot/dts/tegra20-paz00.dts +++ b/arch/arm/boot/dts/tegra20-paz00.dts @@ -705,5 +705,5 @@ }; &emc_icc_dvfs_opp_table { - /delete-node/ opp@760000000; + /delete-node/ opp-760000000; }; diff --git a/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi b/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi index ef3ad2e5f2701..d4d0a5fa70155 100644 --- a/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi +++ b/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi @@ -1,107 +1,107 @@ // SPDX-License-Identifier: GPL-2.0 / { - emc_icc_dvfs_opp_table: emc-dvfs-opp-table { + emc_icc_dvfs_opp_table: opp-table-emc { compatible = "operating-points-v2"; - opp@36000000 { + opp-36000000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <36000000>; opp-supported-hw = <0x000F>; }; - opp@47500000 { + opp-47500000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <47500000>; opp-supported-hw = <0x000F>; }; - opp@50000000 { + opp-50000000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <50000000>; opp-supported-hw = <0x000F>; }; - opp@54000000 { + opp-54000000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x000F>; }; - opp@57000000 { + opp-57000000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <57000000>; opp-supported-hw = <0x000F>; }; - opp@100000000 { + opp-100000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <100000000>; opp-supported-hw = <0x000F>; }; - opp@108000000 { + opp-108000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <108000000>; opp-supported-hw = <0x000F>; }; - opp@126666000 { + opp-126666000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <126666000>; opp-supported-hw = <0x000F>; }; - opp@150000000 { + opp-150000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <150000000>; opp-supported-hw = <0x000F>; }; - opp@190000000 { + opp-190000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <190000000>; opp-supported-hw = <0x000F>; }; - opp@216000000 { + opp-216000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <216000000>; opp-supported-hw = <0x000F>; opp-suspend; }; - opp@300000000 { + opp-300000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <300000000>; opp-supported-hw = <0x000F>; }; - opp@333000000 { + opp-333000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <333000000>; opp-supported-hw = <0x000F>; }; - opp@380000000 { + opp-380000000 { opp-microvolt = <1100000 1100000 1300000>; opp-hz = /bits/ 64 <380000000>; opp-supported-hw = <0x000F>; }; - opp@600000000 { + opp-600000000 { opp-microvolt = <1200000 1200000 1300000>; opp-hz = /bits/ 64 <600000000>; opp-supported-hw = <0x000F>; }; - opp@666000000 { + opp-666000000 { opp-microvolt = <1200000 1200000 1300000>; opp-hz = /bits/ 64 <666000000>; opp-supported-hw = <0x000F>; }; - opp@760000000 { + opp-760000000 { opp-microvolt = <1300000 1300000 1300000>; opp-hz = /bits/ 64 <760000000>; opp-supported-hw = <0x000F>; diff --git a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-memory-timings.dtsi b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-memory-timings.dtsi index bcff0997ee517..6c229e1d67e71 100644 --- a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-memory-timings.dtsi +++ b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-memory-timings.dtsi @@ -1565,13 +1565,13 @@ }; &emc_icc_dvfs_opp_table { - /delete-node/ opp@750000000,1300; - /delete-node/ opp@800000000,1300; - /delete-node/ opp@900000000,1350; + /delete-node/ opp-750000000-1300; + /delete-node/ opp-800000000-1300; + /delete-node/ opp-900000000-1350; }; &emc_bw_dfs_opp_table { - /delete-node/ opp@750000000; - /delete-node/ opp@800000000; - /delete-node/ opp@900000000; + /delete-node/ opp-750000000; + /delete-node/ opp-800000000; + /delete-node/ opp-900000000; }; diff --git a/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi b/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi index 1be715d2a442b..b8e0e91170212 100644 --- a/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi +++ b/arch/arm/boot/dts/tegra30-cpu-opp-microvolt.dtsi @@ -1,288 +1,288 @@ // SPDX-License-Identifier: GPL-2.0 / { - cpu0_opp_table: cpu_opp_table0 { - opp@51000000,800 { + cpu0_opp_table: opp-table-cpu0 { + opp-51000000-800 { opp-microvolt = <800000 800000 1250000>; }; - opp@51000000,850 { + opp-51000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@51000000,912 { + opp-51000000-912 { opp-microvolt = <912000 912000 1250000>; }; - opp@102000000,800 { + opp-102000000-800 { opp-microvolt = <800000 800000 1250000>; }; - opp@102000000,850 { + opp-102000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@102000000,912 { + opp-102000000-912 { opp-microvolt = <912000 912000 1250000>; }; - opp@204000000,800 { + opp-204000000-800 { opp-microvolt = <800000 800000 1250000>; }; - opp@204000000,850 { + opp-204000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@204000000,912 { + opp-204000000-912 { opp-microvolt = <912000 912000 1250000>; }; - opp@312000000,850 { + opp-312000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@312000000,912 { + opp-312000000-912 { opp-microvolt = <912000 912000 1250000>; }; - opp@340000000,800 { + opp-340000000-800 { opp-microvolt = <800000 800000 1250000>; }; - opp@340000000,850 { + opp-340000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@370000000,800 { + opp-370000000-800 { opp-microvolt = <800000 800000 1250000>; }; - opp@456000000,850 { + opp-456000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@456000000,912 { + opp-456000000-912 { opp-microvolt = <912000 912000 1250000>; }; - opp@475000000,800 { + opp-475000000-800 { opp-microvolt = <800000 800000 1250000>; }; - opp@475000000,850 { + opp-475000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@608000000,850 { + opp-608000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@608000000,912 { + opp-608000000-912 { opp-microvolt = <912000 912000 1250000>; }; - opp@620000000,850 { + opp-620000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@640000000,850 { + opp-640000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@640000000,900 { + opp-640000000-900 { opp-microvolt = <900000 900000 1250000>; }; - opp@760000000,850 { + opp-760000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@760000000,900 { + opp-760000000-900 { opp-microvolt = <900000 900000 1250000>; }; - opp@760000000,912 { + opp-760000000-912 { opp-microvolt = <912000 912000 1250000>; }; - opp@760000000,975 { + opp-760000000-975 { opp-microvolt = <975000 975000 1250000>; }; - opp@816000000,850 { + opp-816000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@816000000,912 { + opp-816000000-912 { opp-microvolt = <912000 912000 1250000>; }; - opp@860000000,850 { + opp-860000000-850 { opp-microvolt = <850000 850000 1250000>; }; - opp@860000000,900 { + opp-860000000-900 { opp-microvolt = <900000 900000 1250000>; }; - opp@860000000,975 { + opp-860000000-975 { opp-microvolt = <975000 975000 1250000>; }; - opp@860000000,1000 { + opp-860000000-1000 { opp-microvolt = <1000000 1000000 1250000>; }; - opp@910000000,900 { + opp-910000000-900 { opp-microvolt = <900000 900000 1250000>; }; - opp@1000000000,900 { + opp-1000000000-900 { opp-microvolt = <900000 900000 1250000>; }; - opp@1000000000,975 { + opp-1000000000-975 { opp-microvolt = <975000 975000 1250000>; }; - opp@1000000000,1000 { + opp-1000000000-1000 { opp-microvolt = <1000000 1000000 1250000>; }; - opp@1000000000,1025 { + opp-1000000000-1025 { opp-microvolt = <1025000 1025000 1250000>; }; - opp@1100000000,900 { + opp-1100000000-900 { opp-microvolt = <900000 900000 1250000>; }; - opp@1100000000,975 { + opp-1100000000-975 { opp-microvolt = <975000 975000 1250000>; }; - opp@1100000000,1000 { + opp-1100000000-1000 { opp-microvolt = <1000000 1000000 1250000>; }; - opp@1100000000,1025 { + opp-1100000000-1025 { opp-microvolt = <1025000 1025000 1250000>; }; - opp@1100000000,1075 { + opp-1100000000-1075 { opp-microvolt = <1075000 1075000 1250000>; }; - opp@1150000000,975 { + opp-1150000000-975 { opp-microvolt = <975000 975000 1250000>; }; - opp@1200000000,975 { + opp-1200000000-975 { opp-microvolt = <975000 975000 1250000>; }; - opp@1200000000,1000 { + opp-1200000000-1000 { opp-microvolt = <1000000 1000000 1250000>; }; - opp@1200000000,1025 { + opp-1200000000-1025 { opp-microvolt = <1025000 1025000 1250000>; }; - opp@1200000000,1050 { + opp-1200000000-1050 { opp-microvolt = <1050000 1050000 1250000>; }; - opp@1200000000,1075 { + opp-1200000000-1075 { opp-microvolt = <1075000 1075000 1250000>; }; - opp@1200000000,1100 { + opp-1200000000-1100 { opp-microvolt = <1100000 1100000 1250000>; }; - opp@1300000000,1000 { + opp-1300000000-1000 { opp-microvolt = <1000000 1000000 1250000>; }; - opp@1300000000,1025 { + opp-1300000000-1025 { opp-microvolt = <1025000 1025000 1250000>; }; - opp@1300000000,1050 { + opp-1300000000-1050 { opp-microvolt = <1050000 1050000 1250000>; }; - opp@1300000000,1075 { + opp-1300000000-1075 { opp-microvolt = <1075000 1075000 1250000>; }; - opp@1300000000,1100 { + opp-1300000000-1100 { opp-microvolt = <1100000 1100000 1250000>; }; - opp@1300000000,1125 { + opp-1300000000-1125 { opp-microvolt = <1125000 1125000 1250000>; }; - opp@1300000000,1150 { + opp-1300000000-1150 { opp-microvolt = <1150000 1150000 1250000>; }; - opp@1300000000,1175 { + opp-1300000000-1175 { opp-microvolt = <1175000 1175000 1250000>; }; - opp@1400000000,1100 { + opp-1400000000-1100 { opp-microvolt = <1100000 1100000 1250000>; }; - opp@1400000000,1125 { + opp-1400000000-1125 { opp-microvolt = <1125000 1125000 1250000>; }; - opp@1400000000,1150 { + opp-1400000000-1150 { opp-microvolt = <1150000 1150000 1250000>; }; - opp@1400000000,1175 { + opp-1400000000-1175 { opp-microvolt = <1175000 1175000 1250000>; }; - opp@1400000000,1237 { + opp-1400000000-1237 { opp-microvolt = <1237000 1237000 1250000>; }; - opp@1500000000,1125 { + opp-1500000000-1125 { opp-microvolt = <1125000 1125000 1250000>; }; - opp@1500000000,1150 { + opp-1500000000-1150 { opp-microvolt = <1150000 1150000 1250000>; }; - opp@1500000000,1200 { + opp-1500000000-1200 { opp-microvolt = <1200000 1200000 1250000>; }; - opp@1500000000,1237 { + opp-1500000000-1237 { opp-microvolt = <1237000 1237000 1250000>; }; - opp@1600000000,1212 { + opp-1600000000-1212 { opp-microvolt = <1212000 1212000 1250000>; }; - opp@1600000000,1237 { + opp-1600000000-1237 { opp-microvolt = <1237000 1237000 1250000>; }; - opp@1700000000,1212 { + opp-1700000000-1212 { opp-microvolt = <1212000 1212000 1250000>; }; - opp@1700000000,1237 { + opp-1700000000-1237 { opp-microvolt = <1237000 1237000 1250000>; }; }; diff --git a/arch/arm/boot/dts/tegra30-cpu-opp.dtsi b/arch/arm/boot/dts/tegra30-cpu-opp.dtsi index 72f2fe26cc0e9..5b9ebb75a09f8 100644 --- a/arch/arm/boot/dts/tegra30-cpu-opp.dtsi +++ b/arch/arm/boot/dts/tegra30-cpu-opp.dtsi @@ -1,116 +1,116 @@ // SPDX-License-Identifier: GPL-2.0 / { - cpu0_opp_table: cpu_opp_table0 { + cpu0_opp_table: opp-table-cpu0 { compatible = "operating-points-v2"; opp-shared; - opp@51000000,800 { + opp-51000000-800 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x31FE>; opp-hz = /bits/ 64 <51000000>; }; - opp@51000000,850 { + opp-51000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0C01>; opp-hz = /bits/ 64 <51000000>; }; - opp@51000000,912 { + opp-51000000-912 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0200>; opp-hz = /bits/ 64 <51000000>; }; - opp@102000000,800 { + opp-102000000-800 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x31FE>; opp-hz = /bits/ 64 <102000000>; }; - opp@102000000,850 { + opp-102000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0C01>; opp-hz = /bits/ 64 <102000000>; }; - opp@102000000,912 { + opp-102000000-912 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0200>; opp-hz = /bits/ 64 <102000000>; }; - opp@204000000,800 { + opp-204000000-800 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x31FE>; opp-hz = /bits/ 64 <204000000>; opp-suspend; }; - opp@204000000,850 { + opp-204000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0C01>; opp-hz = /bits/ 64 <204000000>; opp-suspend; }; - opp@204000000,912 { + opp-204000000-912 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0200>; opp-hz = /bits/ 64 <204000000>; opp-suspend; }; - opp@312000000,850 { + opp-312000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0C00>; opp-hz = /bits/ 64 <312000000>; }; - opp@312000000,912 { + opp-312000000-912 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0200>; opp-hz = /bits/ 64 <312000000>; }; - opp@340000000,800 { + opp-340000000-800 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0192>; opp-hz = /bits/ 64 <340000000>; }; - opp@340000000,850 { + opp-340000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x0F 0x0001>; opp-hz = /bits/ 64 <340000000>; }; - opp@370000000,800 { + opp-370000000-800 { clock-latency-ns = <100000>; opp-supported-hw = <0x1E 0x306C>; opp-hz = /bits/ 64 <370000000>; }; - opp@456000000,850 { + opp-456000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0C00>; opp-hz = /bits/ 64 <456000000>; }; - opp@456000000,912 { + opp-456000000-912 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0200>; opp-hz = /bits/ 64 <456000000>; }; - opp@475000000,800 { + opp-475000000-800 { clock-latency-ns = <100000>; opp-supported-hw = <0x1E 0x31FE>; opp-hz = /bits/ 64 <475000000>; }; - opp@475000000,850 { + opp-475000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x0F 0x0001>, <0x01 0x0002>, <0x01 0x0010>, <0x01 0x0080>, @@ -118,25 +118,25 @@ opp-hz = /bits/ 64 <475000000>; }; - opp@608000000,850 { + opp-608000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0400>; opp-hz = /bits/ 64 <608000000>; }; - opp@608000000,912 { + opp-608000000-912 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0200>; opp-hz = /bits/ 64 <608000000>; }; - opp@620000000,850 { + opp-620000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x1E 0x306C>; opp-hz = /bits/ 64 <620000000>; }; - opp@640000000,850 { + opp-640000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x0F 0x0001>, <0x02 0x0002>, <0x04 0x0002>, <0x08 0x0002>, @@ -149,13 +149,13 @@ opp-hz = /bits/ 64 <640000000>; }; - opp@640000000,900 { + opp-640000000-900 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0192>; opp-hz = /bits/ 64 <640000000>; }; - opp@760000000,850 { + opp-760000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x1E 0x3461>, <0x08 0x0002>, <0x08 0x0004>, <0x08 0x0008>, @@ -165,7 +165,7 @@ opp-hz = /bits/ 64 <760000000>; }; - opp@760000000,900 { + opp-760000000-900 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0001>, <0x02 0x0002>, <0x04 0x0002>, <0x02 0x0004>, @@ -177,37 +177,37 @@ opp-hz = /bits/ 64 <760000000>; }; - opp@760000000,912 { + opp-760000000-912 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0200>; opp-hz = /bits/ 64 <760000000>; }; - opp@760000000,975 { + opp-760000000-975 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0192>; opp-hz = /bits/ 64 <760000000>; }; - opp@816000000,850 { + opp-816000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0400>; opp-hz = /bits/ 64 <816000000>; }; - opp@816000000,912 { + opp-816000000-912 { clock-latency-ns = <100000>; opp-supported-hw = <0x1F 0x0200>; opp-hz = /bits/ 64 <816000000>; }; - opp@860000000,850 { + opp-860000000-850 { clock-latency-ns = <100000>; opp-supported-hw = <0x0C 0x0001>; opp-hz = /bits/ 64 <860000000>; }; - opp@860000000,900 { + opp-860000000-900 { clock-latency-ns = <100000>; opp-supported-hw = <0x02 0x0001>, <0x04 0x0002>, <0x08 0x0002>, <0x04 0x0004>, @@ -220,7 +220,7 @@ opp-hz = /bits/ 64 <860000000>; }; - opp@860000000,975 { + opp-860000000-975 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0001>, <0x02 0x0002>, <0x02 0x0004>, <0x02 0x0008>, @@ -229,25 +229,25 @@ opp-hz = /bits/ 64 <860000000>; }; - opp@860000000,1000 { + opp-860000000-1000 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0192>; opp-hz = /bits/ 64 <860000000>; }; - opp@910000000,900 { + opp-910000000-900 { clock-latency-ns = <100000>; opp-supported-hw = <0x18 0x3060>; opp-hz = /bits/ 64 <910000000>; }; - opp@1000000000,900 { + opp-1000000000-900 { clock-latency-ns = <100000>; opp-supported-hw = <0x0C 0x0001>; opp-hz = /bits/ 64 <1000000000>; }; - opp@1000000000,975 { + opp-1000000000-975 { clock-latency-ns = <100000>; opp-supported-hw = <0x03 0x0001>, <0x04 0x0002>, <0x08 0x0002>, <0x04 0x0004>, @@ -260,25 +260,25 @@ opp-hz = /bits/ 64 <1000000000>; }; - opp@1000000000,1000 { + opp-1000000000-1000 { clock-latency-ns = <100000>; opp-supported-hw = <0x02 0x019E>; opp-hz = /bits/ 64 <1000000000>; }; - opp@1000000000,1025 { + opp-1000000000-1025 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0192>; opp-hz = /bits/ 64 <1000000000>; }; - opp@1100000000,900 { + opp-1100000000-900 { clock-latency-ns = <100000>; opp-supported-hw = <0x08 0x0001>; opp-hz = /bits/ 64 <1100000000>; }; - opp@1100000000,975 { + opp-1100000000-975 { clock-latency-ns = <100000>; opp-supported-hw = <0x06 0x0001>, <0x08 0x0002>, <0x08 0x0004>, <0x08 0x0008>, @@ -288,7 +288,7 @@ opp-hz = /bits/ 64 <1100000000>; }; - opp@1100000000,1000 { + opp-1100000000-1000 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0001>, <0x04 0x0002>, <0x04 0x0004>, <0x04 0x0008>, @@ -297,31 +297,31 @@ opp-hz = /bits/ 64 <1100000000>; }; - opp@1100000000,1025 { + opp-1100000000-1025 { clock-latency-ns = <100000>; opp-supported-hw = <0x02 0x019E>; opp-hz = /bits/ 64 <1100000000>; }; - opp@1100000000,1075 { + opp-1100000000-1075 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0192>; opp-hz = /bits/ 64 <1100000000>; }; - opp@1150000000,975 { + opp-1150000000-975 { clock-latency-ns = <100000>; opp-supported-hw = <0x18 0x3060>; opp-hz = /bits/ 64 <1150000000>; }; - opp@1200000000,975 { + opp-1200000000-975 { clock-latency-ns = <100000>; opp-supported-hw = <0x08 0x0001>; opp-hz = /bits/ 64 <1200000000>; }; - opp@1200000000,1000 { + opp-1200000000-1000 { clock-latency-ns = <100000>; opp-supported-hw = <0x04 0x0001>, <0x08 0x0002>, <0x08 0x0004>, <0x08 0x0008>, @@ -331,7 +331,7 @@ opp-hz = /bits/ 64 <1200000000>; }; - opp@1200000000,1025 { + opp-1200000000-1025 { clock-latency-ns = <100000>; opp-supported-hw = <0x02 0x0001>, <0x04 0x0002>, <0x04 0x0004>, <0x04 0x0008>, @@ -340,39 +340,39 @@ opp-hz = /bits/ 64 <1200000000>; }; - opp@1200000000,1050 { + opp-1200000000-1050 { clock-latency-ns = <100000>; opp-supported-hw = <0x02 0x019E>; opp-hz = /bits/ 64 <1200000000>; }; - opp@1200000000,1075 { + opp-1200000000-1075 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0001>; opp-hz = /bits/ 64 <1200000000>; }; - opp@1200000000,1100 { + opp-1200000000-1100 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0192>; opp-hz = /bits/ 64 <1200000000>; }; - opp@1300000000,1000 { + opp-1300000000-1000 { clock-latency-ns = <100000>; opp-supported-hw = <0x08 0x0001>, <0x10 0x0080>, <0x10 0x0100>; opp-hz = /bits/ 64 <1300000000>; }; - opp@1300000000,1025 { + opp-1300000000-1025 { clock-latency-ns = <100000>; opp-supported-hw = <0x04 0x0001>, <0x08 0x0002>, <0x08 0x0080>, <0x08 0x0100>; opp-hz = /bits/ 64 <1300000000>; }; - opp@1300000000,1050 { + opp-1300000000-1050 { clock-latency-ns = <100000>; opp-supported-hw = <0x12 0x3061>, <0x04 0x0002>, <0x08 0x0004>, <0x08 0x0008>, @@ -383,68 +383,68 @@ opp-hz = /bits/ 64 <1300000000>; }; - opp@1300000000,1075 { + opp-1300000000-1075 { clock-latency-ns = <100000>; opp-supported-hw = <0x02 0x0182>, <0x04 0x0004>, <0x04 0x0008>, <0x04 0x0010>; opp-hz = /bits/ 64 <1300000000>; }; - opp@1300000000,1100 { + opp-1300000000-1100 { clock-latency-ns = <100000>; opp-supported-hw = <0x02 0x001C>; opp-hz = /bits/ 64 <1300000000>; }; - opp@1300000000,1125 { + opp-1300000000-1125 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0001>; opp-hz = /bits/ 64 <1300000000>; }; - opp@1300000000,1150 { + opp-1300000000-1150 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0182>; opp-hz = /bits/ 64 <1300000000>; }; - opp@1300000000,1175 { + opp-1300000000-1175 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0010>; opp-hz = /bits/ 64 <1300000000>; }; - opp@1400000000,1100 { + opp-1400000000-1100 { clock-latency-ns = <100000>; opp-supported-hw = <0x18 0x307C>; opp-hz = /bits/ 64 <1400000000>; }; - opp@1400000000,1125 { + opp-1400000000-1125 { clock-latency-ns = <100000>; opp-supported-hw = <0x04 0x000C>; opp-hz = /bits/ 64 <1400000000>; }; - opp@1400000000,1150 { + opp-1400000000-1150 { clock-latency-ns = <100000>; opp-supported-hw = <0x02 0x000C>, <0x04 0x0010>; opp-hz = /bits/ 64 <1400000000>; }; - opp@1400000000,1175 { + opp-1400000000-1175 { clock-latency-ns = <100000>; opp-supported-hw = <0x02 0x0010>; opp-hz = /bits/ 64 <1400000000>; }; - opp@1400000000,1237 { + opp-1400000000-1237 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0010>; opp-hz = /bits/ 64 <1400000000>; }; - opp@1500000000,1125 { + opp-1500000000-1125 { clock-latency-ns = <100000>; opp-supported-hw = <0x08 0x0010>, <0x10 0x0020>, <0x10 0x0040>, <0x10 0x1000>, @@ -452,7 +452,7 @@ opp-hz = /bits/ 64 <1500000000>; }; - opp@1500000000,1150 { + opp-1500000000-1150 { clock-latency-ns = <100000>; opp-supported-hw = <0x04 0x0010>, <0x08 0x0020>, <0x08 0x0040>, <0x08 0x1000>, @@ -460,37 +460,37 @@ opp-hz = /bits/ 64 <1500000000>; }; - opp@1500000000,1200 { + opp-1500000000-1200 { clock-latency-ns = <100000>; opp-supported-hw = <0x02 0x0010>; opp-hz = /bits/ 64 <1500000000>; }; - opp@1500000000,1237 { + opp-1500000000-1237 { clock-latency-ns = <100000>; opp-supported-hw = <0x01 0x0010>; opp-hz = /bits/ 64 <1500000000>; }; - opp@1600000000,1212 { + opp-1600000000-1212 { clock-latency-ns = <100000>; opp-supported-hw = <0x10 0x3060>; opp-hz = /bits/ 64 <1600000000>; }; - opp@1600000000,1237 { + opp-1600000000-1237 { clock-latency-ns = <100000>; opp-supported-hw = <0x08 0x3060>; opp-hz = /bits/ 64 <1600000000>; }; - opp@1700000000,1212 { + opp-1700000000-1212 { clock-latency-ns = <100000>; opp-supported-hw = <0x10 0x3060>; opp-hz = /bits/ 64 <1700000000>; }; - opp@1700000000,1237 { + opp-1700000000-1237 { clock-latency-ns = <100000>; opp-supported-hw = <0x08 0x3060>; opp-hz = /bits/ 64 <1700000000>; diff --git a/arch/arm/boot/dts/tegra30-ouya.dts b/arch/arm/boot/dts/tegra30-ouya.dts index 4259871b76c9f..e63f82f619e91 100644 --- a/arch/arm/boot/dts/tegra30-ouya.dts +++ b/arch/arm/boot/dts/tegra30-ouya.dts @@ -4523,9 +4523,9 @@ }; &emc_icc_dvfs_opp_table { - /delete-node/ opp@900000000,1350; + /delete-node/ opp-900000000-1350; }; &emc_bw_dfs_opp_table { - /delete-node/ opp@900000000; + /delete-node/ opp-900000000; }; diff --git a/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi b/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi index 2c97803197257..ff25350869b35 100644 --- a/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi +++ b/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi @@ -1,383 +1,383 @@ // SPDX-License-Identifier: GPL-2.0 / { - emc_icc_dvfs_opp_table: emc-dvfs-opp-table { + emc_icc_dvfs_opp_table: opp-table-emc { compatible = "operating-points-v2"; - opp@12750000,950 { + opp-12750000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0006>; }; - opp@12750000,1000 { + opp-12750000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0001>; }; - opp@12750000,1250 { + opp-12750000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0008>; }; - opp@25500000,950 { + opp-25500000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <25500000>; opp-supported-hw = <0x0006>; }; - opp@25500000,1000 { + opp-25500000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <25500000>; opp-supported-hw = <0x0001>; }; - opp@25500000,1250 { + opp-25500000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <25500000>; opp-supported-hw = <0x0008>; }; - opp@27000000,950 { + opp-27000000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <27000000>; opp-supported-hw = <0x0006>; }; - opp@27000000,1000 { + opp-27000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <27000000>; opp-supported-hw = <0x0001>; }; - opp@27000000,1250 { + opp-27000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <27000000>; opp-supported-hw = <0x0008>; }; - opp@51000000,950 { + opp-51000000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <51000000>; opp-supported-hw = <0x0006>; }; - opp@51000000,1000 { + opp-51000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <51000000>; opp-supported-hw = <0x0001>; }; - opp@51000000,1250 { + opp-51000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <51000000>; opp-supported-hw = <0x0008>; }; - opp@54000000,950 { + opp-54000000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x0006>; }; - opp@54000000,1000 { + opp-54000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x0001>; }; - opp@54000000,1250 { + opp-54000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x0008>; }; - opp@102000000,950 { + opp-102000000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0006>; }; - opp@102000000,1000 { + opp-102000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0001>; }; - opp@102000000,1250 { + opp-102000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0008>; }; - opp@108000000,1000 { + opp-108000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <108000000>; opp-supported-hw = <0x0007>; }; - opp@108000000,1250 { + opp-108000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <108000000>; opp-supported-hw = <0x0008>; }; - opp@204000000,1000 { + opp-204000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x0007>; opp-suspend; }; - opp@204000000,1250 { + opp-204000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x0008>; opp-suspend; }; - opp@333500000,1000 { + opp-333500000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <333500000>; opp-supported-hw = <0x0006>; }; - opp@333500000,1200 { + opp-333500000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <333500000>; opp-supported-hw = <0x0001>; }; - opp@333500000,1250 { + opp-333500000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <333500000>; opp-supported-hw = <0x0008>; }; - opp@375000000,1000 { + opp-375000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <375000000>; opp-supported-hw = <0x0006>; }; - opp@375000000,1200 { + opp-375000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <375000000>; opp-supported-hw = <0x0001>; }; - opp@375000000,1250 { + opp-375000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <375000000>; opp-supported-hw = <0x0008>; }; - opp@400000000,1000 { + opp-400000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <400000000>; opp-supported-hw = <0x0006>; }; - opp@400000000,1200 { + opp-400000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <400000000>; opp-supported-hw = <0x0001>; }; - opp@400000000,1250 { + opp-400000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <400000000>; opp-supported-hw = <0x0008>; }; - opp@416000000,1200 { + opp-416000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <416000000>; opp-supported-hw = <0x0007>; }; - opp@416000000,1250 { + opp-416000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <416000000>; opp-supported-hw = <0x0008>; }; - opp@450000000,1200 { + opp-450000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <450000000>; opp-supported-hw = <0x0007>; }; - opp@450000000,1250 { + opp-450000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <450000000>; opp-supported-hw = <0x0008>; }; - opp@533000000,1200 { + opp-533000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <533000000>; opp-supported-hw = <0x0007>; }; - opp@533000000,1250 { + opp-533000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <533000000>; opp-supported-hw = <0x0008>; }; - opp@625000000,1200 { + opp-625000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <625000000>; opp-supported-hw = <0x0006>; }; - opp@625000000,1250 { + opp-625000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <625000000>; opp-supported-hw = <0x0008>; }; - opp@667000000,1200 { + opp-667000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <667000000>; opp-supported-hw = <0x0006>; }; - opp@750000000,1300 { + opp-750000000-1300 { opp-microvolt = <1300000 1300000 1350000>; opp-hz = /bits/ 64 <750000000>; opp-supported-hw = <0x0004>; }; - opp@800000000,1300 { + opp-800000000-1300 { opp-microvolt = <1300000 1300000 1350000>; opp-hz = /bits/ 64 <800000000>; opp-supported-hw = <0x0004>; }; - opp@900000000,1350 { + opp-900000000-1350 { opp-microvolt = <1350000 1350000 1350000>; opp-hz = /bits/ 64 <900000000>; opp-supported-hw = <0x0004>; }; }; - emc_bw_dfs_opp_table: emc-bandwidth-opp-table { + emc_bw_dfs_opp_table: opp-table-actmon { compatible = "operating-points-v2"; - opp@12750000 { + opp-12750000 { opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <102000>; }; - opp@25500000 { + opp-25500000 { opp-hz = /bits/ 64 <25500000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <204000>; }; - opp@27000000 { + opp-27000000 { opp-hz = /bits/ 64 <27000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <216000>; }; - opp@51000000 { + opp-51000000 { opp-hz = /bits/ 64 <51000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <408000>; }; - opp@54000000 { + opp-54000000 { opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <432000>; }; - opp@102000000 { + opp-102000000 { opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <816000>; }; - opp@108000000 { + opp-108000000 { opp-hz = /bits/ 64 <108000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <864000>; }; - opp@204000000 { + opp-204000000 { opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <1632000>; opp-suspend; }; - opp@333500000 { + opp-333500000 { opp-hz = /bits/ 64 <333500000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <2668000>; }; - opp@375000000 { + opp-375000000 { opp-hz = /bits/ 64 <375000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <3000000>; }; - opp@400000000 { + opp-400000000 { opp-hz = /bits/ 64 <400000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <3200000>; }; - opp@416000000 { + opp-416000000 { opp-hz = /bits/ 64 <416000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <3328000>; }; - opp@450000000 { + opp-450000000 { opp-hz = /bits/ 64 <450000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <3600000>; }; - opp@533000000 { + opp-533000000 { opp-hz = /bits/ 64 <533000000>; opp-supported-hw = <0x000F>; opp-peak-kBps = <4264000>; }; - opp@625000000 { + opp-625000000 { opp-hz = /bits/ 64 <625000000>; opp-supported-hw = <0x000E>; opp-peak-kBps = <5000000>; }; - opp@667000000 { + opp-667000000 { opp-hz = /bits/ 64 <667000000>; opp-supported-hw = <0x0006>; opp-peak-kBps = <5336000>; }; - opp@750000000 { + opp-750000000 { opp-hz = /bits/ 64 <750000000>; opp-supported-hw = <0x0004>; opp-peak-kBps = <6000000>; }; - opp@800000000 { + opp-800000000 { opp-hz = /bits/ 64 <800000000>; opp-supported-hw = <0x0004>; opp-peak-kBps = <6400000>; }; - opp@900000000 { + opp-900000000 { opp-hz = /bits/ 64 <900000000>; opp-supported-hw = <0x0004>; opp-peak-kBps = <7200000>; From 5ecb9ed82cfd4a04a89fb0ec9389619ed735f7cd Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 23 Oct 2021 21:54:54 +0300 Subject: [PATCH 1060/1202] ARM: tegra: Add 500MHz entry to Tegra30 memory OPP table Extend memory OPPs with 500MHz entry. This clock rate is used by ASUS Transformer tablets. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra30-peripherals-opp.dtsi | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi b/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi index ff25350869b35..af96404014024 100644 --- a/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi +++ b/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi @@ -216,6 +216,18 @@ opp-supported-hw = <0x0008>; }; + opp-500000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <500000000>; + opp-supported-hw = <0x0007>; + }; + + opp-500000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <500000000>; + opp-supported-hw = <0x0008>; + }; + opp-533000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <533000000>; @@ -347,6 +359,12 @@ opp-peak-kBps = <3600000>; }; + opp-500000000 { + opp-hz = /bits/ 64 <500000000>; + opp-supported-hw = <0x000F>; + opp-peak-kBps = <4000000>; + }; + opp-533000000 { opp-hz = /bits/ 64 <533000000>; opp-supported-hw = <0x000F>; From b23a3a1c8a325cdba5ee39f13add5810fc2f7080 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 24 Nov 2020 00:34:42 +0300 Subject: [PATCH 1061/1202] ARM: tegra: Add OPP tables and power domains to Tegra20 device-trees Add OPP tables and power domains to all peripheral devices which support power management on Tegra20 SoC. Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 Signed-off-by: Dmitry Osipenko --- .../boot/dts/tegra20-acer-a500-picasso.dts | 1 + arch/arm/boot/dts/tegra20-colibri.dtsi | 3 +- arch/arm/boot/dts/tegra20-harmony.dts | 3 +- arch/arm/boot/dts/tegra20-paz00.dts | 1 + .../arm/boot/dts/tegra20-peripherals-opp.dtsi | 913 ++++++++++++++++++ arch/arm/boot/dts/tegra20-seaboard.dts | 3 +- arch/arm/boot/dts/tegra20-tamonten.dtsi | 3 +- arch/arm/boot/dts/tegra20-trimslice.dts | 9 + arch/arm/boot/dts/tegra20-ventana.dts | 1 + arch/arm/boot/dts/tegra20.dtsi | 102 +- 10 files changed, 1034 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts index 9a3510dcbb31f..777b5a14de55b 100644 --- a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts +++ b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts @@ -716,6 +716,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <458>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; usb@c5000000 { diff --git a/arch/arm/boot/dts/tegra20-colibri.dtsi b/arch/arm/boot/dts/tegra20-colibri.dtsi index 3f9cb5cd6bd8d..eb5b3feb59076 100644 --- a/arch/arm/boot/dts/tegra20-colibri.dtsi +++ b/arch/arm/boot/dts/tegra20-colibri.dtsi @@ -495,7 +495,7 @@ regulator-always-on; }; - sm0 { + vdd_core: sm0 { regulator-name = "VDD_CORE_1.2V"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; @@ -601,6 +601,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <3875>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; /* Set SLEEP MODE bit in SUPPLYENE register of TPS658643 PMIC */ i2c-thermtrip { diff --git a/arch/arm/boot/dts/tegra20-harmony.dts b/arch/arm/boot/dts/tegra20-harmony.dts index ae4312eedcbd5..b21bab437ebd9 100644 --- a/arch/arm/boot/dts/tegra20-harmony.dts +++ b/arch/arm/boot/dts/tegra20-harmony.dts @@ -339,7 +339,7 @@ regulator-always-on; }; - sm0 { + vdd_core: sm0 { regulator-name = "vdd_sm0,vdd_core"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; @@ -565,6 +565,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <3875>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; pcie@80003000 { diff --git a/arch/arm/boot/dts/tegra20-paz00.dts b/arch/arm/boot/dts/tegra20-paz00.dts index 50fe311b2cdfa..21884d9d58cc3 100644 --- a/arch/arm/boot/dts/tegra20-paz00.dts +++ b/arch/arm/boot/dts/tegra20-paz00.dts @@ -519,6 +519,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <0>; nvidia,sys-clock-req-active-high; + core-supply = <&core_vdd_reg>; }; usb@c5000000 { diff --git a/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi b/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi index d4d0a5fa70155..e7477b129e342 100644 --- a/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi +++ b/arch/arm/boot/dts/tegra20-peripherals-opp.dtsi @@ -1,6 +1,46 @@ // SPDX-License-Identifier: GPL-2.0 / { + core_opp_table: opp-table-core { + compatible = "operating-points-v2"; + opp-shared; + + core_opp_950: opp-950000 { + opp-microvolt = <950000 950000 1300000>; + opp-level = <950000>; + }; + + core_opp_1000: opp-1000000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-level = <1000000>; + }; + + core_opp_1100: opp-1100000 { + opp-microvolt = <1100000 1100000 1300000>; + opp-level = <1100000>; + }; + + core_opp_1200: opp-1200000 { + opp-microvolt = <1200000 1200000 1300000>; + opp-level = <1200000>; + }; + + core_opp_1225: opp-1225000 { + opp-microvolt = <1225000 1225000 1300000>; + opp-level = <1225000>; + }; + + core_opp_1275: opp-1275000 { + opp-microvolt = <1275000 1275000 1300000>; + opp-level = <1275000>; + }; + + core_opp_1300: opp-1300000 { + opp-microvolt = <1300000 1300000 1300000>; + opp-level = <1300000>; + }; + }; + emc_icc_dvfs_opp_table: opp-table-emc { compatible = "operating-points-v2"; @@ -8,66 +48,77 @@ opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <36000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; }; opp-47500000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <47500000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; }; opp-50000000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <50000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; }; opp-54000000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; }; opp-57000000 { opp-microvolt = <950000 950000 1300000>; opp-hz = /bits/ 64 <57000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; }; opp-100000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <100000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp-108000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <108000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp-126666000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <126666000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp-150000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <150000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp-190000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <190000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp-216000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <216000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; opp-suspend; }; @@ -75,36 +126,898 @@ opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <300000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp-333000000 { opp-microvolt = <1000000 1000000 1300000>; opp-hz = /bits/ 64 <333000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; }; opp-380000000 { opp-microvolt = <1100000 1100000 1300000>; opp-hz = /bits/ 64 <380000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; }; opp-600000000 { opp-microvolt = <1200000 1200000 1300000>; opp-hz = /bits/ 64 <600000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; }; opp-666000000 { opp-microvolt = <1200000 1200000 1300000>; opp-hz = /bits/ 64 <666000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; }; opp-760000000 { opp-microvolt = <1300000 1300000 1300000>; opp-hz = /bits/ 64 <760000000>; opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1300>; + }; + }; + + host1x_dvfs_opp_table: opp-table-host1x { + compatible = "operating-points-v2"; + + opp-104500000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <104500000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-133000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <133000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-166000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <166000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + mpe_dvfs_opp_table: opp-table-mpe { + compatible = "operating-points-v2"; + + opp-104500000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <104500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_950>; + }; + + opp-142500000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <142500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_950>; + }; + + opp-152000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <152000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; + }; + + opp-190000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1000>; + }; + + opp-190000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_950>; + }; + + opp-228000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <228000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1100>; + }; + + opp-228000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <228000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_950>; + }; + + opp-237500000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <237500000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp-266000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <266000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1000>; + }; + + opp-275500000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <275500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1100>; + }; + + opp-300000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp-300000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1100>; + }; + }; + + vi_dvfs_opp_table: opp-table-vi { + compatible = "operating-points-v2"; + + opp-85000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <85000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-100000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-150000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <150000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + epp_dvfs_opp_table: opp-table-epp { + compatible = "operating-points-v2"; + + opp-133000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <133000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-171000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <171000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-247000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + + opp-300000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + gr2d_dvfs_opp_table: opp-table-gr2d { + compatible = "operating-points-v2"; + + opp-133000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <133000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-171000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <171000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-247000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + + opp-300000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + gr3d_dvfs_opp_table: opp-table-gr3d { + compatible = "operating-points-v2"; + + opp-114000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <114000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_950>; + }; + + opp-161500000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <161500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; + }; + + opp-161500000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <161500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_950>; + }; + + opp-209000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <209000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1000>; + }; + + opp-218500000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <218500000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_950>; + }; + + opp-247000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1100>; + }; + + opp-247000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_950>; + }; + + opp-256500000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <256500000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp-285000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1100>; + }; + + opp-285000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1000>; + }; + + opp-304000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; + }; + + opp-323000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <323000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp-333500000-1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <333500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1275>; + }; + + opp-333500000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <333500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1200>; + }; + + opp-351500000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <351500000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1100>; + }; + + opp-361000000-1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1275>; + }; + + opp-380000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp-400000000-1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <400000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1275>; + }; + + opp-400000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <400000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1200>; + }; + }; + + disp1_dvfs_opp_table: opp-table-disp1 { + compatible = "operating-points-v2"; + + opp-158000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <158000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-190000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + disp2_dvfs_opp_table: opp-table-disp2 { + compatible = "operating-points-v2"; + + opp-158000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <158000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-190000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + dsi_dvfs_opp_table: opp-table-dsi { + compatible = "operating-points-v2"; + + opp-100000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-500000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <500000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + hdmi_dvfs_opp_table: opp-table-hdmi { + compatible = "operating-points-v2"; + + opp-148500000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <148500000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + tvo_dvfs_opp_table: opp-table-tvo { + compatible = "operating-points-v2"; + + opp-250000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <250000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sclk_dvfs_opp_table: opp-table-sclk { + compatible = "operating-points-v2"; + + opp-95000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <95000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_950>; + }; + + opp-123500000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <123500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_950>; + }; + + opp-133000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <133000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; + }; + + opp-152000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <152000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_950>; + }; + + opp-159500000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <159500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1000>; + }; + + opp-171000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <171000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_950>; + }; + + opp-180500000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <180500000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp-190000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1100>; + }; + + opp-207000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <207000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1100>; + }; + + opp-218500000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <218500000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1000>; + }; + + opp-222500000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <222500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; + }; + + opp-229500000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <229500000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp-240000000-1225 { + opp-microvolt = <1225000 1225000 1300000>; + opp-hz = /bits/ 64 <240000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1225>; + }; + + opp-240000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <240000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1200>; + }; + + opp-247000000-1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1275>; + }; + + opp-256500000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <256500000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1100>; + }; + + opp-260000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <260000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp-262000000-1300 { + opp-microvolt = <1300000 1300000 1300000>; + opp-hz = /bits/ 64 <262000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1300>; + }; + + opp-264000000-1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <264000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1275>; + }; + + opp-277500000-1300 { + opp-microvolt = <1300000 1300000 1300000>; + opp-hz = /bits/ 64 <277500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1300>; + }; + + opp-285000000-1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1275>; + }; + + opp-292500000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <292500000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1200>; + }; + + opp-300000000-1300 { + opp-microvolt = <1300000 1300000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp-300000000-1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1275>; + }; + }; + + vde_dvfs_opp_table: opp-table-vde { + compatible = "operating-points-v2"; + + opp-95000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <95000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_950>; + }; + + opp-123500000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <123500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; + }; + + opp-123500000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <123500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_950>; + }; + + opp-152000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <152000000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1000>; + }; + + opp-152000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <152000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_950>; + }; + + opp-171000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <171000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_950>; + }; + + opp-209000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <209000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1100>; + }; + + opp-209000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <209000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp-218500000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <218500000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1000>; + }; + + opp-237500000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <237500000>; + opp-supported-hw = <0x0002>; + required-opps = <&core_opp_1100>; + }; + + opp-275500000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <275500000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; + }; + + opp-285000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp-300000000-1275 { + opp-microvolt = <1275000 1275000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1275>; + }; + + opp-300000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; + }; + + opp-300000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1100>; + }; + }; + + ndflash_dvfs_opp_table: opp-table-ndflash { + compatible = "operating-points-v2"; + + opp-130000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <130000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-150000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <150000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-158000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <158000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + + opp-164000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <164000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + nor_dvfs_opp_table: opp-table-nor { + compatible = "operating-points-v2"; + + opp-92000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <92000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sdmmc1_dvfs_opp_table: opp-table-sdmmc1 { + compatible = "operating-points-v2"; + + opp-44000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <44000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-52000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sdmmc2_dvfs_opp_table: opp-table-sdmmc2 { + compatible = "operating-points-v2"; + + opp-44000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <44000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-52000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sdmmc3_dvfs_opp_table: opp-table-sdmmc3 { + compatible = "operating-points-v2"; + + opp-44000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <44000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-52000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sdmmc4_dvfs_opp_table: opp-table-sdmmc4 { + compatible = "operating-points-v2"; + + opp-44000000-950 { + opp-microvolt = <950000 950000 1300000>; + opp-hz = /bits/ 64 <44000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-52000000-1000 { + opp-microvolt = <1000000 1000000 1300000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + pcie_dvfs_opp_table: opp-table-pcie { + compatible = "operating-points-v2"; + + opp-250000000-1200 { + opp-microvolt = <1200000 1200000 1300000>; + opp-hz = /bits/ 64 <250000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + usbd_dvfs_opp_table: opp-table-usbd { + compatible = "operating-points-v2"; + + opp-480000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + usb2_dvfs_opp_table: opp-table-usb2 { + compatible = "operating-points-v2"; + + opp-480000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + }; + + usb3_dvfs_opp_table: opp-table-usb3 { + compatible = "operating-points-v2"; + + opp-480000000-1100 { + opp-microvolt = <1100000 1100000 1300000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; }; }; }; + diff --git a/arch/arm/boot/dts/tegra20-seaboard.dts b/arch/arm/boot/dts/tegra20-seaboard.dts index 92d494b8c3d25..5aeb7bb6c4151 100644 --- a/arch/arm/boot/dts/tegra20-seaboard.dts +++ b/arch/arm/boot/dts/tegra20-seaboard.dts @@ -444,7 +444,7 @@ regulator-always-on; }; - sm0 { + vdd_core: sm0 { regulator-name = "vdd_sm0,vdd_core"; regulator-min-microvolt = <1300000>; regulator-max-microvolt = <1300000>; @@ -689,6 +689,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <3875>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; memory-controller@7000f400 { diff --git a/arch/arm/boot/dts/tegra20-tamonten.dtsi b/arch/arm/boot/dts/tegra20-tamonten.dtsi index dd4d506683de7..d5888d9581755 100644 --- a/arch/arm/boot/dts/tegra20-tamonten.dtsi +++ b/arch/arm/boot/dts/tegra20-tamonten.dtsi @@ -357,7 +357,7 @@ regulator-always-on; }; - sm0 { + vdd_core: sm0 { regulator-name = "vdd_sys_sm0,vdd_core"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; @@ -477,6 +477,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <3875>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; pcie@80003000 { diff --git a/arch/arm/boot/dts/tegra20-trimslice.dts b/arch/arm/boot/dts/tegra20-trimslice.dts index 4bc87bc0c2a45..582dc7910ff8c 100644 --- a/arch/arm/boot/dts/tegra20-trimslice.dts +++ b/arch/arm/boot/dts/tegra20-trimslice.dts @@ -321,6 +321,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <3875>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; pcie@80003000 { @@ -444,6 +445,14 @@ regulator-always-on; }; + vdd_core: regulator@5 { + compatible = "regulator-fixed"; + regulator-name = "vdd_core"; + regulator-min-microvolt = <1300000>; + regulator-max-microvolt = <1300000>; + regulator-always-on; + }; + sound { compatible = "nvidia,tegra-audio-trimslice"; nvidia,i2s-controller = <&tegra_i2s1>; diff --git a/arch/arm/boot/dts/tegra20-ventana.dts b/arch/arm/boot/dts/tegra20-ventana.dts index 5a2578b3707f4..e41ce1b1ec3ff 100644 --- a/arch/arm/boot/dts/tegra20-ventana.dts +++ b/arch/arm/boot/dts/tegra20-ventana.dts @@ -544,6 +544,7 @@ nvidia,core-pwr-good-time = <3845 3845>; nvidia,core-pwr-off-time = <458>; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; usb@c5000000 { diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 9508248fd1666..1898351a099fc 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -42,6 +42,8 @@ clock-names = "host1x"; resets = <&tegra_car 28>; reset-names = "host1x"; + power-domains = <&pd_core>; + operating-points-v2 = <&host1x_dvfs_opp_table>; #address-cells = <1>; #size-cells = <1>; @@ -55,6 +57,8 @@ clocks = <&tegra_car TEGRA20_CLK_MPE>; resets = <&tegra_car 60>; reset-names = "mpe"; + power-domains = <&pd_mpe>; + operating-points-v2 = <&mpe_dvfs_opp_table>; }; vi@54080000 { @@ -64,6 +68,8 @@ clocks = <&tegra_car TEGRA20_CLK_VI>; resets = <&tegra_car 20>; reset-names = "vi"; + power-domains = <&pd_venc>; + operating-points-v2 = <&vi_dvfs_opp_table>; }; epp@540c0000 { @@ -73,6 +79,8 @@ clocks = <&tegra_car TEGRA20_CLK_EPP>; resets = <&tegra_car 19>; reset-names = "epp"; + power-domains = <&pd_core>; + operating-points-v2 = <&epp_dvfs_opp_table>; }; isp@54100000 { @@ -82,6 +90,7 @@ clocks = <&tegra_car TEGRA20_CLK_ISP>; resets = <&tegra_car 23>; reset-names = "isp"; + power-domains = <&pd_venc>; }; gr2d@54140000 { @@ -91,6 +100,8 @@ clocks = <&tegra_car TEGRA20_CLK_GR2D>; resets = <&tegra_car 21>; reset-names = "2d"; + power-domains = <&pd_core>; + operating-points-v2 = <&gr2d_dvfs_opp_table>; }; gr3d@54180000 { @@ -99,6 +110,8 @@ clocks = <&tegra_car TEGRA20_CLK_GR3D>; resets = <&tegra_car 24>; reset-names = "3d"; + power-domains = <&pd_3d>; + operating-points-v2 = <&gr3d_dvfs_opp_table>; }; dc@54200000 { @@ -110,6 +123,8 @@ clock-names = "dc", "parent"; resets = <&tegra_car 27>; reset-names = "dc"; + power-domains = <&pd_core>; + operating-points-v2 = <&disp1_dvfs_opp_table>; nvidia,head = <0>; @@ -138,6 +153,8 @@ clock-names = "dc", "parent"; resets = <&tegra_car 26>; reset-names = "dc"; + power-domains = <&pd_core>; + operating-points-v2 = <&disp2_dvfs_opp_table>; nvidia,head = <1>; @@ -166,6 +183,8 @@ clock-names = "hdmi", "parent"; resets = <&tegra_car 51>; reset-names = "hdmi"; + power-domains = <&pd_core>; + operating-points-v2 = <&hdmi_dvfs_opp_table>; status = "disabled"; }; @@ -174,6 +193,8 @@ reg = <0x542c0000 0x00040000>; interrupts = ; clocks = <&tegra_car TEGRA20_CLK_TVO>; + power-domains = <&pd_core>; + operating-points-v2 = <&tvo_dvfs_opp_table>; status = "disabled"; }; @@ -185,6 +206,8 @@ clock-names = "dsi", "parent"; resets = <&tegra_car 48>; reset-names = "dsi"; + power-domains = <&pd_core>; + operating-points-v2 = <&dsi_dvfs_opp_table>; status = "disabled"; }; }; @@ -242,6 +265,13 @@ reg = <0x60006000 0x1000>; #clock-cells = <1>; #reset-cells = <1>; + + sclk { + compatible = "nvidia,tegra20-sclk"; + clocks = <&tegra_car TEGRA20_CLK_SCLK>; + power-domains = <&pd_core>; + operating-points-v2 = <&sclk_dvfs_opp_table>; + }; }; flow-controller@60007000 { @@ -319,6 +349,8 @@ clocks = <&tegra_car TEGRA20_CLK_VDE>; reset-names = "vde", "mc"; resets = <&tegra_car 61>, <&mc TEGRA20_MC_RESET_VDE>; + power-domains = <&pd_vde>; + operating-points-v2 = <&vde_dvfs_opp_table>; }; apbmisc@70000800 { @@ -460,6 +492,8 @@ reset-names = "nand"; assigned-clocks = <&tegra_car TEGRA20_CLK_NDFLASH>; assigned-clock-rates = <150000000>; + power-domains = <&pd_core>; + operating-points-v2 = <&ndflash_dvfs_opp_table>; status = "disabled"; }; @@ -473,6 +507,8 @@ clock-names = "gmi"; resets = <&tegra_car 42>; reset-names = "gmi"; + power-domains = <&pd_core>; + operating-points-v2 = <&nor_dvfs_opp_table>; status = "disabled"; }; @@ -643,6 +679,52 @@ clocks = <&tegra_car TEGRA20_CLK_PCLK>, <&clk32k_in>; clock-names = "pclk", "clk32k_in"; #clock-cells = <1>; + + pd_core: core-domain { + #power-domain-cells = <0>; + operating-points-v2 = <&core_opp_table>; + }; + + powergates { + pd_3d: td { + clocks = <&tegra_car TEGRA20_CLK_GR3D>; + resets = <&mc TEGRA20_MC_RESET_3D>, + <&tegra_car TEGRA20_CLK_GR3D>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_venc: venc { + clocks = <&tegra_car TEGRA20_CLK_ISP>, + <&tegra_car TEGRA20_CLK_VI>, + <&tegra_car TEGRA20_CLK_CSI>; + resets = <&mc TEGRA20_MC_RESET_ISP>, + <&mc TEGRA20_MC_RESET_VI>, + <&tegra_car TEGRA20_CLK_ISP>, + <&tegra_car 20 /* VI */>, + <&tegra_car TEGRA20_CLK_CSI>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_vde: vdec { + clocks = <&tegra_car TEGRA20_CLK_VDE>; + resets = <&mc TEGRA20_MC_RESET_VDE>, + <&tegra_car TEGRA20_CLK_VDE>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_mpe: mpe { + clocks = <&tegra_car TEGRA20_CLK_MPE>; + resets = <&mc TEGRA20_MC_RESET_MPEA>, + <&mc TEGRA20_MC_RESET_MPEB>, + <&mc TEGRA20_MC_RESET_MPEC>, + <&tegra_car TEGRA20_CLK_MPE>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + }; }; mc: memory-controller@7000f000 { @@ -662,12 +744,13 @@ reg = <0x7000f400 0x400>; interrupts = ; clocks = <&tegra_car TEGRA20_CLK_EMC>; + power-domains = <&pd_core>; #address-cells = <1>; #size-cells = <0>; #interconnect-cells = <0>; - operating-points-v2 = <&emc_icc_dvfs_opp_table>; nvidia,memory-controller = <&mc>; + operating-points-v2 = <&emc_icc_dvfs_opp_table>; }; fuse@7000f800 { @@ -712,6 +795,9 @@ <&tegra_car 72>, <&tegra_car 74>; reset-names = "pex", "afi", "pcie_x"; + power-domains = <&pd_core>; + operating-points-v2 = <&pcie_dvfs_opp_table>; + status = "disabled"; pci@1,0 { @@ -753,6 +839,8 @@ reset-names = "usb"; nvidia,needs-double-reset; nvidia,phy = <&phy1>; + power-domains = <&pd_core>; + operating-points-v2 = <&usbd_dvfs_opp_table>; status = "disabled"; }; @@ -792,6 +880,8 @@ resets = <&tegra_car 58>; reset-names = "usb"; nvidia,phy = <&phy2>; + power-domains = <&pd_core>; + operating-points-v2 = <&usb2_dvfs_opp_table>; status = "disabled"; }; @@ -820,6 +910,8 @@ resets = <&tegra_car 59>; reset-names = "usb"; nvidia,phy = <&phy3>; + power-domains = <&pd_core>; + operating-points-v2 = <&usb3_dvfs_opp_table>; status = "disabled"; }; @@ -856,6 +948,8 @@ clock-names = "sdhci"; resets = <&tegra_car 14>; reset-names = "sdhci"; + power-domains = <&pd_core>; + operating-points-v2 = <&sdmmc1_dvfs_opp_table>; status = "disabled"; }; @@ -867,6 +961,8 @@ clock-names = "sdhci"; resets = <&tegra_car 9>; reset-names = "sdhci"; + power-domains = <&pd_core>; + operating-points-v2 = <&sdmmc2_dvfs_opp_table>; status = "disabled"; }; @@ -878,6 +974,8 @@ clock-names = "sdhci"; resets = <&tegra_car 69>; reset-names = "sdhci"; + power-domains = <&pd_core>; + operating-points-v2 = <&sdmmc3_dvfs_opp_table>; status = "disabled"; }; @@ -889,6 +987,8 @@ clock-names = "sdhci"; resets = <&tegra_car 15>; reset-names = "sdhci"; + power-domains = <&pd_core>; + operating-points-v2 = <&sdmmc4_dvfs_opp_table>; status = "disabled"; }; From e4249b3dc39e266d026dbfd4e061f63a220a3459 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 24 Nov 2020 00:52:00 +0300 Subject: [PATCH 1062/1202] ARM: tegra: Add OPP tables and power domains to Tegra30 device-trees Add OPP tables and power domains to all peripheral devices which support power management on Tegra30 SoC. Tested-by: Peter Geis # Ouya T30 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra20-trimslice.dts | 2 +- .../tegra30-asus-nexus7-grouper-common.dtsi | 1 + arch/arm/boot/dts/tegra30-beaver.dts | 1 + arch/arm/boot/dts/tegra30-cardhu.dtsi | 1 + arch/arm/boot/dts/tegra30-colibri.dtsi | 17 +- arch/arm/boot/dts/tegra30-ouya.dts | 1 + .../arm/boot/dts/tegra30-peripherals-opp.dtsi | 1227 ++++++++++++++++- arch/arm/boot/dts/tegra30.dtsi | 153 ++ 8 files changed, 1398 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/tegra20-trimslice.dts b/arch/arm/boot/dts/tegra20-trimslice.dts index 582dc7910ff8c..d6c99e17f28be 100644 --- a/arch/arm/boot/dts/tegra20-trimslice.dts +++ b/arch/arm/boot/dts/tegra20-trimslice.dts @@ -445,7 +445,7 @@ regulator-always-on; }; - vdd_core: regulator@5 { + vdd_core: regulator-core { compatible = "regulator-fixed"; regulator-name = "vdd_core"; regulator-min-microvolt = <1300000>; diff --git a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi index 07d4ea130964f..6f88fb1ed8737 100644 --- a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi +++ b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi @@ -980,6 +980,7 @@ nvidia,core-pwr-off-time = <0>; nvidia,core-power-req-active-high; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; ahub@70080000 { diff --git a/arch/arm/boot/dts/tegra30-beaver.dts b/arch/arm/boot/dts/tegra30-beaver.dts index e159feeedef72..b54cbb24c4d33 100644 --- a/arch/arm/boot/dts/tegra30-beaver.dts +++ b/arch/arm/boot/dts/tegra30-beaver.dts @@ -1915,6 +1915,7 @@ nvidia,core-pwr-off-time = <0>; nvidia,core-power-req-active-high; nvidia,sys-clock-req-active-high; + core-supply = <&core_vdd_reg>; }; ahub@70080000 { diff --git a/arch/arm/boot/dts/tegra30-cardhu.dtsi b/arch/arm/boot/dts/tegra30-cardhu.dtsi index 448f1397e64a9..b2bba923eb93b 100644 --- a/arch/arm/boot/dts/tegra30-cardhu.dtsi +++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi @@ -391,6 +391,7 @@ nvidia,core-pwr-off-time = <0>; nvidia,core-power-req-active-high; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; ahub@70080000 { diff --git a/arch/arm/boot/dts/tegra30-colibri.dtsi b/arch/arm/boot/dts/tegra30-colibri.dtsi index 413e35215804b..0627b64f044d1 100644 --- a/arch/arm/boot/dts/tegra30-colibri.dtsi +++ b/arch/arm/boot/dts/tegra30-colibri.dtsi @@ -765,9 +765,14 @@ vddctrl_reg: vddctrl { regulator-name = "+V1.0_VDD_CPU"; - regulator-min-microvolt = <1150000>; - regulator-max-microvolt = <1150000>; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <1250000>; + regulator-coupled-with = <&vdd_core>; + regulator-coupled-max-spread = <300000>; + regulator-max-step-microvolt = <100000>; regulator-always-on; + + nvidia,tegra-cpu-regulator; }; reg_1v8_vio: vio { @@ -890,18 +895,23 @@ }; /* SW: +V1.2_VDD_CORE */ - regulator@60 { + vdd_core: regulator@60 { compatible = "ti,tps62362"; reg = <0x60>; regulator-name = "tps62362-vout"; regulator-min-microvolt = <900000>; regulator-max-microvolt = <1400000>; + regulator-coupled-with = <&vddctrl_reg>; + regulator-coupled-max-spread = <300000>; + regulator-max-step-microvolt = <100000>; regulator-boot-on; regulator-always-on; ti,vsel0-state-low; /* VSEL1: EN_CORE_DVFS_N low for DVFS */ ti,vsel1-state-low; + + nvidia,tegra-core-regulator; }; }; @@ -914,6 +924,7 @@ nvidia,core-pwr-off-time = <0>; nvidia,core-power-req-active-high; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; /* Set DEV_OFF bit in DCDC control register of TPS65911 PMIC */ i2c-thermtrip { diff --git a/arch/arm/boot/dts/tegra30-ouya.dts b/arch/arm/boot/dts/tegra30-ouya.dts index e63f82f619e91..a93bc452d3157 100644 --- a/arch/arm/boot/dts/tegra30-ouya.dts +++ b/arch/arm/boot/dts/tegra30-ouya.dts @@ -277,6 +277,7 @@ nvidia,core-pwr-off-time = <458>; nvidia,core-power-req-active-high; nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; }; mc_timings: memory-controller@7000f000 { diff --git a/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi b/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi index af96404014024..44c0a8c105ecb 100644 --- a/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi +++ b/arch/arm/boot/dts/tegra30-peripherals-opp.dtsi @@ -1,6 +1,56 @@ // SPDX-License-Identifier: GPL-2.0 / { + core_opp_table: opp-table-core { + compatible = "operating-points-v2"; + opp-shared; + + core_opp_950: opp-950000 { + opp-microvolt = <950000 950000 1350000>; + opp-level = <950000>; + }; + + core_opp_1000: opp-1000000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-level = <1000000>; + }; + + core_opp_1050: opp-1050000 { + opp-microvolt = <1050000 1050000 1350000>; + opp-level = <1050000>; + }; + + core_opp_1100: opp-1100000 { + opp-microvolt = <1100000 1100000 1350000>; + opp-level = <1100000>; + }; + + core_opp_1150: opp-1150000 { + opp-microvolt = <1150000 1150000 1350000>; + opp-level = <1150000>; + }; + + core_opp_1200: opp-1200000 { + opp-microvolt = <1200000 1200000 1350000>; + opp-level = <1200000>; + }; + + core_opp_1250: opp-1250000 { + opp-microvolt = <1250000 1250000 1350000>; + opp-level = <1250000>; + }; + + core_opp_1300: opp-1300000 { + opp-microvolt = <1300000 1300000 1350000>; + opp-level = <1300000>; + }; + + core_opp_1350: opp-1350000 { + opp-microvolt = <1350000 1350000 1350000>; + opp-level = <1350000>; + }; + }; + emc_icc_dvfs_opp_table: opp-table-emc { compatible = "operating-points-v2"; @@ -8,126 +58,147 @@ opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp-12750000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp-12750000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <12750000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-25500000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <25500000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp-25500000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <25500000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp-25500000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <25500000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-27000000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <27000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp-27000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <27000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp-27000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <27000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-51000000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <51000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp-51000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <51000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp-51000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <51000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-54000000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp-54000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp-54000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <54000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-102000000-950 { opp-microvolt = <950000 950000 1350000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; }; opp-102000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; }; opp-102000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <102000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-108000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <108000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; }; opp-108000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <108000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-204000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; opp-suspend; }; @@ -135,6 +206,7 @@ opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <204000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; opp-suspend; }; @@ -142,138 +214,161 @@ opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <333500000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; }; opp-333500000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <333500000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; }; opp-333500000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <333500000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-375000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <375000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; }; opp-375000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <375000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; }; opp-375000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <375000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-400000000-1000 { opp-microvolt = <1000000 1000000 1350000>; opp-hz = /bits/ 64 <400000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; }; opp-400000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <400000000>; opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1200>; }; opp-400000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <400000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-416000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <416000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1200>; }; opp-416000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <416000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-450000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <450000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1200>; }; opp-450000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <450000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-500000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <500000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1200>; }; opp-500000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <500000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-533000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <533000000>; opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1200>; }; opp-533000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <533000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-625000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <625000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; }; opp-625000000-1250 { opp-microvolt = <1250000 1250000 1350000>; opp-hz = /bits/ 64 <625000000>; opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; }; opp-667000000-1200 { opp-microvolt = <1200000 1200000 1350000>; opp-hz = /bits/ 64 <667000000>; opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; }; opp-750000000-1300 { opp-microvolt = <1300000 1300000 1350000>; opp-hz = /bits/ 64 <750000000>; opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; }; opp-800000000-1300 { opp-microvolt = <1300000 1300000 1350000>; opp-hz = /bits/ 64 <800000000>; opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; }; opp-900000000-1350 { opp-microvolt = <1350000 1350000 1350000>; opp-hz = /bits/ 64 <900000000>; opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; }; }; @@ -401,4 +496,1134 @@ opp-peak-kBps = <7200000>; }; }; -}; + + pcie_dvfs_opp_table: opp-table-pcie { + compatible = "operating-points-v2"; + + opp-250000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <250000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + host1x_dvfs_opp_table: opp-table-host1x { + compatible = "operating-points-v2"; + + opp-152000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <152000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; + }; + + opp-188000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <188000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1050>; + }; + + opp-222000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <222000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1100>; + }; + + opp-242000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <242000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; + }; + + opp-254000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <254000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1150>; + }; + + opp-267000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1200>; + }; + + opp-300000000-1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + }; + + mpe_dvfs_opp_table: opp-table-mpe { + compatible = "operating-points-v2"; + + opp-234000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <234000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1000>; + }; + + opp-247000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp-285000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>; + }; + + opp-304000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>; + }; + + opp-332000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>; + }; + + opp-361000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp-380000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>; + }; + + opp-408000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>; + }; + + opp-416000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp-446000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <446000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp-484000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1250>; + }; + + opp-520000000-1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp-600000000-1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + }; + + vi_dvfs_opp_table: opp-table-vi { + compatible = "operating-points-v2"; + + opp-216000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <216000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1000>; + }; + + opp-219000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <219000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp-267000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1050>; + }; + + opp-285000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1050>; + }; + + opp-300000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <300000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1100>; + }; + + opp-371000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <371000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1150>; + }; + + opp-409000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <409000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; + }; + + opp-425000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <425000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1250>; + }; + + opp-470000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <470000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; + }; + }; + + epp_dvfs_opp_table: opp-table-epp { + compatible = "operating-points-v2"; + + opp-267000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; + }; + + opp-285000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>; + }; + + opp-304000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>; + }; + + opp-332000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>; + }; + + opp-361000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp-380000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>; + }; + + opp-408000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>; + }; + + opp-416000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp-446000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <446000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp-484000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1250>; + }; + + opp-520000000-1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp-600000000-1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + }; + + gr2d_dvfs_opp_table: opp-table-gr2d { + compatible = "operating-points-v2"; + + opp-267000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; + }; + + opp-285000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>; + }; + + opp-304000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>; + }; + + opp-332000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>; + }; + + opp-361000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp-380000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>; + }; + + opp-408000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>; + }; + + opp-416000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp-446000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <446000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp-484000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1250>; + }; + + opp-520000000-1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp-600000000-1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + }; + + gr3d_dvfs_opp_table: opp-table-gr3d { + compatible = "operating-points-v2"; + + opp-234000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <234000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1000>, <&core_opp_1000>; + }; + + opp-247000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>, <&core_opp_1000>; + }; + + opp-285000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <285000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>, <&core_opp_1050>; + }; + + opp-304000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>, <&core_opp_1050>; + }; + + opp-332000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>, <&core_opp_1100>; + }; + + opp-361000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <361000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>, <&core_opp_1100>; + }; + + opp-380000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>, <&core_opp_1150>; + }; + + opp-408000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>, <&core_opp_1150>; + }; + + opp-416000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>, <&core_opp_1200>; + }; + + opp-446000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <446000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>, <&core_opp_1200>; + }; + + opp-484000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1250>, <&core_opp_1250>; + }; + + opp-520000000-1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>, <&core_opp_1300>; + }; + + opp-600000000-1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>, <&core_opp_1350>; + }; + }; + + disp1_dvfs_opp_table: opp-table-disp1 { + compatible = "operating-points-v2"; + + opp-120000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <120000000>; + opp-supported-hw = <0x0009>; + required-opps = <&core_opp_1000>; + }; + + opp-155000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <155000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; + }; + + opp-190000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x0009>; + required-opps = <&core_opp_1200>; + }; + + opp-268000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <268000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1050>; + }; + }; + + disp2_dvfs_opp_table: opp-table-disp2 { + compatible = "operating-points-v2"; + + opp-120000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <120000000>; + opp-supported-hw = <0x0009>; + required-opps = <&core_opp_1000>; + }; + + opp-155000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <155000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; + }; + + opp-190000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <190000000>; + opp-supported-hw = <0x0009>; + required-opps = <&core_opp_1200>; + }; + + opp-268000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <268000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1050>; + }; + }; + + hdmi_dvfs_opp_table: opp-table-hdmi { + compatible = "operating-points-v2"; + + opp-148500000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <148500000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + tvo_dvfs_opp_table: opp-table-tvo { + compatible = "operating-points-v2"; + + opp-297000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <297000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + }; + + dsia_dvfs_opp_table: opp-table-dsia { + compatible = "operating-points-v2"; + + opp-275000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <275000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + dsib_dvfs_opp_table: opp-table-dsib { + compatible = "operating-points-v2"; + + opp-275000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <275000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sclk_dvfs_opp_table: opp-table-sclk { + compatible = "operating-points-v2"; + + opp-51000000-950 { + opp-microvolt = <950000 950000 1350000>; + opp-hz = /bits/ 64 <51000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_950>; + }; + + opp-136000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <136000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1000>; + }; + + opp-164000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <164000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1050>; + }; + + opp-191000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <191000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1100>; + }; + + opp-205000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <205000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1000>; + }; + + opp-216000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <216000000>; + opp-supported-hw = <0x0001>; + required-opps = <&core_opp_1150>; + }; + + opp-227000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <227000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1100>; + }; + + opp-267000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <267000000>; + opp-supported-hw = <0x0006>; + required-opps = <&core_opp_1200>; + }; + + opp-334000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <334000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1250>; + }; + + opp-378000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <378000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; + }; + }; + + pll_c_dvfs_opp_table: opp-table-pllc { + compatible = "operating-points-v2"; + + opp-533000000-950 { + opp-microvolt = <950000 950000 1350000>; + opp-hz = /bits/ 64 <533000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-667000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <667000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-800000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <800000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + + opp-1066000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <1066000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + + opp-1200000000-1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <1200000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1350>; + }; + }; + + pll_e_dvfs_opp_table: opp-table-plle { + compatible = "operating-points-v2"; + + opp-100000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + pll_m_dvfs_opp_table: opp-table-pllm { + compatible = "operating-points-v2"; + + opp-533000000-950 { + opp-microvolt = <950000 950000 1350000>; + opp-hz = /bits/ 64 <533000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-667000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <667000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-800000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <800000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1100>; + }; + + opp-1066000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <1066000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + vde_dvfs_opp_table: opp-table-vde { + compatible = "operating-points-v2"; + + opp-228000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <228000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1000>; + }; + + opp-247000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <247000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1000>; + }; + + opp-275000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <275000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1050>; + }; + + opp-304000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <304000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1050>; + }; + + opp-332000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <332000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1100>; + }; + + opp-352000000-1100 { + opp-microvolt = <1100000 1100000 1350000>; + opp-hz = /bits/ 64 <352000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1100>; + }; + + opp-380000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <380000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1150>; + }; + + opp-400000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <400000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1150>; + }; + + opp-416000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <416000000>; + opp-supported-hw = <0x0003>; + required-opps = <&core_opp_1200>; + }; + + opp-437000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <437000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1200>; + }; + + opp-484000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <484000000>; + opp-supported-hw = <0x000C>; + required-opps = <&core_opp_1250>; + }; + + opp-520000000-1300 { + opp-microvolt = <1300000 1300000 1350000>; + opp-hz = /bits/ 64 <520000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1300>; + }; + + opp-600000000-1350 { + opp-microvolt = <1350000 1350000 1350000>; + opp-hz = /bits/ 64 <600000000>; + opp-supported-hw = <0x0004>; + required-opps = <&core_opp_1350>; + }; + }; + + fuse_burn_dvfs_opp_table: opp-table-fuseburn { + compatible = "operating-points-v2"; + + opp-26000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <26000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1150>; + }; + }; + + nor_dvfs_opp_table: opp-table-nor { + compatible = "operating-points-v2"; + + opp-108000000-1250 { + opp-microvolt = <1250000 1250000 1350000>; + opp-hz = /bits/ 64 <108000000>; + opp-supported-hw = <0x0008>; + required-opps = <&core_opp_1250>; + }; + + opp-115000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <115000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1000>; + }; + + opp-130000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <130000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1050>; + }; + + opp-133000000-1150 { + opp-microvolt = <1150000 1150000 1350000>; + opp-hz = /bits/ 64 <133000000>; + opp-supported-hw = <0x0007>; + required-opps = <&core_opp_1150>; + }; + }; + + pwm_dvfs_opp_table: opp-table-pwm { + compatible = "operating-points-v2"; + + opp-408000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <408000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + sbc1_dvfs_opp_table: opp-table-sbc1 { + compatible = "operating-points-v2"; + + opp-52000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-60000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp-100000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sbc2_dvfs_opp_table: opp-table-sbc2 { + compatible = "operating-points-v2"; + + opp-52000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-60000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp-100000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sbc3_dvfs_opp_table: opp-table-sbc3 { + compatible = "operating-points-v2"; + + opp-52000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-60000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp-100000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sbc4_dvfs_opp_table: opp-table-sbc4 { + compatible = "operating-points-v2"; + + opp-52000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-60000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp-100000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sbc5_dvfs_opp_table: opp-table-sbc5 { + compatible = "operating-points-v2"; + + opp-52000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-60000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp-100000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sbc6_dvfs_opp_table: opp-table-sbc6 { + compatible = "operating-points-v2"; + + opp-52000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <52000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + + opp-60000000-1050 { + opp-microvolt = <1050000 1050000 1350000>; + opp-hz = /bits/ 64 <60000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1050>; + }; + + opp-100000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <100000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sdmmc1_dvfs_opp_table: opp-table-sdmmc1 { + compatible = "operating-points-v2"; + + opp-104000000-950 { + opp-microvolt = <950000 950000 1350000>; + opp-hz = /bits/ 64 <104000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-208000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <208000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + sdmmc3_dvfs_opp_table: opp-table-sdmmc3 { + compatible = "operating-points-v2"; + + opp-104000000-950 { + opp-microvolt = <950000 950000 1350000>; + opp-hz = /bits/ 64 <104000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_950>; + }; + + opp-208000000-1200 { + opp-microvolt = <1200000 1200000 1350000>; + opp-hz = /bits/ 64 <208000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1200>; + }; + }; + + usbd_dvfs_opp_table: opp-table-usbd { + compatible = "operating-points-v2"; + + opp-480000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + usb2_dvfs_opp_table: opp-table-usb2 { + compatible = "operating-points-v2"; + + opp-480000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; + + usb3_dvfs_opp_table: opp-table-usb3 { + compatible = "operating-points-v2"; + + opp-480000000-1000 { + opp-microvolt = <1000000 1000000 1350000>; + opp-hz = /bits/ 64 <480000000>; + opp-supported-hw = <0x000F>; + required-opps = <&core_opp_1000>; + }; + }; +}; + diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index ae3df73c20a73..e40d5563778bd 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -55,6 +55,8 @@ <&tegra_car 72>, <&tegra_car 74>; reset-names = "pex", "afi", "pcie_x"; + power-domains = <&pd_core>; + operating-points-v2 = <&pcie_dvfs_opp_table>; status = "disabled"; pci@1,0 { @@ -124,6 +126,8 @@ resets = <&tegra_car 28>; reset-names = "host1x"; iommus = <&mc TEGRA_SWGROUP_HC>; + power-domains = <&pd_heg>; + operating-points-v2 = <&host1x_dvfs_opp_table>; #address-cells = <1>; #size-cells = <1>; @@ -137,6 +141,8 @@ clocks = <&tegra_car TEGRA30_CLK_MPE>; resets = <&tegra_car 60>; reset-names = "mpe"; + power-domains = <&pd_mpe>; + operating-points-v2 = <&mpe_dvfs_opp_table>; iommus = <&mc TEGRA_SWGROUP_MPE>; }; @@ -148,6 +154,8 @@ clocks = <&tegra_car TEGRA30_CLK_VI>; resets = <&tegra_car 20>; reset-names = "vi"; + power-domains = <&pd_venc>; + operating-points-v2 = <&vi_dvfs_opp_table>; iommus = <&mc TEGRA_SWGROUP_VI>; }; @@ -159,6 +167,8 @@ clocks = <&tegra_car TEGRA30_CLK_EPP>; resets = <&tegra_car 19>; reset-names = "epp"; + power-domains = <&pd_heg>; + operating-points-v2 = <&epp_dvfs_opp_table>; iommus = <&mc TEGRA_SWGROUP_EPP>; }; @@ -170,6 +180,7 @@ clocks = <&tegra_car TEGRA30_CLK_ISP>; resets = <&tegra_car 23>; reset-names = "isp"; + power-domains = <&pd_venc>; iommus = <&mc TEGRA_SWGROUP_ISP>; }; @@ -181,6 +192,8 @@ clocks = <&tegra_car TEGRA30_CLK_GR2D>; resets = <&tegra_car 21>; reset-names = "2d"; + power-domains = <&pd_heg>; + operating-points-v2 = <&gr2d_dvfs_opp_table>; iommus = <&mc TEGRA_SWGROUP_G2>; }; @@ -194,6 +207,9 @@ resets = <&tegra_car 24>, <&tegra_car 98>; reset-names = "3d", "3d2"; + power-domains = <&pd_3d0>, <&pd_3d1>; + power-domain-names = "3d0", "3d1"; + operating-points-v2 = <&gr3d_dvfs_opp_table>; iommus = <&mc TEGRA_SWGROUP_NV>, <&mc TEGRA_SWGROUP_NV2>; @@ -208,6 +224,8 @@ clock-names = "dc", "parent"; resets = <&tegra_car 27>; reset-names = "dc"; + power-domains = <&pd_core>; + operating-points-v2 = <&disp1_dvfs_opp_table>; iommus = <&mc TEGRA_SWGROUP_DC>; @@ -238,6 +256,8 @@ clock-names = "dc", "parent"; resets = <&tegra_car 26>; reset-names = "dc"; + power-domains = <&pd_core>; + operating-points-v2 = <&disp2_dvfs_opp_table>; iommus = <&mc TEGRA_SWGROUP_DCB>; @@ -268,6 +288,8 @@ clock-names = "hdmi", "parent"; resets = <&tegra_car 51>; reset-names = "hdmi"; + power-domains = <&pd_core>; + operating-points-v2 = <&hdmi_dvfs_opp_table>; status = "disabled"; }; @@ -276,6 +298,8 @@ reg = <0x542c0000 0x00040000>; interrupts = ; clocks = <&tegra_car TEGRA30_CLK_TVO>; + power-domains = <&pd_core>; + operating-points-v2 = <&tvo_dvfs_opp_table>; status = "disabled"; }; @@ -287,6 +311,8 @@ clock-names = "dsi", "parent"; resets = <&tegra_car 48>; reset-names = "dsi"; + power-domains = <&pd_core>; + operating-points-v2 = <&dsia_dvfs_opp_table>; status = "disabled"; }; @@ -298,6 +324,8 @@ clock-names = "dsi", "parent"; resets = <&tegra_car 84>; reset-names = "dsi"; + power-domains = <&pd_core>; + operating-points-v2 = <&dsib_dvfs_opp_table>; status = "disabled"; }; }; @@ -358,6 +386,34 @@ reg = <0x60006000 0x1000>; #clock-cells = <1>; #reset-cells = <1>; + + sclk { + compatible = "nvidia,tegra30-sclk"; + clocks = <&tegra_car TEGRA30_CLK_SCLK>; + power-domains = <&pd_core>; + operating-points-v2 = <&sclk_dvfs_opp_table>; + }; + + pll-c { + compatible = "nvidia,tegra30-pllc"; + clocks = <&tegra_car TEGRA30_CLK_PLL_C>; + power-domains = <&pd_core>; + operating-points-v2 = <&pll_c_dvfs_opp_table>; + }; + + pll-e { + compatible = "nvidia,tegra30-plle"; + clocks = <&tegra_car TEGRA30_CLK_PLL_E>; + power-domains = <&pd_core>; + operating-points-v2 = <&pll_e_dvfs_opp_table>; + }; + + pll-m { + compatible = "nvidia,tegra30-pllm"; + clocks = <&tegra_car TEGRA30_CLK_PLL_M>; + power-domains = <&pd_core>; + operating-points-v2 = <&pll_m_dvfs_opp_table>; + }; }; flow-controller@60007000 { @@ -468,6 +524,8 @@ reset-names = "vde", "mc"; resets = <&tegra_car 61>, <&mc TEGRA30_MC_RESET_VDE>; iommus = <&mc TEGRA_SWGROUP_VDE>; + power-domains = <&pd_vde>; + operating-points-v2 = <&vde_dvfs_opp_table>; }; apbmisc@70000800 { @@ -565,6 +623,8 @@ clock-names = "gmi"; resets = <&tegra_car 42>; reset-names = "gmi"; + power-domains = <&pd_core>; + operating-points-v2 = <&nor_dvfs_opp_table>; status = "disabled"; }; @@ -575,6 +635,8 @@ clocks = <&tegra_car TEGRA30_CLK_PWM>; resets = <&tegra_car 17>; reset-names = "pwm"; + power-domains = <&pd_core>; + operating-points-v2 = <&pwm_dvfs_opp_table>; status = "disabled"; }; @@ -676,6 +738,8 @@ reset-names = "spi"; dmas = <&apbdma 15>, <&apbdma 15>; dma-names = "rx", "tx"; + power-domains = <&pd_core>; + operating-points-v2 = <&sbc1_dvfs_opp_table>; status = "disabled"; }; @@ -690,6 +754,8 @@ reset-names = "spi"; dmas = <&apbdma 16>, <&apbdma 16>; dma-names = "rx", "tx"; + power-domains = <&pd_core>; + operating-points-v2 = <&sbc2_dvfs_opp_table>; status = "disabled"; }; @@ -704,6 +770,8 @@ reset-names = "spi"; dmas = <&apbdma 17>, <&apbdma 17>; dma-names = "rx", "tx"; + power-domains = <&pd_core>; + operating-points-v2 = <&sbc3_dvfs_opp_table>; status = "disabled"; }; @@ -718,6 +786,8 @@ reset-names = "spi"; dmas = <&apbdma 18>, <&apbdma 18>; dma-names = "rx", "tx"; + power-domains = <&pd_core>; + operating-points-v2 = <&sbc4_dvfs_opp_table>; status = "disabled"; }; @@ -732,6 +802,8 @@ reset-names = "spi"; dmas = <&apbdma 27>, <&apbdma 27>; dma-names = "rx", "tx"; + power-domains = <&pd_core>; + operating-points-v2 = <&sbc5_dvfs_opp_table>; status = "disabled"; }; @@ -746,6 +818,8 @@ reset-names = "spi"; dmas = <&apbdma 28>, <&apbdma 28>; dma-names = "rx", "tx"; + power-domains = <&pd_core>; + operating-points-v2 = <&sbc6_dvfs_opp_table>; status = "disabled"; }; @@ -765,6 +839,72 @@ clocks = <&tegra_car TEGRA30_CLK_PCLK>, <&clk32k_in>; clock-names = "pclk", "clk32k_in"; #clock-cells = <1>; + + pd_core: core-domain { + #power-domain-cells = <0>; + operating-points-v2 = <&core_opp_table>; + }; + + powergates { + pd_3d0: td { + clocks = <&tegra_car TEGRA30_CLK_GR3D>; + resets = <&mc TEGRA30_MC_RESET_3D>, + <&tegra_car TEGRA30_CLK_GR3D>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_3d1: td2 { + clocks = <&tegra_car TEGRA30_CLK_GR3D2>; + resets = <&mc TEGRA30_MC_RESET_3D2>, + <&tegra_car TEGRA30_CLK_GR3D2>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_venc: venc { + clocks = <&tegra_car TEGRA30_CLK_ISP>, + <&tegra_car TEGRA30_CLK_VI>, + <&tegra_car TEGRA30_CLK_CSI>; + resets = <&mc TEGRA30_MC_RESET_ISP>, + <&mc TEGRA30_MC_RESET_VI>, + <&tegra_car TEGRA30_CLK_ISP>, + <&tegra_car 20 /* VI */>, + <&tegra_car TEGRA30_CLK_CSI>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_vde: vdec { + clocks = <&tegra_car TEGRA30_CLK_VDE>; + resets = <&mc TEGRA30_MC_RESET_VDE>, + <&tegra_car TEGRA30_CLK_VDE>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_mpe: mpe { + clocks = <&tegra_car TEGRA30_CLK_MPE>; + resets = <&mc TEGRA30_MC_RESET_MPE>, + <&tegra_car TEGRA30_CLK_MPE>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + + pd_heg: heg { + clocks = <&tegra_car TEGRA30_CLK_GR2D>, + <&tegra_car TEGRA30_CLK_EPP>, + <&tegra_car TEGRA30_CLK_HOST1X>; + resets = <&mc TEGRA30_MC_RESET_2D>, + <&mc TEGRA30_MC_RESET_EPP>, + <&mc TEGRA30_MC_RESET_HC>, + <&tegra_car TEGRA30_CLK_GR2D>, + <&tegra_car TEGRA30_CLK_EPP>, + <&tegra_car TEGRA30_CLK_HOST1X>; + power-domains = <&pd_core>; + #power-domain-cells = <0>; + }; + }; }; mc: memory-controller@7000f000 { @@ -785,6 +925,7 @@ reg = <0x7000f400 0x400>; interrupts = ; clocks = <&tegra_car TEGRA30_CLK_EMC>; + power-domains = <&pd_core>; nvidia,memory-controller = <&mc>; operating-points-v2 = <&emc_icc_dvfs_opp_table>; @@ -799,6 +940,8 @@ clock-names = "fuse"; resets = <&tegra_car 39>; reset-names = "fuse"; + power-domains = <&pd_core>; + operating-points-v2 = <&fuse_burn_dvfs_opp_table>; }; tsensor: tsensor@70014000 { @@ -921,6 +1064,8 @@ clock-names = "sdhci"; resets = <&tegra_car 14>; reset-names = "sdhci"; + power-domains = <&pd_core>; + operating-points-v2 = <&sdmmc1_dvfs_opp_table>; status = "disabled"; }; @@ -943,6 +1088,8 @@ clock-names = "sdhci"; resets = <&tegra_car 69>; reset-names = "sdhci"; + power-domains = <&pd_core>; + operating-points-v2 = <&sdmmc3_dvfs_opp_table>; status = "disabled"; }; @@ -967,6 +1114,8 @@ reset-names = "usb"; nvidia,needs-double-reset; nvidia,phy = <&phy1>; + power-domains = <&pd_core>; + operating-points-v2 = <&usbd_dvfs_opp_table>; status = "disabled"; }; @@ -1008,6 +1157,8 @@ resets = <&tegra_car 58>; reset-names = "usb"; nvidia,phy = <&phy2>; + power-domains = <&pd_core>; + operating-points-v2 = <&usb2_dvfs_opp_table>; status = "disabled"; }; @@ -1048,6 +1199,8 @@ resets = <&tegra_car 59>; reset-names = "usb"; nvidia,phy = <&phy3>; + power-domains = <&pd_core>; + operating-points-v2 = <&usb3_dvfs_opp_table>; status = "disabled"; }; From cb77fc4ed4fdfd64710c577da12596f3a17039b6 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 20 Feb 2018 17:07:24 +0300 Subject: [PATCH 1063/1202] ARM: tegra: Add Memory Client resets to Tegra20 GR2D, GR3D and Host1x Memory access must be blocked before hardware reset is asserted and before power is gated, otherwise a serious hardware fault is inevitable. Add reset for memory clients to the GR2D, GR3D and Host1x nodes. Tested-by: Paul Fertser # PAZ00 T20 Tested-by: Nicolas Chauvet # PAZ00 T20 Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra20.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 1898351a099fc..7b69ffc57abe7 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -40,8 +40,8 @@ interrupt-names = "syncpt", "host1x"; clocks = <&tegra_car TEGRA20_CLK_HOST1X>; clock-names = "host1x"; - resets = <&tegra_car 28>; - reset-names = "host1x"; + resets = <&tegra_car 28>, <&mc TEGRA20_MC_RESET_HC>; + reset-names = "host1x", "mc"; power-domains = <&pd_core>; operating-points-v2 = <&host1x_dvfs_opp_table>; @@ -98,8 +98,8 @@ reg = <0x54140000 0x00040000>; interrupts = ; clocks = <&tegra_car TEGRA20_CLK_GR2D>; - resets = <&tegra_car 21>; - reset-names = "2d"; + resets = <&tegra_car 21>, <&mc TEGRA20_MC_RESET_2D>; + reset-names = "2d", "mc"; power-domains = <&pd_core>; operating-points-v2 = <&gr2d_dvfs_opp_table>; }; @@ -108,8 +108,8 @@ compatible = "nvidia,tegra20-gr3d"; reg = <0x54180000 0x00040000>; clocks = <&tegra_car TEGRA20_CLK_GR3D>; - resets = <&tegra_car 24>; - reset-names = "3d"; + resets = <&tegra_car 24>, <&mc TEGRA20_MC_RESET_3D>; + reset-names = "3d", "mc"; power-domains = <&pd_3d>; operating-points-v2 = <&gr3d_dvfs_opp_table>; }; From adfd443543ac8d63e2d83971e9710bda9863f8ae Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 20 Feb 2018 17:10:52 +0300 Subject: [PATCH 1064/1202] ARM: tegra: Add Memory Client resets to Tegra30 GR2D, GR3D and Host1x Memory access must be blocked before hardware reset is asserted and before power is gated, otherwise a serious hardware fault is inevitable. Add reset for memory clients to the GR2D, GR3D and Host1x nodes. Tested-by: Peter Geis # Ouya T30 Tested-by: Matt Merhar # Ouya T30 Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra30.dtsi | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index e40d5563778bd..c1be136aac7dc 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -123,8 +123,8 @@ interrupt-names = "syncpt", "host1x"; clocks = <&tegra_car TEGRA30_CLK_HOST1X>; clock-names = "host1x"; - resets = <&tegra_car 28>; - reset-names = "host1x"; + resets = <&tegra_car 28>, <&mc TEGRA30_MC_RESET_HC>; + reset-names = "host1x", "mc"; iommus = <&mc TEGRA_SWGROUP_HC>; power-domains = <&pd_heg>; operating-points-v2 = <&host1x_dvfs_opp_table>; @@ -190,8 +190,8 @@ reg = <0x54140000 0x00040000>; interrupts = ; clocks = <&tegra_car TEGRA30_CLK_GR2D>; - resets = <&tegra_car 21>; - reset-names = "2d"; + resets = <&tegra_car 21>, <&mc TEGRA30_MC_RESET_2D>; + reset-names = "2d", "mc"; power-domains = <&pd_heg>; operating-points-v2 = <&gr2d_dvfs_opp_table>; @@ -205,8 +205,10 @@ <&tegra_car TEGRA30_CLK_GR3D2>; clock-names = "3d", "3d2"; resets = <&tegra_car 24>, - <&tegra_car 98>; - reset-names = "3d", "3d2"; + <&tegra_car 98>, + <&mc TEGRA30_MC_RESET_3D>, + <&mc TEGRA30_MC_RESET_3D2>; + reset-names = "3d", "3d2", "mc", "mc2"; power-domains = <&pd_3d0>, <&pd_3d1>; power-domain-names = "3d0", "3d1"; operating-points-v2 = <&gr3d_dvfs_opp_table>; From 062148ef6c3a716533c03582d736d95e78c1c1c5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 27 Jul 2021 00:07:49 +0300 Subject: [PATCH 1065/1202] ARM: tegra20/30: Disable unused host1x hardware MPE, VI, EPP and ISP were never used and we don't have drivers for them. Since these modules are enabled by default in a device-tree, a device is created for them, blocking voltage scaling because there is no driver to bind, and thus, state of PMC driver is never synced. Disable them. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra20.dtsi | 4 ++++ arch/arm/boot/dts/tegra30.dtsi | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 7b69ffc57abe7..8010b40d7377f 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -59,6 +59,7 @@ reset-names = "mpe"; power-domains = <&pd_mpe>; operating-points-v2 = <&mpe_dvfs_opp_table>; + status = "disabled"; }; vi@54080000 { @@ -70,6 +71,7 @@ reset-names = "vi"; power-domains = <&pd_venc>; operating-points-v2 = <&vi_dvfs_opp_table>; + status = "disabled"; }; epp@540c0000 { @@ -81,6 +83,7 @@ reset-names = "epp"; power-domains = <&pd_core>; operating-points-v2 = <&epp_dvfs_opp_table>; + status = "disabled"; }; isp@54100000 { @@ -91,6 +94,7 @@ resets = <&tegra_car 23>; reset-names = "isp"; power-domains = <&pd_venc>; + status = "disabled"; }; gr2d@54140000 { diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index c1be136aac7dc..d961ce3761e67 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -145,6 +145,8 @@ operating-points-v2 = <&mpe_dvfs_opp_table>; iommus = <&mc TEGRA_SWGROUP_MPE>; + + status = "disabled"; }; vi@54080000 { @@ -158,6 +160,8 @@ operating-points-v2 = <&vi_dvfs_opp_table>; iommus = <&mc TEGRA_SWGROUP_VI>; + + status = "disabled"; }; epp@540c0000 { @@ -171,6 +175,8 @@ operating-points-v2 = <&epp_dvfs_opp_table>; iommus = <&mc TEGRA_SWGROUP_EPP>; + + status = "disabled"; }; isp@54100000 { @@ -183,6 +189,8 @@ power-domains = <&pd_venc>; iommus = <&mc TEGRA_SWGROUP_ISP>; + + status = "disabled"; }; gr2d@54140000 { From 9599b7d0b1156316d6559e75072706c33f10e35e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 24 Oct 2021 18:47:05 +0300 Subject: [PATCH 1066/1202] ASoC: tegra: Restore AC97 support The device-tree of AC97 codecs need to be parsed differently from I2S codecs, plus codec device may need to be created. This was missed by the patch that unified machine drivers into a single driver, fix it. It should restore audio on Toradex Colibri board. Cc: Fixes: cc8f70f56039 ("ASoC: tegra: Unify ASoC machine drivers") Signed-off-by: Dmitry Osipenko --- sound/soc/tegra/tegra_asoc_machine.c | 59 +++++++++++++++++++++++----- sound/soc/tegra/tegra_asoc_machine.h | 1 + 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/sound/soc/tegra/tegra_asoc_machine.c b/sound/soc/tegra/tegra_asoc_machine.c index 3cbe6ef1cf9f2..d92fabff08bc8 100644 --- a/sound/soc/tegra/tegra_asoc_machine.c +++ b/sound/soc/tegra/tegra_asoc_machine.c @@ -341,9 +341,34 @@ tegra_machine_parse_phandle(struct device *dev, const char *name) return np; } +static void tegra_machine_unregister_codec(void *pdev) +{ + platform_device_unregister(pdev); +} + +static int tegra_machine_register_codec(struct device *dev, const char *name) +{ + struct platform_device *pdev; + int err; + + if (!name) + return 0; + + pdev = platform_device_register_simple(name, -1, NULL, 0); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + err = devm_add_action_or_reset(dev, tegra_machine_unregister_codec, + pdev); + if (err) + return err; + + return 0; +} + int tegra_asoc_machine_probe(struct platform_device *pdev) { - struct device_node *np_codec, *np_i2s; + struct device_node *np_codec, *np_i2s, *np_ac97; const struct tegra_asoc_data *asoc; struct device *dev = &pdev->dev; struct tegra_machine *machine; @@ -404,17 +429,30 @@ int tegra_asoc_machine_probe(struct platform_device *pdev) return err; } - np_codec = tegra_machine_parse_phandle(dev, "nvidia,audio-codec"); - if (IS_ERR(np_codec)) - return PTR_ERR(np_codec); + if (asoc->set_ac97) { + err = tegra_machine_register_codec(dev, asoc->codec_dev_name); + if (err) + return err; + + np_ac97 = tegra_machine_parse_phandle(dev, "nvidia,ac97-controller"); + if (IS_ERR(np_ac97)) + return PTR_ERR(np_ac97); - np_i2s = tegra_machine_parse_phandle(dev, "nvidia,i2s-controller"); - if (IS_ERR(np_i2s)) - return PTR_ERR(np_i2s); + card->dai_link->cpus->of_node = np_ac97; + card->dai_link->platforms->of_node = np_ac97; + } else { + np_codec = tegra_machine_parse_phandle(dev, "nvidia,audio-codec"); + if (IS_ERR(np_codec)) + return PTR_ERR(np_codec); - card->dai_link->cpus->of_node = np_i2s; - card->dai_link->codecs->of_node = np_codec; - card->dai_link->platforms->of_node = np_i2s; + np_i2s = tegra_machine_parse_phandle(dev, "nvidia,i2s-controller"); + if (IS_ERR(np_i2s)) + return PTR_ERR(np_i2s); + + card->dai_link->cpus->of_node = np_i2s; + card->dai_link->codecs->of_node = np_codec; + card->dai_link->platforms->of_node = np_i2s; + } if (asoc->add_common_controls) { card->controls = tegra_machine_controls; @@ -589,6 +627,7 @@ static struct snd_soc_card snd_soc_tegra_wm9712 = { static const struct tegra_asoc_data tegra_wm9712_data = { .card = &snd_soc_tegra_wm9712, .add_common_dapm_widgets = true, + .codec_dev_name = "wm9712-codec", .set_ac97 = true, }; diff --git a/sound/soc/tegra/tegra_asoc_machine.h b/sound/soc/tegra/tegra_asoc_machine.h index 8ee0ec814f67c..d6a8d13205516 100644 --- a/sound/soc/tegra/tegra_asoc_machine.h +++ b/sound/soc/tegra/tegra_asoc_machine.h @@ -13,6 +13,7 @@ struct snd_soc_pcm_runtime; struct tegra_asoc_data { unsigned int (*mclk_rate)(unsigned int srate); + const char *codec_dev_name; struct snd_soc_card *card; unsigned int mclk_id; bool hp_jack_gpio_active_low; From 5af8aee61140f91699ec209d760049c32b8819e9 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 24 Oct 2021 21:13:24 +0300 Subject: [PATCH 1067/1202] ASoC: tegra: Set default card name for Trimslice The default card name for Trimslice device should be "tegra-trimslice". It got lost by accident during unification of machine sound drivers, fix it. Cc: Fixes: cc8f70f56039 ("ASoC: tegra: Unify ASoC machine drivers") Signed-off-by: Dmitry Osipenko --- sound/soc/tegra/tegra_asoc_machine.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/tegra/tegra_asoc_machine.c b/sound/soc/tegra/tegra_asoc_machine.c index d92fabff08bc8..b95438c3dbf7e 100644 --- a/sound/soc/tegra/tegra_asoc_machine.c +++ b/sound/soc/tegra/tegra_asoc_machine.c @@ -725,6 +725,7 @@ static struct snd_soc_dai_link tegra_tlv320aic23_dai = { }; static struct snd_soc_card snd_soc_tegra_trimslice = { + .name = "tegra-trimslice", .components = "codec:tlv320aic23", .dai_link = &tegra_tlv320aic23_dai, .num_links = 1, From 5aea88c13009a2f35d95eac614b2024a1ca24e1f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 18 Oct 2021 21:34:59 +0300 Subject: [PATCH 1068/1202] mfd: tps80031: Remove driver Driver was upstreamed in 2013 and never got a user, remove it. Signed-off-by: Dmitry Osipenko --- drivers/mfd/Kconfig | 14 - drivers/mfd/Makefile | 1 - drivers/mfd/tps80031.c | 526 ----------------------------- include/linux/mfd/tps80031.h | 637 ----------------------------------- 4 files changed, 1178 deletions(-) delete mode 100644 drivers/mfd/tps80031.c delete mode 100644 include/linux/mfd/tps80031.h diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index f99334fa0ae67..bc75156dd96f0 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1624,20 +1624,6 @@ config MFD_TPS65912_SPI If you say yes here you get support for the TPS65912 series of PM chips with SPI interface. -config MFD_TPS80031 - bool "TI TPS80031/TPS80032 Power Management chips" - depends on I2C=y - select MFD_CORE - select REGMAP_I2C - select REGMAP_IRQ - help - If you say yes here you get support for the Texas Instruments - TPS80031/ TPS80032 Fully Integrated Power Management with Power - Path and Battery Charger. The device provides five configurable - step-down converters, 11 general purpose LDOs, USB OTG Module, - ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with - Power Path from USB, 32K clock generator. - config TWL4030_CORE bool "TI TWL4030/TWL5030/TWL6030/TPS659x0 Support" depends on I2C=y diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 2ba6646e874cd..0b1b629aef3e4 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -105,7 +105,6 @@ obj-$(CONFIG_MFD_TPS65910) += tps65910.o obj-$(CONFIG_MFD_TPS65912) += tps65912-core.o obj-$(CONFIG_MFD_TPS65912_I2C) += tps65912-i2c.o obj-$(CONFIG_MFD_TPS65912_SPI) += tps65912-spi.o -obj-$(CONFIG_MFD_TPS80031) += tps80031.o obj-$(CONFIG_MENELAUS) += menelaus.o obj-$(CONFIG_TWL4030_CORE) += twl-core.o twl4030-irq.o twl6030-irq.o diff --git a/drivers/mfd/tps80031.c b/drivers/mfd/tps80031.c deleted file mode 100644 index 3c4e62c3406ab..0000000000000 --- a/drivers/mfd/tps80031.c +++ /dev/null @@ -1,526 +0,0 @@ -/* - * tps80031.c -- TI TPS80031/TPS80032 mfd core driver. - * - * MFD core driver for TI TPS80031/TPS80032 Fully Integrated - * Power Management with Power Path and Battery Charger - * - * Copyright (c) 2012, NVIDIA Corporation. - * - * Author: Laxman Dewangan - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind, - * whether express or implied; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - * 02111-1307, USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct resource tps80031_rtc_resources[] = { - DEFINE_RES_IRQ(TPS80031_INT_RTC_ALARM), -}; - -/* TPS80031 sub mfd devices */ -static const struct mfd_cell tps80031_cell[] = { - { - .name = "tps80031-pmic", - }, - { - .name = "tps80031-clock", - }, - { - .name = "tps80031-rtc", - .num_resources = ARRAY_SIZE(tps80031_rtc_resources), - .resources = tps80031_rtc_resources, - }, - { - .name = "tps80031-gpadc", - }, - { - .name = "tps80031-fuel-gauge", - }, - { - .name = "tps80031-charger", - }, -}; - -static int tps80031_slave_address[TPS80031_NUM_SLAVES] = { - TPS80031_I2C_ID0_ADDR, - TPS80031_I2C_ID1_ADDR, - TPS80031_I2C_ID2_ADDR, - TPS80031_I2C_ID3_ADDR, -}; - -struct tps80031_pupd_data { - u8 reg; - u8 pullup_bit; - u8 pulldown_bit; -}; - -#define TPS80031_IRQ(_reg, _mask) \ - { \ - .reg_offset = (TPS80031_INT_MSK_LINE_##_reg) - \ - TPS80031_INT_MSK_LINE_A, \ - .mask = BIT(_mask), \ - } - -static const struct regmap_irq tps80031_main_irqs[] = { - [TPS80031_INT_PWRON] = TPS80031_IRQ(A, 0), - [TPS80031_INT_RPWRON] = TPS80031_IRQ(A, 1), - [TPS80031_INT_SYS_VLOW] = TPS80031_IRQ(A, 2), - [TPS80031_INT_RTC_ALARM] = TPS80031_IRQ(A, 3), - [TPS80031_INT_RTC_PERIOD] = TPS80031_IRQ(A, 4), - [TPS80031_INT_HOT_DIE] = TPS80031_IRQ(A, 5), - [TPS80031_INT_VXX_SHORT] = TPS80031_IRQ(A, 6), - [TPS80031_INT_SPDURATION] = TPS80031_IRQ(A, 7), - [TPS80031_INT_WATCHDOG] = TPS80031_IRQ(B, 0), - [TPS80031_INT_BAT] = TPS80031_IRQ(B, 1), - [TPS80031_INT_SIM] = TPS80031_IRQ(B, 2), - [TPS80031_INT_MMC] = TPS80031_IRQ(B, 3), - [TPS80031_INT_RES] = TPS80031_IRQ(B, 4), - [TPS80031_INT_GPADC_RT] = TPS80031_IRQ(B, 5), - [TPS80031_INT_GPADC_SW2_EOC] = TPS80031_IRQ(B, 6), - [TPS80031_INT_CC_AUTOCAL] = TPS80031_IRQ(B, 7), - [TPS80031_INT_ID_WKUP] = TPS80031_IRQ(C, 0), - [TPS80031_INT_VBUSS_WKUP] = TPS80031_IRQ(C, 1), - [TPS80031_INT_ID] = TPS80031_IRQ(C, 2), - [TPS80031_INT_VBUS] = TPS80031_IRQ(C, 3), - [TPS80031_INT_CHRG_CTRL] = TPS80031_IRQ(C, 4), - [TPS80031_INT_EXT_CHRG] = TPS80031_IRQ(C, 5), - [TPS80031_INT_INT_CHRG] = TPS80031_IRQ(C, 6), - [TPS80031_INT_RES2] = TPS80031_IRQ(C, 7), -}; - -static struct regmap_irq_chip tps80031_irq_chip = { - .name = "tps80031", - .irqs = tps80031_main_irqs, - .num_irqs = ARRAY_SIZE(tps80031_main_irqs), - .num_regs = 3, - .status_base = TPS80031_INT_STS_A, - .mask_base = TPS80031_INT_MSK_LINE_A, -}; - -#define PUPD_DATA(_reg, _pulldown_bit, _pullup_bit) \ - { \ - .reg = TPS80031_CFG_INPUT_PUPD##_reg, \ - .pulldown_bit = _pulldown_bit, \ - .pullup_bit = _pullup_bit, \ - } - -static const struct tps80031_pupd_data tps80031_pupds[] = { - [TPS80031_PREQ1] = PUPD_DATA(1, BIT(0), BIT(1)), - [TPS80031_PREQ2A] = PUPD_DATA(1, BIT(2), BIT(3)), - [TPS80031_PREQ2B] = PUPD_DATA(1, BIT(4), BIT(5)), - [TPS80031_PREQ2C] = PUPD_DATA(1, BIT(6), BIT(7)), - [TPS80031_PREQ3] = PUPD_DATA(2, BIT(0), BIT(1)), - [TPS80031_NRES_WARM] = PUPD_DATA(2, 0, BIT(2)), - [TPS80031_PWM_FORCE] = PUPD_DATA(2, BIT(5), 0), - [TPS80031_CHRG_EXT_CHRG_STATZ] = PUPD_DATA(2, 0, BIT(6)), - [TPS80031_SIM] = PUPD_DATA(3, BIT(0), BIT(1)), - [TPS80031_MMC] = PUPD_DATA(3, BIT(2), BIT(3)), - [TPS80031_GPADC_START] = PUPD_DATA(3, BIT(4), 0), - [TPS80031_DVSI2C_SCL] = PUPD_DATA(4, 0, BIT(0)), - [TPS80031_DVSI2C_SDA] = PUPD_DATA(4, 0, BIT(1)), - [TPS80031_CTLI2C_SCL] = PUPD_DATA(4, 0, BIT(2)), - [TPS80031_CTLI2C_SDA] = PUPD_DATA(4, 0, BIT(3)), -}; -static struct tps80031 *tps80031_power_off_dev; - -int tps80031_ext_power_req_config(struct device *dev, - unsigned long ext_ctrl_flag, int preq_bit, - int state_reg_add, int trans_reg_add) -{ - u8 res_ass_reg = 0; - int preq_mask_bit = 0; - int ret; - - if (!(ext_ctrl_flag & TPS80031_EXT_PWR_REQ)) - return 0; - - if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ1) { - res_ass_reg = TPS80031_PREQ1_RES_ASS_A + (preq_bit >> 3); - preq_mask_bit = 5; - } else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ2) { - res_ass_reg = TPS80031_PREQ2_RES_ASS_A + (preq_bit >> 3); - preq_mask_bit = 6; - } else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ3) { - res_ass_reg = TPS80031_PREQ3_RES_ASS_A + (preq_bit >> 3); - preq_mask_bit = 7; - } - - /* Configure REQ_ASS registers */ - ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1, res_ass_reg, - BIT(preq_bit & 0x7)); - if (ret < 0) { - dev_err(dev, "reg 0x%02x setbit failed, err = %d\n", - res_ass_reg, ret); - return ret; - } - - /* Unmask the PREQ */ - ret = tps80031_clr_bits(dev, TPS80031_SLAVE_ID1, - TPS80031_PHOENIX_MSK_TRANSITION, BIT(preq_mask_bit)); - if (ret < 0) { - dev_err(dev, "reg 0x%02x clrbit failed, err = %d\n", - TPS80031_PHOENIX_MSK_TRANSITION, ret); - return ret; - } - - /* Switch regulator control to resource now */ - if (ext_ctrl_flag & (TPS80031_PWR_REQ_INPUT_PREQ2 | - TPS80031_PWR_REQ_INPUT_PREQ3)) { - ret = tps80031_update(dev, TPS80031_SLAVE_ID1, state_reg_add, - 0x0, TPS80031_STATE_MASK); - if (ret < 0) - dev_err(dev, "reg 0x%02x update failed, err = %d\n", - state_reg_add, ret); - } else { - ret = tps80031_update(dev, TPS80031_SLAVE_ID1, trans_reg_add, - TPS80031_TRANS_SLEEP_OFF, - TPS80031_TRANS_SLEEP_MASK); - if (ret < 0) - dev_err(dev, "reg 0x%02x update failed, err = %d\n", - trans_reg_add, ret); - } - return ret; -} -EXPORT_SYMBOL_GPL(tps80031_ext_power_req_config); - -static void tps80031_power_off(void) -{ - dev_info(tps80031_power_off_dev->dev, "switching off PMU\n"); - tps80031_write(tps80031_power_off_dev->dev, TPS80031_SLAVE_ID1, - TPS80031_PHOENIX_DEV_ON, TPS80031_DEVOFF); -} - -static void tps80031_pupd_init(struct tps80031 *tps80031, - struct tps80031_platform_data *pdata) -{ - struct tps80031_pupd_init_data *pupd_init_data = pdata->pupd_init_data; - int data_size = pdata->pupd_init_data_size; - int i; - - for (i = 0; i < data_size; ++i) { - struct tps80031_pupd_init_data *pupd_init = &pupd_init_data[i]; - const struct tps80031_pupd_data *pupd = - &tps80031_pupds[pupd_init->input_pin]; - u8 update_value = 0; - u8 update_mask = pupd->pulldown_bit | pupd->pullup_bit; - - if (pupd_init->setting == TPS80031_PUPD_PULLDOWN) - update_value = pupd->pulldown_bit; - else if (pupd_init->setting == TPS80031_PUPD_PULLUP) - update_value = pupd->pullup_bit; - - tps80031_update(tps80031->dev, TPS80031_SLAVE_ID1, pupd->reg, - update_value, update_mask); - } -} - -static int tps80031_init_ext_control(struct tps80031 *tps80031, - struct tps80031_platform_data *pdata) -{ - struct device *dev = tps80031->dev; - int ret; - int i; - - /* Clear all external control for this rail */ - for (i = 0; i < 9; ++i) { - ret = tps80031_write(dev, TPS80031_SLAVE_ID1, - TPS80031_PREQ1_RES_ASS_A + i, 0); - if (ret < 0) { - dev_err(dev, "reg 0x%02x write failed, err = %d\n", - TPS80031_PREQ1_RES_ASS_A + i, ret); - return ret; - } - } - - /* Mask the PREQ */ - ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1, - TPS80031_PHOENIX_MSK_TRANSITION, 0x7 << 5); - if (ret < 0) { - dev_err(dev, "reg 0x%02x set_bits failed, err = %d\n", - TPS80031_PHOENIX_MSK_TRANSITION, ret); - return ret; - } - return ret; -} - -static int tps80031_irq_init(struct tps80031 *tps80031, int irq, int irq_base) -{ - struct device *dev = tps80031->dev; - int i, ret; - - /* - * The MASK register used for updating status register when - * interrupt occurs and LINE register used to pass the status - * to actual interrupt line. As per datasheet: - * When INT_MSK_LINE [i] is set to 1, the associated interrupt - * number i is INT line masked, which means that no interrupt is - * generated on the INT line. - * When INT_MSK_LINE [i] is set to 0, the associated interrupt - * number i is line enabled: An interrupt is generated on the - * INT line. - * In any case, the INT_STS [i] status bit may or may not be updated, - * only linked to the INT_MSK_STS [i] configuration register bit. - * - * When INT_MSK_STS [i] is set to 1, the associated interrupt number - * i is status masked, which means that no interrupt is stored in - * the INT_STS[i] status bit. Note that no interrupt number i is - * generated on the INT line, even if the INT_MSK_LINE [i] register - * bit is set to 0. - * When INT_MSK_STS [i] is set to 0, the associated interrupt number i - * is status enabled: An interrupt status is updated in the INT_STS [i] - * register. The interrupt may or may not be generated on the INT line, - * depending on the INT_MSK_LINE [i] configuration register bit. - */ - for (i = 0; i < 3; i++) - tps80031_write(dev, TPS80031_SLAVE_ID2, - TPS80031_INT_MSK_STS_A + i, 0x00); - - ret = regmap_add_irq_chip(tps80031->regmap[TPS80031_SLAVE_ID2], irq, - IRQF_ONESHOT, irq_base, - &tps80031_irq_chip, &tps80031->irq_data); - if (ret < 0) { - dev_err(dev, "add irq failed, err = %d\n", ret); - return ret; - } - return ret; -} - -static bool rd_wr_reg_id0(struct device *dev, unsigned int reg) -{ - switch (reg) { - case TPS80031_SMPS1_CFG_FORCE ... TPS80031_SMPS2_CFG_VOLTAGE: - return true; - default: - return false; - } -} - -static bool rd_wr_reg_id1(struct device *dev, unsigned int reg) -{ - switch (reg) { - case TPS80031_SECONDS_REG ... TPS80031_RTC_RESET_STATUS_REG: - case TPS80031_VALIDITY0 ... TPS80031_VALIDITY7: - case TPS80031_PHOENIX_START_CONDITION ... TPS80031_KEY_PRESS_DUR_CFG: - case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE: - case TPS80031_BROADCAST_ADDR_ALL ... TPS80031_BROADCAST_ADDR_CLK_RST: - case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE: - case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE: - case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C: - case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING: - case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD: - case TPS80031_BACKUP_REG: - return true; - default: - return false; - } -} - -static bool is_volatile_reg_id1(struct device *dev, unsigned int reg) -{ - switch (reg) { - case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE: - case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE: - case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE: - case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C: - case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING: - case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD: - return true; - default: - return false; - } -} - -static bool rd_wr_reg_id2(struct device *dev, unsigned int reg) -{ - switch (reg) { - case TPS80031_USB_VENDOR_ID_LSB ... TPS80031_USB_OTG_REVISION: - case TPS80031_GPADC_CTRL ... TPS80031_CTRL_P1: - case TPS80031_RTCH0_LSB ... TPS80031_GPCH0_MSB: - case TPS80031_TOGGLE1 ... TPS80031_VIBMODE: - case TPS80031_PWM1ON ... TPS80031_PWM2OFF: - case TPS80031_FG_REG_00 ... TPS80031_FG_REG_11: - case TPS80031_INT_STS_A ... TPS80031_INT_MSK_STS_C: - case TPS80031_CONTROLLER_CTRL2 ... TPS80031_LED_PWM_CTRL2: - return true; - default: - return false; - } -} - -static bool rd_wr_reg_id3(struct device *dev, unsigned int reg) -{ - switch (reg) { - case TPS80031_GPADC_TRIM0 ... TPS80031_GPADC_TRIM18: - return true; - default: - return false; - } -} - -static const struct regmap_config tps80031_regmap_configs[] = { - { - .reg_bits = 8, - .val_bits = 8, - .writeable_reg = rd_wr_reg_id0, - .readable_reg = rd_wr_reg_id0, - .max_register = TPS80031_MAX_REGISTER, - }, - { - .reg_bits = 8, - .val_bits = 8, - .writeable_reg = rd_wr_reg_id1, - .readable_reg = rd_wr_reg_id1, - .volatile_reg = is_volatile_reg_id1, - .max_register = TPS80031_MAX_REGISTER, - }, - { - .reg_bits = 8, - .val_bits = 8, - .writeable_reg = rd_wr_reg_id2, - .readable_reg = rd_wr_reg_id2, - .max_register = TPS80031_MAX_REGISTER, - }, - { - .reg_bits = 8, - .val_bits = 8, - .writeable_reg = rd_wr_reg_id3, - .readable_reg = rd_wr_reg_id3, - .max_register = TPS80031_MAX_REGISTER, - }, -}; - -static int tps80031_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - struct tps80031_platform_data *pdata = dev_get_platdata(&client->dev); - struct tps80031 *tps80031; - int ret; - uint8_t es_version; - uint8_t ep_ver; - int i; - - if (!pdata) { - dev_err(&client->dev, "tps80031 requires platform data\n"); - return -EINVAL; - } - - tps80031 = devm_kzalloc(&client->dev, sizeof(*tps80031), GFP_KERNEL); - if (!tps80031) - return -ENOMEM; - - for (i = 0; i < TPS80031_NUM_SLAVES; i++) { - if (tps80031_slave_address[i] == client->addr) - tps80031->clients[i] = client; - else - tps80031->clients[i] = devm_i2c_new_dummy_device(&client->dev, - client->adapter, tps80031_slave_address[i]); - if (IS_ERR(tps80031->clients[i])) { - dev_err(&client->dev, "can't attach client %d\n", i); - return PTR_ERR(tps80031->clients[i]); - } - - i2c_set_clientdata(tps80031->clients[i], tps80031); - tps80031->regmap[i] = devm_regmap_init_i2c(tps80031->clients[i], - &tps80031_regmap_configs[i]); - if (IS_ERR(tps80031->regmap[i])) { - ret = PTR_ERR(tps80031->regmap[i]); - dev_err(&client->dev, - "regmap %d init failed, err %d\n", i, ret); - return ret; - } - } - - ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3, - TPS80031_JTAGVERNUM, &es_version); - if (ret < 0) { - dev_err(&client->dev, - "Silicon version number read failed: %d\n", ret); - return ret; - } - - ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3, - TPS80031_EPROM_REV, &ep_ver); - if (ret < 0) { - dev_err(&client->dev, - "Silicon eeprom version read failed: %d\n", ret); - return ret; - } - - dev_info(&client->dev, "ES version 0x%02x and EPROM version 0x%02x\n", - es_version, ep_ver); - tps80031->es_version = es_version; - tps80031->dev = &client->dev; - i2c_set_clientdata(client, tps80031); - tps80031->chip_info = id->driver_data; - - ret = tps80031_irq_init(tps80031, client->irq, pdata->irq_base); - if (ret) { - dev_err(&client->dev, "IRQ init failed: %d\n", ret); - return ret; - } - - tps80031_pupd_init(tps80031, pdata); - - tps80031_init_ext_control(tps80031, pdata); - - ret = mfd_add_devices(tps80031->dev, -1, - tps80031_cell, ARRAY_SIZE(tps80031_cell), - NULL, 0, - regmap_irq_get_domain(tps80031->irq_data)); - if (ret < 0) { - dev_err(&client->dev, "mfd_add_devices failed: %d\n", ret); - goto fail_mfd_add; - } - - if (pdata->use_power_off && !pm_power_off) { - tps80031_power_off_dev = tps80031; - pm_power_off = tps80031_power_off; - } - return 0; - -fail_mfd_add: - regmap_del_irq_chip(client->irq, tps80031->irq_data); - return ret; -} - -static const struct i2c_device_id tps80031_id_table[] = { - { "tps80031", TPS80031 }, - { "tps80032", TPS80032 }, - { } -}; - -static struct i2c_driver tps80031_driver = { - .driver = { - .name = "tps80031", - .suppress_bind_attrs = true, - }, - .probe = tps80031_probe, - .id_table = tps80031_id_table, -}; - -static int __init tps80031_init(void) -{ - return i2c_add_driver(&tps80031_driver); -} -subsys_initcall(tps80031_init); diff --git a/include/linux/mfd/tps80031.h b/include/linux/mfd/tps80031.h deleted file mode 100644 index 2c75c9c9318f4..0000000000000 --- a/include/linux/mfd/tps80031.h +++ /dev/null @@ -1,637 +0,0 @@ -/* - * tps80031.h -- TI TPS80031 and TI TPS80032 PMIC driver. - * - * Copyright (c) 2012, NVIDIA Corporation. - * - * Author: Laxman Dewangan - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind, - * whether express or implied; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - * 02111-1307, USA - */ - -#ifndef __LINUX_MFD_TPS80031_H -#define __LINUX_MFD_TPS80031_H - -#include -#include - -/* Pull-ups/Pull-downs */ -#define TPS80031_CFG_INPUT_PUPD1 0xF0 -#define TPS80031_CFG_INPUT_PUPD2 0xF1 -#define TPS80031_CFG_INPUT_PUPD3 0xF2 -#define TPS80031_CFG_INPUT_PUPD4 0xF3 -#define TPS80031_CFG_LDO_PD1 0xF4 -#define TPS80031_CFG_LDO_PD2 0xF5 -#define TPS80031_CFG_SMPS_PD 0xF6 - -/* Real Time Clock */ -#define TPS80031_SECONDS_REG 0x00 -#define TPS80031_MINUTES_REG 0x01 -#define TPS80031_HOURS_REG 0x02 -#define TPS80031_DAYS_REG 0x03 -#define TPS80031_MONTHS_REG 0x04 -#define TPS80031_YEARS_REG 0x05 -#define TPS80031_WEEKS_REG 0x06 -#define TPS80031_ALARM_SECONDS_REG 0x08 -#define TPS80031_ALARM_MINUTES_REG 0x09 -#define TPS80031_ALARM_HOURS_REG 0x0A -#define TPS80031_ALARM_DAYS_REG 0x0B -#define TPS80031_ALARM_MONTHS_REG 0x0C -#define TPS80031_ALARM_YEARS_REG 0x0D -#define TPS80031_RTC_CTRL_REG 0x10 -#define TPS80031_RTC_STATUS_REG 0x11 -#define TPS80031_RTC_INTERRUPTS_REG 0x12 -#define TPS80031_RTC_COMP_LSB_REG 0x13 -#define TPS80031_RTC_COMP_MSB_REG 0x14 -#define TPS80031_RTC_RESET_STATUS_REG 0x16 - -/*PMC Master Module */ -#define TPS80031_PHOENIX_START_CONDITION 0x1F -#define TPS80031_PHOENIX_MSK_TRANSITION 0x20 -#define TPS80031_STS_HW_CONDITIONS 0x21 -#define TPS80031_PHOENIX_LAST_TURNOFF_STS 0x22 -#define TPS80031_VSYSMIN_LO_THRESHOLD 0x23 -#define TPS80031_VSYSMIN_HI_THRESHOLD 0x24 -#define TPS80031_PHOENIX_DEV_ON 0x25 -#define TPS80031_STS_PWR_GRP_STATE 0x27 -#define TPS80031_PH_CFG_VSYSLOW 0x28 -#define TPS80031_PH_STS_BOOT 0x29 -#define TPS80031_PHOENIX_SENS_TRANSITION 0x2A -#define TPS80031_PHOENIX_SEQ_CFG 0x2B -#define TPS80031_PRIMARY_WATCHDOG_CFG 0X2C -#define TPS80031_KEY_PRESS_DUR_CFG 0X2D -#define TPS80031_SMPS_LDO_SHORT_STS 0x2E - -/* PMC Slave Module - Broadcast */ -#define TPS80031_BROADCAST_ADDR_ALL 0x31 -#define TPS80031_BROADCAST_ADDR_REF 0x32 -#define TPS80031_BROADCAST_ADDR_PROV 0x33 -#define TPS80031_BROADCAST_ADDR_CLK_RST 0x34 - -/* PMC Slave Module SMPS Regulators */ -#define TPS80031_SMPS4_CFG_TRANS 0x41 -#define TPS80031_SMPS4_CFG_STATE 0x42 -#define TPS80031_SMPS4_CFG_VOLTAGE 0x44 -#define TPS80031_VIO_CFG_TRANS 0x47 -#define TPS80031_VIO_CFG_STATE 0x48 -#define TPS80031_VIO_CFG_FORCE 0x49 -#define TPS80031_VIO_CFG_VOLTAGE 0x4A -#define TPS80031_VIO_CFG_STEP 0x48 -#define TPS80031_SMPS1_CFG_TRANS 0x53 -#define TPS80031_SMPS1_CFG_STATE 0x54 -#define TPS80031_SMPS1_CFG_FORCE 0x55 -#define TPS80031_SMPS1_CFG_VOLTAGE 0x56 -#define TPS80031_SMPS1_CFG_STEP 0x57 -#define TPS80031_SMPS2_CFG_TRANS 0x59 -#define TPS80031_SMPS2_CFG_STATE 0x5A -#define TPS80031_SMPS2_CFG_FORCE 0x5B -#define TPS80031_SMPS2_CFG_VOLTAGE 0x5C -#define TPS80031_SMPS2_CFG_STEP 0x5D -#define TPS80031_SMPS3_CFG_TRANS 0x65 -#define TPS80031_SMPS3_CFG_STATE 0x66 -#define TPS80031_SMPS3_CFG_VOLTAGE 0x68 - -/* PMC Slave Module LDO Regulators */ -#define TPS80031_VANA_CFG_TRANS 0x81 -#define TPS80031_VANA_CFG_STATE 0x82 -#define TPS80031_VANA_CFG_VOLTAGE 0x83 -#define TPS80031_LDO2_CFG_TRANS 0x85 -#define TPS80031_LDO2_CFG_STATE 0x86 -#define TPS80031_LDO2_CFG_VOLTAGE 0x87 -#define TPS80031_LDO4_CFG_TRANS 0x89 -#define TPS80031_LDO4_CFG_STATE 0x8A -#define TPS80031_LDO4_CFG_VOLTAGE 0x8B -#define TPS80031_LDO3_CFG_TRANS 0x8D -#define TPS80031_LDO3_CFG_STATE 0x8E -#define TPS80031_LDO3_CFG_VOLTAGE 0x8F -#define TPS80031_LDO6_CFG_TRANS 0x91 -#define TPS80031_LDO6_CFG_STATE 0x92 -#define TPS80031_LDO6_CFG_VOLTAGE 0x93 -#define TPS80031_LDOLN_CFG_TRANS 0x95 -#define TPS80031_LDOLN_CFG_STATE 0x96 -#define TPS80031_LDOLN_CFG_VOLTAGE 0x97 -#define TPS80031_LDO5_CFG_TRANS 0x99 -#define TPS80031_LDO5_CFG_STATE 0x9A -#define TPS80031_LDO5_CFG_VOLTAGE 0x9B -#define TPS80031_LDO1_CFG_TRANS 0x9D -#define TPS80031_LDO1_CFG_STATE 0x9E -#define TPS80031_LDO1_CFG_VOLTAGE 0x9F -#define TPS80031_LDOUSB_CFG_TRANS 0xA1 -#define TPS80031_LDOUSB_CFG_STATE 0xA2 -#define TPS80031_LDOUSB_CFG_VOLTAGE 0xA3 -#define TPS80031_LDO7_CFG_TRANS 0xA5 -#define TPS80031_LDO7_CFG_STATE 0xA6 -#define TPS80031_LDO7_CFG_VOLTAGE 0xA7 - -/* PMC Slave Module External Control */ -#define TPS80031_REGEN1_CFG_TRANS 0xAE -#define TPS80031_REGEN1_CFG_STATE 0xAF -#define TPS80031_REGEN2_CFG_TRANS 0xB1 -#define TPS80031_REGEN2_CFG_STATE 0xB2 -#define TPS80031_SYSEN_CFG_TRANS 0xB4 -#define TPS80031_SYSEN_CFG_STATE 0xB5 - -/* PMC Slave Module Internal Control */ -#define TPS80031_NRESPWRON_CFG_TRANS 0xB7 -#define TPS80031_NRESPWRON_CFG_STATE 0xB8 -#define TPS80031_CLK32KAO_CFG_TRANS 0xBA -#define TPS80031_CLK32KAO_CFG_STATE 0xBB -#define TPS80031_CLK32KG_CFG_TRANS 0xBD -#define TPS80031_CLK32KG_CFG_STATE 0xBE -#define TPS80031_CLK32KAUDIO_CFG_TRANS 0xC0 -#define TPS80031_CLK32KAUDIO_CFG_STATE 0xC1 -#define TPS80031_VRTC_CFG_TRANS 0xC3 -#define TPS80031_VRTC_CFG_STATE 0xC4 -#define TPS80031_BIAS_CFG_TRANS 0xC6 -#define TPS80031_BIAS_CFG_STATE 0xC7 -#define TPS80031_VSYSMIN_HI_CFG_TRANS 0xC9 -#define TPS80031_VSYSMIN_HI_CFG_STATE 0xCA -#define TPS80031_RC6MHZ_CFG_TRANS 0xCC -#define TPS80031_RC6MHZ_CFG_STATE 0xCD -#define TPS80031_TMP_CFG_TRANS 0xCF -#define TPS80031_TMP_CFG_STATE 0xD0 - -/* PMC Slave Module resources assignment */ -#define TPS80031_PREQ1_RES_ASS_A 0xD7 -#define TPS80031_PREQ1_RES_ASS_B 0xD8 -#define TPS80031_PREQ1_RES_ASS_C 0xD9 -#define TPS80031_PREQ2_RES_ASS_A 0xDA -#define TPS80031_PREQ2_RES_ASS_B 0xDB -#define TPS80031_PREQ2_RES_ASS_C 0xDC -#define TPS80031_PREQ3_RES_ASS_A 0xDD -#define TPS80031_PREQ3_RES_ASS_B 0xDE -#define TPS80031_PREQ3_RES_ASS_C 0xDF - -/* PMC Slave Module Miscellaneous */ -#define TPS80031_SMPS_OFFSET 0xE0 -#define TPS80031_SMPS_MULT 0xE3 -#define TPS80031_MISC1 0xE4 -#define TPS80031_MISC2 0xE5 -#define TPS80031_BBSPOR_CFG 0xE6 -#define TPS80031_TMP_CFG 0xE7 - -/* Battery Charging Controller and Indicator LED */ -#define TPS80031_CONTROLLER_CTRL2 0xDA -#define TPS80031_CONTROLLER_VSEL_COMP 0xDB -#define TPS80031_CHARGERUSB_VSYSREG 0xDC -#define TPS80031_CHARGERUSB_VICHRG_PC 0xDD -#define TPS80031_LINEAR_CHRG_STS 0xDE -#define TPS80031_CONTROLLER_INT_MASK 0xE0 -#define TPS80031_CONTROLLER_CTRL1 0xE1 -#define TPS80031_CONTROLLER_WDG 0xE2 -#define TPS80031_CONTROLLER_STAT1 0xE3 -#define TPS80031_CHARGERUSB_INT_STATUS 0xE4 -#define TPS80031_CHARGERUSB_INT_MASK 0xE5 -#define TPS80031_CHARGERUSB_STATUS_INT1 0xE6 -#define TPS80031_CHARGERUSB_STATUS_INT2 0xE7 -#define TPS80031_CHARGERUSB_CTRL1 0xE8 -#define TPS80031_CHARGERUSB_CTRL2 0xE9 -#define TPS80031_CHARGERUSB_CTRL3 0xEA -#define TPS80031_CHARGERUSB_STAT1 0xEB -#define TPS80031_CHARGERUSB_VOREG 0xEC -#define TPS80031_CHARGERUSB_VICHRG 0xED -#define TPS80031_CHARGERUSB_CINLIMIT 0xEE -#define TPS80031_CHARGERUSB_CTRLLIMIT1 0xEF -#define TPS80031_CHARGERUSB_CTRLLIMIT2 0xF0 -#define TPS80031_LED_PWM_CTRL1 0xF4 -#define TPS80031_LED_PWM_CTRL2 0xF5 - -/* USB On-The-Go */ -#define TPS80031_BACKUP_REG 0xFA -#define TPS80031_USB_VENDOR_ID_LSB 0x00 -#define TPS80031_USB_VENDOR_ID_MSB 0x01 -#define TPS80031_USB_PRODUCT_ID_LSB 0x02 -#define TPS80031_USB_PRODUCT_ID_MSB 0x03 -#define TPS80031_USB_VBUS_CTRL_SET 0x04 -#define TPS80031_USB_VBUS_CTRL_CLR 0x05 -#define TPS80031_USB_ID_CTRL_SET 0x06 -#define TPS80031_USB_ID_CTRL_CLR 0x07 -#define TPS80031_USB_VBUS_INT_SRC 0x08 -#define TPS80031_USB_VBUS_INT_LATCH_SET 0x09 -#define TPS80031_USB_VBUS_INT_LATCH_CLR 0x0A -#define TPS80031_USB_VBUS_INT_EN_LO_SET 0x0B -#define TPS80031_USB_VBUS_INT_EN_LO_CLR 0x0C -#define TPS80031_USB_VBUS_INT_EN_HI_SET 0x0D -#define TPS80031_USB_VBUS_INT_EN_HI_CLR 0x0E -#define TPS80031_USB_ID_INT_SRC 0x0F -#define TPS80031_USB_ID_INT_LATCH_SET 0x10 -#define TPS80031_USB_ID_INT_LATCH_CLR 0x11 -#define TPS80031_USB_ID_INT_EN_LO_SET 0x12 -#define TPS80031_USB_ID_INT_EN_LO_CLR 0x13 -#define TPS80031_USB_ID_INT_EN_HI_SET 0x14 -#define TPS80031_USB_ID_INT_EN_HI_CLR 0x15 -#define TPS80031_USB_OTG_ADP_CTRL 0x16 -#define TPS80031_USB_OTG_ADP_HIGH 0x17 -#define TPS80031_USB_OTG_ADP_LOW 0x18 -#define TPS80031_USB_OTG_ADP_RISE 0x19 -#define TPS80031_USB_OTG_REVISION 0x1A - -/* Gas Gauge */ -#define TPS80031_FG_REG_00 0xC0 -#define TPS80031_FG_REG_01 0xC1 -#define TPS80031_FG_REG_02 0xC2 -#define TPS80031_FG_REG_03 0xC3 -#define TPS80031_FG_REG_04 0xC4 -#define TPS80031_FG_REG_05 0xC5 -#define TPS80031_FG_REG_06 0xC6 -#define TPS80031_FG_REG_07 0xC7 -#define TPS80031_FG_REG_08 0xC8 -#define TPS80031_FG_REG_09 0xC9 -#define TPS80031_FG_REG_10 0xCA -#define TPS80031_FG_REG_11 0xCB - -/* General Purpose ADC */ -#define TPS80031_GPADC_CTRL 0x2E -#define TPS80031_GPADC_CTRL2 0x2F -#define TPS80031_RTSELECT_LSB 0x32 -#define TPS80031_RTSELECT_ISB 0x33 -#define TPS80031_RTSELECT_MSB 0x34 -#define TPS80031_GPSELECT_ISB 0x35 -#define TPS80031_CTRL_P1 0x36 -#define TPS80031_RTCH0_LSB 0x37 -#define TPS80031_RTCH0_MSB 0x38 -#define TPS80031_RTCH1_LSB 0x39 -#define TPS80031_RTCH1_MSB 0x3A -#define TPS80031_GPCH0_LSB 0x3B -#define TPS80031_GPCH0_MSB 0x3C - -/* SIM, MMC and Battery Detection */ -#define TPS80031_SIMDEBOUNCING 0xEB -#define TPS80031_SIMCTRL 0xEC -#define TPS80031_MMCDEBOUNCING 0xED -#define TPS80031_MMCCTRL 0xEE -#define TPS80031_BATDEBOUNCING 0xEF - -/* Vibrator Driver and PWMs */ -#define TPS80031_VIBCTRL 0x9B -#define TPS80031_VIBMODE 0x9C -#define TPS80031_PWM1ON 0xBA -#define TPS80031_PWM1OFF 0xBB -#define TPS80031_PWM2ON 0xBD -#define TPS80031_PWM2OFF 0xBE - -/* Control Interface */ -#define TPS80031_INT_STS_A 0xD0 -#define TPS80031_INT_STS_B 0xD1 -#define TPS80031_INT_STS_C 0xD2 -#define TPS80031_INT_MSK_LINE_A 0xD3 -#define TPS80031_INT_MSK_LINE_B 0xD4 -#define TPS80031_INT_MSK_LINE_C 0xD5 -#define TPS80031_INT_MSK_STS_A 0xD6 -#define TPS80031_INT_MSK_STS_B 0xD7 -#define TPS80031_INT_MSK_STS_C 0xD8 -#define TPS80031_TOGGLE1 0x90 -#define TPS80031_TOGGLE2 0x91 -#define TPS80031_TOGGLE3 0x92 -#define TPS80031_PWDNSTATUS1 0x93 -#define TPS80031_PWDNSTATUS2 0x94 -#define TPS80031_VALIDITY0 0x17 -#define TPS80031_VALIDITY1 0x18 -#define TPS80031_VALIDITY2 0x19 -#define TPS80031_VALIDITY3 0x1A -#define TPS80031_VALIDITY4 0x1B -#define TPS80031_VALIDITY5 0x1C -#define TPS80031_VALIDITY6 0x1D -#define TPS80031_VALIDITY7 0x1E - -/* Version number related register */ -#define TPS80031_JTAGVERNUM 0x87 -#define TPS80031_EPROM_REV 0xDF - -/* GPADC Trimming Bits. */ -#define TPS80031_GPADC_TRIM0 0xCC -#define TPS80031_GPADC_TRIM1 0xCD -#define TPS80031_GPADC_TRIM2 0xCE -#define TPS80031_GPADC_TRIM3 0xCF -#define TPS80031_GPADC_TRIM4 0xD0 -#define TPS80031_GPADC_TRIM5 0xD1 -#define TPS80031_GPADC_TRIM6 0xD2 -#define TPS80031_GPADC_TRIM7 0xD3 -#define TPS80031_GPADC_TRIM8 0xD4 -#define TPS80031_GPADC_TRIM9 0xD5 -#define TPS80031_GPADC_TRIM10 0xD6 -#define TPS80031_GPADC_TRIM11 0xD7 -#define TPS80031_GPADC_TRIM12 0xD8 -#define TPS80031_GPADC_TRIM13 0xD9 -#define TPS80031_GPADC_TRIM14 0xDA -#define TPS80031_GPADC_TRIM15 0xDB -#define TPS80031_GPADC_TRIM16 0xDC -#define TPS80031_GPADC_TRIM17 0xDD -#define TPS80031_GPADC_TRIM18 0xDE - -/* TPS80031_CONTROLLER_STAT1 bit fields */ -#define TPS80031_CONTROLLER_STAT1_BAT_TEMP 0 -#define TPS80031_CONTROLLER_STAT1_BAT_REMOVED 1 -#define TPS80031_CONTROLLER_STAT1_VBUS_DET 2 -#define TPS80031_CONTROLLER_STAT1_VAC_DET 3 -#define TPS80031_CONTROLLER_STAT1_FAULT_WDG 4 -#define TPS80031_CONTROLLER_STAT1_LINCH_GATED 6 -/* TPS80031_CONTROLLER_INT_MASK bit filed */ -#define TPS80031_CONTROLLER_INT_MASK_MVAC_DET 0 -#define TPS80031_CONTROLLER_INT_MASK_MVBUS_DET 1 -#define TPS80031_CONTROLLER_INT_MASK_MBAT_TEMP 2 -#define TPS80031_CONTROLLER_INT_MASK_MFAULT_WDG 3 -#define TPS80031_CONTROLLER_INT_MASK_MBAT_REMOVED 4 -#define TPS80031_CONTROLLER_INT_MASK_MLINCH_GATED 5 - -#define TPS80031_CHARGE_CONTROL_SUB_INT_MASK 0x3F - -/* TPS80031_PHOENIX_DEV_ON bit field */ -#define TPS80031_DEVOFF 0x1 - -#define TPS80031_EXT_CONTROL_CFG_TRANS 0 -#define TPS80031_EXT_CONTROL_CFG_STATE 1 - -/* State register field */ -#define TPS80031_STATE_OFF 0x00 -#define TPS80031_STATE_ON 0x01 -#define TPS80031_STATE_MASK 0x03 - -/* Trans register field */ -#define TPS80031_TRANS_ACTIVE_OFF 0x00 -#define TPS80031_TRANS_ACTIVE_ON 0x01 -#define TPS80031_TRANS_ACTIVE_MASK 0x03 -#define TPS80031_TRANS_SLEEP_OFF 0x00 -#define TPS80031_TRANS_SLEEP_ON 0x04 -#define TPS80031_TRANS_SLEEP_MASK 0x0C -#define TPS80031_TRANS_OFF_OFF 0x00 -#define TPS80031_TRANS_OFF_ACTIVE 0x10 -#define TPS80031_TRANS_OFF_MASK 0x30 - -#define TPS80031_EXT_PWR_REQ (TPS80031_PWR_REQ_INPUT_PREQ1 | \ - TPS80031_PWR_REQ_INPUT_PREQ2 | \ - TPS80031_PWR_REQ_INPUT_PREQ3) - -/* TPS80031_BBSPOR_CFG bit field */ -#define TPS80031_BBSPOR_CHG_EN 0x8 -#define TPS80031_MAX_REGISTER 0xFF - -struct i2c_client; - -/* Supported chips */ -enum chips { - TPS80031 = 0x00000001, - TPS80032 = 0x00000002, -}; - -enum { - TPS80031_INT_PWRON, - TPS80031_INT_RPWRON, - TPS80031_INT_SYS_VLOW, - TPS80031_INT_RTC_ALARM, - TPS80031_INT_RTC_PERIOD, - TPS80031_INT_HOT_DIE, - TPS80031_INT_VXX_SHORT, - TPS80031_INT_SPDURATION, - TPS80031_INT_WATCHDOG, - TPS80031_INT_BAT, - TPS80031_INT_SIM, - TPS80031_INT_MMC, - TPS80031_INT_RES, - TPS80031_INT_GPADC_RT, - TPS80031_INT_GPADC_SW2_EOC, - TPS80031_INT_CC_AUTOCAL, - TPS80031_INT_ID_WKUP, - TPS80031_INT_VBUSS_WKUP, - TPS80031_INT_ID, - TPS80031_INT_VBUS, - TPS80031_INT_CHRG_CTRL, - TPS80031_INT_EXT_CHRG, - TPS80031_INT_INT_CHRG, - TPS80031_INT_RES2, - TPS80031_INT_BAT_TEMP_OVRANGE, - TPS80031_INT_BAT_REMOVED, - TPS80031_INT_VBUS_DET, - TPS80031_INT_VAC_DET, - TPS80031_INT_FAULT_WDG, - TPS80031_INT_LINCH_GATED, - - /* Last interrupt id to get the end number */ - TPS80031_INT_NR, -}; - -/* TPS80031 Slave IDs */ -#define TPS80031_NUM_SLAVES 4 -#define TPS80031_SLAVE_ID0 0 -#define TPS80031_SLAVE_ID1 1 -#define TPS80031_SLAVE_ID2 2 -#define TPS80031_SLAVE_ID3 3 - -/* TPS80031 I2C addresses */ -#define TPS80031_I2C_ID0_ADDR 0x12 -#define TPS80031_I2C_ID1_ADDR 0x48 -#define TPS80031_I2C_ID2_ADDR 0x49 -#define TPS80031_I2C_ID3_ADDR 0x4A - -enum { - TPS80031_REGULATOR_VIO, - TPS80031_REGULATOR_SMPS1, - TPS80031_REGULATOR_SMPS2, - TPS80031_REGULATOR_SMPS3, - TPS80031_REGULATOR_SMPS4, - TPS80031_REGULATOR_VANA, - TPS80031_REGULATOR_LDO1, - TPS80031_REGULATOR_LDO2, - TPS80031_REGULATOR_LDO3, - TPS80031_REGULATOR_LDO4, - TPS80031_REGULATOR_LDO5, - TPS80031_REGULATOR_LDO6, - TPS80031_REGULATOR_LDO7, - TPS80031_REGULATOR_LDOLN, - TPS80031_REGULATOR_LDOUSB, - TPS80031_REGULATOR_VBUS, - TPS80031_REGULATOR_REGEN1, - TPS80031_REGULATOR_REGEN2, - TPS80031_REGULATOR_SYSEN, - TPS80031_REGULATOR_MAX, -}; - -/* Different configurations for the rails */ -enum { - /* USBLDO input selection */ - TPS80031_USBLDO_INPUT_VSYS = 0x00000001, - TPS80031_USBLDO_INPUT_PMID = 0x00000002, - - /* LDO3 output mode */ - TPS80031_LDO3_OUTPUT_VIB = 0x00000004, - - /* VBUS configuration */ - TPS80031_VBUS_DISCHRG_EN_PDN = 0x00000004, - TPS80031_VBUS_SW_ONLY = 0x00000008, - TPS80031_VBUS_SW_N_ID = 0x00000010, -}; - -/* External controls requests */ -enum tps80031_ext_control { - TPS80031_PWR_REQ_INPUT_NONE = 0x00000000, - TPS80031_PWR_REQ_INPUT_PREQ1 = 0x00000001, - TPS80031_PWR_REQ_INPUT_PREQ2 = 0x00000002, - TPS80031_PWR_REQ_INPUT_PREQ3 = 0x00000004, - TPS80031_PWR_OFF_ON_SLEEP = 0x00000008, - TPS80031_PWR_ON_ON_SLEEP = 0x00000010, -}; - -enum tps80031_pupd_pins { - TPS80031_PREQ1 = 0, - TPS80031_PREQ2A, - TPS80031_PREQ2B, - TPS80031_PREQ2C, - TPS80031_PREQ3, - TPS80031_NRES_WARM, - TPS80031_PWM_FORCE, - TPS80031_CHRG_EXT_CHRG_STATZ, - TPS80031_SIM, - TPS80031_MMC, - TPS80031_GPADC_START, - TPS80031_DVSI2C_SCL, - TPS80031_DVSI2C_SDA, - TPS80031_CTLI2C_SCL, - TPS80031_CTLI2C_SDA, -}; - -enum tps80031_pupd_settings { - TPS80031_PUPD_NORMAL, - TPS80031_PUPD_PULLDOWN, - TPS80031_PUPD_PULLUP, -}; - -struct tps80031 { - struct device *dev; - unsigned long chip_info; - int es_version; - struct i2c_client *clients[TPS80031_NUM_SLAVES]; - struct regmap *regmap[TPS80031_NUM_SLAVES]; - struct regmap_irq_chip_data *irq_data; -}; - -struct tps80031_pupd_init_data { - int input_pin; - int setting; -}; - -/* - * struct tps80031_regulator_platform_data - tps80031 regulator platform data. - * - * @reg_init_data: The regulator init data. - * @ext_ctrl_flag: External control flag for sleep/power request control. - * @config_flags: Configuration flag to configure the rails. - * It should be ORed of config enums. - */ - -struct tps80031_regulator_platform_data { - struct regulator_init_data *reg_init_data; - unsigned int ext_ctrl_flag; - unsigned int config_flags; -}; - -struct tps80031_platform_data { - int irq_base; - bool use_power_off; - struct tps80031_pupd_init_data *pupd_init_data; - int pupd_init_data_size; - struct tps80031_regulator_platform_data - *regulator_pdata[TPS80031_REGULATOR_MAX]; -}; - -static inline int tps80031_write(struct device *dev, int sid, - int reg, uint8_t val) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_write(tps80031->regmap[sid], reg, val); -} - -static inline int tps80031_writes(struct device *dev, int sid, int reg, - int len, uint8_t *val) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_bulk_write(tps80031->regmap[sid], reg, val, len); -} - -static inline int tps80031_read(struct device *dev, int sid, - int reg, uint8_t *val) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - unsigned int ival; - int ret; - - ret = regmap_read(tps80031->regmap[sid], reg, &ival); - if (ret < 0) { - dev_err(dev, "failed reading from reg 0x%02x\n", reg); - return ret; - } - - *val = ival; - return ret; -} - -static inline int tps80031_reads(struct device *dev, int sid, - int reg, int len, uint8_t *val) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_bulk_read(tps80031->regmap[sid], reg, val, len); -} - -static inline int tps80031_set_bits(struct device *dev, int sid, - int reg, uint8_t bit_mask) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_update_bits(tps80031->regmap[sid], reg, - bit_mask, bit_mask); -} - -static inline int tps80031_clr_bits(struct device *dev, int sid, - int reg, uint8_t bit_mask) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_update_bits(tps80031->regmap[sid], reg, bit_mask, 0); -} - -static inline int tps80031_update(struct device *dev, int sid, - int reg, uint8_t val, uint8_t mask) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_update_bits(tps80031->regmap[sid], reg, mask, val); -} - -static inline unsigned long tps80031_get_chip_info(struct device *dev) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return tps80031->chip_info; -} - -static inline int tps80031_get_pmu_version(struct device *dev) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return tps80031->es_version; -} - -static inline int tps80031_irq_get_virq(struct device *dev, int irq) -{ - struct tps80031 *tps80031 = dev_get_drvdata(dev); - - return regmap_irq_get_virq(tps80031->irq_data, irq); -} - -extern int tps80031_ext_power_req_config(struct device *dev, - unsigned long ext_ctrl_flag, int preq_bit, - int state_reg_add, int trans_reg_add); -#endif /*__LINUX_MFD_TPS80031_H */ From 290512627f5bd15f4d1cee78a97edbff5f3916f8 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Fri, 17 Sep 2021 10:44:31 +0200 Subject: [PATCH 1069/1202] ARM: tegra: Name clock and regulator nodes according to DT-schema Name clocks and regulators according to DT-schema to fix warnings such as: arch/arm/boot/dts/tegra20-acer-a500-picasso.dt.yaml: /: clock@0: 'anyOf' conditional failed, one must be fixed: 'reg' is a required property 'ranges' is a required property From schema: /home/runner/.local/lib/python3.8/site-packages/dtschema/schemas/root-node.yaml Signed-off-by: David Heidelberg --- arch/arm/boot/dts/tegra114-dalmore.dts | 16 +++++------ arch/arm/boot/dts/tegra114-roth.dts | 14 +++++----- arch/arm/boot/dts/tegra114-tn7.dts | 8 +++--- arch/arm/boot/dts/tegra124-jetson-tk1.dts | 26 ++++++++--------- arch/arm/boot/dts/tegra124-nyan.dtsi | 28 +++++++++---------- arch/arm/boot/dts/tegra124-venice2.dts | 28 +++++++++---------- .../boot/dts/tegra20-acer-a500-picasso.dts | 12 ++++---- arch/arm/boot/dts/tegra20-harmony.dts | 16 +++++------ arch/arm/boot/dts/tegra20-medcom-wide.dts | 8 +++--- arch/arm/boot/dts/tegra20-paz00.dts | 6 ++-- arch/arm/boot/dts/tegra20-plutux.dts | 8 +++--- arch/arm/boot/dts/tegra20-seaboard.dts | 16 +++++------ arch/arm/boot/dts/tegra20-tamonten.dtsi | 4 +-- arch/arm/boot/dts/tegra20-tec.dts | 8 +++--- arch/arm/boot/dts/tegra20-trimslice.dts | 12 ++++---- arch/arm/boot/dts/tegra20-ventana.dts | 12 ++++---- .../tegra30-asus-nexus7-grouper-common.dtsi | 10 +++---- ...egra30-asus-nexus7-grouper-maxim-pmic.dtsi | 4 +-- .../tegra30-asus-nexus7-grouper-ti-pmic.dtsi | 2 +- arch/arm/boot/dts/tegra30-beaver.dts | 20 ++++++------- arch/arm/boot/dts/tegra30-cardhu-a02.dts | 12 ++++---- arch/arm/boot/dts/tegra30-cardhu-a04.dts | 14 +++++----- arch/arm/boot/dts/tegra30-cardhu.dtsi | 28 +++++++++---------- 23 files changed, 156 insertions(+), 156 deletions(-) diff --git a/arch/arm/boot/dts/tegra114-dalmore.dts b/arch/arm/boot/dts/tegra114-dalmore.dts index 7fd901f8d39ab..d23050bfaba6d 100644 --- a/arch/arm/boot/dts/tegra114-dalmore.dts +++ b/arch/arm/boot/dts/tegra114-dalmore.dts @@ -1151,7 +1151,7 @@ default-brightness-level = <6>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -1186,7 +1186,7 @@ }; }; - vdd_ac_bat_reg: regulator@0 { + vdd_ac_bat_reg: regulator-acbat { compatible = "regulator-fixed"; regulator-name = "vdd_ac_bat"; regulator-min-microvolt = <5000000>; @@ -1194,7 +1194,7 @@ regulator-always-on; }; - dvdd_ts_reg: regulator@1 { + dvdd_ts_reg: regulator-ts { compatible = "regulator-fixed"; regulator-name = "dvdd_ts"; regulator-min-microvolt = <1800000>; @@ -1203,7 +1203,7 @@ gpio = <&gpio TEGRA_GPIO(H, 5) GPIO_ACTIVE_HIGH>; }; - usb1_vbus_reg: regulator@3 { + usb1_vbus_reg: regulator-usb1 { compatible = "regulator-fixed"; regulator-name = "usb1_vbus"; regulator-min-microvolt = <5000000>; @@ -1214,7 +1214,7 @@ vin-supply = <&tps65090_dcdc1_reg>; }; - usb3_vbus_reg: regulator@4 { + usb3_vbus_reg: regulator-usb3 { compatible = "regulator-fixed"; regulator-name = "usb2_vbus"; regulator-min-microvolt = <5000000>; @@ -1225,7 +1225,7 @@ vin-supply = <&tps65090_dcdc1_reg>; }; - vdd_hdmi_reg: regulator@5 { + vdd_hdmi_reg: regulator-hdmi { compatible = "regulator-fixed"; regulator-name = "vdd_hdmi_5v0"; regulator-min-microvolt = <5000000>; @@ -1233,7 +1233,7 @@ vin-supply = <&tps65090_dcdc1_reg>; }; - vdd_cam_1v8_reg: regulator@6 { + vdd_cam_1v8_reg: regulator-cam { compatible = "regulator-fixed"; regulator-name = "vdd_cam_1v8_reg"; regulator-min-microvolt = <1800000>; @@ -1242,7 +1242,7 @@ gpio = <&palmas_gpio 6 0>; }; - vdd_5v0_hdmi: regulator@7 { + vdd_5v0_hdmi: regulator-hdmicon { compatible = "regulator-fixed"; regulator-name = "VDD_5V0_HDMI_CON"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/tegra114-roth.dts b/arch/arm/boot/dts/tegra114-roth.dts index 07960171fabee..18521707ff5b4 100644 --- a/arch/arm/boot/dts/tegra114-roth.dts +++ b/arch/arm/boot/dts/tegra114-roth.dts @@ -1016,7 +1016,7 @@ enable-gpios = <&gpio TEGRA_GPIO(H, 2) GPIO_ACTIVE_HIGH>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -1045,7 +1045,7 @@ }; }; - lcd_bl_en: regulator@0 { + lcd_bl_en: regulator-lcden { compatible = "regulator-fixed"; regulator-name = "lcd_bl_en"; regulator-min-microvolt = <5000000>; @@ -1053,7 +1053,7 @@ regulator-boot-on; }; - vdd_lcd: regulator@1 { + vdd_lcd: regulator-lcd { compatible = "regulator-fixed"; regulator-name = "vdd_lcd_1v8"; regulator-min-microvolt = <1800000>; @@ -1064,7 +1064,7 @@ regulator-boot-on; }; - regulator@2 { + regulator-1v8ts { compatible = "regulator-fixed"; regulator-name = "vdd_1v8_ts"; regulator-min-microvolt = <1800000>; @@ -1073,7 +1073,7 @@ regulator-boot-on; }; - regulator@3 { + regulator-3v3ts { compatible = "regulator-fixed"; regulator-name = "vdd_3v3_ts"; regulator-min-microvolt = <3300000>; @@ -1083,7 +1083,7 @@ regulator-boot-on; }; - regulator@4 { + regulator-1v8com { compatible = "regulator-fixed"; regulator-name = "vdd_1v8_com"; regulator-min-microvolt = <1800000>; @@ -1094,7 +1094,7 @@ regulator-boot-on; }; - regulator@5 { + regulator-3v3com { compatible = "regulator-fixed"; regulator-name = "vdd_3v3_com"; regulator-min-microvolt = <3300000>; diff --git a/arch/arm/boot/dts/tegra114-tn7.dts b/arch/arm/boot/dts/tegra114-tn7.dts index 745d234b105ba..0534247458f01 100644 --- a/arch/arm/boot/dts/tegra114-tn7.dts +++ b/arch/arm/boot/dts/tegra114-tn7.dts @@ -273,7 +273,7 @@ power-supply = <&lcd_bl_en>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -303,7 +303,7 @@ }; /* FIXME: output of BQ24192 */ - vs_sys: regulator@0 { + vs_sys: regulator-vs { compatible = "regulator-fixed"; regulator-name = "VS_SYS"; regulator-min-microvolt = <4200000>; @@ -312,7 +312,7 @@ regulator-boot-on; }; - lcd_bl_en: regulator@1 { + lcd_bl_en: regulator-lcden { compatible = "regulator-fixed"; regulator-name = "VDD_LCD_BL"; regulator-min-microvolt = <16500000>; @@ -323,7 +323,7 @@ regulator-boot-on; }; - vdd_lcd: regulator@2 { + vdd_lcd: regulator-lcd { compatible = "regulator-fixed"; regulator-name = "VD_LCD_1V8"; regulator-min-microvolt = <1800000>; diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index 35ab296408e10..094f94a627b20 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -1868,7 +1868,7 @@ vbus-supply = <&vdd_usb3_vbus>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -1892,7 +1892,7 @@ }; }; - vdd_mux: regulator@0 { + vdd_mux: regulator-mux { compatible = "regulator-fixed"; regulator-name = "+VDD_MUX"; regulator-min-microvolt = <12000000>; @@ -1901,7 +1901,7 @@ regulator-boot-on; }; - vdd_5v0_sys: regulator@1 { + vdd_5v0_sys: regulator-5v0sys { compatible = "regulator-fixed"; regulator-name = "+5V_SYS"; regulator-min-microvolt = <5000000>; @@ -1911,7 +1911,7 @@ vin-supply = <&vdd_mux>; }; - vdd_3v3_sys: regulator@2 { + vdd_3v3_sys: regulator-3v3sys { compatible = "regulator-fixed"; regulator-name = "+3.3V_SYS"; regulator-min-microvolt = <3300000>; @@ -1921,7 +1921,7 @@ vin-supply = <&vdd_mux>; }; - vdd_3v3_run: regulator@3 { + vdd_3v3_run: regulator-3v3run { compatible = "regulator-fixed"; regulator-name = "+3.3V_RUN"; regulator-min-microvolt = <3300000>; @@ -1933,7 +1933,7 @@ vin-supply = <&vdd_3v3_sys>; }; - vdd_3v3_hdmi: regulator@4 { + vdd_3v3_hdmi: regulator-3v3hdmi { compatible = "regulator-fixed"; regulator-name = "+3.3V_AVDD_HDMI_AP_GATED"; regulator-min-microvolt = <3300000>; @@ -1941,7 +1941,7 @@ vin-supply = <&vdd_3v3_run>; }; - vdd_usb1_vbus: regulator@5 { + vdd_usb1_vbus: regulator-usb1 { compatible = "regulator-fixed"; regulator-name = "+USB0_VBUS_SW"; regulator-min-microvolt = <5000000>; @@ -1952,7 +1952,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_usb3_vbus: regulator@6 { + vdd_usb3_vbus: regulator-usb3 { compatible = "regulator-fixed"; regulator-name = "+5V_USB_HS"; regulator-min-microvolt = <5000000>; @@ -1963,7 +1963,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_3v3_lp0: regulator@7 { + vdd_3v3_lp0: regulator-lp0 { compatible = "regulator-fixed"; regulator-name = "+3.3V_LP0"; regulator-min-microvolt = <3300000>; @@ -1975,7 +1975,7 @@ vin-supply = <&vdd_3v3_sys>; }; - vdd_hdmi_pll: regulator@8 { + vdd_hdmi_pll: regulator-hdmipll { compatible = "regulator-fixed"; regulator-name = "+1.05V_RUN_AVDD_HDMI_PLL"; regulator-min-microvolt = <1050000>; @@ -1984,7 +1984,7 @@ vin-supply = <&vdd_1v05_run>; }; - vdd_5v0_hdmi: regulator@9 { + vdd_5v0_hdmi: regulator-hdmicon { compatible = "regulator-fixed"; regulator-name = "+5V_HDMI_CON"; regulator-min-microvolt = <5000000>; @@ -1995,7 +1995,7 @@ }; /* Molex power connector */ - vdd_5v0_sata: regulator@10 { + vdd_5v0_sata: regulator-5v0sata { compatible = "regulator-fixed"; regulator-name = "+5V_SATA"; regulator-min-microvolt = <5000000>; @@ -2005,7 +2005,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_12v0_sata: regulator@11 { + vdd_12v0_sata: regulator-12v0sata { compatible = "regulator-fixed"; regulator-name = "+12V_SATA"; regulator-min-microvolt = <12000000>; diff --git a/arch/arm/boot/dts/tegra124-nyan.dtsi b/arch/arm/boot/dts/tegra124-nyan.dtsi index 63a81270300a6..2d045d1a9bcff 100644 --- a/arch/arm/boot/dts/tegra124-nyan.dtsi +++ b/arch/arm/boot/dts/tegra124-nyan.dtsi @@ -582,7 +582,7 @@ 256>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -615,7 +615,7 @@ }; }; - vdd_mux: regulator@0 { + vdd_mux: regulator-mux { compatible = "regulator-fixed"; regulator-name = "+VDD_MUX"; regulator-min-microvolt = <12000000>; @@ -624,7 +624,7 @@ regulator-boot-on; }; - vdd_5v0_sys: regulator@1 { + vdd_5v0_sys: regulator-5v0sys { compatible = "regulator-fixed"; regulator-name = "+5V_SYS"; regulator-min-microvolt = <5000000>; @@ -634,7 +634,7 @@ vin-supply = <&vdd_mux>; }; - vdd_3v3_sys: regulator@2 { + vdd_3v3_sys: regulator-3v3sys { compatible = "regulator-fixed"; regulator-name = "+3.3V_SYS"; regulator-min-microvolt = <3300000>; @@ -644,7 +644,7 @@ vin-supply = <&vdd_mux>; }; - vdd_3v3_run: regulator@3 { + vdd_3v3_run: regulator-3v3run { compatible = "regulator-fixed"; regulator-name = "+3.3V_RUN"; regulator-min-microvolt = <3300000>; @@ -656,7 +656,7 @@ vin-supply = <&vdd_3v3_sys>; }; - vdd_3v3_hdmi: regulator@4 { + vdd_3v3_hdmi: regulator-3v3hdmi { compatible = "regulator-fixed"; regulator-name = "+3.3V_AVDD_HDMI_AP_GATED"; regulator-min-microvolt = <3300000>; @@ -664,7 +664,7 @@ vin-supply = <&vdd_3v3_run>; }; - vdd_led: regulator@5 { + vdd_led: regulator-led { compatible = "regulator-fixed"; regulator-name = "+VDD_LED"; gpio = <&gpio TEGRA_GPIO(P, 2) GPIO_ACTIVE_HIGH>; @@ -672,7 +672,7 @@ vin-supply = <&vdd_mux>; }; - vdd_5v0_ts: regulator@6 { + vdd_5v0_ts: regulator-ts { compatible = "regulator-fixed"; regulator-name = "+5V_VDD_TS_SW"; regulator-min-microvolt = <5000000>; @@ -683,7 +683,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_usb1_vbus: regulator@7 { + vdd_usb1_vbus: regulator-usb1 { compatible = "regulator-fixed"; regulator-name = "+5V_USB_HS"; regulator-min-microvolt = <5000000>; @@ -694,7 +694,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_usb3_vbus: regulator@8 { + vdd_usb3_vbus: regulator-usb3 { compatible = "regulator-fixed"; regulator-name = "+5V_USB_SS"; regulator-min-microvolt = <5000000>; @@ -705,7 +705,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_3v3_panel: regulator@9 { + vdd_3v3_panel: regulator-panel { compatible = "regulator-fixed"; regulator-name = "+3.3V_PANEL"; regulator-min-microvolt = <3300000>; @@ -715,7 +715,7 @@ vin-supply = <&vdd_3v3_run>; }; - vdd_3v3_lp0: regulator@10 { + vdd_3v3_lp0: regulator-lp0 { compatible = "regulator-fixed"; regulator-name = "+3.3V_LP0"; regulator-min-microvolt = <3300000>; @@ -730,7 +730,7 @@ vin-supply = <&vdd_3v3_sys>; }; - vdd_hdmi_pll: regulator@11 { + vdd_hdmi_pll: regulator-hdmipll { compatible = "regulator-fixed"; regulator-name = "+1.05V_RUN_AVDD_HDMI_PLL"; regulator-min-microvolt = <1050000>; @@ -739,7 +739,7 @@ vin-supply = <&vdd_1v05_run>; }; - vdd_5v0_hdmi: regulator@12 { + vdd_5v0_hdmi: regulator-hdmicon { compatible = "regulator-fixed"; regulator-name = "+5V_HDMI_CON"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts index e6b54ac1ebd1a..457b8aada1f82 100644 --- a/arch/arm/boot/dts/tegra124-venice2.dts +++ b/arch/arm/boot/dts/tegra124-venice2.dts @@ -1061,7 +1061,7 @@ default-brightness-level = <6>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -1086,7 +1086,7 @@ ddc-i2c-bus = <&dpaux>; }; - vdd_mux: regulator@0 { + vdd_mux: regulator-mux { compatible = "regulator-fixed"; regulator-name = "+VDD_MUX"; regulator-min-microvolt = <12000000>; @@ -1095,7 +1095,7 @@ regulator-boot-on; }; - vdd_5v0_sys: regulator@1 { + vdd_5v0_sys: regulator-5v0sys { compatible = "regulator-fixed"; regulator-name = "+5V_SYS"; regulator-min-microvolt = <5000000>; @@ -1105,7 +1105,7 @@ vin-supply = <&vdd_mux>; }; - vdd_3v3_sys: regulator@2 { + vdd_3v3_sys: regulator-3v3sys { compatible = "regulator-fixed"; regulator-name = "+3.3V_SYS"; regulator-min-microvolt = <3300000>; @@ -1115,7 +1115,7 @@ vin-supply = <&vdd_mux>; }; - vdd_3v3_run: regulator@3 { + vdd_3v3_run: regulator-3v3run { compatible = "regulator-fixed"; regulator-name = "+3.3V_RUN"; regulator-min-microvolt = <3300000>; @@ -1127,7 +1127,7 @@ vin-supply = <&vdd_3v3_sys>; }; - vdd_3v3_hdmi: regulator@4 { + vdd_3v3_hdmi: regulator-hdmi { compatible = "regulator-fixed"; regulator-name = "+3.3V_AVDD_HDMI_AP_GATED"; regulator-min-microvolt = <3300000>; @@ -1135,7 +1135,7 @@ vin-supply = <&vdd_3v3_run>; }; - vdd_led: regulator@5 { + vdd_led: regulator-led { compatible = "regulator-fixed"; regulator-name = "+VDD_LED"; regulator-min-microvolt = <3300000>; @@ -1145,7 +1145,7 @@ vin-supply = <&vdd_mux>; }; - vdd_5v0_ts: regulator@6 { + vdd_5v0_ts: regulator-ts { compatible = "regulator-fixed"; regulator-name = "+5V_VDD_TS_SW"; regulator-min-microvolt = <5000000>; @@ -1156,7 +1156,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_usb1_vbus: regulator@7 { + vdd_usb1_vbus: regulator-usb1 { compatible = "regulator-fixed"; regulator-name = "+5V_USB_HS"; regulator-min-microvolt = <5000000>; @@ -1167,7 +1167,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_usb3_vbus: regulator@8 { + vdd_usb3_vbus: regulator-usb3 { compatible = "regulator-fixed"; regulator-name = "+5V_USB_SS"; regulator-min-microvolt = <5000000>; @@ -1178,7 +1178,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_3v3_panel: regulator@9 { + vdd_3v3_panel: regulator-panel { compatible = "regulator-fixed"; regulator-name = "+3.3V_PANEL"; regulator-min-microvolt = <3300000>; @@ -1188,7 +1188,7 @@ vin-supply = <&vdd_3v3_run>; }; - vdd_3v3_lp0: regulator@10 { + vdd_3v3_lp0: regulator-lp0 { compatible = "regulator-fixed"; regulator-name = "+3.3V_LP0"; regulator-min-microvolt = <3300000>; @@ -1203,7 +1203,7 @@ vin-supply = <&vdd_3v3_sys>; }; - vdd_hdmi_pll: regulator@11 { + vdd_hdmi_pll: regulator-hdmipll { compatible = "regulator-fixed"; regulator-name = "+1.05V_RUN_AVDD_HDMI_PLL"; regulator-min-microvolt = <1050000>; @@ -1212,7 +1212,7 @@ vin-supply = <&vdd_1v05_run>; }; - vdd_5v0_hdmi: regulator@12 { + vdd_5v0_hdmi: regulator-hdmicon { compatible = "regulator-fixed"; regulator-name = "+5V_HDMI_CON"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts index 777b5a14de55b..db388ddd062f4 100644 --- a/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts +++ b/arch/arm/boot/dts/tegra20-acer-a500-picasso.dts @@ -828,7 +828,7 @@ }; /* PMIC has a built-in 32KHz oscillator which is used by PMC */ - clk32k_in: clock@0 { + clk32k_in: clock-32k-in { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <32768>; @@ -840,7 +840,7 @@ * oscillator is used as a reference clock-source by the * Azurewave WiFi/BT module. */ - rtc_32k_wifi: clock@1 { + rtc_32k_wifi: clock-32k-wifi { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <32768>; @@ -965,7 +965,7 @@ }; }; - vdd_5v0_sys: regulator@0 { + vdd_5v0_sys: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "vdd_5v0"; regulator-min-microvolt = <5000000>; @@ -973,7 +973,7 @@ regulator-always-on; }; - vdd_3v3_sys: regulator@1 { + vdd_3v3_sys: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3_vs"; regulator-min-microvolt = <3300000>; @@ -982,7 +982,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_1v8_sys: regulator@2 { + vdd_1v8_sys: regulator-1v8 { compatible = "regulator-fixed"; regulator-name = "vdd_1v8_vs"; regulator-min-microvolt = <1800000>; @@ -991,7 +991,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_pnl: regulator@3 { + vdd_pnl: regulator-panel { compatible = "regulator-fixed"; regulator-name = "vdd_panel"; regulator-min-microvolt = <3300000>; diff --git a/arch/arm/boot/dts/tegra20-harmony.dts b/arch/arm/boot/dts/tegra20-harmony.dts index b21bab437ebd9..b4cc88b0f130b 100644 --- a/arch/arm/boot/dts/tegra20-harmony.dts +++ b/arch/arm/boot/dts/tegra20-harmony.dts @@ -641,7 +641,7 @@ default-brightness-level = <6>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -667,7 +667,7 @@ backlight = <&backlight>; }; - vdd_5v0_reg: regulator@0 { + vdd_5v0_reg: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "vdd_5v0"; regulator-min-microvolt = <5000000>; @@ -675,7 +675,7 @@ regulator-always-on; }; - regulator@1 { + regulator-1v5 { compatible = "regulator-fixed"; regulator-name = "vdd_1v5"; regulator-min-microvolt = <1500000>; @@ -683,7 +683,7 @@ gpio = <&pmic 0 GPIO_ACTIVE_HIGH>; }; - regulator@2 { + regulator-1v2 { compatible = "regulator-fixed"; regulator-name = "vdd_1v2"; regulator-min-microvolt = <1200000>; @@ -692,7 +692,7 @@ enable-active-high; }; - pci_vdd_reg: regulator@3 { + pci_vdd_reg: regulator-1v05 { compatible = "regulator-fixed"; regulator-name = "vdd_1v05"; regulator-min-microvolt = <1050000>; @@ -701,7 +701,7 @@ enable-active-high; }; - vdd_pnl_reg: regulator@4 { + vdd_pnl_reg: regulator-pn1 { compatible = "regulator-fixed"; regulator-name = "vdd_pnl"; regulator-min-microvolt = <2800000>; @@ -710,7 +710,7 @@ enable-active-high; }; - vdd_bl_reg: regulator@5 { + vdd_bl_reg: regulator-bl { compatible = "regulator-fixed"; regulator-name = "vdd_bl"; regulator-min-microvolt = <2800000>; @@ -719,7 +719,7 @@ enable-active-high; }; - vdd_5v0_hdmi: regulator@6 { + vdd_5v0_hdmi: regulator-hdmi { compatible = "regulator-fixed"; regulator-name = "VDDIO_HDMI"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/tegra20-medcom-wide.dts b/arch/arm/boot/dts/tegra20-medcom-wide.dts index b31c9bca16e6a..cb6199d7d2249 100644 --- a/arch/arm/boot/dts/tegra20-medcom-wide.dts +++ b/arch/arm/boot/dts/tegra20-medcom-wide.dts @@ -92,7 +92,7 @@ clock-names = "pll_a", "pll_a_out0", "mclk"; }; - vcc_24v_reg: regulator@100 { + vcc_24v_reg: regulator-24v0 { compatible = "regulator-fixed"; regulator-name = "vcc_24v"; regulator-min-microvolt = <24000000>; @@ -100,7 +100,7 @@ regulator-always-on; }; - vdd_5v0_reg: regulator@101 { + vdd_5v0_reg: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "vdd_5v0"; vin-supply = <&vcc_24v_reg>; @@ -109,7 +109,7 @@ regulator-always-on; }; - vdd_3v3_reg: regulator@102 { + vdd_3v3_reg: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3"; vin-supply = <&vcc_24v_reg>; @@ -118,7 +118,7 @@ regulator-always-on; }; - vdd_1v8_reg: regulator@103 { + vdd_1v8_reg: regulator-1v8 { compatible = "regulator-fixed"; regulator-name = "vdd_1v8"; vin-supply = <&vdd_3v3_reg>; diff --git a/arch/arm/boot/dts/tegra20-paz00.dts b/arch/arm/boot/dts/tegra20-paz00.dts index 21884d9d58cc3..f68d37ae6c645 100644 --- a/arch/arm/boot/dts/tegra20-paz00.dts +++ b/arch/arm/boot/dts/tegra20-paz00.dts @@ -576,7 +576,7 @@ default-brightness-level = <10>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -613,7 +613,7 @@ backlight = <&backlight>; }; - p5valw_reg: regulator@0 { + p5valw_reg: regulator-5v0alw { compatible = "regulator-fixed"; regulator-name = "+5valw"; regulator-min-microvolt = <5000000>; @@ -621,7 +621,7 @@ regulator-always-on; }; - vdd_pnl_reg: regulator@1 { + vdd_pnl_reg: regulator-3v0 { compatible = "regulator-fixed"; regulator-name = "+3VS,vdd_pnl"; regulator-min-microvolt = <3300000>; diff --git a/arch/arm/boot/dts/tegra20-plutux.dts b/arch/arm/boot/dts/tegra20-plutux.dts index 5811b7006a9bf..71a8236491dff 100644 --- a/arch/arm/boot/dts/tegra20-plutux.dts +++ b/arch/arm/boot/dts/tegra20-plutux.dts @@ -60,7 +60,7 @@ clock-names = "pll_a", "pll_a_out0", "mclk"; }; - vcc_24v_reg: regulator@100 { + vcc_24v_reg: regulator-24v0 { compatible = "regulator-fixed"; regulator-name = "vcc_24v"; regulator-min-microvolt = <24000000>; @@ -68,7 +68,7 @@ regulator-always-on; }; - vdd_5v0_reg: regulator@101 { + vdd_5v0_reg: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "vdd_5v0"; vin-supply = <&vcc_24v_reg>; @@ -77,7 +77,7 @@ regulator-always-on; }; - vdd_3v3_reg: regulator@102 { + vdd_3v3_reg: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3"; vin-supply = <&vcc_24v_reg>; @@ -86,7 +86,7 @@ regulator-always-on; }; - vdd_1v8_reg: regulator@103 { + vdd_1v8_reg: regulator-1v8 { compatible = "regulator-fixed"; regulator-name = "vdd_1v8"; vin-supply = <&vdd_3v3_reg>; diff --git a/arch/arm/boot/dts/tegra20-seaboard.dts b/arch/arm/boot/dts/tegra20-seaboard.dts index 5aeb7bb6c4151..e270d6d3b8174 100644 --- a/arch/arm/boot/dts/tegra20-seaboard.dts +++ b/arch/arm/boot/dts/tegra20-seaboard.dts @@ -793,7 +793,7 @@ default-brightness-level = <6>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -829,7 +829,7 @@ ddc-i2c-bus = <&lvds_ddc>; }; - vdd_5v0_reg: regulator@0 { + vdd_5v0_reg: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "vdd_5v0"; regulator-min-microvolt = <5000000>; @@ -837,7 +837,7 @@ regulator-always-on; }; - regulator@1 { + regulator-1v5 { compatible = "regulator-fixed"; regulator-name = "vdd_1v5"; regulator-min-microvolt = <1500000>; @@ -845,7 +845,7 @@ gpio = <&pmic 0 GPIO_ACTIVE_HIGH>; }; - regulator@2 { + regulator-1v2 { compatible = "regulator-fixed"; regulator-name = "vdd_1v2"; regulator-min-microvolt = <1200000>; @@ -854,7 +854,7 @@ enable-active-high; }; - vbus_reg: regulator@3 { + vbus_reg: regulator-vbus { compatible = "regulator-fixed"; regulator-name = "vdd_vbus_wup1"; regulator-min-microvolt = <5000000>; @@ -865,7 +865,7 @@ regulator-boot-on; }; - vdd_pnl_reg: regulator@4 { + vdd_pnl_reg: regulator-pnl { compatible = "regulator-fixed"; regulator-name = "vdd_pnl"; regulator-min-microvolt = <2800000>; @@ -874,7 +874,7 @@ enable-active-high; }; - vdd_bl_reg: regulator@5 { + vdd_bl_reg: regulator-bl { compatible = "regulator-fixed"; regulator-name = "vdd_bl"; regulator-min-microvolt = <2800000>; @@ -883,7 +883,7 @@ enable-active-high; }; - vdd_hdmi: regulator@6 { + vdd_hdmi: regulator-hdmi { compatible = "regulator-fixed"; regulator-name = "VDDIO_HDMI"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/tegra20-tamonten.dtsi b/arch/arm/boot/dts/tegra20-tamonten.dtsi index d5888d9581755..de39c5465c0a9 100644 --- a/arch/arm/boot/dts/tegra20-tamonten.dtsi +++ b/arch/arm/boot/dts/tegra20-tamonten.dtsi @@ -503,13 +503,13 @@ status = "okay"; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; }; - pci_vdd_reg: regulator@1 { + pci_vdd_reg: regulator-1v05 { compatible = "regulator-fixed"; regulator-name = "vdd_1v05"; regulator-min-microvolt = <1050000>; diff --git a/arch/arm/boot/dts/tegra20-tec.dts b/arch/arm/boot/dts/tegra20-tec.dts index 10ff09d86efa7..4f41c74384b2c 100644 --- a/arch/arm/boot/dts/tegra20-tec.dts +++ b/arch/arm/boot/dts/tegra20-tec.dts @@ -69,7 +69,7 @@ clock-names = "pll_a", "pll_a_out0", "mclk"; }; - vcc_24v_reg: regulator@100 { + vcc_24v_reg: regulator-24v { compatible = "regulator-fixed"; regulator-name = "vcc_24v"; regulator-min-microvolt = <24000000>; @@ -77,7 +77,7 @@ regulator-always-on; }; - vdd_5v0_reg: regulator@101 { + vdd_5v0_reg: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "vdd_5v0"; vin-supply = <&vcc_24v_reg>; @@ -86,7 +86,7 @@ regulator-always-on; }; - vdd_3v3_reg: regulator@102 { + vdd_3v3_reg: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3"; vin-supply = <&vcc_24v_reg>; @@ -95,7 +95,7 @@ regulator-always-on; }; - vdd_1v8_reg: regulator@103 { + vdd_1v8_reg: regulator-1v8 { compatible = "regulator-fixed"; regulator-name = "vdd_1v8"; vin-supply = <&vdd_3v3_reg>; diff --git a/arch/arm/boot/dts/tegra20-trimslice.dts b/arch/arm/boot/dts/tegra20-trimslice.dts index d6c99e17f28be..7646a4a1f2be6 100644 --- a/arch/arm/boot/dts/tegra20-trimslice.dts +++ b/arch/arm/boot/dts/tegra20-trimslice.dts @@ -380,7 +380,7 @@ bus-width = <4>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -402,7 +402,7 @@ gpios = <&gpio TEGRA_GPIO(X, 7) GPIO_ACTIVE_LOW>; }; - hdmi_vdd_reg: regulator@0 { + hdmi_vdd_reg: regulator-hdmi { compatible = "regulator-fixed"; regulator-name = "avdd_hdmi"; regulator-min-microvolt = <3300000>; @@ -410,7 +410,7 @@ regulator-always-on; }; - hdmi_pll_reg: regulator@1 { + hdmi_pll_reg: regulator-hdmipll { compatible = "regulator-fixed"; regulator-name = "avdd_hdmi_pll"; regulator-min-microvolt = <1800000>; @@ -418,7 +418,7 @@ regulator-always-on; }; - vbus_reg: regulator@2 { + vbus_reg: regulator-vbus { compatible = "regulator-fixed"; regulator-name = "usb1_vbus"; regulator-min-microvolt = <5000000>; @@ -429,7 +429,7 @@ regulator-boot-on; }; - pci_clk_reg: regulator@3 { + pci_clk_reg: regulator-pciclk { compatible = "regulator-fixed"; regulator-name = "pci_clk"; regulator-min-microvolt = <3300000>; @@ -437,7 +437,7 @@ regulator-always-on; }; - pci_vdd_reg: regulator@4 { + pci_vdd_reg: regulator-pcivdd { compatible = "regulator-fixed"; regulator-name = "pci_vdd"; regulator-min-microvolt = <1050000>; diff --git a/arch/arm/boot/dts/tegra20-ventana.dts b/arch/arm/boot/dts/tegra20-ventana.dts index e41ce1b1ec3ff..3d3a7f23792a4 100644 --- a/arch/arm/boot/dts/tegra20-ventana.dts +++ b/arch/arm/boot/dts/tegra20-ventana.dts @@ -607,7 +607,7 @@ default-brightness-level = <6>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -648,7 +648,7 @@ ddc-i2c-bus = <&lvds_ddc>; }; - vdd_5v0_reg: regulator@0 { + vdd_5v0_reg: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "vdd_5v0"; regulator-min-microvolt = <5000000>; @@ -656,7 +656,7 @@ regulator-always-on; }; - regulator@1 { + regulator-1v5 { compatible = "regulator-fixed"; regulator-name = "vdd_1v5"; regulator-min-microvolt = <1500000>; @@ -664,7 +664,7 @@ gpio = <&pmic 0 GPIO_ACTIVE_HIGH>; }; - regulator@2 { + regulator-1v2 { compatible = "regulator-fixed"; regulator-name = "vdd_1v2"; regulator-min-microvolt = <1200000>; @@ -673,7 +673,7 @@ enable-active-high; }; - vdd_pnl_reg: regulator@3 { + vdd_pnl_reg: regulator-pnl { compatible = "regulator-fixed"; regulator-name = "vdd_pnl"; regulator-min-microvolt = <2800000>; @@ -682,7 +682,7 @@ enable-active-high; }; - vdd_bl_reg: regulator@4 { + vdd_bl_reg: regulator-bl { compatible = "regulator-fixed"; regulator-name = "vdd_bl"; regulator-min-microvolt = <2800000>; diff --git a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi index 6f88fb1ed8737..e7a3327e603ca 100644 --- a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi +++ b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-common.dtsi @@ -1070,7 +1070,7 @@ }; /* PMIC has a built-in 32KHz oscillator which is used by PMC */ - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <32768>; @@ -1208,7 +1208,7 @@ }; }; - vdd_5v0_sys: regulator@0 { + vdd_5v0_sys: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "vdd_5v0"; regulator-min-microvolt = <5000000>; @@ -1217,7 +1217,7 @@ regulator-boot-on; }; - vdd_3v3_sys: regulator@1 { + vdd_3v3_sys: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3"; regulator-min-microvolt = <3300000>; @@ -1227,7 +1227,7 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_pnl: regulator@2 { + vdd_pnl: regulator-panel { compatible = "regulator-fixed"; regulator-name = "vdd_panel"; regulator-min-microvolt = <3300000>; @@ -1238,7 +1238,7 @@ vin-supply = <&vdd_3v3_sys>; }; - vcc_3v3_ts: regulator@3 { + vcc_3v3_ts: regulator-ts { compatible = "regulator-fixed"; regulator-name = "ldo_s-1167_3v3"; regulator-min-microvolt = <3300000>; diff --git a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-maxim-pmic.dtsi b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-maxim-pmic.dtsi index 53966fa4eef25..cd28e8782f7db 100644 --- a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-maxim-pmic.dtsi +++ b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-maxim-pmic.dtsi @@ -166,12 +166,12 @@ }; }; - vdd_3v3_sys: regulator@1 { + vdd_3v3_sys: regulator-3v3 { gpio = <&pmic 3 GPIO_ACTIVE_HIGH>; enable-active-high; }; - regulator@4 { + regulator-usb { compatible = "regulator-fixed"; regulator-name = "avdd_usb"; regulator-min-microvolt = <3300000>; diff --git a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-ti-pmic.dtsi b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-ti-pmic.dtsi index 9365ae607239f..ee4a3f4827690 100644 --- a/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-ti-pmic.dtsi +++ b/arch/arm/boot/dts/tegra30-asus-nexus7-grouper-ti-pmic.dtsi @@ -143,7 +143,7 @@ }; }; - vdd_3v3_sys: regulator@1 { + vdd_3v3_sys: regulator-3v3 { gpio = <&pmic 6 GPIO_ACTIVE_HIGH>; enable-active-high; }; diff --git a/arch/arm/boot/dts/tegra30-beaver.dts b/arch/arm/boot/dts/tegra30-beaver.dts index b54cbb24c4d33..24de27a8594e3 100644 --- a/arch/arm/boot/dts/tegra30-beaver.dts +++ b/arch/arm/boot/dts/tegra30-beaver.dts @@ -1967,7 +1967,7 @@ status = "okay"; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -1986,7 +1986,7 @@ }; }; - vdd_5v_in_reg: regulator@0 { + vdd_5v_in_reg: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "vdd_5v_in"; regulator-min-microvolt = <5000000>; @@ -1994,7 +1994,7 @@ regulator-always-on; }; - chargepump_5v_reg: regulator@1 { + chargepump_5v_reg: regulator-chargepump { compatible = "regulator-fixed"; regulator-name = "chargepump_5v"; regulator-min-microvolt = <5000000>; @@ -2005,7 +2005,7 @@ gpio = <&pmic 0 GPIO_ACTIVE_HIGH>; }; - ddr_reg: regulator@2 { + ddr_reg: regulator-ddr { compatible = "regulator-fixed"; regulator-name = "vdd_ddr"; regulator-min-microvolt = <1500000>; @@ -2017,7 +2017,7 @@ vin-supply = <&vdd_5v_in_reg>; }; - vdd_5v_sata_reg: regulator@3 { + vdd_5v_sata_reg: regulator-sata { compatible = "regulator-fixed"; regulator-name = "vdd_5v_sata"; regulator-min-microvolt = <5000000>; @@ -2029,7 +2029,7 @@ vin-supply = <&vdd_5v_in_reg>; }; - usb1_vbus_reg: regulator@4 { + usb1_vbus_reg: regulator-usb1 { compatible = "regulator-fixed"; regulator-name = "usb1_vbus"; regulator-min-microvolt = <5000000>; @@ -2040,7 +2040,7 @@ vin-supply = <&vdd_5v_in_reg>; }; - usb3_vbus_reg: regulator@5 { + usb3_vbus_reg: regulator-usb3 { compatible = "regulator-fixed"; regulator-name = "usb3_vbus"; regulator-min-microvolt = <5000000>; @@ -2051,7 +2051,7 @@ vin-supply = <&vdd_5v_in_reg>; }; - sys_3v3_reg: regulator@6 { + sys_3v3_reg: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "sys_3v3,vdd_3v3_alw"; regulator-min-microvolt = <3300000>; @@ -2063,7 +2063,7 @@ vin-supply = <&vdd_5v_in_reg>; }; - sys_3v3_pexs_reg: regulator@7 { + sys_3v3_pexs_reg: regulator-pexs { compatible = "regulator-fixed"; regulator-name = "sys_3v3_pexs"; regulator-min-microvolt = <3300000>; @@ -2075,7 +2075,7 @@ vin-supply = <&sys_3v3_reg>; }; - vdd_5v0_hdmi: regulator@8 { + vdd_5v0_hdmi: regulator-hdmi { compatible = "regulator-fixed"; regulator-name = "+VDD_5V_HDMI"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/tegra30-cardhu-a02.dts b/arch/arm/boot/dts/tegra30-cardhu-a02.dts index 4899e05a0d9c4..247185314f469 100644 --- a/arch/arm/boot/dts/tegra30-cardhu-a02.dts +++ b/arch/arm/boot/dts/tegra30-cardhu-a02.dts @@ -16,7 +16,7 @@ keep-power-in-suspend; }; - ddr_reg: regulator@100 { + ddr_reg: regulator-ddr { compatible = "regulator-fixed"; regulator-name = "vdd_ddr"; regulator-min-microvolt = <1500000>; @@ -27,7 +27,7 @@ gpio = <&pmic 6 GPIO_ACTIVE_HIGH>; }; - sys_3v3_reg: regulator@101 { + sys_3v3_reg: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "sys_3v3"; regulator-min-microvolt = <3300000>; @@ -38,7 +38,7 @@ gpio = <&pmic 7 GPIO_ACTIVE_HIGH>; }; - usb1_vbus_reg: regulator@102 { + usb1_vbus_reg: regulator-usb1 { compatible = "regulator-fixed"; regulator-name = "usb1_vbus"; regulator-min-microvolt = <5000000>; @@ -49,7 +49,7 @@ vin-supply = <&vdd_5v0_reg>; }; - usb3_vbus_reg: regulator@103 { + usb3_vbus_reg: regulator-usb3 { compatible = "regulator-fixed"; regulator-name = "usb3_vbus"; regulator-min-microvolt = <5000000>; @@ -60,7 +60,7 @@ vin-supply = <&vdd_5v0_reg>; }; - vdd_5v0_reg: regulator@104 { + vdd_5v0_reg: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "5v0"; regulator-min-microvolt = <5000000>; @@ -69,7 +69,7 @@ gpio = <&pmic 2 GPIO_ACTIVE_HIGH>; }; - vdd_bl_reg: regulator@105 { + vdd_bl_reg: regulator-bl { compatible = "regulator-fixed"; regulator-name = "vdd_bl"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/tegra30-cardhu-a04.dts b/arch/arm/boot/dts/tegra30-cardhu-a04.dts index a11028b8b67bc..2911f08863a06 100644 --- a/arch/arm/boot/dts/tegra30-cardhu-a04.dts +++ b/arch/arm/boot/dts/tegra30-cardhu-a04.dts @@ -16,7 +16,7 @@ keep-power-in-suspend; }; - ddr_reg: regulator@100 { + ddr_reg: regulator-ddr { compatible = "regulator-fixed"; regulator-name = "ddr"; regulator-min-microvolt = <1500000>; @@ -27,7 +27,7 @@ gpio = <&pmic 7 GPIO_ACTIVE_HIGH>; }; - sys_3v3_reg: regulator@101 { + sys_3v3_reg: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "sys_3v3"; regulator-min-microvolt = <3300000>; @@ -38,7 +38,7 @@ gpio = <&pmic 6 GPIO_ACTIVE_HIGH>; }; - usb1_vbus_reg: regulator@102 { + usb1_vbus_reg: regulator-usb1 { compatible = "regulator-fixed"; regulator-name = "usb1_vbus"; regulator-min-microvolt = <5000000>; @@ -49,7 +49,7 @@ vin-supply = <&vdd_5v0_reg>; }; - usb3_vbus_reg: regulator@103 { + usb3_vbus_reg: regulator-usb3 { compatible = "regulator-fixed"; regulator-name = "usb3_vbus"; regulator-min-microvolt = <5000000>; @@ -60,7 +60,7 @@ vin-supply = <&vdd_5v0_reg>; }; - vdd_5v0_reg: regulator@104 { + vdd_5v0_reg: regulator-5v0 { compatible = "regulator-fixed"; regulator-name = "5v0"; regulator-min-microvolt = <5000000>; @@ -69,7 +69,7 @@ gpio = <&pmic 8 GPIO_ACTIVE_HIGH>; }; - vdd_bl_reg: regulator@105 { + vdd_bl_reg: regulator-bl { compatible = "regulator-fixed"; regulator-name = "vdd_bl"; regulator-min-microvolt = <5000000>; @@ -80,7 +80,7 @@ gpio = <&gpio TEGRA_GPIO(DD, 2) GPIO_ACTIVE_HIGH>; }; - vdd_bl2_reg: regulator@106 { + vdd_bl2_reg: regulator-bl2 { compatible = "regulator-fixed"; regulator-name = "vdd_bl2"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/tegra30-cardhu.dtsi b/arch/arm/boot/dts/tegra30-cardhu.dtsi index b2bba923eb93b..506cb747b2db4 100644 --- a/arch/arm/boot/dts/tegra30-cardhu.dtsi +++ b/arch/arm/boot/dts/tegra30-cardhu.dtsi @@ -434,7 +434,7 @@ default-brightness-level = <6>; }; - clk32k_in: clock@0 { + clk32k_in: clock-32k { compatible = "fixed-clock"; clock-frequency = <32768>; #clock-cells = <0>; @@ -476,7 +476,7 @@ backlight = <&backlight>; }; - vdd_ac_bat_reg: regulator@0 { + vdd_ac_bat_reg: regulator-acbat { compatible = "regulator-fixed"; regulator-name = "vdd_ac_bat"; regulator-min-microvolt = <5000000>; @@ -484,7 +484,7 @@ regulator-always-on; }; - cam_1v8_reg: regulator@1 { + cam_1v8_reg: regulator-cam { compatible = "regulator-fixed"; regulator-name = "cam_1v8"; regulator-min-microvolt = <1800000>; @@ -494,7 +494,7 @@ vin-supply = <&vio_reg>; }; - cp_5v_reg: regulator@2 { + cp_5v_reg: regulator-5v0cp { compatible = "regulator-fixed"; regulator-name = "cp_5v"; regulator-min-microvolt = <5000000>; @@ -505,7 +505,7 @@ gpio = <&pmic 0 GPIO_ACTIVE_HIGH>; }; - emmc_3v3_reg: regulator@3 { + emmc_3v3_reg: regulator-emmc { compatible = "regulator-fixed"; regulator-name = "emmc_3v3"; regulator-min-microvolt = <3300000>; @@ -517,7 +517,7 @@ vin-supply = <&sys_3v3_reg>; }; - modem_3v3_reg: regulator@4 { + modem_3v3_reg: regulator-modem { compatible = "regulator-fixed"; regulator-name = "modem_3v3"; regulator-min-microvolt = <3300000>; @@ -526,7 +526,7 @@ gpio = <&gpio TEGRA_GPIO(D, 6) GPIO_ACTIVE_HIGH>; }; - pex_hvdd_3v3_reg: regulator@5 { + pex_hvdd_3v3_reg: regulator-pex { compatible = "regulator-fixed"; regulator-name = "pex_hvdd_3v3"; regulator-min-microvolt = <3300000>; @@ -536,7 +536,7 @@ vin-supply = <&sys_3v3_reg>; }; - vdd_cam1_ldo_reg: regulator@6 { + vdd_cam1_ldo_reg: regulator-cam1 { compatible = "regulator-fixed"; regulator-name = "vdd_cam1_ldo"; regulator-min-microvolt = <2800000>; @@ -546,7 +546,7 @@ vin-supply = <&sys_3v3_reg>; }; - vdd_cam2_ldo_reg: regulator@7 { + vdd_cam2_ldo_reg: regulator-cam2 { compatible = "regulator-fixed"; regulator-name = "vdd_cam2_ldo"; regulator-min-microvolt = <2800000>; @@ -556,7 +556,7 @@ vin-supply = <&sys_3v3_reg>; }; - vdd_cam3_ldo_reg: regulator@8 { + vdd_cam3_ldo_reg: regulator-cam3 { compatible = "regulator-fixed"; regulator-name = "vdd_cam3_ldo"; regulator-min-microvolt = <3300000>; @@ -566,7 +566,7 @@ vin-supply = <&sys_3v3_reg>; }; - vdd_com_reg: regulator@9 { + vdd_com_reg: regulator-com { compatible = "regulator-fixed"; regulator-name = "vdd_com"; regulator-min-microvolt = <3300000>; @@ -578,7 +578,7 @@ vin-supply = <&sys_3v3_reg>; }; - vdd_fuse_3v3_reg: regulator@10 { + vdd_fuse_3v3_reg: regulator-fuse { compatible = "regulator-fixed"; regulator-name = "vdd_fuse_3v3"; regulator-min-microvolt = <3300000>; @@ -588,7 +588,7 @@ vin-supply = <&sys_3v3_reg>; }; - vdd_pnl1_reg: regulator@11 { + vdd_pnl1_reg: regulator-pnl1 { compatible = "regulator-fixed"; regulator-name = "vdd_pnl1"; regulator-min-microvolt = <3300000>; @@ -600,7 +600,7 @@ vin-supply = <&sys_3v3_reg>; }; - vdd_vid_reg: regulator@12 { + vdd_vid_reg: regulator-vid { compatible = "regulator-fixed"; regulator-name = "vddio_vid"; regulator-min-microvolt = <5000000>; From 5e30efb61bf1491581969ebe9ec3b69a4ad6e2f6 Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Sun, 15 Aug 2021 08:39:03 +0300 Subject: [PATCH 1070/1202] dt-bindings: display: simple: Add HannStar HSD101PWW2 Add HannStar HSD101PWW2 10.1" WXGA (1280x800) TFT-LCD LVDS panel to the list of compatibles. Signed-off-by: Svyatoslav Ryhel --- .../devicetree/bindings/display/panel/panel-simple.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index f3c9395d23b6a..685ee235ee8af 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -154,6 +154,8 @@ properties: - giantplus,gpm940b0 # HannStar Display Corp. HSD070PWW1 7.0" WXGA TFT LCD panel - hannstar,hsd070pww1 + # HannStar Display Corp. HSD101PWW2 10.1" WXGA (1280x800) LVDS panel + - hannstar,hsd101pww2 # HannStar Display Corp. HSD100PXN1 10.1" XGA LVDS panel - hannstar,hsd100pxn1 # Hitachi Ltd. Corporation 9" WVGA (800x480) TFT LCD panel From d5b1de483c71eb47b35224105e826f89aa871660 Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Fri, 13 Aug 2021 11:56:20 +0300 Subject: [PATCH 1071/1202] drm/panel: simple: Add support for HannStar HSD101PWW2 panel Add definition of the HannStar HSD101PWW2 Rev0-A00/A01 LCD SuperIPS+ HD panel. Signed-off-by: Svyatoslav Ryhel --- drivers/gpu/drm/panel/panel-simple.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index dde033066f3db..f821bd4ef31e1 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1902,6 +1902,31 @@ static const struct panel_desc hannstar_hsd070pww1 = { .connector_type = DRM_MODE_CONNECTOR_LVDS, }; +static const struct display_timing hannstar_hsd101pww2_timing = { + .pixelclock = { 64300000, 71100000, 82000000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 1, 1, 10 }, + .hback_porch = { 1, 1, 10 }, + .hsync_len = { 58, 158, 661 }, + .vactive = { 800, 800, 800 }, + .vfront_porch = { 1, 1, 10 }, + .vback_porch = { 1, 1, 10 }, + .vsync_len = { 1, 21, 203 }, + .flags = DISPLAY_FLAGS_DE_HIGH, +}; + +static const struct panel_desc hannstar_hsd101pww2 = { + .timings = &hannstar_hsd101pww2_timing, + .num_timings = 1, + .bpc = 8, + .size = { + .width = 217, + .height = 136, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, + .connector_type = DRM_MODE_CONNECTOR_LVDS, +}; + static const struct display_timing hannstar_hsd100pxn1_timing = { .pixelclock = { 55000000, 65000000, 75000000 }, .hactive = { 1024, 1024, 1024 }, @@ -3772,6 +3797,9 @@ static const struct of_device_id platform_of_match[] = { }, { .compatible = "hannstar,hsd070pww1", .data = &hannstar_hsd070pww1, + }, { + .compatible = "hannstar,hsd101pww2", + .data = &hannstar_hsd101pww2, }, { .compatible = "hannstar,hsd100pxn1", .data = &hannstar_hsd100pxn1, From a069d191fdf99191e84faacf065b72d17ae43d08 Mon Sep 17 00:00:00 2001 From: Svyatoslav Date: Mon, 1 Jun 2020 23:36:57 +0300 Subject: [PATCH 1072/1202] dt-bindings: ARM: tegra: Document ASUS Transformers Document Tegra20/30-based ASUS Transformer Series tablet devices. This group includes EeePad TF101, Prime TF201, Pad TF300T, TF300TG and Infinity TF700T. Signed-off-by: David Heidelberg Signed-off-by: Svyatoslav Ryhel --- Documentation/devicetree/bindings/arm/tegra.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/tegra.yaml b/Documentation/devicetree/bindings/arm/tegra.yaml index d79d36ac0c44a..6db49792b0414 100644 --- a/Documentation/devicetree/bindings/arm/tegra.yaml +++ b/Documentation/devicetree/bindings/arm/tegra.yaml @@ -36,6 +36,9 @@ properties: - toradex,colibri_t20-iris - const: toradex,colibri_t20 - const: nvidia,tegra20 + - items: + - const: asus,tf101 + - const: nvidia,tegra20 - items: - const: acer,picasso - const: nvidia,tegra20 @@ -49,6 +52,17 @@ properties: - nvidia,cardhu-a04 - const: nvidia,cardhu - const: nvidia,tegra30 + - items: + - const: asus,tf201 + - const: nvidia,tegra30 + - items: + - enum: + - asus,tf300t + - asus,tf300tg + - const: nvidia,tegra30 + - items: + - const: asus,tf700t + - const: nvidia,tegra30 - items: - const: toradex,apalis_t30-eval - const: toradex,apalis_t30 From f7ae3dff2e0d06f9eaa2fc16f13d6b5afe6bc933 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Sat, 11 Sep 2021 22:28:11 +0200 Subject: [PATCH 1073/1202] dt-bindings: ARM: tegra: Document Pegatron Chagall Document Pegatron Chagall, which is Tegra30-based tablet device. Signed-off-by: David Heidelberg --- Documentation/devicetree/bindings/arm/tegra.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/tegra.yaml b/Documentation/devicetree/bindings/arm/tegra.yaml index 6db49792b0414..93e334cffe065 100644 --- a/Documentation/devicetree/bindings/arm/tegra.yaml +++ b/Documentation/devicetree/bindings/arm/tegra.yaml @@ -88,6 +88,9 @@ properties: - items: - const: ouya,ouya - const: nvidia,tegra30 + - items: + - const: pegatron,chagall + - const: nvidia,tegra30 - items: - enum: - nvidia,dalmore From 87dba7073b5cd248dee1a02bb8ad564f899cfc68 Mon Sep 17 00:00:00 2001 From: Nikola Milosavljevic Date: Thu, 25 Mar 2021 21:44:44 +0000 Subject: [PATCH 1074/1202] ARM: tegra: Add device-tree for ASUS Transformer EeePad TF101 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add device-tree for Tegra20-based ASUS Transformer EeePad TF101. Co-developed-by: David Heidelberg Co-developed-by: Dmitry Osipenko Co-developed-by: Svyatoslav Ryhel Co-developed-by: Antoni Aloy Torrens Signed-off-by: David Heidelberg Signed-off-by: Svyatoslav Ryhel Signed-off-by: Antoni Aloy Torrens Signed-off-by: Nikola Milosavljević Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/Makefile | 1 + arch/arm/boot/dts/tegra20-asus-tf101.dts | 1187 ++++++++++++++++++++++ 2 files changed, 1188 insertions(+) create mode 100644 arch/arm/boot/dts/tegra20-asus-tf101.dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 52f4fe6214d53..3c33e469d7035 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -1303,6 +1303,7 @@ dtb-$(CONFIG_MACH_SUNIV) += \ suniv-f1c100s-licheepi-nano.dtb dtb-$(CONFIG_ARCH_TEGRA_2x_SOC) += \ tegra20-acer-a500-picasso.dtb \ + tegra20-asus-tf101.dtb \ tegra20-harmony.dtb \ tegra20-colibri-eval-v3.dtb \ tegra20-colibri-iris.dtb \ diff --git a/arch/arm/boot/dts/tegra20-asus-tf101.dts b/arch/arm/boot/dts/tegra20-asus-tf101.dts new file mode 100644 index 0000000000000..223f74abede45 --- /dev/null +++ b/arch/arm/boot/dts/tegra20-asus-tf101.dts @@ -0,0 +1,1187 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include +#include +#include +#include + +#include "tegra20.dtsi" +#include "tegra20-cpu-opp.dtsi" +#include "tegra20-cpu-opp-microvolt.dtsi" + +/ { + model = "ASUS EeePad Transformer TF101"; + compatible = "asus,tf101", "nvidia,tegra20"; + + aliases { + mmc0 = &sdmmc4; /* eMMC */ + mmc1 = &sdmmc3; /* MicroSD */ + mmc2 = &sdmmc1; /* WiFi */ + + rtc0 = &pmic; + rtc1 = "/rtc@7000e000"; + + serial0 = &uartd; + serial1 = &uartc; /* Bluetooth */ + serial2 = &uartb; /* GPS */ + }; + + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + */ + chosen {}; + + memory@0 { + reg = <0x00000000 0x40000000>; + }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + ramoops@2ffe0000 { + compatible = "ramoops"; + reg = <0x2ffe0000 0x10000>; /* 64kB */ + console-size = <0x8000>; /* 32kB */ + record-size = <0x400>; /* 1kB */ + ecc-size = <16>; + }; + + linux,cma@30000000 { + compatible = "shared-dma-pool"; + alloc-ranges = <0x30000000 0x10000000>; + size = <0x10000000>; /* 256MiB */ + linux,cma-default; + reusable; + }; + }; + + host1x@50000000 { + dc@54200000 { + rgb { + status = "okay"; + + port@0 { + lcd_output: endpoint { + remote-endpoint = <&lvds_encoder_input>; + bus-width = <18>; + }; + }; + }; + }; + + hdmi@54280000 { + status = "okay"; + + vdd-supply = <&hdmi_vdd_reg>; + pll-supply = <&hdmi_pll_reg>; + hdmi-supply = <&vdd_hdmi_en>; + + nvidia,ddc-i2c-bus = <&hdmi_ddc>; + nvidia,hpd-gpio = <&gpio TEGRA_GPIO(N, 7) + GPIO_ACTIVE_HIGH>; + }; + }; + + gpio@6000d000 { + charging-enable-hog { + gpio-hog; + gpios = ; + output-low; + }; + }; + + pinmux@70000014 { + pinctrl-names = "default"; + pinctrl-0 = <&state_default>; + + state_default: pinmux { + ata { + nvidia,pins = "ata"; + nvidia,function = "ide"; + }; + atb { + nvidia,pins = "atb", "gma", "gme"; + nvidia,function = "sdio4"; + }; + atc { + nvidia,pins = "atc"; + nvidia,function = "nand"; + }; + atd { + nvidia,pins = "atd", "ate", "gmb", "spia", + "spib", "spic"; + nvidia,function = "gmi"; + }; + cdev1 { + nvidia,pins = "cdev1"; + nvidia,function = "plla_out"; + }; + cdev2 { + nvidia,pins = "cdev2"; + nvidia,function = "pllp_out4"; + }; + crtp { + nvidia,pins = "crtp"; + nvidia,function = "crt"; + }; + lm1 { + nvidia,pins = "lm1"; + nvidia,function = "rsvd3"; + }; + csus { + nvidia,pins = "csus"; + nvidia,function = "vi_sensor_clk"; + }; + dap1 { + nvidia,pins = "dap1"; + nvidia,function = "dap1"; + }; + dap2 { + nvidia,pins = "dap2"; + nvidia,function = "dap2"; + }; + dap3 { + nvidia,pins = "dap3"; + nvidia,function = "dap3"; + }; + dap4 { + nvidia,pins = "dap4"; + nvidia,function = "dap4"; + }; + dta { + nvidia,pins = "dta", "dtb", "dtc", "dtd", "dte"; + nvidia,function = "vi"; + }; + dtf { + nvidia,pins = "dtf"; + nvidia,function = "i2c3"; + }; + gmc { + nvidia,pins = "gmc"; + nvidia,function = "uartd"; + }; + gmd { + nvidia,pins = "gmd"; + nvidia,function = "sflash"; + }; + gpu { + nvidia,pins = "gpu"; + nvidia,function = "pwm"; + }; + gpu7 { + nvidia,pins = "gpu7"; + nvidia,function = "rtck"; + }; + gpv { + nvidia,pins = "gpv", "slxa"; + nvidia,function = "pcie"; + }; + hdint { + nvidia,pins = "hdint"; + nvidia,function = "hdmi"; + }; + i2cp { + nvidia,pins = "i2cp"; + nvidia,function = "i2cp"; + }; + irrx { + nvidia,pins = "irrx", "irtx"; + nvidia,function = "uartb"; + }; + kbca { + nvidia,pins = "kbca", "kbcb", "kbcc", "kbcd", + "kbce", "kbcf"; + nvidia,function = "kbc"; + }; + lcsn { + nvidia,pins = "lcsn", "ldc", "lm0", "lpw1", + "lsdi", "lvp0"; + nvidia,function = "rsvd4"; + }; + ld0 { + nvidia,pins = "ld0", "ld1", "ld2", "ld3", "ld4", + "ld5", "ld6", "ld7", "ld8", "ld9", + "ld10", "ld11", "ld12", "ld13", "ld14", + "ld15", "ld16", "ld17", "ldi", "lhp0", + "lhp1", "lhp2", "lhs", "lpp", "lpw0", + "lpw2", "lsc0", "lsc1", "lsck", "lsda", + "lspi", "lvp1", "lvs"; + nvidia,function = "displaya"; + }; + owc { + nvidia,pins = "owc", "spdi", "spdo", "uac"; + nvidia,function = "rsvd2"; + }; + pmc { + nvidia,pins = "pmc"; + nvidia,function = "pwr_on"; + }; + rm { + nvidia,pins = "rm"; + nvidia,function = "i2c1"; + }; + sdb { + nvidia,pins = "sdb", "sdc", "sdd", "slxc", "slxk"; + nvidia,function = "sdio3"; + }; + sdio1 { + nvidia,pins = "sdio1"; + nvidia,function = "sdio1"; + }; + slxd { + nvidia,pins = "slxd"; + nvidia,function = "spdif"; + }; + spid { + nvidia,pins = "spid", "spie", "spif"; + nvidia,function = "spi1"; + }; + spig { + nvidia,pins = "spig", "spih"; + nvidia,function = "spi2_alt"; + }; + uaa { + nvidia,pins = "uaa", "uab", "uda"; + nvidia,function = "ulpi"; + }; + uad { + nvidia,pins = "uad"; + nvidia,function = "irda"; + }; + uca { + nvidia,pins = "uca", "ucb"; + nvidia,function = "uartc"; + }; + conf_ata { + nvidia,pins = "ata", "atb", "atc", "atd", + "cdev1", "cdev2", "dap1", "dap4", + "dte", "ddc", "dtf", "gma", "gmc", + "gme", "gpu", "gpu7", "gpv", "i2cp", + "irrx", "irtx", "pta", "rm", "sdc", + "sdd", "slxc", "slxd", "slxk", "spdi", + "spdo", "uac", "uad", + "uda", "csus"; + nvidia,pull = ; + nvidia,tristate = ; + }; + conf_ate { + nvidia,pins = "ate", "dap2", "dap3", "gmb", "gmd", + "owc", "spia", "spib", "spic", + "spid", "spie", "spig", "slxa"; + nvidia,pull = ; + nvidia,tristate = ; + }; + conf_ck32 { + nvidia,pins = "ck32", "ddrc", "pmca", "pmcb", + "pmcc", "pmcd", "pmce", "xm2c", "xm2d"; + nvidia,pull = ; + }; + conf_crtp { + nvidia,pins = "crtp", "spih"; + nvidia,pull = ; + nvidia,tristate = ; + }; + conf_dta { + nvidia,pins = "dta", "dtb", "dtc", "dtd"; + nvidia,pull = ; + nvidia,tristate = ; + }; + conf_spif { + nvidia,pins = "spif"; + nvidia,pull = ; + nvidia,tristate = ; + }; + conf_hdint { + nvidia,pins = "hdint", "lcsn", "ldc", "lm1", + "lpw1", "lsck", "lsda", "lsdi", "lvp0"; + nvidia,tristate = ; + }; + conf_kbca { + nvidia,pins = "kbca", "kbcb", "kbcc", "kbcd", + "kbce", "kbcf", "sdio1", "uaa", "uab", + "uca", "ucb"; + nvidia,pull = ; + nvidia,tristate = ; + }; + conf_lc { + nvidia,pins = "lc", "ls"; + nvidia,pull = ; + }; + conf_ld0 { + nvidia,pins = "ld0", "ld1", "ld2", "ld3", "ld4", + "ld5", "ld6", "ld7", "ld8", "ld9", + "ld10", "ld11", "ld12", "ld13", "ld14", + "ld15", "ld16", "ld17", "ldi", "lhp0", + "lhp1", "lhp2", "lhs", "lm0", "lpp", + "lpw0", "lpw2", "lsc0", "lsc1", "lspi", + "lvp1", "lvs", "pmc", "sdb"; + nvidia,tristate = ; + }; + conf_ld17_0 { + nvidia,pins = "ld17_0", "ld19_18", "ld21_20", + "ld23_22"; + nvidia,pull = ; + }; + drive_sdio1 { + nvidia,pins = "drive_sdio1", "drive_ddc", "drive_vi1"; + nvidia,high-speed-mode = ; + nvidia,schmitt = ; + nvidia,low-power-mode = ; + nvidia,pull-down-strength = <31>; + nvidia,pull-up-strength = <31>; + nvidia,slew-rate-rising = ; + nvidia,slew-rate-falling = ; + }; + drive_csus { + nvidia,pins = "drive_csus"; + nvidia,high-speed-mode = ; + nvidia,schmitt = ; + nvidia,low-power-mode = ; + nvidia,pull-down-strength = <31>; + nvidia,pull-up-strength = <31>; + nvidia,slew-rate-rising = ; + nvidia,slew-rate-falling = ; + }; + }; + + state_i2cmux_ddc: pinmux_i2cmux_ddc { + ddc { + nvidia,pins = "ddc"; + nvidia,function = "i2c2"; + }; + pta { + nvidia,pins = "pta"; + nvidia,function = "rsvd4"; + }; + }; + + state_i2cmux_pta: pinmux_i2cmux_pta { + ddc { + nvidia,pins = "ddc"; + nvidia,function = "rsvd4"; + }; + pta { + nvidia,pins = "pta"; + nvidia,function = "i2c2"; + }; + }; + + state_i2cmux_idle: pinmux_i2cmux_idle { + ddc { + nvidia,pins = "ddc"; + nvidia,function = "rsvd4"; + }; + pta { + nvidia,pins = "pta"; + nvidia,function = "rsvd4"; + }; + }; + }; + + i2s@70002800 { + status = "okay"; + }; + + uartb: serial@70006040 { + compatible = "nvidia,tegra20-hsuart"; + /* GPS BCM4751 */ + }; + + uartc: serial@70006200 { + compatible = "nvidia,tegra20-hsuart"; + status = "okay"; + + /* Azurewave AW-NH665 BCM4329B1 */ + bluetooth { + compatible = "brcm,bcm4329-bt"; + + interrupt-parent = <&gpio>; + interrupts = ; + interrupt-names = "host-wakeup"; + + /* PLLP 216MHz / 16 / 4 */ + max-speed = <3375000>; + + clocks = <&tegra_pmc TEGRA_PMC_CLK_BLINK>; + clock-names = "txco"; + + vbat-supply = <&vdd_3v3_sys>; + vddio-supply = <&vdd_1v8_sys>; + + device-wakeup-gpios = <&gpio TEGRA_GPIO(U, 1) GPIO_ACTIVE_HIGH>; + shutdown-gpios = <&gpio TEGRA_GPIO(U, 0) GPIO_ACTIVE_HIGH>; + }; + }; + + serial@70006300 { + status = "okay"; + }; + + pwm: pwm@7000a000 { + status = "okay"; + }; + + i2c@7000c000 { + status = "okay"; + clock-frequency = <400000>; + + /* Aichi AMI306 digital compass */ + magnetometer@e { + compatible = "asahi-kasei,ak8974"; + reg = <0xe>; + + avdd-supply = <&vdd_3v3_sys>; + dvdd-supply = <&vdd_1v8_sys>; + + mount-matrix = "-1", "0", "0", + "0", "1", "0", + "0", "0", "-1"; + }; + + wm8903: audio-codec@1a { + compatible = "wlf,wm8903"; + reg = <0x1a>; + + interrupt-parent = <&gpio>; + interrupts = ; + + gpio-controller; + #gpio-cells = <2>; + + micdet-cfg = <0x83>; + micdet-delay = <100>; + + gpio-cfg = < + 0xffffffff /* don't touch */ + 0xffffffff /* don't touch */ + 0x00000000 /* Speaker-enable GPIO, output, low */ + 0x00000400 /* Mic bias current detect */ + 0xffffffff /* don't touch */ + >; + + AVDD-supply = <&vdd_1v8_sys>; + CPVDD-supply = <&vdd_1v8_sys>; + DBVDD-supply = <&vdd_1v8_sys>; + DCVDD-supply = <&vdd_1v8_sys>; + }; + + /* Atmel MXT1386 Touchscreen */ + touchscreen@5b { + compatible = "atmel,maxtouch"; + reg = <0x5b>; + + interrupt-parent = <&gpio>; + interrupts = ; + + reset-gpios = <&gpio TEGRA_GPIO(Q, 7) GPIO_ACTIVE_LOW>; + + vdda-supply = <&vdd_3v3_sys>; + vdd-supply = <&vdd_3v3_sys>; + + atmel,wakeup-method = ; + }; + + gyroscope@68 { + compatible = "invensense,mpu3050"; + reg = <0x68>; + + interrupt-parent = <&gpio>; + interrupts = ; + + vdd-supply = <&vdd_3v3_sys>; + vlogic-supply = <&vdd_1v8_sys>; + + mount-matrix = "0", "1", "0", + "-1", "0", "0", + "0", "0", "1"; + + i2c-gate { + #address-cells = <1>; + #size-cells = <0>; + + accelerometer@f { + compatible = "kionix,kxtf9"; + reg = <0xf>; + + interrupt-parent = <&gpio>; + interrupts = ; + + vdd-supply = <&vdd_1v8_sys>; + vddio-supply = <&vdd_1v8_sys>; + + mount-matrix = "1", "0", "0", + "0", "1", "0", + "0", "0", "1"; + }; + }; + }; + }; + + i2c2: i2c@7000c400 { + status = "okay"; + clock-frequency = <100000>; + }; + + i2cmux { + compatible = "i2c-mux-pinctrl"; + #address-cells = <1>; + #size-cells = <0>; + + i2c-parent = <&i2c2>; + + pinctrl-names = "ddc", "pta", "idle"; + pinctrl-0 = <&state_i2cmux_ddc>; + pinctrl-1 = <&state_i2cmux_pta>; + pinctrl-2 = <&state_i2cmux_idle>; + + hdmi_ddc: i2c@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + }; + + lvds_ddc: i2c@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + smart-battery@b { + compatible = "ti,bq20z75", "sbs,sbs-battery"; + reg = <0xb>; + sbs,i2c-retry-count = <2>; + sbs,poll-retry-count = <10>; + power-supplies = <&mains>; + }; + }; + }; + + i2c@7000c500 { + status = "okay"; + clock-frequency = <400000>; + }; + + i2c@7000d000 { + status = "okay"; + clock-frequency = <400000>; + + pmic: pmic@34 { + compatible = "ti,tps6586x"; + reg = <0x34>; + interrupts = ; + + ti,system-power-controller; + + #gpio-cells = <2>; + gpio-controller; + + sys-supply = <&vdd_5v0_sys>; + vin-sm0-supply = <&sys_reg>; + vin-sm1-supply = <&sys_reg>; + vin-sm2-supply = <&sys_reg>; + vinldo01-supply = <&sm2_reg>; + vinldo23-supply = <&sm2_reg>; + vinldo4-supply = <&sm2_reg>; + vinldo678-supply = <&sm2_reg>; + vinldo9-supply = <&sm2_reg>; + + regulators { + sys_reg: sys { + regulator-name = "vdd_sys"; + regulator-always-on; + }; + + vdd_core: sm0 { + regulator-name = "vdd_sm0,vdd_core"; + regulator-min-microvolt = <950000>; + regulator-max-microvolt = <1300000>; + regulator-coupled-with = <&rtc_vdd &vdd_cpu>; + regulator-coupled-max-spread = <170000 550000>; + regulator-always-on; + regulator-boot-on; + + nvidia,tegra-core-regulator; + }; + + vdd_cpu: sm1 { + regulator-name = "vdd_sm1,vdd_cpu"; + regulator-min-microvolt = <750000>; + regulator-max-microvolt = <1125000>; + regulator-coupled-with = <&vdd_core &rtc_vdd>; + regulator-coupled-max-spread = <550000 550000>; + regulator-always-on; + regulator-boot-on; + + nvidia,tegra-cpu-regulator; + }; + + sm2_reg: sm2 { + regulator-name = "vdd_sm2,vin_ldo*"; + regulator-min-microvolt = <3700000>; + regulator-max-microvolt = <3700000>; + regulator-always-on; + }; + + /* LDO0 is not connected to anything */ + + ldo1 { + regulator-name = "vdd_ldo1,avdd_pll*"; + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1100000>; + regulator-always-on; + }; + + rtc_vdd: ldo2 { + regulator-name = "vdd_ldo2,vdd_rtc"; + regulator-min-microvolt = <950000>; + regulator-max-microvolt = <1300000>; + regulator-coupled-with = <&vdd_core &vdd_cpu>; + regulator-coupled-max-spread = <170000 550000>; + regulator-always-on; + regulator-boot-on; + + nvidia,tegra-rtc-regulator; + }; + + ldo3 { + regulator-name = "vdd_ldo3,avdd_usb*"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + ldo4 { + regulator-name = "vdd_ldo4,avdd_osc,vddio_sys"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + }; + + vcore_emmc: ldo5 { + regulator-name = "vdd_ldo5,vcore_mmc"; + regulator-min-microvolt = <2850000>; + regulator-max-microvolt = <2850000>; + regulator-always-on; + }; + + ldo6 { + regulator-name = "vdd_ldo6,avdd_vdac"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + hdmi_vdd_reg: ldo7 { + regulator-name = "vdd_ldo7,avdd_hdmi,vdd_fuse"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + + hdmi_pll_reg: ldo8 { + regulator-name = "vdd_ldo8,avdd_hdmi_pll"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + + ldo9 { + regulator-name = "vdd_ldo9,avdd_2v85,vdd_ddr_rx"; + regulator-min-microvolt = <2850000>; + regulator-max-microvolt = <2850000>; + regulator-always-on; + }; + + ldo_rtc { + regulator-name = "vdd_rtc_out,vdd_cell"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + }; + }; + + nct1008: temperature-sensor@4c { + compatible = "onnn,nct1008"; + reg = <0x4c>; + vcc-supply = <&vdd_3v3_sys>; + + interrupt-parent = <&gpio>; + interrupts = ; + + #thermal-sensor-cells = <1>; + }; + }; + + pmc@7000e400 { + nvidia,invert-interrupt; + nvidia,suspend-mode = <1>; + nvidia,cpu-pwr-good-time = <2000>; + nvidia,cpu-pwr-off-time = <100>; + nvidia,core-pwr-good-time = <3845 3845>; + nvidia,core-pwr-off-time = <458>; + nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; + }; + + /* USB via ASUS connector */ + usb@c5000000 { + compatible = "nvidia,tegra20-udc"; + status = "okay"; + dr_mode = "peripheral"; + }; + + usb-phy@c5000000 { + status = "okay"; + dr_mode = "peripheral"; + nvidia,xcvr-setup-use-fuses; + nvidia,xcvr-lsfslew = <2>; + nvidia,xcvr-lsrslew = <2>; + vbus-supply = <&vdd_5v0_sys>; + }; + + /* Dock's USB port */ + usb@c5008000 { + status = "okay"; + }; + + usb-phy@c5008000 { + status = "okay"; + nvidia,xcvr-setup-use-fuses; + vbus-supply = <&vdd_5v0_sys>; + }; + + brcm_wifi_pwrseq: wifi-pwrseq { + compatible = "mmc-pwrseq-simple"; + + clocks = <&tegra_pmc TEGRA_PMC_CLK_BLINK>; + clock-names = "ext_clock"; + + reset-gpios = <&gpio TEGRA_GPIO(K, 6) GPIO_ACTIVE_LOW>; + post-power-on-delay-ms = <200>; + power-off-delay-us = <200>; + }; + + sdmmc1: mmc@c8000000 { + status = "okay"; + + #address-cells = <1>; + #size-cells = <0>; + + assigned-clocks = <&tegra_car TEGRA20_CLK_SDMMC1>; + assigned-clock-parents = <&tegra_car TEGRA20_CLK_PLL_C>; + assigned-clock-rates = <40000000>; + + max-frequency = <40000000>; + keep-power-in-suspend; + bus-width = <4>; + non-removable; + + mmc-pwrseq = <&brcm_wifi_pwrseq>; + vmmc-supply = <&vdd_3v3_sys>; + vqmmc-supply = <&vdd_3v3_sys>; + + wifi@1 { + compatible = "brcm,bcm4329-fmac"; + reg = <1>; + + interrupt-parent = <&gpio>; + interrupts = ; + interrupt-names = "host-wake"; + }; + }; + + sdmmc3: mmc@c8000400 { + status = "okay"; + bus-width = <4>; + cd-gpios = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio TEGRA_GPIO(H, 1) GPIO_ACTIVE_HIGH>; + power-gpios = <&gpio TEGRA_GPIO(I, 6) GPIO_ACTIVE_HIGH>; + vmmc-supply = <&vdd_3v3_sys>; + vqmmc-supply = <&vdd_3v3_sys>; + }; + + sdmmc4: mmc@c8000600 { + status = "okay"; + bus-width = <8>; + vmmc-supply = <&vcore_emmc>; + vqmmc-supply = <&vdd_3v3_sys>; + non-removable; + }; + + mains: ac-adapter-detect { + compatible = "gpio-charger"; + charger-type = "mains"; + gpios = <&gpio TEGRA_GPIO(V, 3) GPIO_ACTIVE_LOW>; + }; + + backlight: backlight { + compatible = "pwm-backlight"; + + enable-gpios = <&gpio TEGRA_GPIO(D, 4) GPIO_ACTIVE_HIGH>; + power-supply = <&vdd_3v3_sys>; + pwms = <&pwm 2 4000000>; + + brightness-levels = <7 255>; + num-interpolated-steps = <248>; + default-brightness-level = <20>; + }; + + /* PMIC has a built-in 32KHz oscillator which is used by PMC */ + clk32k_in: clock-32k-in { + compatible = "fixed-clock"; + clock-frequency = <32768>; + #clock-cells = <0>; + }; + + cpus { + cpu0: cpu@0 { + cpu-supply = <&vdd_cpu>; + operating-points-v2 = <&cpu0_opp_table>; + #cooling-cells = <2>; + }; + + cpu1: cpu@1 { + cpu-supply = <&vdd_cpu>; + operating-points-v2 = <&cpu0_opp_table>; + #cooling-cells = <2>; + }; + }; + + gpio-keys { + compatible = "gpio-keys"; + + dock-hall-sensor { + label = "Lid"; + gpios = <&gpio TEGRA_GPIO(S, 4) GPIO_ACTIVE_LOW>; + linux,input-type = ; + linux,code = ; + debounce-interval = <500>; + wakeup-event-action = ; + wakeup-source; + }; + + power { + label = "Power"; + gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + + volume-up { + label = "Volume Up"; + gpios = <&gpio TEGRA_GPIO(Q, 5) GPIO_ACTIVE_LOW>; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + + volume-down { + label = "Volume Down"; + gpios = <&gpio TEGRA_GPIO(Q, 4) GPIO_ACTIVE_LOW>; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + }; + + lvds-encoder { + compatible = "ti,sn75lvds83", "lvds-encoder"; + + powerdown-gpios = <&gpio TEGRA_GPIO(B, 2) GPIO_ACTIVE_LOW>; + power-supply = <&vdd_3v3_sys>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + lvds_encoder_input: endpoint { + remote-endpoint = <&lcd_output>; + }; + }; + + port@1 { + reg = <1>; + + lvds_encoder_output: endpoint { + remote-endpoint = <&panel_input>; + }; + }; + }; + }; + + display-panel { + compatible = "auo,b101ew05", "panel-lvds"; + + backlight = <&backlight>; + ddc-i2c-bus = <&lvds_ddc>; + power-supply = <&vdd_pnl_reg>; + + width-mm = <218>; + height-mm = <135>; + + data-mapping = "jeida-18"; + + panel-timing { + clock-frequency = <71200000>; + hactive = <1280>; + vactive = <800>; + hfront-porch = <8>; + hback-porch = <18>; + hsync-len = <184>; + vsync-len = <3>; + vfront-porch = <4>; + vback-porch = <8>; + }; + + port { + panel_input: endpoint { + remote-endpoint = <&lvds_encoder_output>; + }; + }; + }; + + vdd_5v0_sys: regulator-0 { + compatible = "regulator-fixed"; + regulator-name = "vdd_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + }; + + vdd_3v3_sys: regulator-1 { + compatible = "regulator-fixed"; + regulator-name = "vdd_3v3_vs"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + vin-supply = <&vdd_5v0_sys>; + }; + + regulator-2 { + compatible = "regulator-fixed"; + regulator-name = "pcie_vdd"; + regulator-min-microvolt = <1500000>; + regulator-max-microvolt = <1500000>; + gpio = <&pmic 0 GPIO_ACTIVE_HIGH>; + regulator-always-on; + }; + + vdd_pnl_reg: regulator-3 { + compatible = "regulator-fixed"; + regulator-name = "vdd_pnl"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + gpio = <&gpio TEGRA_GPIO(C, 6) GPIO_ACTIVE_HIGH>; + enable-active-high; + }; + + vdd_1v8_sys: regulator-4 { + compatible = "regulator-fixed"; + regulator-name = "vdd_1v8_vs"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + vin-supply = <&vdd_5v0_sys>; + }; + + vdd_hdmi_en: regulator-5 { + compatible = "regulator-fixed"; + regulator-name = "vdd_5v0_hdmi_en"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + vin-supply = <&vdd_5v0_sys>; + gpio = <&gpio TEGRA_GPIO(V, 5) GPIO_ACTIVE_HIGH>; + enable-active-high; + }; + + sound { + compatible = "asus,tegra-audio-wm8903-tf101", + "nvidia,tegra-audio-wm8903"; + nvidia,model = "ASUS EeePad Transformer TF101"; + + nvidia,audio-routing = + "Headphone Jack", "HPOUTR", + "Headphone Jack", "HPOUTL", + "Int Spk", "ROP", + "Int Spk", "RON", + "Int Spk", "LOP", + "Int Spk", "LON", + "Mic Jack", "MICBIAS", + "IN1L", "Mic Jack"; + + nvidia,i2s-controller = <&tegra_i2s1>; + nvidia,audio-codec = <&wm8903>; + + nvidia,spkr-en-gpios = <&wm8903 2 GPIO_ACTIVE_HIGH>; + nvidia,hp-det-gpios = <&gpio TEGRA_GPIO(W, 2) GPIO_ACTIVE_LOW>; + nvidia,headset; + + clocks = <&tegra_car TEGRA20_CLK_PLL_A>, + <&tegra_car TEGRA20_CLK_PLL_A_OUT0>, + <&tegra_car TEGRA20_CLK_CDEV1>; + clock-names = "pll_a", "pll_a_out0", "mclk"; + }; + + thermal-zones { + skin-thermal { + polling-delay-passive = <1000>; /* milliseconds */ + polling-delay = <0>; /* milliseconds */ + + thermal-sensors = <&nct1008 0>; + }; + + cpu-thermal { + polling-delay-passive = <1000>; /* milliseconds */ + polling-delay = <5000>; /* milliseconds */ + + thermal-sensors = <&nct1008 1>; + + trips { + trip0: cpu-alert0 { + /* start throttling at 50C */ + temperature = <50000>; + hysteresis = <200>; + type = "passive"; + }; + + trip1: cpu-crit { + /* shut down at 60C */ + temperature = <60000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&trip0>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; + }; + }; + }; + + memory-controller@7000f400 { + nvidia,use-ram-code; + + emc-tables@3 { + reg = <0x3>; + + #address-cells = <1>; + #size-cells = <0>; + + lpddr2 { + compatible = "elpida,B8132B2PB-6D-F", "jedec,lpddr2-s4"; + revision-id1 = <1>; + density = <2048>; + io-width = <16>; + }; + + emc-table@25000 { + reg = <25000>; + compatible = "nvidia,tegra20-emc-table"; + clock-frequency = <25000>; + nvidia,emc-registers = <0x00000002 0x00000006 + 0x00000003 0x00000003 0x00000006 0x00000004 + 0x00000002 0x00000009 0x00000003 0x00000003 + 0x00000002 0x00000002 0x00000002 0x00000004 + 0x00000003 0x00000008 0x0000000b 0x0000004d + 0x00000000 0x00000003 0x00000003 0x00000003 + 0x00000008 0x00000001 0x0000000a 0x00000004 + 0x00000003 0x00000008 0x00000004 0x00000006 + 0x00000002 0x00000068 0x00000000 0x00000003 + 0x00000000 0x00000000 0x00000282 0xa0ae04ae + 0x00070000 0x00000000 0x00000000 0x00000003 + 0x00000000 0x00000000 0x00000000 0x00000000>; + }; + + emc-table@50000 { + reg = <50000>; + compatible = "nvidia,tegra20-emc-table"; + clock-frequency = <50000>; + nvidia,emc-registers = <0x00000003 0x00000007 + 0x00000003 0x00000003 0x00000006 0x00000004 + 0x00000002 0x00000009 0x00000003 0x00000003 + 0x00000002 0x00000002 0x00000002 0x00000005 + 0x00000003 0x00000008 0x0000000b 0x0000009f + 0x00000000 0x00000003 0x00000003 0x00000003 + 0x00000008 0x00000001 0x0000000a 0x00000007 + 0x00000003 0x00000008 0x00000004 0x00000006 + 0x00000002 0x000000d0 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000282 0xa0ae04ae + 0x00070000 0x00000000 0x00000000 0x00000005 + 0x00000000 0x00000000 0x00000000 0x00000000>; + }; + + emc-table@75000 { + reg = <75000>; + compatible = "nvidia,tegra20-emc-table"; + clock-frequency = <75000>; + nvidia,emc-registers = <0x00000005 0x0000000a + 0x00000004 0x00000003 0x00000006 0x00000004 + 0x00000002 0x00000009 0x00000003 0x00000003 + 0x00000002 0x00000002 0x00000002 0x00000005 + 0x00000003 0x00000008 0x0000000b 0x000000ff + 0x00000000 0x00000003 0x00000003 0x00000003 + 0x00000008 0x00000001 0x0000000a 0x0000000b + 0x00000003 0x00000008 0x00000004 0x00000006 + 0x00000002 0x00000138 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000282 0xa0ae04ae + 0x00070000 0x00000000 0x00000000 0x00000007 + 0x00000000 0x00000000 0x00000000 0x00000000>; + }; + + emc-table@150000 { + reg = <150000>; + compatible = "nvidia,tegra20-emc-table"; + clock-frequency = <150000>; + nvidia,emc-registers = <0x00000009 0x00000014 + 0x00000007 0x00000003 0x00000006 0x00000004 + 0x00000002 0x00000009 0x00000003 0x00000003 + 0x00000002 0x00000002 0x00000002 0x00000005 + 0x00000003 0x00000008 0x0000000b 0x0000021f + 0x00000000 0x00000003 0x00000003 0x00000003 + 0x00000008 0x00000001 0x0000000a 0x00000015 + 0x00000003 0x00000008 0x00000004 0x00000006 + 0x00000002 0x00000270 0x00000000 0x00000001 + 0x00000000 0x00000000 0x00000282 0xa07c04ae + 0x007dc010 0x00000000 0x00000000 0x0000000e + 0x00000000 0x00000000 0x00000000 0x00000000>; + }; + + emc-table@300000 { + reg = <300000>; + compatible = "nvidia,tegra20-emc-table"; + clock-frequency = <300000>; + nvidia,emc-registers = <0x00000012 0x00000027 + 0x0000000d 0x00000006 0x00000007 0x00000005 + 0x00000003 0x00000009 0x00000006 0x00000006 + 0x00000003 0x00000003 0x00000002 0x00000006 + 0x00000003 0x00000009 0x0000000c 0x0000045f + 0x00000000 0x00000004 0x00000004 0x00000006 + 0x00000008 0x00000001 0x0000000e 0x0000002a + 0x00000003 0x0000000f 0x00000007 0x00000005 + 0x00000002 0x000004e0 0x00000005 0x00000002 + 0x00000000 0x00000000 0x00000282 0xe059048b + 0x007e0010 0x00000000 0x00000000 0x0000001b + 0x00000000 0x00000000 0x00000000 0x00000000>; + }; + }; + }; +}; + +&emc_icc_dvfs_opp_table { + /delete-node/ opp-666000000; + /delete-node/ opp-760000000; +}; From ba60ed8c2254a35d684ac0ea067cdf81ff9c8f99 Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Tue, 9 Mar 2021 22:22:42 +0200 Subject: [PATCH 1075/1202] ARM: tegra: Add device-tree for ASUS Transformer Prime TF201 Add device-tree for ASUS Transformer Prime TF201, which is NVIDIA Tegra30-based tablet device. Co-developed-by: Ion Agorria Signed-off-by: Ion Agorria Signed-off-by: Svyatoslav Ryhel --- arch/arm/boot/dts/Makefile | 1 + .../boot/dts/tegra30-asus-lvds-display.dtsi | 45 + arch/arm/boot/dts/tegra30-asus-tf201.dts | 623 ++++++ .../dts/tegra30-asus-transformer-common.dtsi | 1757 +++++++++++++++++ 4 files changed, 2426 insertions(+) create mode 100644 arch/arm/boot/dts/tegra30-asus-lvds-display.dtsi create mode 100644 arch/arm/boot/dts/tegra30-asus-tf201.dts create mode 100644 arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 3c33e469d7035..7806a30a2d1b2 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -1320,6 +1320,7 @@ dtb-$(CONFIG_ARCH_TEGRA_3x_SOC) += \ tegra30-asus-nexus7-grouper-PM269.dtb \ tegra30-asus-nexus7-grouper-E1565.dtb \ tegra30-asus-nexus7-tilapia-E1565.dtb \ + tegra30-asus-tf201.dtb \ tegra30-beaver.dtb \ tegra30-cardhu-a02.dtb \ tegra30-cardhu-a04.dtb \ diff --git a/arch/arm/boot/dts/tegra30-asus-lvds-display.dtsi b/arch/arm/boot/dts/tegra30-asus-lvds-display.dtsi new file mode 100644 index 0000000000000..d35c712eeb49f --- /dev/null +++ b/arch/arm/boot/dts/tegra30-asus-lvds-display.dtsi @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* This dtsi file describes parts common for Asus T30 devices with a LVDS panel. */ + +/ { + display-panel { + power-supply = <&vdd_pnl>; + backlight = <&backlight>; + + port { + panel_input: endpoint { + remote-endpoint = <&bridge_output>; + }; + }; + }; + + /* Texas Instruments SN75LVDS83B LVDS Transmitter */ + lvds-encoder { + compatible = "ti,sn75lvds83", "lvds-encoder"; + + powerdown-gpios = <&gpio TEGRA_GPIO(N, 6) GPIO_ACTIVE_LOW>; + power-supply = <&vdd_3v3_sys>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + bridge_input: endpoint { + remote-endpoint = <&dpi_output>; + }; + }; + + port@1 { + reg = <1>; + + bridge_output: endpoint { + remote-endpoint = <&panel_input>; + }; + }; + }; + }; +}; diff --git a/arch/arm/boot/dts/tegra30-asus-tf201.dts b/arch/arm/boot/dts/tegra30-asus-tf201.dts new file mode 100644 index 0000000000000..79ae5b22a96ac --- /dev/null +++ b/arch/arm/boot/dts/tegra30-asus-tf201.dts @@ -0,0 +1,623 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include "tegra30-asus-transformer-common.dtsi" +#include "tegra30-asus-lvds-display.dtsi" + +/ { + model = "Asus Transformer Prime TF201"; + compatible = "asus,tf201", "nvidia,tegra30"; + + pinmux@70000868 { + state_default: pinmux { + lcd_pwr2_pc6 { + nvidia,pins = "lcd_pwr2_pc6", + "lcd_dc1_pd2"; + nvidia,function = "displaya"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb3 { + nvidia,pins = "pbb3"; + nvidia,function = "vgp3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb7 { + nvidia,pins = "pbb7"; + nvidia,function = "i2s4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + kb_row7_pr7 { + nvidia,pins = "kb_row7_pr7"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_cs4_n_pk2 { + nvidia,pins = "gmi_cs4_n_pk2"; + nvidia,function = "gmi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + }; + }; + + uartc: serial@70006200 { + /* Azurewave AW-NH615 BCM4329B1 */ + bluetooth { + compatible = "brcm,bcm4329-bt"; + }; + }; + + i2c@7000c400 { + /* Atmel MXT768E touchscreen */ + touchscreen@4d { + compatible = "atmel,maxtouch"; + reg = <0x4d>; + + interrupt-parent = <&gpio>; + interrupts = ; + reset-gpios = <&gpio TEGRA_GPIO(H, 6) GPIO_ACTIVE_LOW>; + + vdda-supply = <&vdd_3v3_sys>; + vdd-supply = <&vdd_3v3_sys>; + }; + }; + + i2c@7000c500 { + clock-frequency = <100000>; + + magnetometer@e { + mount-matrix = "-1", "0", "0", + "0", "-1", "0", + "0", "0", "-1"; + }; + + gyroscope@68 { + mount-matrix = "0", "-1", "0", + "-1", "0", "0", + "0", "0", "-1"; + + /* External I2C interface */ + i2c-gate { + accelerometer@f { + mount-matrix = "1", "0", "0", + "0", "-1", "0", + "0", "0", "1"; + }; + }; + }; + }; + + i2c@7000d000 { + /* Realtek ALC5631 audio codec */ + rt5631: audio-codec@1a { + compatible = "realtek,rt5631"; + reg = <0x1a>; + }; + }; + + usb-phy@7d000000 { + /delete-property/ nvidia,xcvr-setup-use-fuses; + nvidia,xcvr-setup = <5>; /* Based on TF201 fuse value - 48 */ + }; + + usb-phy@7d008000 { + /delete-property/ nvidia,xcvr-setup-use-fuses; + nvidia,xcvr-setup = <5>; /* Based on TF201 fuse value - 48 */ + }; + + display-panel { + compatible = "hannstar,hsd101pww2"; + }; + + haptic-feedback { + compatible = "gpio-vibrator"; + enable-gpios = <&gpio TEGRA_GPIO(H, 7) GPIO_ACTIVE_HIGH>; + vcc-supply = <&vdd_3v3_sys>; + }; + + memory-controller@7000f000 { + emc-timings-0 { + /* Elpida 1GB EDB8132B2MA-8D-F lpDDR2 400MHz */ + nvidia,ram-code = <0>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0x80000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x73e30303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0x80000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x72c30303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0x80000018 + 0x00000001 0x00000001 0x00000003 0x00000001 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060403 0x72430504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0x80000025 + 0x00000001 0x00000001 0x00000006 0x00000003 + 0x00000005 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000003 0x00000002 + 0x02030001 0x00070506 0x71e40a07 0x001f0000 >; + }; + + timing-400000000 { + clock-frequency = <400000000>; + + nvidia,emem-configuration = < 0x00000006 0x80000048 + 0x00000002 0x00000003 0x0000000c 0x00000007 + 0x00000009 0x00000001 0x00000002 0x00000006 + 0x00000001 0x00000000 0x00000004 0x00000004 + 0x04040001 0x000d090c 0x71c6120d 0x001f0000 >; + }; + }; + + emc-timings-1 { + /* TF201 Unknown 1GB lpDDR2 500MHZ */ + nvidia,ram-code = <1>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0x80000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x73e30303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0x80000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x72c30303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0x80000018 + 0x00000001 0x00000001 0x00000003 0x00000001 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060403 0x72430504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0x80000025 + 0x00000001 0x00000001 0x00000006 0x00000003 + 0x00000005 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000003 0x00000002 + 0x02030001 0x00070506 0x71e40a07 0x001f0000 >; + }; + + timing-500000000 { + clock-frequency = <500000000>; + + nvidia,emem-configuration = < 0x00000007 0x8000005a + 0x00000003 0x00000004 0x0000000e 0x00000009 + 0x0000000c 0x00000002 0x00000002 0x00000008 + 0x00000001 0x00000000 0x00000004 0x00000005 + 0x05040001 0x00100a0e 0x71c8170f 0x001f0000 >; + }; + }; + }; + + memory-controller@7000f400 { + emc-timings-0 { + /* Elpida 1GB EDB8132B2MA-8D-F lpDDR2 400MHz */ + nvidia,ram-code = <0>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000003 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x00000060 0x00000000 0x00000018 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000004 0x00000004 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x0000006b 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x00780084 + 0x00008000 0x00098000 0x00098000 0x00098000 + 0x00098000 0x00000010 0x00000010 0x00000010 + 0x00000010 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000008 0x00000008 0x00000008 + 0x00000008 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00000000 + 0x00000009 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x800001c5 0xe0000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000003 + 0x00000006 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x000000c0 0x00000000 0x00000030 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000008 0x00000008 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000000d5 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x00780084 + 0x00008000 0x00098000 0x00098000 0x00098000 + 0x00098000 0x00000010 0x00000010 0x00000010 + 0x00000010 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000018 0x00000018 0x00000018 + 0x00000018 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00000000 + 0x00000009 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe0000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x0000000a>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000006 + 0x0000000d 0x00000004 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x00000181 0x00000000 0x00000060 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x0000000f 0x0000000f + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000001a9 0x00000004 0x00000006 + 0x00000000 0x00000000 0x00004282 0x00780084 + 0x00008000 0x000a0000 0x000a0000 0x000a0000 + 0x000a0000 0x00000010 0x00000010 0x00000010 + 0x00000010 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000008 0x00000008 0x00000008 + 0x00000008 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00120220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00000000 + 0x0000000a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe0000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010042>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000013>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000000c + 0x0000001a 0x00000008 0x00000003 0x00000005 + 0x00000004 0x00000001 0x00000006 0x00000003 + 0x00000003 0x00000002 0x00000002 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000c + 0x0000000a 0x00000303 0x00000000 0x000000c0 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000007 0x0000001d 0x0000001d + 0x00000004 0x0000000b 0x00000005 0x00000004 + 0x00000002 0x00000351 0x00000004 0x00000006 + 0x00000000 0x00000000 0x00004282 0x00440084 + 0x00008000 0x00074000 0x00074000 0x00074000 + 0x00074000 0x00000010 0x00000010 0x00000010 + 0x00000010 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000018 0x00000018 0x00000018 + 0x00000018 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00078000 0x00078000 0x00078000 + 0x00078000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00000000 + 0x00000013 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe0000000 0xff00ff00 >; + }; + + timing-400000000 { + clock-frequency = <400000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010082>; + nvidia,emc-mode-2 = <0x00020004>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000024>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000017 + 0x00000033 0x00000010 0x00000007 0x00000007 + 0x00000007 0x00000002 0x0000000a 0x00000007 + 0x00000007 0x00000003 0x00000002 0x00000000 + 0x00000003 0x00000007 0x00000004 0x0000000d + 0x0000000e 0x000005e9 0x00000000 0x0000017a + 0x00000002 0x00000002 0x00000007 0x00000000 + 0x00000001 0x0000000c 0x00000038 0x00000038 + 0x00000006 0x00000014 0x00000009 0x00000004 + 0x00000002 0x00000680 0x00000000 0x00000006 + 0x00000000 0x00000000 0x00006282 0x001d0084 + 0x00008000 0x0002c000 0x0002c000 0x0002c000 + 0x0002c000 0x00000010 0x00000010 0x00000010 + 0x00000010 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000008 0x00000008 0x00000008 + 0x00000008 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00048000 0x00048000 0x00048000 + 0x00048000 0x000c0220 0x0800003d 0x00000000 + 0x77ffc004 0x01f1f408 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00000000 + 0x00000024 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000ce6 0xe0000000 0xff00ff88 >; + }; + }; + + emc-timings-1 { + /* TF201 Unknown 1GB lpDDR2 500MHZ */ + nvidia,ram-code = <1>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000003 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x00000009 0x00000060 0x00000000 0x00000018 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000004 0x00000004 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x0000006b 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x00780084 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x0000000a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x800001c5 0xe0000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000003 + 0x00000006 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x00000009 0x000000c0 0x00000000 0x00000030 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000008 0x00000008 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000000d5 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x00780084 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000013 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe0000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x0000000a>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000006 + 0x0000000d 0x00000004 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x00000009 0x00000181 0x00000000 0x00000060 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x0000000f 0x0000000f + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000001a9 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x00780084 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000025 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe0000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010042>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000013>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000000c + 0x0000001a 0x00000008 0x00000003 0x00000005 + 0x00000004 0x00000001 0x00000006 0x00000003 + 0x00000003 0x00000002 0x00000002 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000c + 0x0000000a 0x00000303 0x00000000 0x000000c0 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000007 0x0000001d 0x0000001d + 0x00000004 0x0000000b 0x00000005 0x00000004 + 0x00000002 0x00000351 0x00000004 0x00000006 + 0x00000000 0x00000000 0x00004282 0x00440084 + 0x00008000 0x00060000 0x00060000 0x00060000 + 0x00060000 0x00072000 0x00072000 0x00072000 + 0x00072000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000d0000 0x000d0000 0x000d0000 + 0x000d0000 0x000e0220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x0000004a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe0000000 0xff00ff00 >; + }; + + timing-500000000 { + clock-frequency = <500000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x000100c2>; + nvidia,emc-mode-2 = <0x00020005>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x0000002d>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000001d + 0x00000040 0x00000014 0x00000008 0x00000007 + 0x00000009 0x00000003 0x0000000d 0x00000008 + 0x00000008 0x00000004 0x00000002 0x00000000 + 0x00000004 0x00000008 0x00000005 0x0000000d + 0x0000000f 0x00000763 0x00000000 0x000001d8 + 0x00000003 0x00000003 0x00000008 0x00000000 + 0x00000001 0x0000000e 0x00000046 0x00000046 + 0x00000008 0x00000019 0x0000000b 0x00000004 + 0x00000002 0x00000820 0x00000000 0x00000006 + 0x00000000 0x00000000 0x00006282 0xf0140091 + 0x00008000 0x00000008 0x00000008 0x00000008 + 0x00000008 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000c 0x0000000c 0x0000000c + 0x0000000c 0x00080220 0x0800003d 0x00000000 + 0x77ffc004 0x01f1f408 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x000000b4 0x000d000d 0xa0f10404 0x00000000 + 0x00000000 0x80000fde 0xe0000000 0xff00ff88 >; + }; + }; + }; +}; + +&emc_icc_dvfs_opp_table { + /delete-node/ opp-533000000-1200; + /delete-node/ opp-625000000-1200; + /delete-node/ opp-625000000-1250; + /delete-node/ opp-667000000-1200; + /delete-node/ opp-750000000-1300; + /delete-node/ opp-800000000-1300; + /delete-node/ opp-900000000-1350; +}; + +&emc_bw_dfs_opp_table { + /delete-node/ opp-533000000; + /delete-node/ opp-625000000; + /delete-node/ opp-667000000; + /delete-node/ opp-750000000; + /delete-node/ opp-800000000; + /delete-node/ opp-900000000; +}; diff --git a/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi b/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi new file mode 100644 index 0000000000000..970d85b2e4da8 --- /dev/null +++ b/arch/arm/boot/dts/tegra30-asus-transformer-common.dtsi @@ -0,0 +1,1757 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +#include "tegra30.dtsi" +#include "tegra30-cpu-opp.dtsi" +#include "tegra30-cpu-opp-microvolt.dtsi" + +/ { + aliases { + mmc0 = &sdmmc4; /* eMMC */ + mmc1 = &sdmmc1; /* uSD slot */ + mmc2 = &sdmmc3; /* WiFi */ + + rtc0 = &pmic; + rtc1 = "/rtc@7000e000"; + + display0 = &lcd; + display1 = &hdmi; + + serial1 = &uartc; /* Bluetooth */ + serial2 = &uartb; /* GPS */ + }; + + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + */ + chosen {}; + + memory@80000000 { + reg = <0x80000000 0x40000000>; + }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + linux,cma@80000000 { + compatible = "shared-dma-pool"; + alloc-ranges = <0x80000000 0x30000000>; + size = <0x10000000>; /* 256MiB */ + linux,cma-default; + reusable; + }; + + ramoops@beb00000 { + compatible = "ramoops"; + reg = <0xbeb00000 0x10000>; /* 64kB */ + console-size = <0x8000>; /* 32kB */ + record-size = <0x400>; /* 1kB */ + ecc-size = <16>; + }; + + trustzone@bfe00000 { + reg = <0xbfe00000 0x200000>; /* 2MB */ + no-map; + }; + }; + + host1x@50000000 { + hdmi: hdmi@54280000 { + status = "okay"; + + hdmi-supply = <&hdmi_5v0_sys>; + pll-supply = <&vdd_1v8_vio>; + vdd-supply = <&vdd_3v3_sys>; + + nvidia,hpd-gpio = <&gpio TEGRA_GPIO(N, 7) GPIO_ACTIVE_HIGH>; + nvidia,ddc-i2c-bus = <&hdmi_ddc>; + }; + + lcd: dc@54200000 { + rgb { + status = "okay"; + + port@0 { + dpi_output: endpoint { + remote-endpoint = <&bridge_input>; + bus-width = <24>; + }; + }; + }; + }; + }; + + gpio@6000d000 { + init-lpm-in-hog { + gpio-hog; + gpios = , + ; + input; + }; + + init-lpm-out-hog { + gpio-hog; + gpios = , + ; + output-low; + }; + + usb-charge-limit-hog { + gpio-hog; + gpios = ; + output-high; + }; + }; + + vde@6001a000 { + assigned-clocks = <&tegra_car TEGRA30_CLK_VDE>; + assigned-clock-parents = <&tegra_car TEGRA30_CLK_PLL_P>; + assigned-clock-rates = <408000000>; + }; + + pinmux@70000868 { + pinctrl-names = "default"; + pinctrl-0 = <&state_default>; + + state_default: pinmux { + /* SDMMC1 pinmux */ + sdmmc1_clk { + nvidia,pins = "sdmmc1_clk_pz0"; + nvidia,function = "sdmmc1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + sdmmc1_cmd { + nvidia,pins = "sdmmc1_dat3_py4", + "sdmmc1_dat2_py5", + "sdmmc1_dat1_py6", + "sdmmc1_dat0_py7", + "sdmmc1_cmd_pz1"; + nvidia,function = "sdmmc1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + sdmmc1_cd { + nvidia,pins = "gmi_iordy_pi5"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + sdmmc1_wp { + nvidia,pins = "vi_d11_pt3"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* SDMMC2 pinmux */ + vi_d1_pd5 { + nvidia,pins = "vi_d1_pd5", + "vi_d2_pl0", + "vi_d3_pl1", + "vi_d5_pl3", + "vi_d7_pl5"; + nvidia,function = "sdmmc2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + vi_d8_pl6 { + nvidia,pins = "vi_d8_pl6", + "vi_d9_pl7"; + nvidia,function = "sdmmc2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,lock = <0>; + nvidia,ioreset = <0>; + }; + + /* SDMMC3 pinmux */ + sdmmc3_clk { + nvidia,pins = "sdmmc3_clk_pa6"; + nvidia,function = "sdmmc3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + sdmmc3_cmd { + nvidia,pins = "sdmmc3_cmd_pa7", + "sdmmc3_dat0_pb7", + "sdmmc3_dat1_pb6", + "sdmmc3_dat2_pb5", + "sdmmc3_dat3_pb4", + "sdmmc3_dat4_pd1", + "sdmmc3_dat5_pd0", + "sdmmc3_dat6_pd3", + "sdmmc3_dat7_pd4"; + nvidia,function = "sdmmc3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* SDMMC4 pinmux */ + sdmmc4_clk { + nvidia,pins = "sdmmc4_clk_pcc4"; + nvidia,function = "sdmmc4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + sdmmc4_cmd { + nvidia,pins = "sdmmc4_cmd_pt7", + "sdmmc4_dat0_paa0", + "sdmmc4_dat1_paa1", + "sdmmc4_dat2_paa2", + "sdmmc4_dat3_paa3", + "sdmmc4_dat4_paa4", + "sdmmc4_dat5_paa5", + "sdmmc4_dat6_paa6", + "sdmmc4_dat7_paa7"; + nvidia,function = "sdmmc4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + sdmmc4_rst_n { + nvidia,pins = "sdmmc4_rst_n_pcc3"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + cam_mclk { + nvidia,pins = "cam_mclk_pcc0"; + nvidia,function = "vi_alt3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + drive_sdmmc4 { + nvidia,pins = "drive_gma", + "drive_gmb", + "drive_gmc", + "drive_gmd"; + nvidia,pull-down-strength = <9>; + nvidia,pull-up-strength = <9>; + nvidia,slew-rate-rising = ; + nvidia,slew-rate-falling = ; + }; + + /* I2C pinmux */ + gen1_i2c { + nvidia,pins = "gen1_i2c_scl_pc4", + "gen1_i2c_sda_pc5"; + nvidia,function = "i2c1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,open-drain = ; + nvidia,lock = <0>; + }; + gen2_i2c { + nvidia,pins = "gen2_i2c_scl_pt5", + "gen2_i2c_sda_pt6"; + nvidia,function = "i2c2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,open-drain = ; + nvidia,lock = <0>; + }; + cam_i2c { + nvidia,pins = "cam_i2c_scl_pbb1", + "cam_i2c_sda_pbb2"; + nvidia,function = "i2c3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,open-drain = ; + nvidia,lock = <0>; + }; + ddc_i2c { + nvidia,pins = "ddc_scl_pv4", + "ddc_sda_pv5"; + nvidia,function = "i2c4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,lock = <0>; + }; + pwr_i2c { + nvidia,pins = "pwr_i2c_scl_pz6", + "pwr_i2c_sda_pz7"; + nvidia,function = "i2cpwr"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,open-drain = ; + nvidia,lock = <0>; + }; + hotplug_i2c { + nvidia,pins = "pu4"; + nvidia,function = "rsvd4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* HDMI pinmux */ + hdmi_cec { + nvidia,pins = "hdmi_cec_pee3"; + nvidia,function = "cec"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,open-drain = ; + nvidia,lock = <0>; + }; + hdmi_hpd { + nvidia,pins = "hdmi_int_pn7"; + nvidia,function = "hdmi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* UART-A */ + ulpi_data0_po1 { + nvidia,pins = "ulpi_data0_po1"; + nvidia,function = "uarta"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + ulpi_data1_po2 { + nvidia,pins = "ulpi_data1_po2"; + nvidia,function = "uarta"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + ulpi_data5_po6 { + nvidia,pins = "ulpi_data5_po6"; + nvidia,function = "uarta"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + ulpi_data7_po0 { + nvidia,pins = "ulpi_data7_po0", + "ulpi_data2_po3", + "ulpi_data3_po4", + "ulpi_data4_po5", + "ulpi_data6_po7"; + nvidia,function = "uarta"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* UART-B */ + uartb_txd_rts { + nvidia,pins = "uart2_txd_pc2", + "uart2_rts_n_pj6"; + nvidia,function = "uartb"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + uartb_rxd_cts { + nvidia,pins = "uart2_rxd_pc3", + "uart2_cts_n_pj5"; + nvidia,function = "uartb"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* UART-C */ + uartc_rxd_cts { + nvidia,pins = "uart3_cts_n_pa1", + "uart3_rxd_pw7"; + nvidia,function = "uartc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + uartc_txd_rts { + nvidia,pins = "uart3_rts_n_pc0", + "uart3_txd_pw6"; + nvidia,function = "uartc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* UART-D */ + ulpi_nxt_py2 { + nvidia,pins = "ulpi_nxt_py2"; + nvidia,function = "uartd"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + ulpi_clk_py0 { + nvidia,pins = "ulpi_clk_py0", + "ulpi_dir_py1", + "ulpi_stp_py3"; + nvidia,function = "uartd"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* I2S pinmux */ + dap_i2s0 { + nvidia,pins = "dap1_fs_pn0", + "dap1_din_pn1", + "dap1_dout_pn2", + "dap1_sclk_pn3"; + nvidia,function = "i2s0"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + dap_i2s1 { + nvidia,pins = "dap2_fs_pa2", + "dap2_sclk_pa3", + "dap2_din_pa4", + "dap2_dout_pa5"; + nvidia,function = "i2s1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + dap3_fs { + nvidia,pins = "dap3_fs_pp0", + "dap3_din_pp1"; + nvidia,function = "i2s2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + dap3_dout { + nvidia,pins = "dap3_dout_pp2", + "dap3_sclk_pp3"; + nvidia,function = "i2s2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + dap_i2s3 { + nvidia,pins = "dap4_fs_pp4", + "dap4_din_pp5", + "dap4_dout_pp6", + "dap4_sclk_pp7"; + nvidia,function = "i2s3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* Sensors pinmux */ + nct_irq { + nvidia,pins = "pcc2"; + nvidia,function = "i2s4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* Asus EC pinmux */ + ec_irqs { + nvidia,pins = "kb_row10_ps2", + "kb_row15_ps7"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + ec_reqs { + nvidia,pins = "kb_col1_pq1"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* Memory type bootstrap */ + mem_boostraps { + nvidia,pins = "gmi_ad4_pg4", + "gmi_ad5_pg5"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* PCI-e pinmux */ + pex_l2_rst_n { + nvidia,pins = "pex_l2_rst_n_pcc6", + "pex_l0_rst_n_pdd1", + "pex_l1_rst_n_pdd5"; + nvidia,function = "pcie"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pex_l2_clkreq_n { + nvidia,pins = "pex_l2_clkreq_n_pcc7", + "pex_l0_prsnt_n_pdd0", + "pex_l0_clkreq_n_pdd2", + "pex_wake_n_pdd3", + "pex_l1_prsnt_n_pdd4", + "pex_l1_clkreq_n_pdd6", + "pex_l2_prsnt_n_pdd7"; + nvidia,function = "pcie"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* SPI pinmux */ + spi1_mosi_px4 { + nvidia,pins = "spi1_mosi_px4", + "spi1_sck_px5", + "spi1_cs0_n_px6", + "spi1_miso_px7"; + nvidia,function = "spi1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spi2_cs1_n_pw2 { + nvidia,pins = "spi2_cs1_n_pw2"; + nvidia,function = "spi2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spi2_sck_px2 { + nvidia,pins = "spi2_sck_px2"; + nvidia,function = "spi2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_a17_pb0 { + nvidia,pins = "gmi_a17_pb0", + "gmi_a16_pj7"; + nvidia,function = "spi4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_a18_pb1 { + nvidia,pins = "gmi_a18_pb1"; + nvidia,function = "spi4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_a19_pk7 { + nvidia,pins = "gmi_a19_pk7"; + nvidia,function = "spi4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* Display A pinmux */ + lcd_pwr0_pb2 { + nvidia,pins = "lcd_pwr0_pb2", + "lcd_pclk_pb3", + "lcd_pwr1_pc1", + "lcd_d0_pe0", + "lcd_d1_pe1", + "lcd_d2_pe2", + "lcd_d3_pe3", + "lcd_d4_pe4", + "lcd_d5_pe5", + "lcd_d6_pe6", + "lcd_d7_pe7", + "lcd_d8_pf0", + "lcd_d9_pf1", + "lcd_d10_pf2", + "lcd_d11_pf3", + "lcd_d12_pf4", + "lcd_d13_pf5", + "lcd_d14_pf6", + "lcd_d15_pf7", + "lcd_de_pj1", + "lcd_hsync_pj3", + "lcd_vsync_pj4", + "lcd_d16_pm0", + "lcd_d17_pm1", + "lcd_d18_pm2", + "lcd_d19_pm3", + "lcd_d20_pm4", + "lcd_d21_pm5", + "lcd_d22_pm6", + "lcd_d23_pm7", + "lcd_dc0_pn6", + "lcd_sdin_pz2"; + nvidia,function = "displaya"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + lcd_cs0_n_pn4 { + nvidia,pins = "lcd_cs0_n_pn4", + "lcd_sdout_pn5", + "lcd_wr_n_pz3"; + nvidia,function = "displaya"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + blink { + nvidia,pins = "clk_32k_out_pa0"; + nvidia,function = "blink"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* KBC keys */ + kb_col0_pq0 { + nvidia,pins = "kb_col0_pq0"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + kb_col1_pq1 { + nvidia,pins = "kb_row1_pr1", + "kb_row3_pr3", + "kb_row8_ps0", + "kb_row14_ps6"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + kb_col4_pq4 { + nvidia,pins = "kb_col4_pq4", + "kb_col5_pq5", + "kb_col7_pq7", + "kb_row2_pr2", + "kb_row4_pr4", + "kb_row5_pr5", + "kb_row12_ps4", + "kb_row13_ps5"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + gmi_wp_n_pc7 { + nvidia,pins = "gmi_wp_n_pc7", + "gmi_wait_pi7", + "gmi_cs3_n_pk4"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_cs0_n_pj0 { + nvidia,pins = "gmi_cs0_n_pj0", + "gmi_cs1_n_pj2", + "gmi_cs2_n_pk3"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + vi_pclk_pt0 { + nvidia,pins = "vi_pclk_pt0"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,lock = <0>; + nvidia,ioreset = <0>; + }; + + /* GPIO keys pinmux */ + power_key { + nvidia,pins = "pv0"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + vol_keys { + nvidia,pins = "kb_col2_pq2", + "kb_col3_pq3"; + nvidia,function = "rsvd4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* Bluetooth */ + bt_shutdown { + nvidia,pins = "pu0"; + nvidia,function = "rsvd4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + bt_dev_wake { + nvidia,pins = "pu1"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + bt_host_wake { + nvidia,pins = "pu6"; + nvidia,function = "rsvd4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + pu2 { + nvidia,pins = "pu2"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pu3 { + nvidia,pins = "pu3"; + nvidia,function = "rsvd4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pcc1 { + nvidia,pins = "pcc1"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pv2 { + nvidia,pins = "pv2"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pv3 { + nvidia,pins = "pv3"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + vi_vsync_pd6 { + nvidia,pins = "vi_vsync_pd6", + "vi_hsync_pd7"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,lock = <0>; + nvidia,ioreset = <0>; + }; + vi_d10_pt2 { + nvidia,pins = "vi_d10_pt2", + "vi_d0_pt4", "pbb0"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + kb_row0_pr0 { + nvidia,pins = "kb_row0_pr0"; + nvidia,function = "rsvd4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + gmi_ad0_pg0 { + nvidia,pins = "gmi_ad0_pg0", + "gmi_ad1_pg1", + "gmi_ad2_pg2", + "gmi_ad3_pg3", + "gmi_ad6_pg6", + "gmi_ad7_pg7", + "gmi_wr_n_pi0", + "gmi_oe_n_pi1", + "gmi_dqs_pi2", + "gmi_adv_n_pk0", + "gmi_clk_pk1"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_ad13_ph5 { + nvidia,pins = "gmi_ad13_ph5"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_ad10_ph2 { + nvidia,pins = "gmi_ad10_ph2", + "gmi_ad11_ph3", + "gmi_ad14_ph6"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_ad12_ph4 { + nvidia,pins = "gmi_ad12_ph4", + "gmi_rst_n_pi4", + "gmi_cs7_n_pi6"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* Vibrator control */ + vibrator { + nvidia,pins = "gmi_ad15_ph7"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* PWM pimnmux */ + pwm_0 { + nvidia,pins = "gmi_ad8_ph0"; + nvidia,function = "pwm0"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pwm_2 { + nvidia,pins = "pu5"; + nvidia,function = "pwm2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + gmi_cs6_n_pi3 { + nvidia,pins = "gmi_cs6_n_pi3"; + nvidia,function = "gmi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* Spdif pinmux */ + spdif_out { + nvidia,pins = "spdif_out_pk5"; + nvidia,function = "spdif"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spdif_in { + nvidia,pins = "spdif_in_pk6"; + nvidia,function = "spdif"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + vi_d4_pl2 { + nvidia,pins = "vi_d4_pl2"; + nvidia,function = "vi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + vi_d6_pl4 { + nvidia,pins = "vi_d6_pl4"; + nvidia,function = "vi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,lock = <0>; + nvidia,ioreset = <0>; + }; + vi_mclk_pt1 { + nvidia,pins = "vi_mclk_pt1"; + nvidia,function = "vi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + jtag_rtck { + nvidia,pins = "jtag_rtck_pu7"; + nvidia,function = "rtck"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + crt_hsync_pv6 { + nvidia,pins = "crt_hsync_pv6", + "crt_vsync_pv7"; + nvidia,function = "crt"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + clk1_out { + nvidia,pins = "clk1_out_pw4"; + nvidia,function = "extperiph1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + clk2_out { + nvidia,pins = "clk2_out_pw5"; + nvidia,function = "extperiph2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + clk3_out { + nvidia,pins = "clk3_out_pee0"; + nvidia,function = "extperiph3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + sys_clk_req { + nvidia,pins = "sys_clk_req_pz5"; + nvidia,function = "sysclk"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + pbb4 { + nvidia,pins = "pbb4"; + nvidia,function = "vgp4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb5 { + nvidia,pins = "pbb5"; + nvidia,function = "vgp5"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb6 { + nvidia,pins = "pbb6"; + nvidia,function = "vgp6"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + clk2_req_pcc5 { + nvidia,pins = "clk2_req_pcc5", + "clk1_req_pee2"; + nvidia,function = "dap"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + clk3_req_pee1 { + nvidia,pins = "clk3_req_pee1"; + nvidia,function = "dev3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + owr { + nvidia,pins = "owr"; + nvidia,function = "owr"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* GPIO power/drive control */ + drive_dap1 { + nvidia,pins = "drive_dap1", + "drive_dap2", + "drive_dbg", + "drive_at5", + "drive_gme", + "drive_ddc", + "drive_ao1", + "drive_uart3"; + nvidia,high-speed-mode = ; + nvidia,schmitt = ; + nvidia,low-power-mode = ; + nvidia,pull-down-strength = <31>; + nvidia,pull-up-strength = <31>; + nvidia,slew-rate-rising = ; + nvidia,slew-rate-falling = ; + }; + drive_sdio1 { + nvidia,pins = "drive_sdio1", + "drive_sdio3"; + nvidia,high-speed-mode = ; + nvidia,schmitt = ; + nvidia,pull-down-strength = <46>; + nvidia,pull-up-strength = <42>; + nvidia,slew-rate-rising = ; + nvidia,slew-rate-falling = ; + }; + }; + }; + + uartb: serial@70006040 { + compatible = "nvidia,tegra30-hsuart"; + status = "okay"; + + /* Broadcom GPS BCM47511 */ + }; + + uartc: serial@70006200 { + compatible = "nvidia,tegra30-hsuart"; + status = "okay"; + + nvidia,adjust-baud-rates = <0 9600 100>, + <9600 115200 200>, + <1000000 4000000 136>; + + bluetooth { + max-speed = <4000000>; + + clocks = <&tegra_pmc TEGRA_PMC_CLK_BLINK>; + clock-names = "txco"; + + interrupt-parent = <&gpio>; + interrupts = ; + interrupt-names = "host-wakeup"; + + device-wakeup-gpios = <&gpio TEGRA_GPIO(U, 1) GPIO_ACTIVE_HIGH>; + shutdown-gpios = <&gpio TEGRA_GPIO(U, 0) GPIO_ACTIVE_HIGH>; + + vbat-supply = <&vdd_3v3_com>; + vddio-supply = <&vdd_1v8_vio>; + }; + }; + + pwm: pwm@7000a000 { + status = "okay"; + }; + + i2c1: i2c@7000c000 { + status = "okay"; + clock-frequency = <100000>; + }; + + i2c2: i2c@7000c400 { + status = "okay"; + clock-frequency = <400000>; + }; + + i2c3: i2c@7000c500 { + status = "okay"; + + /* Aichi AMI306 digital compass */ + magnetometer@e { + compatible = "asahi-kasei,ak8974"; + reg = <0x0e>; + + avdd-supply = <&vdd_3v3_sys>; + dvdd-supply = <&vdd_1v8_vio>; + }; + + /* Dynaimage ambient light sensor */ + light-sensor@1c { + compatible = "dynaimage,al3010"; + reg = <0x1c>; + + interrupt-parent = <&gpio>; + interrupts = ; + + vdd-supply = <&vdd_3v3_sys>; + }; + + gyroscope@68 { + compatible = "invensense,mpu3050"; + reg = <0x68>; + + interrupt-parent = <&gpio>; + interrupts = ; + + vdd-supply = <&vdd_3v3_sys>; + vlogic-supply = <&vdd_1v8_vio>; + + i2c-gate { + #address-cells = <1>; + #size-cells = <0>; + + accelerometer@f { + compatible = "kionix,kxtf9"; + reg = <0x0f>; + + interrupt-parent = <&gpio>; + interrupts = ; + + vdd-supply = <&vdd_1v8_vio>; + vddio-supply = <&vdd_1v8_vio>; + }; + }; + }; + }; + + hdmi_ddc: i2c@7000c700 { /*i2c4*/ + status = "okay"; + clock-frequency = <93750>; + }; + + i2c5: i2c@7000d000 { + status = "okay"; + clock-frequency = <400000>; + + nct72: temperature-sensor@4c { + compatible = "onnn,nct1008"; + reg = <0x4c>; + + interrupt-parent = <&gpio>; + interrupts = ; + + vcc-supply = <&vdd_3v3_sys>; + #thermal-sensor-cells = <1>; + }; + + /* Texas Instruments TPS659110 PMIC */ + pmic: pmic@2d { + compatible = "ti,tps65911"; + reg = <0x2d>; + + interrupts = ; + #interrupt-cells = <2>; + interrupt-controller; + wakeup-source; + + ti,en-gpio-sleep = <0 0 1 0 0 0 0 0 0>; + ti,system-power-controller; + ti,sleep-keep-ck32k; + ti,sleep-enable; + + #gpio-cells = <2>; + gpio-controller; + + vcc1-supply = <&vdd_5v0_bat>; + vcc2-supply = <&vdd_5v0_bat>; + vcc3-supply = <&vdd_1v8_vio>; + vcc4-supply = <&vdd_5v0_bat>; + vcc5-supply = <&vdd_5v0_bat>; + vcc6-supply = <&vddio_ddr>; + vcc7-supply = <&vdd_5v0_bat>; + vccio-supply = <&vdd_5v0_bat>; + + pmic-sleep-hog { + gpio-hog; + gpios = <2 GPIO_ACTIVE_HIGH>; + output-high; + }; + + regulators { + /* vdd1 is not used by transformers */ + + vddio_ddr: vdd2 { + regulator-name = "vddio_ddr"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + regulator-always-on; + regulator-boot-on; + }; + + vdd_cpu: vddctrl { + regulator-name = "vdd_cpu,vdd_sys"; + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <1400000>; + regulator-coupled-with = <&vdd_core>; + regulator-coupled-max-spread = <300000>; + regulator-max-step-microvolt = <100000>; + regulator-always-on; + regulator-boot-on; + ti,regulator-ext-sleep-control = <1>; + + nvidia,tegra-cpu-regulator; + }; + + vdd_1v8_vio: vio { + regulator-name = "vdd_1v8_gen"; + regulator-min-microvolt = <1500000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + regulator-boot-on; + }; + + /* eMMC VDD */ + vcore_emmc: ldo1 { + regulator-name = "vdd_emmc_core"; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + /* uSD slot VDD */ + vdd_usd: ldo2 { + regulator-name = "vdd_usd"; + regulator-min-microvolt = <3100000>; + regulator-max-microvolt = <3100000>; + regulator-always-on; + }; + + /* uSD slot VDDIO */ + vddio_usd: ldo3 { + regulator-name = "vddio_usd"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3100000>; + }; + + ldo4 { + regulator-name = "vdd_rtc"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + regulator-always-on; + }; + + /* ldo5 is not used by transformers */ + + ldo6 { + regulator-name = "avdd_dsi_csi,pwrdet_mipi"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + }; + + ldo7 { + regulator-name = "vdd_pllm,x,u,a_p_c_s"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + regulator-always-on; + regulator-boot-on; + ti,regulator-ext-sleep-control = <8>; + }; + + ldo8 { + regulator-name = "vdd_ddr_hs"; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <1000000>; + regulator-always-on; + ti,regulator-ext-sleep-control = <8>; + }; + }; + }; + + vdd_core: core-regulator@60 { + compatible = "ti,tps62361"; + reg = <0x60>; + + regulator-name = "tps62361-vout"; + regulator-min-microvolt = <500000>; + regulator-max-microvolt = <1770000>; + regulator-coupled-with = <&vdd_cpu>; + regulator-coupled-max-spread = <300000>; + regulator-max-step-microvolt = <100000>; + regulator-boot-on; + regulator-always-on; + ti,enable-vout-discharge; + ti,vsel0-state-high; + ti,vsel1-state-high; + + nvidia,tegra-core-regulator; + }; + }; + + vdd_5v0_bat: regulator-bat { + compatible = "regulator-fixed"; + regulator-name = "vdd_ac_bat"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + regulator-boot-on; + }; + + vdd_5v0_cp: regulator-sby { + compatible = "regulator-fixed"; + regulator-name = "vdd_5v0_sby"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + regulator-boot-on; + gpio = <&pmic 0 GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_bat>; + }; + + vdd_5v0_sys: regulator-5v { + compatible = "regulator-fixed"; + regulator-name = "vdd_5v0_sys"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + regulator-boot-on; + gpio = <&pmic 8 GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_bat>; + }; + + vdd_1v5_ddr: regulator-ddr { + compatible = "regulator-fixed"; + regulator-name = "vdd_ddr"; + regulator-min-microvolt = <1500000>; + regulator-max-microvolt = <1500000>; + regulator-always-on; + regulator-boot-on; + gpio = <&pmic 7 GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_bat>; + }; + + vdd_3v3_sys: regulator-3v { + compatible = "regulator-fixed"; + regulator-name = "vdd_3v3_sys"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + regulator-boot-on; + gpio = <&pmic 6 GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_bat>; + }; + + vdd_pnl: regulator-panel { + compatible = "regulator-fixed"; + regulator-name = "vdd_panel"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <20000>; + gpio = <&gpio TEGRA_GPIO(W, 1) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_3v3_sys>; + }; + + vdd_3v3_com: regulator-com { + compatible = "regulator-fixed"; + regulator-name = "vdd_3v3_com"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + gpio = <&gpio TEGRA_GPIO(D, 0) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_3v3_sys>; + }; + + vdd_5v0_bl: regulator-bl { + compatible = "regulator-fixed"; + regulator-name = "vdd_5v0_bl"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-boot-on; + gpio = <&gpio TEGRA_GPIO(H, 3) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_bat>; + }; + + hdmi_5v0_sys: regulator-hdmi { + compatible = "regulator-fixed"; + regulator-name = "hdmi_5v0_sys"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + gpio = <&gpio TEGRA_GPIO(P, 2) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_sys>; + }; + + vdd_2v85_cam2: regulator-cam3 { + compatible = "regulator-fixed"; + regulator-name = "cam3_ldo_en"; + regulator-min-microvolt = <2850000>; + regulator-max-microvolt = <2850000>; + gpio = <&gpio TEGRA_GPIO(S, 0) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_3v3_sys>; + }; + + vddio_1v8_cam: regulator-camvio { + compatible = "regulator-fixed"; + regulator-name = "vddio_1v8_cam"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + gpio = <&gpio TEGRA_GPIO(BB, 4) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_1v8_vio>; + }; + + pmc@7000e400 { + status = "okay"; + nvidia,invert-interrupt; + nvidia,suspend-mode = <1>; + nvidia,cpu-pwr-good-time = <2000>; + nvidia,cpu-pwr-off-time = <200>; + nvidia,core-pwr-good-time = <3845 3845>; + nvidia,core-pwr-off-time = <0>; + nvidia,core-power-req-active-high; + nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; + + /* Set DEV_OFF + PWR_OFF_SET bit in DCDC control register of TPS65911 PMIC */ + i2c-thermtrip { + nvidia,i2c-controller-id = <4>; + nvidia,bus-addr = <0x2d>; + nvidia,reg-addr = <0x3f>; + nvidia,reg-data = <0x81>; + }; + }; + + hda@70030000 { + status = "okay"; + }; + + ahub@70080000 { + i2s@70080400 { /* i2s1 */ + status = "okay"; + }; + + /* BT SCO */ + i2s@70080600 { /* i2s3 */ + status = "okay"; + }; + }; + + sdmmc1: mmc@78000000 { + status = "okay"; + + /* SDR104 mode unsupported yet */ + max-frequency = <104000000>; + + cd-gpios = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>; + bus-width = <4>; + + vmmc-supply = <&vdd_usd>; /* ldo2 */ + vqmmc-supply = <&vddio_usd>; /* ldo3 */ + }; + + brcm_wifi_pwrseq: wifi-pwrseq { + compatible = "mmc-pwrseq-simple"; + + clocks = <&tegra_pmc TEGRA_PMC_CLK_BLINK>; + clock-names = "ext_clock"; + + reset-gpios = <&gpio TEGRA_GPIO(D, 4) GPIO_ACTIVE_LOW>; + post-power-on-delay-ms = <300>; + power-off-delay-us = <300>; + }; + + sdmmc3: mmc@78000400 { + status = "okay"; + + #address-cells = <1>; + #size-cells = <0>; + + assigned-clocks = <&tegra_car TEGRA30_CLK_SDMMC3>; + assigned-clock-parents = <&tegra_car TEGRA30_CLK_PLL_C>; + assigned-clock-rates = <50000000>; + + max-frequency = <50000000>; + keep-power-in-suspend; + bus-width = <4>; + non-removable; + + mmc-pwrseq = <&brcm_wifi_pwrseq>; + vmmc-supply = <&vdd_3v3_com>; + vqmmc-supply = <&vdd_1v8_vio>; + + /* Azurewave AW-NH615 BCM4329 or AW-NH665 BCM4330 */ + wifi@1 { + compatible = "brcm,bcm4329-fmac"; + reg = <1>; + + interrupt-parent = <&gpio>; + interrupts = ; + interrupt-names = "host-wake"; + }; + }; + + sdmmc4: mmc@78000600 { + status = "okay"; + bus-width = <8>; + vmmc-supply = <&vcore_emmc>; + vqmmc-supply = <&vdd_1v8_vio>; + mmc-ddr-3_3v; + non-removable; + }; + + /* USB via ASUS connector */ + usb@7d000000 { + compatible = "nvidia,tegra30-udc"; + status = "okay"; + dr_mode = "peripheral"; + }; + + usb-phy@7d000000 { + status = "okay"; + dr_mode = "peripheral"; + nvidia,hssync-start-delay = <0>; + nvidia,xcvr-lsfslew = <2>; + nvidia,xcvr-lsrslew = <2>; + vbus-supply = <&vdd_5v0_sys>; + }; + + /* Dock's USB port */ + usb@7d008000 { + status = "okay"; + }; + + usb-phy@7d008000 { + status = "okay"; + vbus-supply = <&vdd_5v0_bat>; + }; + + backlight: backlight { + compatible = "pwm-backlight"; + + enable-gpios = <&gpio TEGRA_GPIO(H, 2) GPIO_ACTIVE_HIGH>; + power-supply = <&vdd_5v0_bl>; + pwms = <&pwm 0 4000000>; + + brightness-levels = <1 255>; + num-interpolated-steps = <254>; + default-brightness-level = <15>; + }; + + /* PMIC has a built-in 32KHz oscillator which is used by PMC */ + clk32k_in: clock-32k { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "pmic-oscillator"; + }; + + cpus { + cpu0: cpu@0 { + cpu-supply = <&vdd_cpu>; + operating-points-v2 = <&cpu0_opp_table>; + #cooling-cells = <2>; + }; + cpu1: cpu@1 { + cpu-supply = <&vdd_cpu>; + operating-points-v2 = <&cpu0_opp_table>; + #cooling-cells = <2>; + }; + cpu2: cpu@2 { + cpu-supply = <&vdd_cpu>; + operating-points-v2 = <&cpu0_opp_table>; + #cooling-cells = <2>; + }; + cpu3: cpu@3 { + cpu-supply = <&vdd_cpu>; + operating-points-v2 = <&cpu0_opp_table>; + #cooling-cells = <2>; + }; + }; + + firmware { + trusted-foundations { + compatible = "tlm,trusted-foundations"; + tlm,version-major = <2>; + tlm,version-minor = <8>; + }; + }; + + mains: ac-adapter-detect { + compatible = "gpio-charger"; + charger-type = "mains"; + gpios = <&gpio TEGRA_GPIO(H, 5) GPIO_ACTIVE_HIGH>; + }; + + pad-keys { + compatible = "gpio-keys"; + interrupt-parent = <&gpio>; + + power { + label = "Power"; + gpios = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + + volume-up { + label = "Volume Up"; + gpios = <&gpio TEGRA_GPIO(Q, 2) GPIO_ACTIVE_LOW>; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + + volume-down { + label = "Volume Down"; + gpios = <&gpio TEGRA_GPIO(Q, 3) GPIO_ACTIVE_LOW>; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + }; + + extcon-keys { + compatible = "gpio-keys"; + interrupt-parent = <&gpio>; + + dock-hall-sensor { + label = "Lid sensor"; + gpios = <&gpio TEGRA_GPIO(S, 6) GPIO_ACTIVE_LOW>; + linux,input-type = ; + linux,code = ; + debounce-interval = <500>; + wakeup-event-action = ; + wakeup-source; + }; + + lineout-detect { + label = "Audio dock line-out detect"; + gpios = <&gpio TEGRA_GPIO(X, 3) GPIO_ACTIVE_LOW>; + linux,input-type = ; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + }; + + sound { + nvidia,i2s-controller = <&tegra_i2s1>; + + nvidia,hp-det-gpios = <&gpio TEGRA_GPIO(W, 2) GPIO_ACTIVE_LOW>; + nvidia,hp-mute-gpios = <&gpio TEGRA_GPIO(X, 2) GPIO_ACTIVE_LOW>; + + clocks = <&tegra_car TEGRA30_CLK_PLL_A>, + <&tegra_car TEGRA30_CLK_PLL_A_OUT0>, + <&tegra_pmc TEGRA_PMC_CLK_OUT_1>; + clock-names = "pll_a", "pll_a_out0", "mclk"; + + assigned-clocks = <&tegra_car TEGRA30_CLK_EXTERN1>, + <&tegra_pmc TEGRA_PMC_CLK_OUT_1>; + + assigned-clock-parents = <&tegra_car TEGRA30_CLK_PLL_A_OUT0>, + <&tegra_car TEGRA30_CLK_EXTERN1>; + }; + + thermal-zones { + /* + * NCT72 has two sensors: + * + * 0: internal that monitors ambient/skin temperature + * 1: external that is connected to the CPU's diode + * + * Ideally we should use userspace thermal governor, + * but it's a much more complex solution. The "skin" + * zone exists as a simpler solution which prevents + * Transformers from getting too hot from a user's + * tactile perspective. The CPU zone is intended to + * protect silicon from damage. + */ + + skin-thermal { + polling-delay-passive = <1000>; /* milliseconds */ + polling-delay = <5000>; /* milliseconds */ + + thermal-sensors = <&nct72 0>; + + trips { + trip0: skin-alert { + /* throttle at 57C until temperature drops to 56.8C */ + temperature = <57000>; + hysteresis = <200>; + type = "passive"; + }; + + trip1: skin-crit { + /* shut down at 65C */ + temperature = <65000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&trip0>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&actmon THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + }; + }; + + cpu-thermal { + polling-delay-passive = <1000>; /* milliseconds */ + polling-delay = <5000>; /* milliseconds */ + + thermal-sensors = <&nct72 1>; + + trips { + trip2: cpu-alert { + /* throttle at 75C until temperature drops to 74.8C */ + temperature = <75000>; + hysteresis = <200>; + type = "passive"; + }; + + trip3: cpu-crit { + /* shut down at 90C */ + temperature = <90000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map1 { + trip = <&trip2>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&actmon THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + }; + }; + }; +}; From f286e3758117c41517c73f02d7506acf2edf9d1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 9 Mar 2021 22:25:53 +0200 Subject: [PATCH 1076/1202] ARM: tegra: Add device-tree for ASUS Transformer Pad TF300T MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add device-tree for ASUS Transformer Pad TF300T, which is NVIDIA Tegra30-based tablet device. Tested-by: Tested-by: Co-developed-by: Ion Agorria Signed-off-by: Ion Agorria Co-developed-by: Svyatoslav Ryhel Signed-off-by: Svyatoslav Ryhel Signed-off-by: Michał Mirosław --- arch/arm/boot/dts/Makefile | 1 + arch/arm/boot/dts/tegra30-asus-tf300t.dts | 1030 +++++++++++++++++++++ 2 files changed, 1031 insertions(+) create mode 100644 arch/arm/boot/dts/tegra30-asus-tf300t.dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 7806a30a2d1b2..820c9f6ea5331 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -1321,6 +1321,7 @@ dtb-$(CONFIG_ARCH_TEGRA_3x_SOC) += \ tegra30-asus-nexus7-grouper-E1565.dtb \ tegra30-asus-nexus7-tilapia-E1565.dtb \ tegra30-asus-tf201.dtb \ + tegra30-asus-tf300t.dtb \ tegra30-beaver.dtb \ tegra30-cardhu-a02.dtb \ tegra30-cardhu-a04.dtb \ diff --git a/arch/arm/boot/dts/tegra30-asus-tf300t.dts b/arch/arm/boot/dts/tegra30-asus-tf300t.dts new file mode 100644 index 0000000000000..f4e0d81d76e87 --- /dev/null +++ b/arch/arm/boot/dts/tegra30-asus-tf300t.dts @@ -0,0 +1,1030 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include "tegra30-asus-transformer-common.dtsi" +#include "tegra30-asus-lvds-display.dtsi" + +/ { + model = "Asus Transformer Pad TF300T"; + compatible = "asus,tf300t", "nvidia,tegra30"; + + gpio@6000d000 { + tf300t-init-hog { + gpio-hog; + gpios = ; + output-low; + }; + }; + + pinmux@70000868 { + state_default: pinmux { + lcd_pwr2_pc6 { + nvidia,pins = "lcd_pwr2_pc6", + "lcd_dc1_pd2"; + nvidia,function = "displaya"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb3 { + nvidia,pins = "pbb3"; + nvidia,function = "vgp3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb7 { + nvidia,pins = "pbb7"; + nvidia,function = "i2s4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + kb_row7_pr7 { + nvidia,pins = "kb_row7_pr7"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_cs4_n_pk2 { + nvidia,pins = "gmi_cs4_n_pk2"; + nvidia,function = "gmi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + }; + }; + + uartc: serial@70006200 { + /* Azurewave AW-NH615 BCM4329B1 */ + bluetooth { + compatible = "brcm,bcm4329-bt"; + }; + }; + + i2c@7000c400 { + /* Elantech EKTH1036 touchscreen */ + touchscreen@10 { + compatible = "elan,ektf3624"; + reg = <0x10>; + + interrupt-parent = <&gpio>; + interrupts = ; + reset-gpios = <&gpio TEGRA_GPIO(H, 6) GPIO_ACTIVE_LOW>; + + vcc33-supply = <&vdd_3v3_sys>; + vccio-supply = <&vdd_3v3_sys>; + + touchscreen-size-x = <2240>; + touchscreen-size-y = <1408>; + touchscreen-inverted-y; + }; + }; + + i2c@7000c500 { + clock-frequency = <400000>; + + magnetometer@e { + mount-matrix = "0", "-1", "0", + "-1", "0", "0", + "0", "0", "-1"; + }; + + gyroscope@68 { + mount-matrix = "-1", "0", "0", + "0", "1", "0", + "0", "0", "-1"; + + /* External I2C interface */ + i2c-gate { + accelerometer@f { + mount-matrix = "0", "-1", "0", + "-1", "0", "0", + "0", "0", "1"; + }; + }; + }; + }; + + i2c@7000d000 { + /* Wolfson Microelectronics WM8903 audio codec */ + wm8903: audio-codec@1a { + compatible = "wlf,wm8903"; + reg = <0x1a>; + + interrupt-parent = <&gpio>; + interrupts = ; + + gpio-controller; + #gpio-cells = <2>; + + micdet-cfg = <0>; + micdet-delay = <100>; + + gpio-cfg = < + 0xffffffff /* don't touch */ + 0xffffffff /* don't touch */ + 0x00000000 /* Speaker-enable GPIO, output, low */ + 0xffffffff /* don't touch */ + 0xffffffff /* don't touch */ + >; + + AVDD-supply = <&vdd_1v8_vio>; + CPVDD-supply = <&vdd_1v8_vio>; + DBVDD-supply = <&vdd_1v8_vio>; + DCVDD-supply = <&vdd_1v8_vio>; + }; + }; + + display-panel { + compatible = "innolux,g101ice-l01"; + }; + + sound { + compatible = "asus,tegra-audio-wm8903-tf300t", + "nvidia,tegra-audio-wm8903"; + nvidia,model = "ASUS Transformer WM8903"; + + nvidia,audio-routing = + "Headphone Jack", "HPOUTR", + "Headphone Jack", "HPOUTL", + "Int Spk", "ROP", + "Int Spk", "RON", + "Int Spk", "LOP", + "Int Spk", "LON", + "IN1L", "Mic Jack", + "IN2L", "Mic Jack", + "DMICDAT", "Int Mic"; + + nvidia,audio-codec = <&wm8903>; + nvidia,spkr-en-gpios = <&wm8903 2 GPIO_ACTIVE_HIGH>; + nvidia,headset; + }; + + memory-controller@7000f000 { + emc-timings-0 { + /* Elpida 1GB 667MHZ */ + nvidia,ram-code = <0>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00030003 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x74830303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010003 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x73430303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000030 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0503 0x72830504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000006 0xc0000025 + 0x00000001 0x00000001 0x00000005 0x00000002 + 0x00000003 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0505 0x72440a06 0x001f0000 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emem-configuration = < 0x0000000a 0xc000003d + 0x00000001 0x00000002 0x00000008 0x00000004 + 0x00000004 0x00000001 0x00000002 0x00000007 + 0x00000002 0x00000002 0x00000003 0x00000006 + 0x06030202 0x000b0608 0x70850f09 0x001f0000 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emem-configuration = < 0x00000014 0xc0000079 + 0x00000003 0x00000004 0x00000010 0x0000000b + 0x0000000a 0x00000001 0x00000003 0x0000000b + 0x00000002 0x00000002 0x00000004 0x00000008 + 0x08040202 0x00130b10 0x70ea1f11 0x001f0000 >; + }; + }; + + emc-timings-1 { + /* Hynix 1GB 667MHZ */ + nvidia,ram-code = <1>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00030003 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x74830303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010003 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x73430303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000030 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0503 0x72830504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000006 0xc0000025 + 0x00000001 0x00000001 0x00000005 0x00000002 + 0x00000003 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0605 0x72440a06 0x001f0000 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emem-configuration = < 0x0000000a 0xc000003d + 0x00000001 0x00000002 0x00000008 0x00000005 + 0x00000004 0x00000001 0x00000002 0x00000007 + 0x00000002 0x00000002 0x00000003 0x00000006 + 0x06030202 0x000b0608 0x70850f09 0x001f0000 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emem-configuration = < 0x00000014 0xc0000079 + 0x00000003 0x00000004 0x00000011 0x0000000b + 0x0000000a 0x00000001 0x00000003 0x0000000b + 0x00000002 0x00000002 0x00000004 0x00000008 + 0x08040202 0x00140b11 0x70ea1f12 0x001f0000 >; + }; + }; + + emc-timings-2 { + /* Micron 1GB 667MHZ */ + nvidia,ram-code = <2>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x74830303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x73430303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0xc0000030 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0503 0x72830504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000025 + 0x00000001 0x00000001 0x00000005 0x00000002 + 0x00000003 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0505 0x72440a06 0x001f0000 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emem-configuration = < 0x00000005 0xc000003d + 0x00000001 0x00000002 0x00000008 0x00000004 + 0x00000004 0x00000001 0x00000002 0x00000007 + 0x00000002 0x00000002 0x00000003 0x00000006 + 0x06030202 0x000b0608 0x70850f09 0x001f0000 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emem-configuration = < 0x0000000a 0xc0000079 + 0x00000003 0x00000004 0x00000010 0x0000000a + 0x0000000a 0x00000001 0x00000003 0x0000000b + 0x00000002 0x00000002 0x00000004 0x00000008 + 0x08040202 0x00140b10 0x70ea1f11 0x001f0000 >; + }; + }; + }; + + memory-controller@7000f400 { + emc-timings-0 { + /* Elpida 1GB 667MHZ */ + nvidia,ram-code = <0>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000004 0x00000000 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x00000009 + 0x0000000b 0x000000c0 0x00000000 0x00000030 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000005 0x00000005 + 0x00000004 0x00000001 0x00000000 0x00000004 + 0x00000005 0x000000c7 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe8000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000002 + 0x00000008 0x00000001 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x00000009 + 0x0000000b 0x00000181 0x00000000 0x00000060 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000009 0x00000009 + 0x00000004 0x00000002 0x00000000 0x00000004 + 0x00000005 0x0000018e 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe8000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000004 + 0x00000010 0x00000003 0x00000001 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000001 + 0x00000001 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x00000009 + 0x0000000b 0x00000303 0x00000000 0x000000c0 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000012 0x00000012 + 0x00000004 0x00000004 0x00000000 0x00000004 + 0x00000005 0x0000031c 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe8000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000000a + 0x00000020 0x00000007 0x00000002 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000002 + 0x00000002 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x00000009 + 0x0000000b 0x00000607 0x00000000 0x00000181 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000023 0x00000023 + 0x00000004 0x00000007 0x00000000 0x00000004 + 0x00000005 0x00000638 0x00000006 0x00000006 + 0x00000000 0x00000000 0x00004288 0x004400a4 + 0x00008000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00020000 + 0x00000100 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000d22 0xe8000000 0xff00ff00 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200000>; + nvidia,emc-mode-reset = <0x80000321>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + + nvidia,emc-configuration = < 0x0000000f + 0x00000034 0x0000000a 0x00000003 0x00000003 + 0x00000008 0x00000002 0x00000009 0x00000003 + 0x00000003 0x00000002 0x00000001 0x00000000 + 0x00000004 0x00000006 0x00000004 0x0000000a + 0x0000000c 0x000009e9 0x00000000 0x0000027a + 0x00000001 0x00000008 0x00000001 0x00000000 + 0x00000007 0x0000000e 0x00000039 0x00000200 + 0x00000004 0x0000000a 0x00000000 0x00000004 + 0x00000005 0x00000a2a 0x00000000 0x00000004 + 0x00000000 0x00000000 0x00007088 0x002600a4 + 0x00008000 0x0003c000 0x0003c000 0x0003c000 + 0x0003c000 0x00014000 0x00014000 0x00014000 + 0x00014000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00048000 0x00048000 0x00048000 + 0x00048000 0x000002a0 0x0800013d 0x00000000 + 0x77fff884 0x01f1f508 0x05057404 0x54000007 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x018b000c 0xa0f10000 0x00000000 + 0x00000000 0x800014d4 0xe8000000 0xff00ff89 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200018>; + nvidia,emc-mode-reset = <0x80000b71>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000001f + 0x00000069 0x00000017 0x00000007 0x00000005 + 0x0000000c 0x00000003 0x00000011 0x00000007 + 0x00000007 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000a 0x00000009 0x0000000a + 0x00000011 0x00001412 0x00000000 0x00000504 + 0x00000002 0x0000000e 0x00000001 0x00000000 + 0x0000000c 0x00000016 0x00000072 0x00000200 + 0x00000005 0x00000015 0x00000000 0x00000006 + 0x00000007 0x00001453 0x0000000b 0x00000006 + 0x00000000 0x00000000 0x00005088 0xf00b0191 + 0x00008000 0x00000008 0x00000008 0x00000008 + 0x00000008 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x000002a0 0x0800013d 0x22220000 + 0x77fff884 0x01f1f501 0x07077404 0x54000000 + 0x080001e8 0x0f000021 0x00000802 0x00020000 + 0x00000100 0x0156000c 0xa0f10000 0x00000000 + 0x00000000 0x800028a5 0xe8000000 0xff00ff49 >; + }; + }; + + emc-timings-1 { + /* Hynix 1GB 667MHZ */ + nvidia,ram-code = <1>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000004 0x00000000 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x00000009 + 0x0000000b 0x000000c0 0x00000000 0x00000030 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000005 0x00000005 + 0x00000004 0x00000001 0x00000000 0x00000004 + 0x00000005 0x000000c7 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe8000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000002 + 0x00000008 0x00000001 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x00000009 + 0x0000000b 0x00000181 0x00000000 0x00000060 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000009 0x00000009 + 0x00000004 0x00000002 0x00000000 0x00000004 + 0x00000005 0x0000018e 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe8000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000005 + 0x00000010 0x00000003 0x00000001 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000001 + 0x00000001 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x00000009 + 0x0000000b 0x00000303 0x00000000 0x000000c0 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000012 0x00000012 + 0x00000004 0x00000004 0x00000000 0x00000004 + 0x00000005 0x0000031c 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe8000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000000a + 0x00000020 0x00000007 0x00000003 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000002 + 0x00000002 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x00000009 + 0x0000000b 0x00000607 0x00000000 0x00000181 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000023 0x00000023 + 0x00000004 0x00000007 0x00000000 0x00000004 + 0x00000005 0x00000638 0x00000006 0x00000006 + 0x00000000 0x00000000 0x00004288 0x004400a4 + 0x00008000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00020000 + 0x00000100 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000d22 0xe8000000 0xff00ff00 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200000>; + nvidia,emc-mode-reset = <0x80000321>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + + nvidia,emc-configuration = < 0x0000000f + 0x00000034 0x0000000b 0x00000003 0x00000003 + 0x00000008 0x00000002 0x00000009 0x00000003 + 0x00000003 0x00000002 0x00000001 0x00000000 + 0x00000004 0x00000006 0x00000004 0x0000000a + 0x0000000c 0x000009e9 0x00000000 0x0000027a + 0x00000001 0x00000008 0x00000001 0x00000000 + 0x00000007 0x0000000e 0x00000039 0x00000200 + 0x00000004 0x0000000a 0x00000000 0x00000004 + 0x00000005 0x00000a2a 0x00000000 0x00000004 + 0x00000000 0x00000000 0x00007088 0x002600a4 + 0x00008000 0x0003c000 0x0003c000 0x0003c000 + 0x0003c000 0x00014000 0x00014000 0x00014000 + 0x00014000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00048000 0x00048000 0x00048000 + 0x00048000 0x000002a0 0x0800013d 0x00000000 + 0x77fff884 0x01f1f508 0x05057404 0x54000007 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x018b000c 0xa0f10000 0x00000000 + 0x00000000 0x800014d4 0xe8000000 0xff00ff89 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200018>; + nvidia,emc-mode-reset = <0x80000b71>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000020 + 0x0000006a 0x00000018 0x00000008 0x00000005 + 0x0000000c 0x00000003 0x00000011 0x00000007 + 0x00000007 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000a 0x00000009 0x0000000a + 0x00000011 0x00001412 0x00000000 0x00000504 + 0x00000002 0x0000000e 0x00000001 0x00000000 + 0x0000000c 0x00000016 0x00000072 0x00000200 + 0x00000005 0x00000015 0x00000000 0x00000006 + 0x00000007 0x00001453 0x0000000b 0x00000006 + 0x00000000 0x00000000 0x00005088 0xf00b0191 + 0x00008000 0x00000008 0x00000008 0x00000008 + 0x00000008 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000c 0x0000000c 0x0000000c + 0x0000000c 0x000002a0 0x0800013d 0x22220000 + 0x77fff884 0x01f1f501 0x07077404 0x54000000 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x0155000c 0xa0f10000 0x00000000 + 0x00000000 0x800028a5 0xe8000000 0xff00ff49 >; + }; + }; + + emc-timings-2 { + /* Micron 1GB 667MHZ */ + nvidia,ram-code = <2>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000004 0x00000000 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x00000009 + 0x0000000b 0x000000c0 0x00000000 0x00000030 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000005 0x00000005 + 0x00000004 0x00000001 0x00000000 0x00000004 + 0x00000005 0x000000c7 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe8000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000002 + 0x00000008 0x00000001 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x00000009 + 0x0000000b 0x00000181 0x00000000 0x00000060 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000009 0x00000009 + 0x00000004 0x00000002 0x00000000 0x00000004 + 0x00000005 0x0000018e 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe8000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000004 + 0x00000010 0x00000003 0x00000001 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000001 + 0x00000001 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000303 0x00000000 0x000000c0 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000012 0x00000012 + 0x00000004 0x00000004 0x00000000 0x00000004 + 0x00000005 0x0000031c 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xd8000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000009 + 0x00000020 0x00000007 0x00000002 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000002 + 0x00000002 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000607 0x00000000 0x00000181 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000023 0x00000023 + 0x00000004 0x00000007 0x00000000 0x00000004 + 0x00000005 0x00000638 0x00000006 0x00000006 + 0x00000000 0x00000000 0x00004288 0x004400a4 + 0x00008000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00020000 + 0x00000100 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000d22 0xd8000000 0xff00ff00 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200040>; + nvidia,emc-mode-reset = <0x80000321>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + + nvidia,emc-configuration = < 0x0000000f + 0x00000034 0x0000000a 0x00000003 0x00000003 + 0x00000008 0x00000002 0x00000009 0x00000003 + 0x00000003 0x00000002 0x00000001 0x00000000 + 0x00000004 0x00000006 0x00000004 0x0000000a + 0x0000000c 0x000009e9 0x00000000 0x0000027a + 0x00000001 0x00000008 0x00000001 0x00000000 + 0x00000007 0x0000000e 0x00000039 0x00000200 + 0x00000004 0x0000000a 0x00000000 0x00000004 + 0x00000005 0x00000a2a 0x00000000 0x00000004 + 0x00000000 0x00000000 0x00007088 0x002600a4 + 0x00008000 0x0003c000 0x0003c000 0x0003c000 + 0x0003c000 0x00014000 0x00014000 0x00014000 + 0x00014000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00048000 0x00048000 0x00048000 + 0x00048000 0x000002a0 0x0800013d 0x00000000 + 0x77fff884 0x01f1f508 0x05057404 0x54000007 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x018b000c 0xa0f10000 0x00000000 + 0x00000000 0x800014d4 0xe8000000 0xff00ff89 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200058>; + nvidia,emc-mode-reset = <0x80000b71>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000001f + 0x00000069 0x00000016 0x00000007 0x00000005 + 0x0000000c 0x00000003 0x00000011 0x00000008 + 0x00000008 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000a 0x00000009 0x0000000b + 0x00000011 0x00001412 0x00000000 0x00000504 + 0x00000002 0x0000000e 0x00000001 0x00000000 + 0x0000000c 0x00000016 0x00000072 0x00000200 + 0x00000005 0x00000015 0x00000000 0x00000006 + 0x00000007 0x00001453 0x0000000b 0x00000006 + 0x00000000 0x00000000 0x00005088 0xf00b0191 + 0x00008000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000c 0x0000000c 0x0000000c + 0x0000000c 0x000002a0 0x0800013d 0x22220000 + 0x77fff884 0x01f1f501 0x07077404 0x54000000 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x0156000c 0xa0f10000 0x00000000 + 0x00000000 0x800028a5 0xf8000000 0xff00ff49 >; + }; + }; + }; +}; + +&emc_icc_dvfs_opp_table { + /delete-node/ opp-750000000-1300; + /delete-node/ opp-800000000-1300; + /delete-node/ opp-900000000-1350; +}; + +&emc_bw_dfs_opp_table { + /delete-node/ opp-750000000; + /delete-node/ opp-800000000; + /delete-node/ opp-900000000; +}; From 3efa410a2f49f8cecba9b2080352323388b12268 Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Fri, 7 May 2021 14:16:08 +0300 Subject: [PATCH 1077/1202] ARM: tegra: Add device-tree for ASUS Transformer Pad TF300TG Add device-tree for ASUS Transformer Pad TF300TG, which is NVIDIA Tegra30-based tablet device, a variant of TF300T that has 3g modem. Co-developed-by: Ion Agorria Signed-off-by: Ion Agorria Signed-off-by: Svyatoslav Ryhel --- arch/arm/boot/dts/Makefile | 1 + arch/arm/boot/dts/tegra30-asus-tf300tg.dts | 1072 ++++++++++++++++++++ 2 files changed, 1073 insertions(+) create mode 100644 arch/arm/boot/dts/tegra30-asus-tf300tg.dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 820c9f6ea5331..06068e8d4fe88 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -1322,6 +1322,7 @@ dtb-$(CONFIG_ARCH_TEGRA_3x_SOC) += \ tegra30-asus-nexus7-tilapia-E1565.dtb \ tegra30-asus-tf201.dtb \ tegra30-asus-tf300t.dtb \ + tegra30-asus-tf300tg.dtb \ tegra30-beaver.dtb \ tegra30-cardhu-a02.dtb \ tegra30-cardhu-a04.dtb \ diff --git a/arch/arm/boot/dts/tegra30-asus-tf300tg.dts b/arch/arm/boot/dts/tegra30-asus-tf300tg.dts new file mode 100644 index 0000000000000..e1509b48a4faf --- /dev/null +++ b/arch/arm/boot/dts/tegra30-asus-tf300tg.dts @@ -0,0 +1,1072 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include "tegra30-asus-transformer-common.dtsi" +#include "tegra30-asus-lvds-display.dtsi" + +/ { + model = "Asus Transformer Pad 3G TF300TG"; + compatible = "asus,tf300tg", "nvidia,tegra30"; + + gpio@6000d000 { + tf300tg-init-hog { + gpio-hog; + gpios = , + , + , + , + , + , + , + , + , + , + , + , + , + ; + output-low; + }; + }; + + pinmux@70000868 { + state_default: pinmux { + pbb3 { + nvidia,pins = "pbb3"; + nvidia,function = "vgp3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb7 { + nvidia,pins = "pbb7"; + nvidia,function = "i2s4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_cs4_n_pk2 { + nvidia,pins = "gmi_cs4_n_pk2"; + nvidia,function = "gmi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + lcd_pwr2_pc6 { + nvidia,pins = "lcd_pwr2_pc6", + "lcd_dc1_pd2"; + nvidia,function = "displaya"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + kb_row7_pr7 { + nvidia,pins = "kb_row7_pr7"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spi2_cs2_n_pw3 { + nvidia,pins = "spi2_cs2_n_pw3"; + nvidia,function = "spi2"; + nvidia,tristate = ; + }; + dap3_din_pp1 { + nvidia,pins = "dap3_din_pp1"; + nvidia,function = "i2s2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spi1_sck_px5 { + nvidia,pins = "spi1_sck_px5"; + nvidia,function = "spi1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pu5 { + nvidia,pins = "pu5"; + nvidia,function = "pwm2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spi2_mosi_px0 { + nvidia,pins = "spi2_mosi_px0"; + nvidia,function = "spi2"; + }; + spi1_miso_px7 { + nvidia,pins = "spi1_miso_px7"; + nvidia,function = "spi1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + clk3_req_pee1 { + nvidia,pins = "clk3_req_pee1"; + nvidia,function = "dev3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + ulpi_nxt_py2 { + nvidia,pins = "ulpi_nxt_py2"; + nvidia,function = "uartd"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + ulpi_stp_py3 { + nvidia,pins = "ulpi_stp_py3"; + nvidia,function = "uartd"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pu3 { + nvidia,pins = "pu3"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + dap1_din_pn1 { + nvidia,pins = "dap1_din_pn1"; + nvidia,function = "i2s0"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + }; + }; + + uartc: serial@70006200 { + /* Azurewave AW-NH615 BCM4329B1 */ + bluetooth { + compatible = "brcm,bcm4329-bt"; + }; + }; + + i2c2: i2c@7000c400 { + /* Elantech EKTH1036 touchscreen */ + touchscreen@10 { + compatible = "elan,ektf3624"; + reg = <0x10>; + + interrupt-parent = <&gpio>; + interrupts = ; + reset-gpios = <&gpio TEGRA_GPIO(H, 6) GPIO_ACTIVE_LOW>; + + vcc33-supply = <&vdd_3v3_sys>; + vccio-supply = <&vdd_3v3_sys>; + + touchscreen-size-x = <2240>; + touchscreen-size-y = <1408>; + touchscreen-inverted-y; + }; + }; + + i2c3: i2c@7000c500 { + clock-frequency = <400000>; + + magnetometer@e { + mount-matrix = "1", "0", "0", + "0", "-1", "0", + "0", "0", "-1"; + }; + + gyroscope@68 { + mount-matrix = "-1", "0", "0", + "0", "1", "0", + "0", "0", "-1"; + + /* External I2C interface */ + i2c-gate { + accelerometer@f { + mount-matrix = "0", "-1", "0", + "-1", "0", "0", + "0", "0", "1"; + }; + }; + }; + }; + + i2c5: i2c@7000d000 { + /* Realtek ALC5631 audio codec */ + rt5631: audio-codec@1a { + compatible = "realtek,rt5631"; + reg = <0x1a>; + }; + }; + + display-panel { + compatible = "innolux,g101ice-l01"; + }; + + memory-controller@7000f000 { + emc-timings-0 { + /* Elpida 1GB 667MHZ */ + nvidia,ram-code = <0>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x74830303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x73430303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0xc0000030 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0503 0x72830504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000025 + 0x00000001 0x00000001 0x00000005 0x00000002 + 0x00000003 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0505 0x72440a06 0x001f0000 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emem-configuration = < 0x00000005 0xc000003d + 0x00000001 0x00000002 0x00000008 0x00000004 + 0x00000004 0x00000001 0x00000002 0x00000007 + 0x00000002 0x00000002 0x00000003 0x00000006 + 0x06030202 0x000b0608 0x70850f09 0x001f0000 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emem-configuration = < 0x0000000a 0xc0000079 + 0x00000003 0x00000004 0x00000010 0x0000000b + 0x0000000a 0x00000001 0x00000003 0x0000000b + 0x00000002 0x00000002 0x00000004 0x00000008 + 0x08040202 0x00130b10 0x70ea1f11 0x001f0000 >; + }; + }; + + emc-timings-1 { + /* Hynix 1GB 667MHZ */ + nvidia,ram-code = <1>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x74830303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x73430303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0xc0000030 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0503 0x72830504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000025 + 0x00000001 0x00000001 0x00000005 0x00000002 + 0x00000003 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0505 0x72440a06 0x001f0000 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emem-configuration = < 0x00000005 0xc000003d + 0x00000001 0x00000002 0x00000008 0x00000004 + 0x00000004 0x00000001 0x00000002 0x00000007 + 0x00000002 0x00000002 0x00000003 0x00000006 + 0x06030202 0x000b0608 0x70850f09 0x001f0000 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emem-configuration = < 0x0000000a 0xc0000079 + 0x00000003 0x00000004 0x00000010 0x0000000b + 0x0000000a 0x00000001 0x00000003 0x0000000b + 0x00000002 0x00000002 0x00000004 0x00000008 + 0x08040202 0x00130b10 0x70ea1f11 0x001f0000 >; + }; + }; + + emc-timings-2 { + /* Micron 1GB 667MHZ */ + nvidia,ram-code = <2>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x74830303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x73430303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0xc0000030 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0503 0x72830504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000025 + 0x00000001 0x00000001 0x00000005 0x00000002 + 0x00000003 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0505 0x72440a06 0x001f0000 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emem-configuration = < 0x00000005 0x8000003d + 0x00000001 0x00000002 0x00000008 0x00000004 + 0x00000004 0x00000001 0x00000002 0x00000007 + 0x00000002 0x00000002 0x00000003 0x00000006 + 0x06030202 0x000b0608 0x70850f09 0x001f0000 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emem-configuration = < 0x0000000a 0x80000079 + 0x00000003 0x00000004 0x00000010 0x0000000a + 0x0000000a 0x00000001 0x00000003 0x0000000b + 0x00000002 0x00000002 0x00000004 0x00000008 + 0x08040202 0x00130b10 0x70ea1f11 0x001f0000 >; + }; + }; + }; + + memory-controller@7000f400 { + emc-timings-0 { + /* Elpida 1GB 667MHZ */ + nvidia,ram-code = <0>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000004 0x00000000 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x000000c0 0x00000000 0x00000030 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000005 0x00000005 + 0x00000004 0x00000001 0x00000000 0x00000004 + 0x00000005 0x000000c7 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe8000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000002 + 0x00000008 0x00000001 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000181 0x00000000 0x00000060 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000009 0x00000009 + 0x00000004 0x00000002 0x00000000 0x00000004 + 0x00000005 0x0000018e 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe8000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000005 + 0x00000010 0x00000003 0x00000001 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000001 + 0x00000001 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000303 0x00000000 0x000000c0 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000012 0x00000012 + 0x00000004 0x00000004 0x00000000 0x00000004 + 0x00000005 0x0000031c 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe8000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000000a + 0x00000020 0x00000007 0x00000002 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000002 + 0x00000002 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000006 0x00000004 0x0000000a + 0x0000000b 0x00000607 0x00000000 0x00000181 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000023 0x00000023 + 0x00000004 0x00000007 0x00000000 0x00000004 + 0x00000005 0x00000638 0x00000007 0x00000004 + 0x00000000 0x00000000 0x00004288 0x004400a4 + 0x00008000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00020000 + 0x00000100 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000d22 0xe8000000 0xff00ff00 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200040>; + nvidia,emc-mode-reset = <0x80000321>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + + nvidia,emc-configuration = < 0x0000000f + 0x00000034 0x0000000a 0x00000003 0x00000003 + 0x00000008 0x00000002 0x00000009 0x00000003 + 0x00000003 0x00000002 0x00000001 0x00000000 + 0x00000004 0x00000006 0x00000004 0x0000000a + 0x0000000c 0x000009e9 0x00000000 0x0000027a + 0x00000001 0x00000008 0x00000001 0x00000000 + 0x00000007 0x0000000e 0x00000039 0x00000200 + 0x00000004 0x0000000a 0x00000000 0x00000004 + 0x00000005 0x00000a2a 0x00000000 0x00000004 + 0x00000000 0x00000000 0x00007088 0x002600a4 + 0x00008000 0x0003c000 0x0003c000 0x0003c000 + 0x0003c000 0x00014000 0x00014000 0x00014000 + 0x00014000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00050000 0x00050000 0x00050000 + 0x00050000 0x000002a0 0x0800013d 0x00000000 + 0x77fff884 0x01f1f508 0x05057404 0x54000007 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x018b000c 0xa0f10000 0x00000000 + 0x00000000 0x800014d4 0xe8000000 0xff00ff89 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200058>; + nvidia,emc-mode-reset = <0x80000b71>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000001f + 0x00000069 0x00000017 0x00000007 0x00000005 + 0x0000000c 0x00000003 0x00000011 0x00000007 + 0x00000007 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000b 0x00000009 0x0000000b + 0x00000011 0x00001412 0x00000000 0x00000504 + 0x00000002 0x0000000e 0x00000001 0x00000000 + 0x0000000c 0x00000016 0x00000072 0x00000200 + 0x00000005 0x00000015 0x00000000 0x00000006 + 0x00000007 0x00001453 0x0000000c 0x00000004 + 0x00000000 0x00000000 0x00005088 0xf00b0191 + 0x00008000 0x0000000c 0x0000000c 0x0000000c + 0x0000000c 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x00018000 0x00018000 0x00018000 + 0x00018000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000c 0x0000000c 0x0000000c + 0x0000000c 0x000002a0 0x0a00013d 0x22220000 + 0x77fff884 0x01f1f501 0x07077404 0x54000000 + 0x080001e8 0x0a000021 0x00000802 0x00020000 + 0x00000100 0x0156000c 0xa0f10000 0x00000000 + 0x00000000 0x800028a5 0xe8000000 0xff00ff49 >; + }; + }; + + emc-timings-1 { + /* Hynix 1GB 667MHZ */ + nvidia,ram-code = <1>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000004 0x00000000 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x000000c0 0x00000000 0x00000030 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000005 0x00000005 + 0x00000004 0x00000001 0x00000000 0x00000004 + 0x00000005 0x000000c7 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe8000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000002 + 0x00000008 0x00000001 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000181 0x00000000 0x00000060 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000009 0x00000009 + 0x00000004 0x00000002 0x00000000 0x00000004 + 0x00000005 0x0000018e 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe8000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000005 + 0x00000010 0x00000003 0x00000001 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000001 + 0x00000001 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000303 0x00000000 0x000000c0 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000012 0x00000012 + 0x00000004 0x00000004 0x00000000 0x00000004 + 0x00000005 0x0000031c 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe8000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200048>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000000a + 0x00000020 0x00000007 0x00000002 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000002 + 0x00000002 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000006 0x00000004 0x0000000a + 0x0000000b 0x00000607 0x00000000 0x00000181 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000023 0x00000023 + 0x00000004 0x00000007 0x00000000 0x00000004 + 0x00000005 0x00000638 0x00000007 0x00000004 + 0x00000000 0x00000000 0x00004288 0x004400a4 + 0x00008000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00020000 + 0x00000100 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000d22 0xe8000000 0xff00ff00 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200040>; + nvidia,emc-mode-reset = <0x80000321>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + + nvidia,emc-configuration = < 0x0000000f + 0x00000034 0x0000000a 0x00000003 0x00000003 + 0x00000008 0x00000002 0x00000009 0x00000003 + 0x00000003 0x00000002 0x00000001 0x00000000 + 0x00000004 0x00000006 0x00000004 0x0000000a + 0x0000000c 0x000009e9 0x00000000 0x0000027a + 0x00000001 0x00000008 0x00000001 0x00000000 + 0x00000007 0x0000000e 0x00000039 0x00000200 + 0x00000004 0x0000000a 0x00000000 0x00000004 + 0x00000005 0x00000a2a 0x00000000 0x00000004 + 0x00000000 0x00000000 0x00007088 0x002600a4 + 0x00008000 0x0003c000 0x0003c000 0x0003c000 + 0x0003c000 0x00014000 0x00014000 0x00014000 + 0x00014000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00048000 0x00048000 0x00048000 + 0x00048000 0x000002a0 0x0800013d 0x00000000 + 0x77fff884 0x01f1f508 0x05057404 0x54000007 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x018b000c 0xa0f10000 0x00000000 + 0x00000000 0x800014d4 0xe8000000 0xff00ff89 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200058>; + nvidia,emc-mode-reset = <0x80000b71>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000020 + 0x00000069 0x00000017 0x00000007 0x00000005 + 0x0000000c 0x00000003 0x00000011 0x00000007 + 0x00000007 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000b 0x00000009 0x0000000b + 0x00000011 0x00001412 0x00000000 0x00000504 + 0x00000002 0x0000000e 0x00000001 0x00000000 + 0x0000000c 0x00000016 0x00000072 0x00000200 + 0x00000005 0x00000015 0x00000000 0x00000006 + 0x00000007 0x00001453 0x0000000c 0x00000004 + 0x00000000 0x00000000 0x00005088 0xf00b0191 + 0x00008000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x00018000 0x00018000 0x00018000 + 0x00018000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x000002a0 0x0800013d 0x22220000 + 0x77fff884 0x01f1f501 0x07077404 0x54000000 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x0156000c 0xa0f10000 0x00000000 + 0x00000000 0x800028a5 0xe8000000 0xff00ff49 >; + }; + }; + + emc-timings-2 { + /* Micron 1GB 667MHZ */ + nvidia,ram-code = <2>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000004 0x00000000 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x000000c0 0x00000000 0x00000030 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000005 0x00000005 + 0x00000004 0x00000001 0x00000000 0x00000004 + 0x00000005 0x000000c7 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe8000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000002 + 0x00000008 0x00000001 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000181 0x00000000 0x00000060 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000009 0x00000009 + 0x00000004 0x00000002 0x00000000 0x00000004 + 0x00000005 0x0000018e 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe8000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000004 + 0x00000010 0x00000003 0x00000001 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000001 + 0x00000001 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000303 0x00000000 0x000000c0 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000012 0x00000012 + 0x00000004 0x00000004 0x00000000 0x00000004 + 0x00000005 0x0000031c 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe8000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000009 + 0x00000020 0x00000007 0x00000002 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000002 + 0x00000002 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000006 0x00000004 0x0000000a + 0x0000000b 0x00000607 0x00000000 0x00000181 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000023 0x00000023 + 0x00000004 0x00000007 0x00000000 0x00000004 + 0x00000005 0x00000638 0x00000007 0x00000004 + 0x00000000 0x00000000 0x00004288 0x004400a4 + 0x00008000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00020000 + 0x00000100 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000d22 0xe8000000 0xff00ff00 >; + }; + + timing-333500000 { + clock-frequency = <333500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200000>; + nvidia,emc-mode-reset = <0x80000321>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + + nvidia,emc-configuration = < 0x0000000f + 0x00000034 0x0000000a 0x00000003 0x00000003 + 0x00000008 0x00000002 0x00000009 0x00000003 + 0x00000003 0x00000002 0x00000001 0x00000000 + 0x00000004 0x00000006 0x00000004 0x0000000a + 0x0000000c 0x000009e9 0x00000000 0x0000027a + 0x00000001 0x00000008 0x00000001 0x00000000 + 0x00000007 0x0000000e 0x00000039 0x00000200 + 0x00000004 0x0000000a 0x00000000 0x00000004 + 0x00000005 0x00000a2a 0x00000000 0x00000004 + 0x00000000 0x00000000 0x00007088 0x002600a4 + 0x00008000 0x0003c000 0x0003c000 0x0003c000 + 0x0003c000 0x00014000 0x00014000 0x00014000 + 0x00014000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00040000 0x00040000 0x00040000 + 0x00040000 0x000002a0 0x0800013d 0x00000000 + 0x77fff884 0x01f1f508 0x05057404 0x54000007 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x018b000c 0xa0f10000 0x00000000 + 0x00000000 0x800014d4 0xe8000000 0xff00ff89 >; + }; + + timing-667000000 { + clock-frequency = <667000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200018>; + nvidia,emc-mode-reset = <0x80000b71>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000001f + 0x00000069 0x00000016 0x00000007 0x00000005 + 0x0000000c 0x00000003 0x00000011 0x00000007 + 0x00000007 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000b 0x00000009 0x0000000b + 0x00000011 0x00001412 0x00000000 0x00000504 + 0x00000002 0x0000000e 0x00000001 0x00000000 + 0x0000000c 0x00000016 0x00000072 0x00000200 + 0x00000005 0x00000015 0x00000000 0x00000006 + 0x00000007 0x00001453 0x0000000c 0x00000004 + 0x00000000 0x00000000 0x00005088 0xf00b0191 + 0x00008000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x000002a0 0x0600013d 0x22220000 + 0x77fff884 0x01f1f501 0x07077404 0x54000000 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x0156000c 0xa0f10000 0x00000000 + 0x00000000 0x800028a5 0xe8000000 0xff00ff49 >; + }; + }; + }; +}; + +&emc_icc_dvfs_opp_table { + /delete-node/ opp-750000000-1300; + /delete-node/ opp-800000000-1300; + /delete-node/ opp-900000000-1350; +}; + +&emc_bw_dfs_opp_table { + /delete-node/ opp-750000000; + /delete-node/ opp-800000000; + /delete-node/ opp-900000000; +}; From 586b6f4a3c7b4d2fbabd42ae957aaa1d3c49f312 Mon Sep 17 00:00:00 2001 From: Svyatoslav Date: Sun, 24 Jan 2021 17:48:59 +0100 Subject: [PATCH 1078/1202] ARM: tegra: Add device-tree for ASUS Transformer Infinity TF700T Add device-tree for ASUS Transformer Infinity TF700T, which is NVIDIA Tegra30-based tablet device. Tested-by: Andreas Westman Dorcsak Tested-by: Jasper Korten Co-developed-by: Ion Agorria Co-developed-by: Maxim Schwalm Signed-off-by: Ion Agorria Signed-off-by: Maxim Schwalm Signed-off-by: Svyatoslav Ryhel --- arch/arm/boot/dts/Makefile | 1 + arch/arm/boot/dts/tegra30-asus-tf700t.dts | 807 ++++++++++++++++++++++ 2 files changed, 808 insertions(+) create mode 100644 arch/arm/boot/dts/tegra30-asus-tf700t.dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 06068e8d4fe88..9ef8b43750570 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -1323,6 +1323,7 @@ dtb-$(CONFIG_ARCH_TEGRA_3x_SOC) += \ tegra30-asus-tf201.dtb \ tegra30-asus-tf300t.dtb \ tegra30-asus-tf300tg.dtb \ + tegra30-asus-tf700t.dtb \ tegra30-beaver.dtb \ tegra30-cardhu-a02.dtb \ tegra30-cardhu-a04.dtb \ diff --git a/arch/arm/boot/dts/tegra30-asus-tf700t.dts b/arch/arm/boot/dts/tegra30-asus-tf700t.dts new file mode 100644 index 0000000000000..2e2d7b54d6d82 --- /dev/null +++ b/arch/arm/boot/dts/tegra30-asus-tf700t.dts @@ -0,0 +1,807 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include "tegra30-asus-transformer-common.dtsi" + +/ { + model = "Asus Transformer Infinity TF700T"; + compatible = "asus,tf700t", "nvidia,tegra30"; + + host1x@50000000 { + lcd: dc@54200000 { + clocks = <&tegra_car TEGRA30_CLK_DISP1>, + <&tegra_car TEGRA30_CLK_PLL_D_OUT0>; + }; + }; + + pinmux@70000868 { + state_default: pinmux { + lcd_pwr2_pc6 { + nvidia,pins = "lcd_pwr2_pc6", + "lcd_dc1_pd2"; + nvidia,function = "displaya"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb3 { + nvidia,pins = "pbb3"; + nvidia,function = "vgp3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spi2_mosi_px0 { + nvidia,pins = "spi2_mosi_px0"; + nvidia,function = "spi2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb7 { + nvidia,pins = "pbb7"; + nvidia,function = "i2s4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + kb_row7_pr7 { + nvidia,pins = "kb_row7_pr7"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_cs4_n_pk2 { + nvidia,pins = "gmi_cs4_n_pk2"; + nvidia,function = "gmi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + }; + }; + + uartc: serial@70006200 { + /* Azurewave AW-NH665 BCM4330B1 */ + bluetooth { + compatible = "brcm,bcm4330-bt"; + }; + }; + + tc358768_refclk: tc358768-refclk { + compatible = "fixed-clock"; + clock-frequency = <23100000>; + clock-accuracy = <100>; + #clock-cells = <0>; + }; + + tc358768_osc: tc358768-osc-gate { + compatible = "gpio-gate-clock"; + enable-gpios = <&gpio TEGRA_GPIO(D, 2) GPIO_ACTIVE_HIGH>; + clocks = <&tc358768_refclk>; + #clock-cells = <0>; + }; + + i2c-switch-tc358768 { + compatible = "i2c-mux-gpio"; + + mux-gpios = <&gpio TEGRA_GPIO(X, 0) GPIO_ACTIVE_HIGH>; + i2c-parent = <&i2c1>; + idle-state = <0x0>; + + #address-cells = <1>; + #size-cells = <0>; + + i2c@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + dsi-bridge@7 { + compatible = "toshiba,tc358768"; + reg = <0x7>; + + #address-cells = <1>; + #size-cells = <0>; + + clocks = <&tc358768_osc>; + clock-names = "refclk"; + + reset-gpios = <&gpio TEGRA_GPIO(N, 6) GPIO_ACTIVE_LOW>; + + vddc-supply = <&vdd_1v2_mipi>; + vddio-supply = <&vdd_1v8_vio>; + vddmipi-supply = <&vdd_1v2_mipi>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + bridge_input: endpoint { + remote-endpoint = <&dpi_output>; + data-lines = <24>; + }; + }; + + port@1 { + reg = <1>; + + bridge_output: endpoint { + remote-endpoint = <&panel_input>; + }; + }; + }; + + /* + * Panasonic VVX10F004B00 or HYDIS HV101WU1-1E1 + * LCD SuperIPS+ Full HD panel. + */ + panel@1 { + compatible = "panasonic,vvx10f004b00"; + reg = <1>; + + power-supply = <&vdd_pnl>; + backlight = <&backlight>; + + port { + panel_input: endpoint { + remote-endpoint = <&bridge_output>; + }; + }; + }; + }; + }; + }; + + i2c@7000c400 { + /* Elantech ELAN-3024-7053 or 5184N FPC-1 REV: 2/3 touchscreen */ + touchscreen@10 { + compatible = "elan,ektf3624"; + reg = <0x10>; + + interrupt-parent = <&gpio>; + interrupts = ; + reset-gpios = <&gpio TEGRA_GPIO(H, 6) GPIO_ACTIVE_LOW>; + + vcc33-supply = <&vdd_3v3_sys>; + vccio-supply = <&vdd_3v3_sys>; + + touchscreen-size-x = <2944>; + touchscreen-size-y = <1856>; + touchscreen-inverted-y; + }; + }; + + i2c@7000c500 { + clock-frequency = <100000>; + + magnetometer@e { + mount-matrix = "1", "0", "0", + "0", "-1", "0", + "0", "0", "-1"; + }; + + gyroscope@68 { + mount-matrix = "0", "1", "0", + "1", "0", "0", + "0", "0", "-1"; + + /* External I2C interface */ + i2c-gate { + accelerometer@f { + mount-matrix = "0", "-1", "0", + "-1", "0", "0", + "0", "0", "1"; + }; + }; + }; + }; + + i2c@7000d000 { + /* Realtek ALC5631 audio codec */ + rt5631: audio-codec@1a { + compatible = "realtek,rt5631"; + reg = <0x1a>; + }; + }; + + vdd_1v2_mipi: regulator-mipi { + compatible = "regulator-fixed"; + regulator-name = "tc358768_1v2_vdd"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + regulator-enable-ramp-delay = <10000>; + regulator-boot-on; + gpio = <&gpio TEGRA_GPIO(BB, 3) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_3v3_sys>; + }; + + haptic-feedback { + compatible = "gpio-vibrator"; + enable-gpios = <&gpio TEGRA_GPIO(H, 7) GPIO_ACTIVE_HIGH>; + vcc-supply = <&vdd_3v3_sys>; + }; + + memory-controller@7000f000 { + emc-timings-0 { + /* Micron 1GB 800MHZ */ + nvidia,ram-code = <0>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x75830303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x74630303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0xc0000030 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000002 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0503 0x73c30504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000025 + 0x00000001 0x00000001 0x00000005 0x00000002 + 0x00000004 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0505 0x73840a06 0x001f0000 >; + }; + + timing-400000000 { + clock-frequency = <400000000>; + + nvidia,emem-configuration = < 0x00000006 0xc0000048 + 0x00000001 0x00000002 0x00000009 0x00000005 + 0x00000007 0x00000001 0x00000002 0x00000008 + 0x00000002 0x00000002 0x00000003 0x00000006 + 0x06030202 0x000d0709 0x7086120a 0x001f0000 >; + }; + + timing-800000000 { + clock-frequency = <800000000>; + + nvidia,emem-configuration = < 0x0000000c 0xc0000090 + 0x00000004 0x00000005 0x00000013 0x0000000c + 0x0000000f 0x00000002 0x00000003 0x0000000c + 0x00000002 0x00000002 0x00000004 0x00000008 + 0x08040202 0x00160d13 0x712c2414 0x001f0000 >; + }; + }; + + emc-timings-1 { + /* Elpida 1GB 800MHZ */ + nvidia,ram-code = <1>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x75830303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0xc0000020 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0502 0x74630303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0xc0000030 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000002 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0503 0x73c30504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000025 + 0x00000001 0x00000001 0x00000005 0x00000002 + 0x00000004 0x00000001 0x00000003 0x00000008 + 0x00000002 0x00000001 0x00000002 0x00000006 + 0x06020102 0x000a0505 0x73840a06 0x001f0000 >; + }; + + timing-400000000 { + clock-frequency = <400000000>; + + nvidia,emem-configuration = < 0x00000006 0xc0000048 + 0x00000001 0x00000002 0x00000009 0x00000005 + 0x00000007 0x00000001 0x00000002 0x00000008 + 0x00000002 0x00000002 0x00000003 0x00000006 + 0x06030202 0x000d0709 0x7086120a 0x001f0000 >; + }; + + timing-800000000 { + clock-frequency = <800000000>; + + nvidia,emem-configuration = < 0x0000000c 0xc0000090 + 0x00000004 0x00000005 0x00000013 0x0000000c + 0x0000000f 0x00000002 0x00000003 0x0000000c + 0x00000002 0x00000002 0x00000004 0x00000008 + 0x08040202 0x00160d13 0x712c2414 0x001f0000 >; + }; + }; + }; + + memory-controller@7000f400 { + emc-timings-0 { + /* Micron 1GB 800MHZ */ + nvidia,ram-code = <0>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000006 0x00000000 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x000000c0 0x00000000 0x00000030 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000007 0x00000007 + 0x00000004 0x00000002 0x00000000 0x00000004 + 0x00000005 0x000000c7 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe8000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000002 + 0x0000000d 0x00000001 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000181 0x00000000 0x00000060 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x0000000e 0x0000000e + 0x00000004 0x00000003 0x00000000 0x00000004 + 0x00000005 0x0000018e 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe8000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000004 + 0x0000001a 0x00000003 0x00000001 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000001 + 0x00000001 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000303 0x00000000 0x000000c0 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x0000001c 0x0000001c + 0x00000004 0x00000005 0x00000000 0x00000004 + 0x00000005 0x0000031c 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe8000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000009 + 0x00000035 0x00000007 0x00000002 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000002 + 0x00000002 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000006 0x00000004 0x0000000a + 0x0000000b 0x00000607 0x00000000 0x00000181 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000038 0x00000038 + 0x00000004 0x00000009 0x00000000 0x00000004 + 0x00000005 0x00000638 0x00000007 0x00000004 + 0x00000000 0x00000000 0x00004288 0x004400a4 + 0x00008000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00020000 + 0x00000100 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000d22 0xe8000000 0xff00ff00 >; + }; + + timing-400000000 { + clock-frequency = <400000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200000>; + nvidia,emc-mode-reset = <0x80000521>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + + nvidia,emc-configuration = < 0x00000012 + 0x00000066 0x0000000c 0x00000004 0x00000003 + 0x00000008 0x00000002 0x0000000a 0x00000004 + 0x00000004 0x00000002 0x00000001 0x00000000 + 0x00000004 0x00000006 0x00000004 0x0000000a + 0x0000000c 0x00000bf0 0x00000000 0x000002fc + 0x00000001 0x00000008 0x00000001 0x00000000 + 0x00000008 0x0000000f 0x0000006c 0x00000200 + 0x00000004 0x00000010 0x00000000 0x00000004 + 0x00000005 0x00000c30 0x00000000 0x00000004 + 0x00000000 0x00000000 0x00007088 0x001d0084 + 0x00008000 0x00044000 0x00044000 0x00044000 + 0x00044000 0x00014000 0x00014000 0x00014000 + 0x00014000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00048000 0x00048000 0x00048000 + 0x00048000 0x000002a0 0x0600013d 0x00000000 + 0x77fff884 0x01f1f508 0x05057404 0x54000007 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x0158000c 0xa0f10000 0x00000000 + 0x00000000 0x800018c8 0xe8000000 0xff00ff89 >; + }; + + timing-800000000 { + clock-frequency = <800000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200018>; + nvidia,emc-mode-reset = <0x80000d71>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000025 + 0x000000ce 0x0000001a 0x00000009 0x00000005 + 0x0000000d 0x00000004 0x00000013 0x00000009 + 0x00000009 0x00000004 0x00000001 0x00000000 + 0x00000007 0x0000000a 0x00000009 0x0000000a + 0x00000011 0x00001820 0x00000000 0x00000608 + 0x00000003 0x00000012 0x00000001 0x00000000 + 0x0000000f 0x00000018 0x000000d8 0x00000200 + 0x00000005 0x00000020 0x00000000 0x00000007 + 0x00000008 0x00001860 0x0000000b 0x00000006 + 0x00000000 0x00000000 0x00005088 0xf0070191 + 0x00008000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x00018000 0x00018000 0x00018000 + 0x00018000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x000002a0 0x0800013d 0x22220000 + 0x77fff884 0x01f1f501 0x07077404 0x54000000 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x00f0000c 0xa0f10000 0x00000000 + 0x00000000 0x8000308c 0xe8000000 0xff00ff49 >; + }; + }; + + emc-timings-1 { + /* Elpida 1GB 800MHZ */ + nvidia,ram-code = <1>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000006 0x00000000 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x000000c0 0x00000000 0x00000030 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000007 0x00000007 + 0x00000004 0x00000002 0x00000000 0x00000004 + 0x00000005 0x000000c7 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe8000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000002 + 0x0000000d 0x00000001 0x00000000 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000000 + 0x00000000 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000181 0x00000000 0x00000060 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x0000000e 0x0000000e + 0x00000004 0x00000003 0x00000000 0x00000004 + 0x00000005 0x0000018e 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe8000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000004 + 0x0000001a 0x00000003 0x00000001 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000001 + 0x00000001 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000005 0x00000004 0x0000000a + 0x0000000b 0x00000303 0x00000000 0x000000c0 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x0000001c 0x0000001c + 0x00000004 0x00000005 0x00000000 0x00000004 + 0x00000005 0x0000031c 0x00000006 0x00000004 + 0x00000000 0x00000000 0x00004288 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00000000 + 0x00000040 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe8000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100003>; + nvidia,emc-mode-2 = <0x80200008>; + nvidia,emc-mode-reset = <0x80001221>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000009 + 0x00000035 0x00000007 0x00000002 0x00000002 + 0x0000000a 0x00000005 0x0000000b 0x00000002 + 0x00000002 0x00000003 0x00000001 0x00000000 + 0x00000005 0x00000006 0x00000004 0x0000000a + 0x0000000b 0x00000607 0x00000000 0x00000181 + 0x00000002 0x00000002 0x00000001 0x00000000 + 0x00000007 0x0000000f 0x00000038 0x00000038 + 0x00000004 0x00000009 0x00000000 0x00000004 + 0x00000005 0x00000638 0x00000007 0x00000004 + 0x00000000 0x00000000 0x00004288 0x004400a4 + 0x00008000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000002a0 0x0800211c 0x00000000 + 0x77fff884 0x01f1f108 0x05057404 0x54000007 + 0x08000168 0x08000000 0x00000802 0x00020000 + 0x00000100 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000d22 0xe8000000 0xff00ff00 >; + }; + + timing-400000000 { + clock-frequency = <400000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200000>; + nvidia,emc-mode-reset = <0x80000521>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + + nvidia,emc-configuration = < 0x00000012 + 0x00000066 0x0000000c 0x00000004 0x00000003 + 0x00000008 0x00000002 0x0000000a 0x00000004 + 0x00000004 0x00000002 0x00000001 0x00000000 + 0x00000004 0x00000006 0x00000004 0x0000000a + 0x0000000c 0x00000bf0 0x00000000 0x000002fc + 0x00000001 0x00000008 0x00000001 0x00000000 + 0x00000008 0x0000000f 0x0000006c 0x00000200 + 0x00000004 0x00000010 0x00000000 0x00000004 + 0x00000005 0x00000c30 0x00000000 0x00000004 + 0x00000000 0x00000000 0x00007088 0x001d0084 + 0x00008000 0x00044000 0x00044000 0x00044000 + 0x00044000 0x00014000 0x00014000 0x00014000 + 0x00014000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00048000 0x00048000 0x00048000 + 0x00048000 0x000002a0 0x0600013d 0x00000000 + 0x77fff884 0x01f1f508 0x05057404 0x54000007 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x0158000c 0xa0f10000 0x00000000 + 0x00000000 0x800018c8 0xe8000000 0xff00ff89 >; + }; + + timing-800000000 { + clock-frequency = <800000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x80100002>; + nvidia,emc-mode-2 = <0x80200018>; + nvidia,emc-mode-reset = <0x80000d71>; + nvidia,emc-zcal-cnt-long = <0x00000040>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000025 + 0x000000ce 0x0000001a 0x00000009 0x00000005 + 0x0000000d 0x00000004 0x00000013 0x00000009 + 0x00000009 0x00000004 0x00000001 0x00000000 + 0x00000007 0x0000000a 0x00000009 0x0000000a + 0x00000011 0x00001820 0x00000000 0x00000608 + 0x00000003 0x00000012 0x00000001 0x00000000 + 0x0000000f 0x00000018 0x000000d8 0x00000200 + 0x00000005 0x00000020 0x00000000 0x00000007 + 0x00000008 0x00001860 0x0000000b 0x00000006 + 0x00000000 0x00000000 0x00005088 0xf0070191 + 0x00008000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x00018000 0x00018000 0x00018000 + 0x00018000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x000002a0 0x0a00013d 0x22220000 + 0x77fff884 0x01f1f501 0x07077404 0x54000000 + 0x080001e8 0x08000021 0x00000802 0x00020000 + 0x00000100 0x00f0000c 0xa0f10000 0x00000000 + 0x00000000 0x8000308c 0xe8000000 0xff00ff49 >; + }; + }; + }; +}; + +&emc_icc_dvfs_opp_table { + /delete-node/ opp-900000000-1350; +}; + +&emc_bw_dfs_opp_table { + /delete-node/ opp-900000000; +}; From d51672d683899b522949f32cbd515aba5e8e258a Mon Sep 17 00:00:00 2001 From: Svyatoslav Ryhel Date: Tue, 9 Mar 2021 22:29:37 +0200 Subject: [PATCH 1079/1202] ARM: tegra: Add device-tree for Pegatron Chagall tablet Add device-tree for Pegatron Chagall, which is NVIDIA Tegra30-based Android tablet. Co-developed-by: Raffaele Tranquillini Signed-off-by: Raffaele Tranquillini Co-developed-by: Ion Agorria Signed-off-by: Ion Agorria Signed-off-by: Svyatoslav Ryhel --- arch/arm/boot/dts/Makefile | 3 +- .../arm/boot/dts/tegra30-pegatron-chagall.dts | 2799 +++++++++++++++++ 2 files changed, 2801 insertions(+), 1 deletion(-) create mode 100644 arch/arm/boot/dts/tegra30-pegatron-chagall.dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 9ef8b43750570..3421e9a3763ad 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -1328,7 +1328,8 @@ dtb-$(CONFIG_ARCH_TEGRA_3x_SOC) += \ tegra30-cardhu-a02.dtb \ tegra30-cardhu-a04.dtb \ tegra30-colibri-eval-v3.dtb \ - tegra30-ouya.dtb + tegra30-ouya.dtb \ + tegra30-pegatron-chagall.dtb dtb-$(CONFIG_ARCH_TEGRA_114_SOC) += \ tegra114-dalmore.dtb \ tegra114-roth.dtb \ diff --git a/arch/arm/boot/dts/tegra30-pegatron-chagall.dts b/arch/arm/boot/dts/tegra30-pegatron-chagall.dts new file mode 100644 index 0000000000000..421cc25e69d89 --- /dev/null +++ b/arch/arm/boot/dts/tegra30-pegatron-chagall.dts @@ -0,0 +1,2799 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include +#include +#include + +#include "tegra30.dtsi" +#include "tegra30-cpu-opp.dtsi" +#include "tegra30-cpu-opp-microvolt.dtsi" +#include "tegra30-asus-lvds-display.dtsi" + +/ { + model = "Pegatron Chagall based tablet"; + compatible = "pegatron,chagall", "nvidia,tegra30"; + + aliases { + mmc0 = &sdmmc4; /* eMMC */ + mmc1 = &sdmmc1; /* uSD slot */ + mmc2 = &sdmmc3; /* WiFi */ + + rtc0 = &pmic; + rtc1 = "/rtc@7000e000"; + + display0 = &lcd; + display1 = &hdmi; + + serial1 = &uartc; /* Bluetooth */ + serial2 = &uartb; /* GPS */ + }; + + /* + * The decompressor and also some bootloaders rely on a + * pre-existing /chosen node to be available to insert the + * command line and merge other ATAGS info. + */ + chosen {}; + + memory@80000000 { + reg = <0x80000000 0x40000000>; + }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + linux,cma@80000000 { + compatible = "shared-dma-pool"; + alloc-ranges = <0x80000000 0x30000000>; + size = <0x10000000>; /* 256MiB */ + linux,cma-default; + reusable; + }; + + ramoops@beb00000 { + compatible = "ramoops"; + reg = <0xbeb00000 0x10000>; /* 64kB */ + console-size = <0x8000>; /* 32kB */ + record-size = <0x400>; /* 1kB */ + ecc-size = <16>; + }; + + trustzone@bfe00000 { + reg = <0xbfe00000 0x200000>; /* 2MB */ + no-map; + }; + }; + + host1x@50000000 { + lcd: dc@54200000 { + rgb { + status = "okay"; + + port@0 { + dpi_output: endpoint { + remote-endpoint = <&bridge_input>; + bus-width = <24>; + }; + }; + }; + }; + + hdmi: hdmi@54280000 { + status = "okay"; + + hdmi-supply = <&hdmi_5v0_sys>; + pll-supply = <&vdd_1v8_vio>; + vdd-supply = <&vdd_3v3_sys>; + + nvidia,hpd-gpio = <&gpio TEGRA_GPIO(N, 7) GPIO_ACTIVE_HIGH>; + nvidia,ddc-i2c-bus = <&hdmi_ddc>; + }; + }; + + gpio@6000d000 {}; /* set up from downstream if necessary */ + + vde@6001a000 { + assigned-clocks = <&tegra_car TEGRA30_CLK_VDE>; + assigned-clock-parents = <&tegra_car TEGRA30_CLK_PLL_P>; + assigned-clock-rates = <408000000>; + }; + + pinmux@70000868 { + pinctrl-names = "default"; + pinctrl-0 = <&state_default>; + + state_default: pinmux { + /* SDMMC1 pinmux */ + sdmmc1_clk_pz0 { + nvidia,pins = "sdmmc1_clk_pz0"; + nvidia,function = "sdmmc1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + sdmmc1_dat3_py4 { + nvidia,pins = "sdmmc1_dat3_py4", + "sdmmc1_dat2_py5", + "sdmmc1_dat1_py6", + "sdmmc1_dat0_py7", + "sdmmc1_cmd_pz1"; + nvidia,function = "sdmmc1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* SDMMC2 pinmux */ + vi_d1_pd5 { + nvidia,pins = "vi_d1_pd5", + "vi_d2_pl0", + "vi_d3_pl1", + "vi_d5_pl3", + "vi_d7_pl5"; + nvidia,function = "sdmmc2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + vi_d8_pl6 { + nvidia,pins = "vi_d8_pl6", + "vi_d9_pl7"; + nvidia,function = "sdmmc2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,lock = <0>; + nvidia,ioreset = <0>; + }; + + /* SDMMC3 pinmux */ + sdmmc3_clk_pa6 { + nvidia,pins = "sdmmc3_clk_pa6"; + nvidia,function = "sdmmc3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + sdmmc3_cmd_pa7 { + nvidia,pins = "sdmmc3_cmd_pa7", + "sdmmc3_dat3_pb4", + "sdmmc3_dat2_pb5", + "sdmmc3_dat2_pb5", + "sdmmc3_dat1_pb6", + "sdmmc3_dat0_pb7", + "sdmmc3_dat5_pd0", + "sdmmc3_dat4_pd1", + "sdmmc3_dat6_pd3", + "sdmmc3_dat7_pd4"; + nvidia,function = "sdmmc3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* SDMMC4 pinmux */ + sdmmc4_clk_pcc4 { + nvidia,pins = "sdmmc4_clk_pcc4"; + nvidia,function = "sdmmc4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + sdmmc4_cmd_pt7 { + nvidia,pins = "sdmmc4_cmd_pt7", + "sdmmc4_dat0_paa0", + "sdmmc4_dat1_paa1", + "sdmmc4_dat2_paa2", + "sdmmc4_dat3_paa3", + "sdmmc4_dat4_paa4", + "sdmmc4_dat5_paa5", + "sdmmc4_dat6_paa6", + "sdmmc4_dat7_paa7"; + nvidia,function = "sdmmc4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* I2C pinmux */ + gen1_i2c_scl_pc4 { + nvidia,pins = "gen1_i2c_scl_pc4", + "gen1_i2c_sda_pc5"; + nvidia,function = "i2c1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,open-drain = ; + nvidia,lock = <0>; + }; + gen2_i2c_scl_pt5 { + nvidia,pins = "gen2_i2c_scl_pt5", + "gen2_i2c_sda_pt6"; + nvidia,function = "i2c2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,open-drain = ; + nvidia,lock = <0>; + }; + cam_i2c_scl_pbb1 { + nvidia,pins = "cam_i2c_scl_pbb1", + "cam_i2c_sda_pbb2"; + nvidia,function = "i2c3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,open-drain = ; + nvidia,lock = <0>; + }; + ddc_scl_pv4 { + nvidia,pins = "ddc_scl_pv4", + "ddc_sda_pv5"; + nvidia,function = "i2c4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,lock = <0>; + }; + pwr_i2c_scl_pz6 { + nvidia,pins = "pwr_i2c_scl_pz6", + "pwr_i2c_sda_pz7"; + nvidia,function = "i2cpwr"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,open-drain = ; + nvidia,lock = <0>; + }; + + /* HDMI-CEC pinmux */ + hdmi_cec_pee3 { + nvidia,pins = "hdmi_cec_pee3"; + nvidia,function = "cec"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,open-drain = ; + nvidia,lock = <0>; + }; + + /* UART-A */ + ulpi_data0_po1 { + nvidia,pins = "ulpi_data0_po1"; + nvidia,function = "uarta"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + ulpi_data1_po2 { + nvidia,pins = "ulpi_data1_po2", + "ulpi_data2_po3", + "ulpi_data3_po4", + "ulpi_data4_po5", + "ulpi_data5_po6", + "ulpi_data6_po7"; + nvidia,function = "uarta"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + ulpi_data7_po0 { + nvidia,pins = "ulpi_data7_po0"; + nvidia,function = "uarta"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* UART-B */ + uart2_txd_pc2 { + nvidia,pins = "uart2_txd_pc2", + "uart2_rts_n_pj6"; + nvidia,function = "uartb"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + uart2_rxd_pc3 { + nvidia,pins = "uart2_rxd_pc3", + "uart2_cts_n_pj5"; + nvidia,function = "uartb"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* UART-C */ + uart3_cts_n_pa1 { + nvidia,pins = "uart3_cts_n_pa1", + "uart3_rxd_pw7"; + nvidia,function = "uartc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + uart3_rts_n_pc0 { + nvidia,pins = "uart3_rts_n_pc0", + "uart3_txd_pw6"; + nvidia,function = "uartc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* UART-D */ + ulpi_clk_py0 { + nvidia,pins = "ulpi_clk_py0", + "ulpi_stp_py3"; + nvidia,function = "uartd"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + ulpi_dir_py1 { + nvidia,pins = "ulpi_dir_py1", + "ulpi_nxt_py2"; + nvidia,function = "uartd"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + + /* I2S pinmux */ + dap1_fs_pn0 { + nvidia,pins = "dap1_fs_pn0", + "dap1_din_pn1", + "dap1_dout_pn2", + "dap1_sclk_pn3"; + nvidia,function = "i2s0"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + dap2_fs_pa2 { + nvidia,pins = "dap2_fs_pa2", + "dap2_sclk_pa3", + "dap2_din_pa4", + "dap2_dout_pa5"; + nvidia,function = "i2s1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + dap3_fs_pp0 { + nvidia,pins = "dap3_fs_pp0", + "dap3_din_pp1", + "dap3_dout_pp2", + "dap3_sclk_pp3"; + nvidia,function = "i2s2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + dap4_fs_pp4 { + nvidia,pins = "dap4_fs_pp4", + "dap4_din_pp5", + "dap4_dout_pp6", + "dap4_sclk_pp7"; + nvidia,function = "i2s3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pcc2 { + nvidia,pins = "pcc2"; + nvidia,function = "i2s4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* PCI-e pinmux */ + pex_l2_rst_n_pcc6 { + nvidia,pins = "pex_l2_rst_n_pcc6", + "pex_l0_rst_n_pdd1", + "pex_l1_rst_n_pdd5"; + nvidia,function = "pcie"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + pex_l2_clkreq_n_pcc7 { + nvidia,pins = "pex_l2_clkreq_n_pcc7", + "pex_l0_prsnt_n_pdd0", + "pex_l0_clkreq_n_pdd2", + "pex_l2_prsnt_n_pdd7"; + nvidia,function = "pcie"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pex_wake_n_pdd3 { + nvidia,pins = "pex_wake_n_pdd3"; + nvidia,function = "pcie"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* SPI pinmux */ + spi1_mosi_px4 { + nvidia,pins = "spi1_mosi_px4", + "spi1_sck_px5", + "spi1_cs0_n_px6", + "spi1_miso_px7"; + nvidia,function = "spi1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spi2_cs1_n_pw2 { + nvidia,pins = "spi2_cs1_n_pw2", + "spi2_cs2_n_pw3"; + nvidia,function = "spi2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spi2_sck_px2 { + nvidia,pins = "spi2_sck_px2"; + nvidia,function = "gmi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_a16_pj7 { + nvidia,pins = "gmi_a16_pj7", + "gmi_a19_pk7"; + nvidia,function = "spi4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_a17_pb0 { + nvidia,pins = "gmi_a17_pb0", + "gmi_a18_pb1"; + nvidia,function = "spi4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spi2_mosi_px0 { + nvidia,pins = "spi2_mosi_px0"; + nvidia,function = "spi6"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spdif_out_pk5 { + nvidia,pins = "spdif_out_pk5"; + nvidia,function = "spdif"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + spdif_in_pk6 { + nvidia,pins = "spdif_in_pk6"; + nvidia,function = "spdif"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* Display A pinmux */ + lcd_pwr0_pb2 { + nvidia,pins = "lcd_pwr0_pb2", + "lcd_pclk_pb3", + "lcd_pwr1_pc1", + "lcd_pwr2_pc6", + "lcd_d0_pe0", + "lcd_d1_pe1", + "lcd_d2_pe2", + "lcd_d3_pe3", + "lcd_d4_pe4", + "lcd_d5_pe5", + "lcd_d6_pe6", + "lcd_d7_pe7", + "lcd_d8_pf0", + "lcd_d9_pf1", + "lcd_d10_pf2", + "lcd_d11_pf3", + "lcd_d12_pf4", + "lcd_d13_pf5", + "lcd_d14_pf6", + "lcd_d15_pf7", + "lcd_de_pj1", + "lcd_hsync_pj3", + "lcd_vsync_pj4", + "lcd_d16_pm0", + "lcd_d17_pm1", + "lcd_d18_pm2", + "lcd_d19_pm3", + "lcd_d20_pm4", + "lcd_d21_pm5", + "lcd_d22_pm6", + "lcd_d23_pm7", + "lcd_cs0_n_pn4", + "lcd_sdout_pn5", + "lcd_dc0_pn6", + "lcd_sdin_pz2", + "lcd_wr_n_pz3", + "lcd_sck_pz4", + "lcd_cs1_n_pw0", + "lcd_m1_pw1"; + nvidia,function = "displaya"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + lcd_dc1_pd2 { + nvidia,pins = "lcd_dc1_pd2"; + nvidia,function = "displaya"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + clk_32k_out_pa0 { + nvidia,pins = "clk_32k_out_pa0"; + nvidia,function = "blink"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* KBC keys */ + kb_row0_pr0 { + nvidia,pins = "kb_row0_pr0", + "kb_row1_pr1", + "kb_row2_pr2", + "kb_row3_pr3", + "kb_row8_ps0", + "kb_col0_pq0", + "kb_col1_pq1", + "kb_col2_pq2", + "kb_col3_pq3", + "kb_col4_pq4", + "kb_col5_pq5", + "kb_col7_pq7"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + kb_row4_pr4 { + nvidia,pins = "kb_row4_pr4", + "kb_row7_pr7", + "kb_row10_ps2", + "kb_row13_ps5"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + kb_row11_ps3 { + nvidia,pins = "kb_row11_ps3", + "kb_row12_ps4", + "kb_row15_ps7"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + kb_row14_ps6 { + nvidia,pins = "kb_row14_ps6"; + nvidia,function = "kbc"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + gmi_iordy_pi5 { + nvidia,pins = "gmi_iordy_pi5"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + vi_pclk_pt0 { + nvidia,pins = "vi_pclk_pt0"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,lock = <0>; + nvidia,ioreset = <0>; + }; + pu1 { + nvidia,pins = "pu1"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pu2 { + nvidia,pins = "pu2"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pv0 { + nvidia,pins = "pv0"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pv1 { + nvidia,pins = "pv1"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + pcc1 { + nvidia,pins = "pcc1"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + sdmmc4_rst_n_pcc3 { + nvidia,pins = "sdmmc4_rst_n_pcc3"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pv3 { + nvidia,pins = "pv3"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + vi_vsync_pd6 { + nvidia,pins = "vi_vsync_pd6", + "vi_hsync_pd7"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,lock = <0>; + nvidia,ioreset = <0>; + }; + vi_d10_pt2 { + nvidia,pins = "vi_d10_pt2", + "vi_d0_pt4", "pbb0"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + vi_d11_pt3 { + nvidia,pins = "vi_d11_pt3"; + nvidia,function = "rsvd2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + pu0 { + nvidia,pins = "pu0"; + nvidia,function = "rsvd4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pu3 { + nvidia,pins = "pu3"; + nvidia,function = "rsvd4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pu6 { + nvidia,pins = "pu6"; + nvidia,function = "rsvd4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pex_l1_prsnt_n_pdd4 { + nvidia,pins = "pex_l1_prsnt_n_pdd4", + "pex_l1_clkreq_n_pdd6"; + nvidia,function = "rsvd4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + gmi_wait_pi7 { + nvidia,pins = "gmi_wait_pi7", + "gmi_cs0_n_pj0", + "gmi_cs1_n_pj2", + "gmi_cs4_n_pk2"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_ad0_pg0 { + nvidia,pins = "gmi_ad0_pg0", + "gmi_ad1_pg1", + "gmi_ad2_pg2", + "gmi_ad3_pg3", + "gmi_ad4_pg4", + "gmi_ad5_pg5", + "gmi_ad6_pg6", + "gmi_ad7_pg7", + "gmi_wr_n_pi0", + "gmi_oe_n_pi1", + "gmi_dqs_pi2", + "gmi_adv_n_pk0", + "gmi_clk_pk1"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_cs2_n_pk3 { + nvidia,pins = "gmi_cs2_n_pk3"; + nvidia,function = "rsvd1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_cs3_n_pk4 { + nvidia,pins = "gmi_cs3_n_pk4"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_ad10_ph2 { + nvidia,pins = "gmi_ad10_ph2", + "gmi_ad11_ph3", + "gmi_ad14_ph6"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_ad13_ph5 { + nvidia,pins = "gmi_ad13_ph5", + "gmi_ad12_ph4", + "gmi_cs7_n_pi6"; + nvidia,function = "nand"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_rst_n_pi4 { + nvidia,pins = "gmi_rst_n_pi4"; + nvidia,function = "gmi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + gmi_ad8_ph0 { + nvidia,pins = "gmi_ad8_ph0"; + nvidia,function = "pwm0"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_ad9_ph1 { + nvidia,pins = "gmi_ad9_ph1"; + nvidia,function = "pwm1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + gmi_wp_n_pc7 { + nvidia,pins = "gmi_wp_n_pc7"; + nvidia,function = "gmi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + gmi_cs6_n_pi3 { + nvidia,pins = "gmi_cs6_n_pi3"; + nvidia,function = "sata"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + vi_d4_pl2 { + nvidia,pins = "vi_d4_pl2"; + nvidia,function = "vi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + vi_d6_pl4 { + nvidia,pins = "vi_d6_pl4"; + nvidia,function = "vi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + nvidia,lock = <0>; + nvidia,ioreset = <0>; + }; + vi_mclk_pt1 { + nvidia,pins = "vi_mclk_pt1"; + nvidia,function = "vi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* HDMI hot-plug-detect */ + hdmi_int_pn7 { + nvidia,pins = "hdmi_int_pn7"; + nvidia,function = "hdmi"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + pu4 { + nvidia,pins = "pu4"; + nvidia,function = "pwm1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pu5 { + nvidia,pins = "pu5"; + nvidia,function = "pwm2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + jtag_rtck_pu7 { + nvidia,pins = "jtag_rtck_pu7"; + nvidia,function = "rtck"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + crt_hsync_pv6 { + nvidia,pins = "crt_hsync_pv6", + "crt_vsync_pv7"; + nvidia,function = "crt"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + clk1_out_pw4 { + nvidia,pins = "clk1_out_pw4"; + nvidia,function = "extperiph1"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + clk2_out_pw5 { + nvidia,pins = "clk2_out_pw5"; + nvidia,function = "extperiph2"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + clk3_out_pee0 { + nvidia,pins = "clk3_out_pee0"; + nvidia,function = "extperiph3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + sys_clk_req_pz5 { + nvidia,pins = "sys_clk_req_pz5"; + nvidia,function = "sysclk"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + pbb4 { + nvidia,pins = "pbb4"; + nvidia,function = "vgp4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb5 { + nvidia,pins = "pbb5"; + nvidia,function = "vgp5"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb6 { + nvidia,pins = "pbb6"; + nvidia,function = "vgp6"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + clk1_req_pee2 { + nvidia,pins = "clk1_req_pee2"; + nvidia,function = "dap"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + clk2_req_pcc5 { + nvidia,pins = "clk2_req_pcc5"; + nvidia,function = "dap"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + clk3_req_pee1 { + nvidia,pins = "clk3_req_pee1"; + nvidia,function = "dev3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + owr { + nvidia,pins = "owr"; + nvidia,function = "owr"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pv2 { + nvidia,pins = "pv2", + "kb_row5_pr5"; + nvidia,function = "owr"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + pbb3 { + nvidia,pins = "pbb3"; + nvidia,function = "vgp3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + pbb7 { + nvidia,pins = "pbb7"; + nvidia,function = "i2s4"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + cam_mclk_pcc0 { + nvidia,pins = "cam_mclk_pcc0"; + nvidia,function = "vi_alt3"; + nvidia,pull = ; + nvidia,tristate = ; + nvidia,enable-input = ; + }; + + /* GPIO power/drive control */ + drive_dap1 { + nvidia,pins = "drive_dap1", + "drive_dap2", + "drive_dbg", + "drive_at5", + "drive_gme", + "drive_ddc", + "drive_ao1", + "drive_uart3"; + nvidia,high-speed-mode = <0>; + nvidia,schmitt = ; + nvidia,low-power-mode = ; + nvidia,pull-down-strength = <31>; + nvidia,pull-up-strength = <31>; + nvidia,slew-rate-rising = ; + nvidia,slew-rate-falling = ; + }; + drive_sdio1 { + nvidia,pins = "drive_sdio1"; + nvidia,high-speed-mode = <0>; + nvidia,schmitt = ; + nvidia,pull-down-strength = <5>; + nvidia,pull-up-strength = <5>; + nvidia,slew-rate-rising = ; + nvidia,slew-rate-falling = ; + }; + drive_sdio3 { + nvidia,pins = "drive_sdio3"; + nvidia,high-speed-mode = <0>; + nvidia,schmitt = ; + nvidia,pull-down-strength = <46>; + nvidia,pull-up-strength = <42>; + nvidia,slew-rate-rising = ; + nvidia,slew-rate-falling = ; + }; + drive_gma { + nvidia,pins = "drive_gma", + "drive_gmb", + "drive_gmc", + "drive_gmd"; + nvidia,pull-down-strength = <9>; + nvidia,pull-up-strength = <9>; + nvidia,slew-rate-rising = ; + nvidia,slew-rate-falling = ; + }; + drive_lcd2 { + nvidia,pins = "drive_lcd2"; + nvidia,high-speed-mode = <0>; + nvidia,schmitt = ; + nvidia,low-power-mode = ; + nvidia,pull-down-strength = <20>; + nvidia,pull-up-strength = <20>; + nvidia,slew-rate-rising = ; + nvidia,slew-rate-falling = ; + }; + }; + }; + + uartb: serial@70006040 { + compatible = "nvidia,tegra30-hsuart"; + status = "okay"; + + /* Broadcom GPS BCM47511 */ + }; + + uartc: serial@70006200 { + compatible = "nvidia,tegra30-hsuart"; + status = "okay"; + + nvidia,adjust-baud-rates = <0 9600 100>, + <9600 115200 200>, + <1000000 4000000 136>; + + /* Azurewave AW-AH663 BCM4330B1 */ + bluetooth { + compatible = "brcm,bcm4330-bt"; + max-speed = <4000000>; + + clocks = <&tegra_pmc TEGRA_PMC_CLK_BLINK>; + clock-names = "txco"; + + interrupt-parent = <&gpio>; + interrupts = ; + interrupt-names = "host-wakeup"; + + device-wakeup-gpios = <&gpio TEGRA_GPIO(U, 1) GPIO_ACTIVE_HIGH>; + shutdown-gpios = <&gpio TEGRA_GPIO(U, 0) GPIO_ACTIVE_HIGH>; + + vbat-supply = <&vdd_3v3_sys>; + vddio-supply = <&vdd_1v8_vio>; + }; + }; + + pwm: pwm@7000a000 { + status = "okay"; + }; + + i2c1: i2c@7000c000 { + status = "okay"; + clock-frequency = <400000>; + + /* Wolfson Microelectronics WM8903 audio codec */ + wm8903: audio-codec@1a { + compatible = "wlf,wm8903"; + reg = <0x1a>; + + interrupt-parent = <&gpio>; + interrupts = ; + + gpio-controller; + #gpio-cells = <2>; + + micdet-cfg = <0>; + micdet-delay = <100>; + + gpio-cfg = <0xffffffff 0xffffffff 0 0xffffffff 0xffffffff>; + + AVDD-supply = <&vdd_1v8_vio>; + CPVDD-supply = <&vdd_1v8_vio>; + DBVDD-supply = <&vdd_1v8_vio>; + DCVDD-supply = <&vdd_1v8_vio>; + }; + + }; + + i2c2: i2c@7000c400 { + status = "okay"; + clock-frequency = <400000>; + + /* Atmel touchscreen */ + touchscreen@4d { + compatible = "atmel,maxtouch"; + reg = <0x4d>; + + interrupt-parent = <&gpio>; + interrupts = ; + reset-gpios = <&gpio TEGRA_GPIO(H, 6) GPIO_ACTIVE_LOW>; + + vdda-supply = <&vdd_3v3_sys>; + vdd-supply = <&vdd_3v3_sys>; + }; + }; + + i2c3: i2c@7000c500 { + status = "okay"; + clock-frequency = <400000>; + + light-sensor@44 { + compatible = "isil,isl29023"; + reg = <0x44>; + + interrupt-parent = <&gpio>; + interrupts = ; + + vcc-supply = <&vdd_3v3_sen>; + }; + + /* AsahiKASEI AK8975 magnetometer sensor */ + magnetometer@c { + compatible = "asahi-kasei,ak8975"; + reg = <0x0c>; + + vdd-supply = <&vdd_3v3_sen>; + vid-supply = <&vdd_1v8_vio>; + + mount-matrix = "0", "1", "0", + "1", "0", "0", + "0", "0", "-1"; + }; + + gyroscope@68 { + compatible = "invensense,mpu3050"; + reg = <0x68>; + + interrupt-parent = <&gpio>; + interrupts = ; + + vdd-supply = <&vdd_3v3_sen>; + vlogic-supply = <&vdd_1v8_vio>; + + mount-matrix = "0", "1", "0", + "1", "0", "0", + "0", "0", "-1"; + + /* External I2C interface */ + i2c-gate { + #address-cells = <1>; + #size-cells = <0>; + + accelerometer@f { + compatible = "kionix,kxtf9"; + reg = <0x0f>; + + interrupt-parent = <&gpio>; + interrupts = ; + + vdd-supply = <&vdd_1v8_vio>; + vddio-supply = <&vdd_1v8_vio>; + + mount-matrix = "-1", "0", "0", + "0", "1", "0", + "0", "0", "1"; + }; + }; + }; + }; + + hdmi_ddc: i2c@7000c700 { /*i2c4*/ + status = "okay"; + clock-frequency = <93750>; + + /* Nvidia Tegra HDCP device */ + }; + + i2c5: i2c@7000d000 { + status = "okay"; + clock-frequency = <400000>; + + nct72: temperature-sensor@4c { + compatible = "onnn,nct1008"; + reg = <0x4c>; + + interrupt-parent = <&gpio>; + interrupts = ; + + vcc-supply = <&vdd_3v3_sys>; + #thermal-sensor-cells = <1>; + }; + + /* Texas Instruments TPS659110 PMIC */ + pmic: pmic@2d { + compatible = "ti,tps65911"; + reg = <0x2d>; + + interrupts = ; + #interrupt-cells = <2>; + interrupt-controller; + wakeup-source; + + ti,en-gpio-sleep = <0 0 1 0 0 0 0 0 0>; + ti,system-power-controller; + ti,sleep-keep-ck32k; + ti,sleep-enable; + + #gpio-cells = <2>; + gpio-controller; + + vcc1-supply = <&vdd_5v0_sys>; + vcc2-supply = <&vdd_5v0_sys>; + vcc3-supply = <&vdd_1v8_vio>; + vcc4-supply = <&vdd_5v0_sys>; /* may need vio */ + vcc5-supply = <&vdd_5v0_sys>; + vcc6-supply = <&vddio_1v2_ddr>; + vcc7-supply = <&vdd_5v0_sys>; + vccio-supply = <&vdd_5v0_sys>; + + pmic-sleep-hog { + gpio-hog; + gpios = <0 GPIO_ACTIVE_HIGH>, + <2 GPIO_ACTIVE_HIGH>, + <6 GPIO_ACTIVE_HIGH>, + <8 GPIO_ACTIVE_HIGH>; + output-high; + }; + + regulators { + /* vdd1 is not used by chagall */ + + vddio_1v2_ddr: vdd2 { + regulator-name = "vddio_1v2_ddr"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + regulator-always-on; + regulator-boot-on; + }; + + vdd_cpu: vddctrl { + regulator-name = "vdd_cpu,vdd_sys"; + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <1400000>; + regulator-coupled-with = <&vdd_core>; + regulator-coupled-max-spread = <300000>; + regulator-max-step-microvolt = <100000>; + regulator-always-on; + regulator-boot-on; + ti,regulator-ext-sleep-control = <1>; + + nvidia,tegra-cpu-regulator; + }; + + vdd_1v8_vio: vio { + regulator-name = "vdd_1v8_gen"; + regulator-min-microvolt = <1500000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + regulator-boot-on; + }; + + /* eMMC VDD */ + vcore_emmc: ldo1 { + regulator-name = "vdd_emmc_core"; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + /* uSD slot VDD */ + vdd_usd: ldo2 { + regulator-name = "vdd_usd"; + regulator-min-microvolt = <3100000>; + regulator-max-microvolt = <3100000>; + }; + + /* uSD slot VDDIO */ + vddio_usd: ldo3 { + regulator-name = "vddio_usd"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3100000>; + }; + + ldo4 { + regulator-name = "vdd_rtc"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + regulator-always-on; + }; + + ldo5 { + regulator-name = "vdd_1v3_cam_isp"; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <1000000>; + }; + + ldo6 { + regulator-name = "avdd_dsi_csi,pwrdet_mipi"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + }; + + ldo7 { + regulator-name = "vdd_pllm,x,u,a_p_c_s"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + regulator-always-on; + regulator-boot-on; + ti,regulator-ext-sleep-control = <8>; + }; + + ldo8 { + regulator-name = "vdd_ddr_hs"; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <1000000>; + regulator-always-on; + ti,regulator-ext-sleep-control = <8>; + }; + }; + }; + + vdd_core: core-regulator@60 { + compatible = "ti,tps62361"; + reg = <0x60>; + + regulator-name = "tps62361-vout"; + regulator-min-microvolt = <500000>; + regulator-max-microvolt = <1770000>; + regulator-coupled-with = <&vdd_cpu>; + regulator-coupled-max-spread = <300000>; + regulator-max-step-microvolt = <100000>; + regulator-boot-on; + regulator-always-on; + ti,enable-vout-discharge; + ti,vsel0-state-high; + ti,vsel1-state-high; + + nvidia,tegra-core-regulator; + }; + }; + + vdd_5v0_sys: regulator-5v { + compatible = "regulator-fixed"; + regulator-name = "vdd_5v0_sys"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + regulator-boot-on; + }; + + vdd_3v3_sys: regulator-3v { + compatible = "regulator-fixed"; + regulator-name = "vdd_3v3_sys"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + regulator-boot-on; + }; + + vdd_pnl: regulator-panel { + compatible = "regulator-fixed"; + regulator-name = "vdd_panel"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-enable-ramp-delay = <300000>; + gpio = <&gpio TEGRA_GPIO(W, 1) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_3v3_sys>; + }; + + vdd_3v3_sen: regulator-sensors { + compatible = "regulator-fixed"; + regulator-name = "sen_3v3_en"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + gpio = <&gpio TEGRA_GPIO(K, 5) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_3v3_sys>; + }; + + hdmi_5v0_sys: regulator-hdmi { + compatible = "regulator-fixed"; + regulator-name = "hdmi_5v0_sys"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + gpio = <&gpio TEGRA_GPIO(P, 2) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_sys>; + }; + + vdd_vbus_usb1: regulator-usb1 { + compatible = "regulator-fixed"; + regulator-name = "vdd_vbus_micro_usb"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + gpio = <&gpio TEGRA_GPIO(DD, 3) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_sys>; + }; + + vdd_vbus_usb3: regulator-usb3 { + compatible = "regulator-fixed"; + regulator-name = "vdd_vbus_typea_usb"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + gpio = <&gpio TEGRA_GPIO(CC, 6) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_sys>; + }; + + pmc@7000e400 { + status = "okay"; + nvidia,invert-interrupt; + nvidia,suspend-mode = <1>; + nvidia,cpu-pwr-good-time = <2000>; + nvidia,cpu-pwr-off-time = <200>; + nvidia,core-pwr-good-time = <3845 3845>; + nvidia,core-pwr-off-time = <0>; + nvidia,core-power-req-active-high; + nvidia,sys-clock-req-active-high; + core-supply = <&vdd_core>; + + /* Set DEV_OFF + PWR_OFF_SET bit in DCDC control register of TPS65911 PMIC */ + i2c-thermtrip { + nvidia,i2c-controller-id = <4>; + nvidia,bus-addr = <0x2d>; + nvidia,reg-addr = <0x3f>; + nvidia,reg-data = <0x81>; + }; + }; + + hda@70030000 { + status = "okay"; + }; + + ahub@70080000 { + i2s@70080400 { /* i2s1 */ + status = "okay"; + }; + + /* BT SCO */ + i2s@70080600 { /* i2s3 */ + status = "okay"; + }; + }; + + sdmmc1: mmc@78000000 { + status = "okay"; + + cd-gpios = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>; + bus-width = <4>; + + vmmc-supply = <&vdd_usd>; /* ldo2 */ + vqmmc-supply = <&vddio_usd>; /* ldo3 */ + }; + + brcm_wifi_pwrseq: wifi-pwrseq { + compatible = "mmc-pwrseq-simple"; + + clocks = <&tegra_pmc TEGRA_PMC_CLK_BLINK>; + clock-names = "ext_clock"; + + reset-gpios = <&gpio TEGRA_GPIO(D, 3) GPIO_ACTIVE_LOW>; + post-power-on-delay-ms = <300>; + power-off-delay-us = <300>; + }; + + sdmmc3: mmc@78000400 { + status = "okay"; + + #address-cells = <1>; + #size-cells = <0>; + + assigned-clocks = <&tegra_car TEGRA30_CLK_SDMMC3>; + assigned-clock-parents = <&tegra_car TEGRA30_CLK_PLL_C>; + assigned-clock-rates = <50000000>; + + max-frequency = <50000000>; + keep-power-in-suspend; + bus-width = <4>; + non-removable; + + mmc-pwrseq = <&brcm_wifi_pwrseq>; + vmmc-supply = <&vdd_3v3_sys>; + vqmmc-supply = <&vdd_1v8_vio>; + + /* Azurewave AW-AH663 BCM4330 */ + wifi@1 { + compatible = "brcm,bcm4329-fmac"; + reg = <1>; + + interrupt-parent = <&gpio>; + interrupts = ; + interrupt-names = "host-wake"; + }; + }; + + sdmmc4: mmc@78000600 { + status = "okay"; + bus-width = <8>; + vmmc-supply = <&vcore_emmc>; + vqmmc-supply = <&vdd_1v8_vio>; + non-removable; + }; + + usb@7d000000 { + compatible = "nvidia,tegra30-udc"; + status = "okay"; + dr_mode = "otg"; + vbus-supply = <&vdd_vbus_usb1>; + }; + + usb-phy@7d000000 { + status = "okay"; + dr_mode = "otg"; + nvidia,hssync-start-delay = <0>; + nvidia,xcvr-lsfslew = <2>; + nvidia,xcvr-lsrslew = <2>; + }; + + usb@7d008000 { + status = "okay"; + }; + + usb-phy@7d008000 { + status = "okay"; + vbus-supply = <&vdd_vbus_usb3>; + }; + + backlight: backlight { + compatible = "pwm-backlight"; + + enable-gpios = <&gpio TEGRA_GPIO(H, 2) GPIO_ACTIVE_HIGH>; + power-supply = <&vdd_5v0_sys>; + pwms = <&pwm 0 5000000>; + + brightness-levels = <1 255>; + num-interpolated-steps = <254>; + default-brightness-level = <15>; + }; + + display-panel { + compatible = "panel-lvds"; + + width-mm = <217>; + height-mm = <136>; + + data-mapping = "jeida-24"; + + panel-timing { + /* 1280x800@60Hz */ + clock-frequency = <68000000>; + hactive = <1280>; + vactive = <800>; + hfront-porch = <48>; + hback-porch = <18>; + hsync-len = <30>; + vsync-len = <5>; + vfront-porch = <3>; + vback-porch = <12>; + }; + }; + + /* PMIC has a built-in 32KHz oscillator which is used by PMC */ + clk32k_in: clock-32k { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "pmic-oscillator"; + }; + + cpus { + cpu0: cpu@0 { + cpu-supply = <&vdd_cpu>; + operating-points-v2 = <&cpu0_opp_table>; + #cooling-cells = <2>; + }; + cpu1: cpu@1 { + cpu-supply = <&vdd_cpu>; + operating-points-v2 = <&cpu0_opp_table>; + #cooling-cells = <2>; + }; + cpu2: cpu@2 { + cpu-supply = <&vdd_cpu>; + operating-points-v2 = <&cpu0_opp_table>; + #cooling-cells = <2>; + }; + cpu3: cpu@3 { + cpu-supply = <&vdd_cpu>; + operating-points-v2 = <&cpu0_opp_table>; + #cooling-cells = <2>; + }; + }; + + firmware { + trusted-foundations { + compatible = "tlm,trusted-foundations"; + tlm,version-major = <2>; + tlm,version-minor = <8>; + }; + }; + + mains: ac-adapter-detect { + compatible = "gpio-charger"; + charger-type = "mains"; + gpios = <&gpio TEGRA_GPIO(V, 1) GPIO_ACTIVE_HIGH>; + }; + + gpio-keys { + compatible = "gpio-keys"; + interrupt-parent = <&gpio>; + + power { + label = "Power"; + gpios = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + + volume-up { + label = "Volume Up"; + gpios = <&gpio TEGRA_GPIO(Q, 0) GPIO_ACTIVE_LOW>; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + + volume-down { + label = "Volume Down"; + gpios = <&gpio TEGRA_GPIO(Q, 1) GPIO_ACTIVE_LOW>; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + }; + + extcon-keys { + compatible = "gpio-keys"; + interrupt-parent = <&gpio>; + + dock-insert { + label = "Chagall Dock"; + gpios = <&gpio TEGRA_GPIO(S, 4) GPIO_ACTIVE_LOW>; + linux,input-type = ; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + + lineout-detect { + label = "Audio dock line-out detect"; + gpios = <&gpio TEGRA_GPIO(S, 3) GPIO_ACTIVE_LOW>; + linux,input-type = ; + linux,code = ; + debounce-interval = <10>; + wakeup-event-action = ; + wakeup-source; + }; + }; + + sound { + compatible = "pegatron,tegra-audio-wm8903-chagall", + "nvidia,tegra-audio-wm8903"; + nvidia,model = "ASUS Transformer WM8903"; + + nvidia,audio-routing = + "Headphone Jack", "HPOUTR", + "Headphone Jack", "HPOUTL", + "Int Spk", "ROP", + "Int Spk", "RON", + "Int Spk", "LOP", + "Int Spk", "LON", + "IN1R", "Mic Jack", + "DMICDAT", "Int Mic"; + + nvidia,i2s-controller = <&tegra_i2s1>; + nvidia,audio-codec = <&wm8903>; + + nvidia,spkr-en-gpios = <&wm8903 2 GPIO_ACTIVE_HIGH>; + nvidia,hp-det-gpios = <&gpio TEGRA_GPIO(W, 2) GPIO_ACTIVE_LOW>; + nvidia,headset; + + clocks = <&tegra_car TEGRA30_CLK_PLL_A>, + <&tegra_car TEGRA30_CLK_PLL_A_OUT0>, + <&tegra_pmc TEGRA_PMC_CLK_OUT_1>; + clock-names = "pll_a", "pll_a_out0", "mclk"; + + assigned-clocks = <&tegra_car TEGRA30_CLK_EXTERN1>, + <&tegra_pmc TEGRA_PMC_CLK_OUT_1>; + + assigned-clock-parents = <&tegra_car TEGRA30_CLK_PLL_A_OUT0>, + <&tegra_car TEGRA30_CLK_EXTERN1>; + }; + + thermal-zones { + /* + * NCT72 has two sensors: + * + * 0: internal that monitors ambient/skin temperature + * 1: external that is connected to the CPU's diode + * + * Ideally we should use userspace thermal governor, + * but it's a much more complex solution. The "skin" + * zone exists as a simpler solution which prevents + * Chagall from getting too hot from a user's tactile + * perspective. The CPU zone is intended to protect + * silicon from damage. + */ + + skin-thermal { + polling-delay-passive = <1000>; /* milliseconds */ + polling-delay = <5000>; /* milliseconds */ + + thermal-sensors = <&nct72 0>; + + trips { + trip0: skin-alert { + /* throttle at 57C until temperature drops to 56.8C */ + temperature = <57000>; + hysteresis = <200>; + type = "passive"; + }; + + trip1: skin-crit { + /* shut down at 65C */ + temperature = <65000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&trip0>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&actmon THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + }; + }; + + cpu-thermal { + polling-delay-passive = <1000>; /* milliseconds */ + polling-delay = <5000>; /* milliseconds */ + + thermal-sensors = <&nct72 1>; + + trips { + trip2: cpu-alert { + /* throttle at 85C until temperature drops to 84.8C */ + temperature = <85000>; + hysteresis = <200>; + type = "passive"; + }; + + trip3: cpu-crit { + /* shut down at 90C */ + temperature = <90000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map1 { + trip = <&trip2>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&actmon THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + }; + }; + }; + + haptic-feedback { + compatible = "gpio-vibrator"; + enable-gpios = <&gpio TEGRA_GPIO(U, 4) GPIO_ACTIVE_HIGH>; + vcc-supply = <&vdd_3v3_sys>; + }; + + memory-controller@7000f000 { + emc-timings-0 { + /* SAMSUNG K4P8G304EB FGC1 */ + nvidia,ram-code = <0>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0xc0000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x73e30303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0xc0000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x72c30303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0xc0000018 + 0x00000001 0x00000001 0x00000003 0x00000001 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060403 0x72430504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000025 + 0x00000001 0x00000001 0x00000006 0x00000003 + 0x00000005 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000003 0x00000002 + 0x02030001 0x00070506 0x71e40a07 0x001f0000 >; + }; + + timing-400000000 { + clock-frequency = <400000000>; + + nvidia,emem-configuration = < 0x00000006 0xc0000048 + 0x00000002 0x00000003 0x0000000c 0x00000007 + 0x00000009 0x00000001 0x00000002 0x00000006 + 0x00000001 0x00000000 0x00000004 0x00000004 + 0x04040001 0x000d090c 0x7026120d 0x001f0000 >; + }; + }; + + emc-timings-1 { + /* ELPIDA EDB8132B2MA 8D_F */ + nvidia,ram-code = <1>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0xc0000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x73e30303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0xc0000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x72c30303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0xc0000018 + 0x00000001 0x00000001 0x00000003 0x00000001 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060403 0x72430504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000025 + 0x00000001 0x00000001 0x00000006 0x00000003 + 0x00000005 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000003 0x00000002 + 0x02030001 0x00070506 0x71e40a07 0x001f0000 >; + }; + + timing-400000000 { + clock-frequency = <400000000>; + + nvidia,emem-configuration = < 0x00000006 0xc0000048 + 0x00000002 0x00000003 0x0000000c 0x00000007 + 0x00000009 0x00000001 0x00000002 0x00000006 + 0x00000001 0x00000000 0x00000004 0x00000004 + 0x04040001 0x000d090c 0x7026120d 0x001f0000 >; + }; + }; + + emc-timings-2 { + /* SAMSUNG K4P8G304EB FGC2 */ + nvidia,ram-code = <2>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0xc0000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x73e30303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0xc0000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x72c30303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0xc0000018 + 0x00000001 0x00000001 0x00000003 0x00000001 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060403 0x72430504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000025 + 0x00000001 0x00000001 0x00000006 0x00000003 + 0x00000005 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000003 0x00000002 + 0x02030001 0x00070506 0x71e40a07 0x001f0000 >; + }; + + timing-533000000 { + clock-frequency = <533000000>; + + nvidia,emem-configuration = < 0x00000008 0xc0000060 + 0x00000003 0x00000004 0x00000010 0x0000000a + 0x0000000d 0x00000002 0x00000002 0x00000008 + 0x00000002 0x00000000 0x00000004 0x00000005 + 0x05040002 0x00110b10 0x70281811 0x001f0000 >; + }; + }; + + emc-timings-3 { + /* HYNIX H9TCNNN8JDMMPR NGM */ + nvidia,ram-code = <3>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emem-configuration = < 0x00020001 0xc0000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x73e30303 0x001f0000 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emem-configuration = < 0x00010001 0xc0000010 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060402 0x72c30303 0x001f0000 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emem-configuration = < 0x00000001 0xc0000018 + 0x00000001 0x00000001 0x00000003 0x00000001 + 0x00000003 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000002 0x00000002 + 0x02020001 0x00060403 0x72430504 0x001f0000 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emem-configuration = < 0x00000003 0xc0000025 + 0x00000001 0x00000001 0x00000006 0x00000003 + 0x00000005 0x00000001 0x00000002 0x00000004 + 0x00000001 0x00000000 0x00000003 0x00000002 + 0x02030001 0x00070506 0x71e40a07 0x001f0000 >; + }; + + timing-533000000 { + clock-frequency = <533000000>; + + nvidia,emem-configuration = < 0x00000008 0xc0000060 + 0x00000003 0x00000004 0x00000010 0x0000000a + 0x0000000d 0x00000002 0x00000002 0x00000008 + 0x00000002 0x00000000 0x00000004 0x00000005 + 0x05040002 0x00110b10 0x70281811 0x001f0000 >; + }; + }; + }; + + memory-controller@7000f400 { + emc-timings-0 { + /* SAMSUNG K4P8G304EB FGC1 */ + nvidia,ram-code = <0>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000003 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x00000009 0x00000060 0x00000000 0x00000018 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000004 0x00000004 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x0000006b 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x0000000a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x800001c5 0xe0000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000003 + 0x00000006 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x00000009 0x000000c0 0x00000000 0x00000030 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000008 0x00000008 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000000d5 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000013 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe0000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x0000000a>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000006 + 0x0000000d 0x00000004 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x00000009 0x00000181 0x00000000 0x00000060 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x0000000f 0x0000000f + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000001a9 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000025 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe0000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010042>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000013>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000000c + 0x0000001a 0x00000008 0x00000003 0x00000005 + 0x00000004 0x00000001 0x00000006 0x00000003 + 0x00000003 0x00000002 0x00000002 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000c + 0x0000000a 0x00000303 0x00000000 0x000000c0 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000007 0x0000001d 0x0000001d + 0x00000004 0x0000000b 0x00000005 0x00000004 + 0x00000002 0x00000351 0x00000004 0x00000006 + 0x00000000 0x00000000 0x00004282 0x004400a4 + 0x00008000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000e0220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x0000004a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe0000000 0xff00ff00 >; + }; + + timing-400000000 { + clock-frequency = <400000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010082>; + nvidia,emc-mode-2 = <0x00020004>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000024>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000017 + 0x00000033 0x00000010 0x00000007 0x00000007 + 0x00000007 0x00000002 0x0000000a 0x00000007 + 0x00000007 0x00000003 0x00000002 0x00000000 + 0x00000003 0x00000007 0x00000004 0x0000000d + 0x0000000e 0x000005e9 0x00000000 0x0000017a + 0x00000002 0x00000002 0x00000007 0x00000000 + 0x00000001 0x0000000c 0x00000038 0x00000038 + 0x00000006 0x00000014 0x00000009 0x00000004 + 0x00000002 0x00000680 0x00000000 0x00000006 + 0x00000000 0x00000000 0x00006282 0x001d0084 + 0x00008000 0x00034000 0x00034000 0x00034000 + 0x00034000 0x00034000 0x00034000 0x00034000 + 0x00034000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00038000 0x00038000 0x00038000 + 0x00038000 0x00080220 0x0800003d 0x00000000 + 0x77ffc004 0x01f1f408 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000090 0x000c000c 0xa0f10404 0x00000000 + 0x00000000 0x80000ce6 0xe0000000 0xff00ff88 >; + }; + }; + + emc-timings-1 { + /* ELPIDA EDB8132B2MA 8D_F */ + nvidia,ram-code = <1>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000003 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x00000060 0x00000000 0x00000018 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000004 0x00000004 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x0000006b 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x0000000a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x800001c5 0xe0000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000003 + 0x00000006 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x000000c0 0x00000000 0x00000030 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000008 0x00000008 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000000d5 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000013 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe0000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x0000000a>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000006 + 0x0000000d 0x00000004 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x00000181 0x00000000 0x00000060 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x0000000f 0x0000000f + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000001a9 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000025 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe0000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010042>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000013>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000000c + 0x0000001a 0x00000008 0x00000003 0x00000005 + 0x00000004 0x00000001 0x00000006 0x00000003 + 0x00000003 0x00000002 0x00000002 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000c + 0x0000000a 0x00000303 0x00000000 0x000000c0 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000007 0x0000001d 0x0000001d + 0x00000004 0x0000000b 0x00000005 0x00000004 + 0x00000002 0x00000351 0x00000004 0x00000006 + 0x00000000 0x00000000 0x00004282 0x004400a4 + 0x00008000 0x00070000 0x00070000 0x00070000 + 0x00070000 0x00070000 0x00070000 0x00070000 + 0x00070000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000e0220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x0000004a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe0000000 0xff00ff00 >; + }; + + timing-400000000 { + clock-frequency = <400000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010082>; + nvidia,emc-mode-2 = <0x00020004>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000024>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000017 + 0x00000033 0x00000010 0x00000007 0x00000007 + 0x00000007 0x00000002 0x0000000a 0x00000007 + 0x00000007 0x00000003 0x00000002 0x00000000 + 0x00000003 0x00000007 0x00000004 0x0000000d + 0x0000000e 0x000005e9 0x00000000 0x0000017a + 0x00000002 0x00000002 0x00000007 0x00000000 + 0x00000001 0x0000000c 0x00000038 0x00000038 + 0x00000006 0x00000014 0x00000009 0x00000004 + 0x00000002 0x00000680 0x00000000 0x00000004 + 0x00000000 0x00000000 0x00006282 0x001d0084 + 0x00008000 0x00034000 0x00034000 0x00034000 + 0x00034000 0x00034000 0x00034000 0x00034000 + 0x00034000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00048000 0x00048000 0x00048000 + 0x00048000 0x00060220 0x0800003d 0x00000000 + 0x77ffc004 0x01f1f408 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000090 0x000c000c 0xa0f10000 0x00000000 + 0x00000000 0x80000ce6 0xe0000000 0xff00ff88 >; + }; + }; + + emc-timings-2 { + /* SAMSUNG K4P8G304EB FGC2 */ + nvidia,ram-code = <2>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000003 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x00000060 0x00000000 0x00000018 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000004 0x00000004 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x0000006b 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x0000000a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x800001c5 0xe0000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000003 + 0x00000006 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x000000c0 0x00000000 0x00000030 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000008 0x00000008 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000000d5 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000013 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe0000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x0000000a>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000006 + 0x0000000d 0x00000004 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x00000009 0x00000181 0x00000000 0x00000060 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x0000000f 0x0000000f + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000001a9 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000025 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe0000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010042>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000013>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000000c + 0x0000001a 0x00000008 0x00000003 0x00000005 + 0x00000004 0x00000001 0x00000006 0x00000003 + 0x00000003 0x00000002 0x00000002 0x00000000 + 0x00000001 0x00000004 0x00000001 0x0000000c + 0x0000000a 0x00000303 0x00000000 0x000000c0 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000007 0x0000001d 0x0000001d + 0x00000004 0x0000000b 0x00000005 0x00000004 + 0x00000002 0x00000351 0x00000005 0x00000004 + 0x00000000 0x00000000 0x00004282 0x004400a4 + 0x00008000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000e0220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x0000004a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xe0000000 0xff00ff00 >; + }; + + timing-533000000 { + clock-frequency = <533000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x000100c2>; + nvidia,emc-mode-2 = <0x00020006>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000030>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000001f + 0x00000045 0x00000016 0x00000009 0x00000008 + 0x00000009 0x00000003 0x0000000d 0x00000009 + 0x00000009 0x00000005 0x00000003 0x00000000 + 0x00000004 0x0000000a 0x00000006 0x0000000d + 0x00000010 0x000007df 0x00000000 0x000001f7 + 0x00000003 0x00000003 0x00000009 0x00000000 + 0x00000001 0x0000000f 0x0000004b 0x0000004b + 0x00000008 0x0000001b 0x0000000c 0x00000004 + 0x00000002 0x000008aa 0x00000000 0x00000004 + 0x00000000 0x00000000 0x00006282 0xf0120091 + 0x00008000 0x007f8008 0x007f8008 0x007f8008 + 0x007f8008 0x007f8008 0x007f8008 0x007f8008 + 0x007f8008 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000c 0x0000000c 0x0000000c + 0x0000000c 0x00080220 0x0200003d 0x00000000 + 0x77ffc004 0x01f1f408 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x000000c0 0x000e000e 0xa0f10000 0x00000000 + 0x00000000 0x800010d9 0xf0000000 0xff00ff88 >; + }; + }; + + emc-timings-3 { + /* HYNIX H9TCNNN8JDMMPR NGM */ + nvidia,ram-code = <3>; + + timing-25500000 { + clock-frequency = <25500000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000001 + 0x00000003 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x00000060 0x00000000 0x00000018 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000004 0x00000004 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x0000006b 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x0000000a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x800001c5 0xe0000000 0xff00ff00 >; + }; + + timing-51000000 { + clock-frequency = <51000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000009>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000003 + 0x00000006 0x00000002 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x000000c0 0x00000000 0x00000030 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x00000008 0x00000008 + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000000d5 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000013 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000287 0xe0000000 0xff00ff00 >; + }; + + timing-102000000 { + clock-frequency = <102000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010022>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x0000000a>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x00000006 + 0x0000000d 0x00000004 0x00000002 0x00000004 + 0x00000004 0x00000001 0x00000005 0x00000002 + 0x00000002 0x00000001 0x00000001 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000b + 0x0000000a 0x00000181 0x00000000 0x00000060 + 0x00000001 0x00000001 0x00000002 0x00000000 + 0x00000001 0x00000007 0x0000000f 0x0000000f + 0x00000003 0x00000008 0x00000004 0x00000004 + 0x00000002 0x000001a9 0x00000004 0x00000004 + 0x00000000 0x00000000 0x00004282 0x007800a4 + 0x00008000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x000fc000 0x000fc000 0x000fc000 + 0x000fc000 0x00100220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x00000025 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x8000040b 0xe0000000 0xff00ff00 >; + }; + + timing-204000000 { + clock-frequency = <204000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x00010042>; + nvidia,emc-mode-2 = <0x00020001>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000013>; + nvidia,emc-cfg-dyn-self-ref; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000000c + 0x0000001a 0x00000008 0x00000003 0x00000005 + 0x00000004 0x00000001 0x00000006 0x00000003 + 0x00000003 0x00000002 0x00000002 0x00000000 + 0x00000001 0x00000003 0x00000001 0x0000000c + 0x0000000b 0x00000303 0x00000000 0x000000c0 + 0x00000001 0x00000001 0x00000003 0x00000000 + 0x00000001 0x00000007 0x0000001d 0x0000001d + 0x00000004 0x0000000b 0x00000005 0x00000004 + 0x00000002 0x00000351 0x00000004 0x00000006 + 0x00000000 0x00000000 0x00004282 0x004400a4 + 0x00008000 0x00072000 0x00072000 0x00072000 + 0x00072000 0x00072000 0x00072000 0x00072000 + 0x00072000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00080000 0x00080000 0x00080000 + 0x00080000 0x000e0220 0x0800201c 0x00000000 + 0x77ffc004 0x01f1f008 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x0000004a 0x00090009 0xa0f10000 0x00000000 + 0x00000000 0x80000713 0xd0000000 0xff00ff00 >; + }; + + timing-533000000 { + clock-frequency = <533000000>; + + nvidia,emc-auto-cal-interval = <0x001fffff>; + nvidia,emc-mode-1 = <0x000100c2>; + nvidia,emc-mode-2 = <0x00020006>; + nvidia,emc-mode-reset = <0x00000000>; + nvidia,emc-zcal-cnt-long = <0x00000030>; + nvidia,emc-cfg-periodic-qrst; + + nvidia,emc-configuration = < 0x0000001f + 0x00000045 0x00000016 0x00000009 0x00000008 + 0x00000009 0x00000003 0x0000000d 0x00000009 + 0x00000009 0x00000005 0x00000003 0x00000000 + 0x00000004 0x00000009 0x00000006 0x0000000d + 0x00000010 0x000007df 0x00000000 0x000001f7 + 0x00000003 0x00000003 0x00000009 0x00000000 + 0x00000001 0x0000000f 0x0000004b 0x0000004b + 0x00000008 0x0000001b 0x0000000c 0x00000004 + 0x00000002 0x000008aa 0x00000000 0x00000006 + 0x00000000 0x00000000 0x00006282 0xf0120091 + 0x00008000 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x0000000a 0x0000000a 0x0000000a + 0x0000000a 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x00000000 0x00000000 0x00000000 + 0x00000000 0x0000000c 0x0000000c 0x0000000c + 0x0000000c 0x000a0220 0x0800003d 0x00000000 + 0x77ffc004 0x01f1f408 0x00000000 0x00000007 + 0x08000068 0x08000000 0x00000802 0x00064000 + 0x000000c0 0x000e000e 0xa0f10000 0x00000000 + 0x00000000 0x800010d9 0xe0000000 0xff00ff88 >; + }; + }; + }; +}; + +&emc_icc_dvfs_opp_table { + /delete-node/ opp-625000000-1200; + /delete-node/ opp-625000000-1250; + /delete-node/ opp-667000000-1200; + /delete-node/ opp-750000000-1300; + /delete-node/ opp-800000000-1300; + /delete-node/ opp-900000000-1350; +}; + +&emc_bw_dfs_opp_table { + /delete-node/ opp-625000000; + /delete-node/ opp-667000000; + /delete-node/ opp-750000000; + /delete-node/ opp-800000000; + /delete-node/ opp-900000000; +}; From e9fb630138c906ef2699aa9ee85e40b6d23f8d4f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 13 Oct 2021 16:11:43 +0300 Subject: [PATCH 1080/1202] spi: tegra20-slink: Put device into suspend on driver removal pm_runtime_disable() cancels all pending power requests, while they should be completed for the Tegra SPI driver. Otherwise SPI clock won't be disabled ever again because clk refcount will become unbalanced. Enforce runtime PM suspension to put device into expected state before driver is unbound and device's RPM state is reset by driver's core. Signed-off-by: Dmitry Osipenko --- drivers/spi/spi-tegra20-slink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 584fa25d39183..c2c8b0ed9e8b9 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1131,7 +1131,7 @@ static int tegra_slink_probe(struct platform_device *pdev) exit_pm_put: pm_runtime_put(&pdev->dev); exit_pm_disable: - pm_runtime_disable(&pdev->dev); + pm_runtime_force_suspend(&pdev->dev); tegra_slink_deinit_dma_param(tspi, false); exit_rx_dma_free: @@ -1150,7 +1150,7 @@ static int tegra_slink_remove(struct platform_device *pdev) free_irq(tspi->irq, tspi); - pm_runtime_disable(&pdev->dev); + pm_runtime_force_suspend(&pdev->dev); if (tspi->tx_dma_chan) tegra_slink_deinit_dma_param(tspi, false); From 86cd45ec07b161d1b6997f98161009b82205092d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 22 Oct 2021 22:43:07 +0300 Subject: [PATCH 1081/1202] spi: tegra210-quad: Put device into suspend on driver removal pm_runtime_disable() cancels all pending power requests, while they should be completed for the Tegra SPI driver. Otherwise SPI clock won't be disabled ever again because clk refcount will become unbalanced. Enforce runtime PM suspension to put device into expected state before driver is unbound and device's RPM state is reset by driver's core. Signed-off-by: Dmitry Osipenko --- drivers/spi/spi-tegra210-quad.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 2354ca1e38581..c0f9a75b44b5d 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -1318,7 +1318,7 @@ static int tegra_qspi_probe(struct platform_device *pdev) exit_free_irq: free_irq(qspi_irq, tqspi); exit_pm_disable: - pm_runtime_disable(&pdev->dev); + pm_runtime_force_suspend(&pdev->dev); tegra_qspi_deinit_dma(tqspi); return ret; } @@ -1330,7 +1330,7 @@ static int tegra_qspi_remove(struct platform_device *pdev) spi_unregister_master(master); free_irq(tqspi->irq, tqspi); - pm_runtime_disable(&pdev->dev); + pm_runtime_force_suspend(&pdev->dev); tegra_qspi_deinit_dma(tqspi); return 0; From 81de9eff8e71984d7341c004653a6120efde444f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 6 Apr 2021 23:02:50 +0300 Subject: [PATCH 1082/1202] iommu/tegra-smmu: Change debugfs directory name Change debugfs directory name to "smmu", which is a much more obvious name than the generic name of the memory controller device-tree node. Signed-off-by: Dmitry Osipenko --- drivers/iommu/tegra-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index e900e3c46903b..9509b44aadaf7 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -1138,7 +1138,7 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, tegra_smmu_ahb_enable(); - err = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, dev_name(dev)); + err = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, "smmu"); if (err) return ERR_PTR(err); From 1a1fa627cc197528e3f7cdd350050f7d9944e90e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 26 Mar 2021 21:04:35 +0300 Subject: [PATCH 1083/1202] iommu/tegra-smmu: Defer attachment of display clients All consumer-grade Android and Chromebook devices show a splash screen on boot and then display is left enabled when kernel is booted. This behaviour is unacceptable in a case of implicit IOMMU domains to which devices are attached during kernel boot since devices, like display controller, may perform DMA at that time. We can work around this problem by deferring the enable of SMMU translation for a specific devices, like a display controller, until the first IOMMU mapping is created, which works good enough in practice because by that time h/w is already stopped. Signed-off-by: Dmitry Osipenko --- drivers/iommu/tegra-smmu.c | 71 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 9509b44aadaf7..df093f52da6ff 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -60,6 +60,8 @@ struct tegra_smmu_as { dma_addr_t pd_dma; unsigned id; u32 attr; + bool display_attached[2]; + bool attached_devices_need_sync; }; static struct tegra_smmu_as *to_smmu_as(struct iommu_domain *dom) @@ -78,6 +80,10 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) return readl(smmu->regs + offset); } +/* all Tegra SoCs use the same group IDs for displays */ +#define SMMU_SWGROUP_DC 1 +#define SMMU_SWGROUP_DCB 2 + #define SMMU_CONFIG 0x010 #define SMMU_CONFIG_ENABLE (1 << 0) @@ -253,6 +259,20 @@ static inline void smmu_flush(struct tegra_smmu *smmu) smmu_readl(smmu, SMMU_PTB_ASID); } +static int smmu_swgroup_to_display_id(unsigned int swgroup) +{ + switch (swgroup) { + case SMMU_SWGROUP_DC: + return 0; + + case SMMU_SWGROUP_DCB: + return 1; + + default: + return -1; + } +} + static int tegra_smmu_alloc_asid(struct tegra_smmu *smmu, unsigned int *idp) { unsigned long id; @@ -318,6 +338,9 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type) as->domain.geometry.aperture_end = 0xffffffff; as->domain.geometry.force_aperture = true; + /* work around implicit attachment of devices with active DMA */ + as->attached_devices_need_sync = true; + return &as->domain; } @@ -410,6 +433,31 @@ static void tegra_smmu_disable(struct tegra_smmu *smmu, unsigned int swgroup, } } +static void tegra_smmu_attach_deferred_devices(struct iommu_domain *domain) +{ + struct tegra_smmu_as *as = to_smmu_as(domain); + + if (!as->attached_devices_need_sync) + return; + + if (as->display_attached[0] || as->display_attached[1]) { + struct tegra_smmu *smmu = as->smmu; + unsigned int i; + + for (i = 0; i < smmu->soc->num_clients; i++) { + const struct tegra_mc_client *client = &smmu->soc->clients[i]; + const int disp_id = smmu_swgroup_to_display_id(client->swgroup); + + if (disp_id < 0 || !as->display_attached[disp_id]) + continue; + + tegra_smmu_enable(smmu, client->swgroup, as->id); + } + } + + as->attached_devices_need_sync = false; +} + static int tegra_smmu_as_prepare(struct tegra_smmu *smmu, struct tegra_smmu_as *as) { @@ -495,10 +543,26 @@ static int tegra_smmu_attach_dev(struct iommu_domain *domain, return -ENOENT; for (index = 0; index < fwspec->num_ids; index++) { + const unsigned int swgroup = fwspec->ids[index]; + const int disp_id = smmu_swgroup_to_display_id(swgroup); + err = tegra_smmu_as_prepare(smmu, as); if (err) goto disable; + if (disp_id >= 0) { + as->display_attached[disp_id] = true; + + /* + * In most cases display is performing DMA before + * driver is initialized by showing a splash screen + * and in this case we should defer the h/w attachment + * until the first mapping is created by display driver. + */ + if (as->attached_devices_need_sync) + continue; + } + tegra_smmu_enable(smmu, fwspec->ids[index], as->id); } @@ -527,6 +591,12 @@ static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *de return; for (index = 0; index < fwspec->num_ids; index++) { + const unsigned int swgroup = fwspec->ids[index]; + const int disp_id = smmu_swgroup_to_display_id(swgroup); + + if (disp_id >= 0) + as->display_attached[disp_id] = false; + tegra_smmu_disable(smmu, fwspec->ids[index], as->id); tegra_smmu_as_unprepare(smmu, as); } @@ -762,6 +832,7 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, int ret; spin_lock_irqsave(&as->lock, flags); + tegra_smmu_attach_deferred_devices(domain); ret = __tegra_smmu_map(domain, iova, paddr, size, prot, gfp, &flags); spin_unlock_irqrestore(&as->lock, flags); From 99b9a9683a1124c336ee26949c021aac35b2e85a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 26 Mar 2021 21:19:00 +0300 Subject: [PATCH 1084/1202] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook The previous commit fixes problem where display client was attaching too early to IOMMU during kernel boot in a multi-platform kernel configuration which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore, revert it. Signed-off-by: Dmitry Osipenko --- drivers/iommu/tegra-smmu.c | 71 +------------------------------------- 1 file changed, 1 insertion(+), 70 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index df093f52da6ff..c83d5b82791d8 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -869,69 +869,10 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain, return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova); } -static struct tegra_smmu *tegra_smmu_find(struct device_node *np) -{ - struct platform_device *pdev; - struct tegra_mc *mc; - - pdev = of_find_device_by_node(np); - if (!pdev) - return NULL; - - mc = platform_get_drvdata(pdev); - if (!mc) - return NULL; - - return mc->smmu; -} - -static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, - struct of_phandle_args *args) -{ - const struct iommu_ops *ops = smmu->iommu.ops; - int err; - - err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops); - if (err < 0) { - dev_err(dev, "failed to initialize fwspec: %d\n", err); - return err; - } - - err = ops->of_xlate(dev, args); - if (err < 0) { - dev_err(dev, "failed to parse SW group ID: %d\n", err); - iommu_fwspec_free(dev); - return err; - } - - return 0; -} - static struct iommu_device *tegra_smmu_probe_device(struct device *dev) { - struct device_node *np = dev->of_node; - struct tegra_smmu *smmu = NULL; - struct of_phandle_args args; - unsigned int index = 0; - int err; - - while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index, - &args) == 0) { - smmu = tegra_smmu_find(args.np); - if (smmu) { - err = tegra_smmu_configure(smmu, dev, &args); - - if (err < 0) { - of_node_put(args.np); - return ERR_PTR(err); - } - } - - of_node_put(args.np); - index++; - } + struct tegra_smmu *smmu = dev_iommu_priv_get(dev); - smmu = dev_iommu_priv_get(dev); if (!smmu) return ERR_PTR(-ENODEV); @@ -1157,16 +1098,6 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, if (!smmu) return ERR_PTR(-ENOMEM); - /* - * This is a bit of a hack. Ideally we'd want to simply return this - * value. However the IOMMU registration process will attempt to add - * all devices to the IOMMU when bus_set_iommu() is called. In order - * not to rely on global variables to track the IOMMU instance, we - * set it here so that it can be looked up from the .probe_device() - * callback via the IOMMU device's .drvdata field. - */ - mc->smmu = smmu; - smmu->asids = devm_bitmap_zalloc(dev, soc->num_asids, GFP_KERNEL); if (!smmu->asids) return ERR_PTR(-ENOMEM); From 3713252c62322840bd8a7869df056b430ad8e4d9 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 28 Mar 2021 18:51:17 +0300 Subject: [PATCH 1085/1202] gpu: host1x: Add back arm_iommu_detach_device() The case of CONFIG_ARM_DMA_USE_IOMMU=y was found to be broken for host1x driver. Add back the workaround using arm_iommu_detach_device() as a temporary solution. Cc: stable@vger.kernel.org Fixes: af1cbfb9bf0f ("gpu: host1x: Support DMA mapping of buffers" Signed-off-by: Dmitry Osipenko --- drivers/gpu/host1x/dev.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index c42ab78327e76..f5b4dcded0887 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -22,6 +22,10 @@ #include #undef CREATE_TRACE_POINTS +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include +#endif + #include "bus.h" #include "channel.h" #include "debug.h" @@ -263,6 +267,17 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host) struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); int err; +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (host->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(host->dev); + arm_iommu_detach_device(host->dev); + arm_iommu_release_mapping(mapping); + + domain = iommu_get_domain_for_dev(host->dev); + } +#endif + /* * We may not always want to enable IOMMU support (for example if the * host1x firewall is already enabled and we don't support addressing From 28ecc1d119acbdaf232b8c4a26740ce9c76c43bd Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 28 Mar 2021 19:00:47 +0300 Subject: [PATCH 1086/1202] drm/tegra: Add back arm_iommu_detach_device() The case of CONFIG_ARM_DMA_USE_IOMMU=y was found to be broken for DRM driver. Add back the workaround using arm_iommu_detach_device() as a temporary solution. Cc: stable@vger.kernel.org Fixes: fa6661b7aa0b ("drm/tegra: Optionally attach clients to the IOMMU") Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/drm.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 2e7da2a7505d5..b2ebba8021078 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -21,6 +21,10 @@ #include #include +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include +#endif + #include "dc.h" #include "drm.h" #include "gem.h" @@ -936,6 +940,17 @@ int host1x_client_iommu_attach(struct host1x_client *client) struct iommu_group *group = NULL; int err; +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (client->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(client->dev); + arm_iommu_detach_device(client->dev); + arm_iommu_release_mapping(mapping); + + domain = iommu_get_domain_for_dev(client->dev); + } +#endif + /* * If the host1x client is already attached to an IOMMU domain that is * not the shared IOMMU domain, don't try to attach it to a different From 37ec7a00ec21c2ad350c29c20d68850f947ac27b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 18 Aug 2018 16:55:12 +0300 Subject: [PATCH 1087/1202] drm/tegra: plane: Accept all format-modifiers Tiling modifier can't be applied to YV12 video overlay by userspace because all tiling modifiers are filtered out for multi-plane formats. AFAIK, all modifiers should work with all of formats, hence the checking is incorrect. Fixes: e90124cb46bdb ("drm/tegra: plane: Support format modifiers") Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 321cb1f13da63..56dc6477d47d4 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -123,7 +123,7 @@ static bool tegra_plane_format_mod_supported(struct drm_plane *plane, if (info->num_planes == 1) return true; - return false; + return true; } const struct drm_plane_funcs tegra_plane_funcs = { From bbdcf8f54497cd73502c1b2c0a8949cb692617a7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 27 Jun 2019 01:54:33 +0300 Subject: [PATCH 1088/1202] PM / devfreq: tegra30: Use tracepoints for debugging Debug messages create too much CPU and memory activity by themselves, so it's difficult to debug lower rates and catch unwanted interrupts that happen rarely. Tracepoints are ideal in that regards because they do not contribute to the sampled date at all. This allowed me to catch few problems which are fixed by the followup patches, without tracepoints it would be much harder to do. Signed-off-by: Dmitry Osipenko --- drivers/devfreq/tegra30-devfreq.c | 17 ++++ include/trace/events/tegra30_devfreq.h | 105 +++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 include/trace/events/tegra30_devfreq.h diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index 65ecf17a36f49..4016a35fb0ad3 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -72,6 +72,9 @@ /* Assume that the bus is saturated if the utilization is 25% */ #define BUS_SATURATION_RATIO 25 +#define CREATE_TRACE_POINTS +#include + /** * struct tegra_devfreq_device_config - configuration specific to an ACTMON * device @@ -272,6 +275,10 @@ static void tegra_devfreq_update_wmark(struct tegra_devfreq *tegra, device_writel(dev, do_percent(val, dev->config->boost_down_threshold), ACTMON_DEV_LOWER_WMARK); + + trace_device_lower_upper(dev->config->offset, tegra->cur_freq, + do_percent(val, dev->config->boost_down_threshold), + do_percent(val, dev->config->boost_up_threshold)); } static void actmon_isr_device(struct tegra_devfreq *tegra, @@ -279,6 +286,9 @@ static void actmon_isr_device(struct tegra_devfreq *tegra, { u32 intr_status, dev_ctrl; + trace_device_isr_enter(tegra->regs, dev->config->offset, + dev->boost_freq, cpufreq_quick_get(0)); + dev->avg_count = device_readl(dev, ACTMON_DEV_AVG_COUNT); tegra_devfreq_update_avg_wmark(tegra, dev); @@ -318,6 +328,9 @@ static void actmon_isr_device(struct tegra_devfreq *tegra, device_writel(dev, dev_ctrl, ACTMON_DEV_CTRL); device_writel(dev, ACTMON_INTR_STATUS_CLEAR, ACTMON_DEV_INTR_STATUS); + + trace_device_isr_exit(tegra->regs, dev->config->offset, + dev->boost_freq, cpufreq_quick_get(0)); } static unsigned long actmon_cpu_to_emc_rate(struct tegra_devfreq *tegra, @@ -714,6 +727,10 @@ static int tegra_governor_get_target(struct devfreq *devfreq, actmon_update_target(tegra, dev); target_freq = max(target_freq, dev->target_freq); + + trace_device_target_freq(dev->config->offset, dev->target_freq); + trace_device_target_update(tegra->regs, dev->config->offset, + dev->boost_freq, cpufreq_quick_get(0)); } /* diff --git a/include/trace/events/tegra30_devfreq.h b/include/trace/events/tegra30_devfreq.h new file mode 100644 index 0000000000000..8f264a489daf8 --- /dev/null +++ b/include/trace/events/tegra30_devfreq.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM tegra30_devfreq + +#if !defined(_TRACE_TEGRA30_DEVFREQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TEGRA30_DEVFREQ_H + +#include +#include +#include + +DECLARE_EVENT_CLASS(device_state, + TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq), + TP_ARGS(base, offset, boost, cpufreq), + TP_STRUCT__entry( + __field(u32, offset) + __field(u32, intr_status) + __field(u32, ctrl) + __field(u32, avg_count) + __field(u32, avg_lower) + __field(u32, avg_upper) + __field(u32, count) + __field(u32, lower) + __field(u32, upper) + __field(u32, boost_freq) + __field(u32, cpu_freq) + ), + TP_fast_assign( + __entry->offset = offset; + __entry->intr_status = readl_relaxed(base + offset + 0x24); + __entry->ctrl = readl_relaxed(base + offset + 0x0); + __entry->avg_count = readl_relaxed(base + offset + 0x20); + __entry->avg_lower = readl_relaxed(base + offset + 0x14); + __entry->avg_upper = readl_relaxed(base + offset + 0x10); + __entry->count = readl_relaxed(base + offset + 0x1c); + __entry->lower = readl_relaxed(base + offset + 0x8); + __entry->upper = readl_relaxed(base + offset + 0x4); + __entry->boost_freq = boost; + __entry->cpu_freq = cpufreq; + ), + TP_printk("%03x: intr 0x%08x ctrl 0x%08x avg %010u %010u %010u cnt %010u %010u %010u boost %010u cpu %u", + __entry->offset, + __entry->intr_status, + __entry->ctrl, + __entry->avg_count, + __entry->avg_lower, + __entry->avg_upper, + __entry->count, + __entry->lower, + __entry->upper, + __entry->boost_freq, + __entry->cpu_freq) +); + +DEFINE_EVENT(device_state, device_isr_enter, + TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq), + TP_ARGS(base, offset, boost, cpufreq)); + +DEFINE_EVENT(device_state, device_isr_exit, + TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq), + TP_ARGS(base, offset, boost, cpufreq)); + +DEFINE_EVENT(device_state, device_target_update, + TP_PROTO(void __iomem *base, u32 offset, u32 boost, u32 cpufreq), + TP_ARGS(base, offset, boost, cpufreq)); + +TRACE_EVENT(device_lower_upper, + TP_PROTO(u32 offset, u32 target, u32 lower, u32 upper), + TP_ARGS(offset, target, lower, upper), + TP_STRUCT__entry( + __field(u32, offset) + __field(u32, target) + __field(u32, lower) + __field(u32, upper) + ), + TP_fast_assign( + __entry->offset = offset; + __entry->target = target; + __entry->lower = lower; + __entry->upper = upper; + ), + TP_printk("%03x: freq %010u lower freq %010u upper freq %010u", + __entry->offset, + __entry->target, + __entry->lower, + __entry->upper) +); + +TRACE_EVENT(device_target_freq, + TP_PROTO(u32 offset, u32 target), + TP_ARGS(offset, target), + TP_STRUCT__entry( + __field(u32, offset) + __field(u32, target) + ), + TP_fast_assign( + __entry->offset = offset; + __entry->target = target; + ), + TP_printk("%03x: freq %010u", __entry->offset, __entry->target) +); +#endif /* _TRACE_TEGRA30_DEVFREQ_H */ + +/* This part must be outside protection */ +#include From 74f2fbedceff1797efe653c9aad0d73c599ea743 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 8 May 2018 16:28:36 +0300 Subject: [PATCH 1089/1202] memory: tegra: Block DMA for clients HW on a faulty memory access Currently Memory Controller informs about erroneous memory accesses done by memory clients and that's it. Let's make it to block whole HW unit that corresponds to the misbehaving memory client in order to try to avoid memory corruptions and to stop deliberate attempts of manipulation by a misbehaving client. Signed-off-by: Dmitry Osipenko --- drivers/memory/tegra/mc.c | 50 ++++++++++++++++++++++ drivers/memory/tegra/tegra114.c | 64 +++++++++++++++++++++++++++++ drivers/memory/tegra/tegra124.c | 66 +++++++++++++++++++++++++++++ drivers/memory/tegra/tegra20.c | 65 +++++++++++++++++++++++++++++ drivers/memory/tegra/tegra210.c | 73 +++++++++++++++++++++++++++++++++ drivers/memory/tegra/tegra30.c | 66 +++++++++++++++++++++++++++++ include/soc/tegra/mc.h | 6 +++ 7 files changed, 390 insertions(+) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 44b4a4080920e..570f428f866a4 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -349,6 +349,45 @@ unsigned int tegra_mc_get_emem_device_count(struct tegra_mc *mc) } EXPORT_SYMBOL_GPL(tegra_mc_get_emem_device_count); +int tegra_mc_error_block_client_dma(struct tegra_mc *mc, + unsigned int client_idx) +{ + const struct tegra_mc_reset_ops *rst_ops; + const struct tegra_mc_reset *rst; + const char *client; + unsigned int id; + int err; + + id = mc->soc->clients[client_idx].reset_id; + if (id == TEGRA_MC_CLIENT_NO_RESET) + return 0; + + client = mc->soc->clients[client_idx].name; + + rst_ops = mc->soc->reset_ops; + if (!rst_ops) + return -ENODEV; + + if (!rst_ops->block_dma) + return 0; + + rst = tegra_mc_reset_find(mc, id); + if (!rst) + return -ENODEV; + + err = rst_ops->block_dma(mc, rst); + if (err) { + dev_err_ratelimited(mc->dev, "%s: failed to block DMA: %d\n", + client, err); + return err; + } + + dev_warn_ratelimited(mc->dev, "%s: DMA blocked\n", client); + + return 0; +} +EXPORT_SYMBOL_GPL(tegra_mc_error_block_client_dma); + #if defined(CONFIG_ARCH_TEGRA_3x_SOC) || \ defined(CONFIG_ARCH_TEGRA_114_SOC) || \ defined(CONFIG_ARCH_TEGRA_124_SOC) || \ @@ -522,6 +561,7 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data) const char *direction, *secure; phys_addr_t addr = 0; unsigned int i; + bool block_dma; char perm[7]; u8 id, type; u32 value; @@ -535,6 +575,10 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data) addr <<= 32; } #endif + if (value & MC_ERR_STATUS_RW) + block_dma = true; + else + block_dma = false; if (value & MC_ERR_STATUS_RW) direction = "write"; @@ -591,6 +635,12 @@ static irqreturn_t tegra30_mc_handle_irq(int irq, void *data) value = mc_readl(mc, MC_ERR_ADR); addr |= value; + /* Read errors are quite common, hence lets skip them since + * not all drivers support recovering from a blocked DMA. + */ + if (block_dma) + tegra_mc_error_block_client_dma(mc, i); + dev_err_ratelimited(mc->dev, "%s: %s%s @%pa: %s (%s%s)\n", client, secure, direction, &addr, error, desc, perm); diff --git a/drivers/memory/tegra/tegra114.c b/drivers/memory/tegra/tegra114.c index 41350570c815c..d03a5d162dbdc 100644 --- a/drivers/memory/tegra/tegra114.c +++ b/drivers/memory/tegra/tegra114.c @@ -23,6 +23,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x01, .name = "display0a", @@ -39,6 +40,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x02, .name = "display0ab", @@ -55,6 +57,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x03, .name = "display0b", @@ -71,6 +74,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x04, .name = "display0bb", @@ -87,6 +91,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x05, .name = "display0c", @@ -103,6 +108,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x06, .name = "display0cb", @@ -119,6 +125,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x09, .name = "eppup", @@ -135,6 +142,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x33, }, }, + .reset_id = TEGRA114_MC_RESET_EPP, }, { .id = 0x0a, .name = "g2pr", @@ -151,6 +159,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x09, }, }, + .reset_id = TEGRA114_MC_RESET_2D, }, { .id = 0x0b, .name = "g2sr", @@ -167,6 +176,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x09, }, }, + .reset_id = TEGRA114_MC_RESET_2D, }, { .id = 0x0f, .name = "avpcarm7r", @@ -183,6 +193,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x10, .name = "displayhc", @@ -199,6 +210,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x68, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x11, .name = "displayhcb", @@ -215,6 +227,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x68, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x12, .name = "fdcdrd", @@ -231,6 +244,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA114_MC_RESET_3D, }, { .id = 0x13, .name = "fdcdrd2", @@ -247,6 +261,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA114_MC_RESET_3D2, }, { .id = 0x14, .name = "g2dr", @@ -263,6 +278,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA114_MC_RESET_2D, }, { .id = 0x15, .name = "hdar", @@ -279,6 +295,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x16, .name = "host1xdmar", @@ -295,6 +312,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA114_MC_RESET_HC, }, { .id = 0x17, .name = "host1xr", @@ -311,6 +329,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA114_MC_RESET_HC, }, { .id = 0x18, .name = "idxsrd", @@ -327,6 +346,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0b, }, }, + .reset_id = TEGRA114_MC_RESET_3D, }, { .id = 0x1c, .name = "msencsrd", @@ -359,6 +379,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1e, .name = "ppcsahbslvr", @@ -375,6 +396,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xe8, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x20, .name = "texl2srd", @@ -391,6 +413,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA114_MC_RESET_3D2, }, { .id = 0x22, .name = "vdebsevr", @@ -407,6 +430,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x23, .name = "vdember", @@ -423,6 +447,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x24, .name = "vdemcer", @@ -439,6 +464,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xb8, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x25, .name = "vdetper", @@ -455,6 +481,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xee, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x26, .name = "mpcorelpr", @@ -467,6 +494,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x27, .name = "mpcorer", @@ -479,6 +507,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x28, .name = "eppu", @@ -495,6 +524,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x33, }, }, + .reset_id = TEGRA114_MC_RESET_EPP, }, { .id = 0x29, .name = "eppv", @@ -511,6 +541,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x6c, }, }, + .reset_id = TEGRA114_MC_RESET_EPP, }, { .id = 0x2a, .name = "eppy", @@ -527,6 +558,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x6c, }, }, + .reset_id = TEGRA114_MC_RESET_EPP, }, { .id = 0x2b, .name = "msencswr", @@ -543,6 +575,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA114_MC_RESET_MPE, }, { .id = 0x2c, .name = "viwsb", @@ -559,6 +592,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x47, }, }, + .reset_id = TEGRA114_MC_RESET_VI, }, { .id = 0x2d, .name = "viwu", @@ -575,6 +609,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VI, }, { .id = 0x2e, .name = "viwv", @@ -591,6 +626,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VI, }, { .id = 0x2f, .name = "viwy", @@ -607,6 +643,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x47, }, }, + .reset_id = TEGRA114_MC_RESET_VI, }, { .id = 0x30, .name = "g2dw", @@ -623,6 +660,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x9, }, }, + .reset_id = TEGRA114_MC_RESET_2D, }, { .id = 0x32, .name = "avpcarm7w", @@ -639,6 +677,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x33, .name = "fdcdwr", @@ -655,6 +694,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA114_MC_RESET_3D, }, { .id = 0x34, .name = "fdcdwr2", @@ -671,6 +711,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA114_MC_RESET_3D2, }, { .id = 0x35, .name = "hdaw", @@ -687,6 +728,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x36, .name = "host1xw", @@ -703,6 +745,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x25, }, }, + .reset_id = TEGRA114_MC_RESET_HC, }, { .id = 0x37, .name = "ispw", @@ -719,6 +762,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_ISP, }, { .id = 0x38, .name = "mpcorelpw", @@ -731,6 +775,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x39, .name = "mpcorew", @@ -743,6 +788,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3b, .name = "ppcsahbdmaw", @@ -759,6 +805,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3c, .name = "ppcsahbslvw", @@ -775,6 +822,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xe8, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3e, .name = "vdebsevw", @@ -791,6 +839,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x3f, .name = "vdedbgw", @@ -807,6 +856,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x40, .name = "vdembew", @@ -823,6 +873,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x89, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x41, .name = "vdetpmw", @@ -839,6 +890,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x59, }, }, + .reset_id = TEGRA114_MC_RESET_VDE, }, { .id = 0x4a, .name = "xusb_hostr", @@ -855,6 +907,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x4b, .name = "xusb_hostw", @@ -871,6 +924,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x4c, .name = "xusb_devr", @@ -887,6 +941,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x4d, .name = "xusb_devw", @@ -903,6 +958,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x4e, .name = "fdcdwr3", @@ -919,6 +975,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x4f, .name = "fdcdrd3", @@ -935,6 +992,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x50, .name = "fdcwr4", @@ -951,6 +1009,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x51, .name = "fdcrd4", @@ -967,6 +1026,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x52, .name = "emucifr", @@ -979,6 +1039,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x53, .name = "emucifw", @@ -991,6 +1052,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x54, .name = "tsecsrd", @@ -1007,6 +1069,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x55, .name = "tsecswr", @@ -1023,6 +1086,7 @@ static const struct tegra_mc_client tegra114_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, }; diff --git a/drivers/memory/tegra/tegra124.c b/drivers/memory/tegra/tegra124.c index d780a84241fe0..b759d53d78c79 100644 --- a/drivers/memory/tegra/tegra124.c +++ b/drivers/memory/tegra/tegra124.c @@ -24,6 +24,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x0, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x01, .name = "display0a", @@ -40,6 +41,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0xc2, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x02, .name = "display0ab", @@ -56,6 +58,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0xc6, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x03, .name = "display0b", @@ -72,6 +75,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x04, .name = "display0bb", @@ -88,6 +92,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x05, .name = "display0c", @@ -104,6 +109,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x06, .name = "display0cb", @@ -120,6 +126,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x0e, .name = "afir", @@ -136,6 +143,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA124_MC_RESET_AFI, }, { .id = 0x0f, .name = "avpcarm7r", @@ -152,6 +160,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x10, .name = "displayhc", @@ -168,6 +177,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x11, .name = "displayhcb", @@ -184,6 +194,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x15, .name = "hdar", @@ -200,6 +211,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x24, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x16, .name = "host1xdmar", @@ -216,6 +228,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA124_MC_RESET_HC, }, { .id = 0x17, .name = "host1xr", @@ -232,6 +245,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA124_MC_RESET_HC, }, { .id = 0x1c, .name = "msencsrd", @@ -248,6 +262,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x23, }, }, + .reset_id = TEGRA124_MC_RESET_MSENC, }, { .id = 0x1d, .name = "ppcsahbdmar", @@ -264,6 +279,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1e, .name = "ppcsahbslvr", @@ -280,6 +296,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1f, .name = "satar", @@ -296,6 +313,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x65, }, }, + .reset_id = TEGRA124_MC_RESET_SATA, }, { .id = 0x22, .name = "vdebsevr", @@ -312,6 +330,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x4f, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x23, .name = "vdember", @@ -328,6 +347,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x3d, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x24, .name = "vdemcer", @@ -344,6 +364,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x66, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x25, .name = "vdetper", @@ -360,6 +381,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0xa5, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x26, .name = "mpcorelpr", @@ -372,6 +394,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x27, .name = "mpcorer", @@ -384,6 +407,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x2b, .name = "msencswr", @@ -400,6 +424,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_MSENC, }, { .id = 0x31, .name = "afiw", @@ -416,6 +441,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_AFI, }, { .id = 0x32, .name = "avpcarm7w", @@ -432,6 +458,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x35, .name = "hdaw", @@ -448,6 +475,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x36, .name = "host1xw", @@ -476,6 +504,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x39, .name = "mpcorew", @@ -488,6 +517,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3b, .name = "ppcsahbdmaw", @@ -504,6 +534,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3c, .name = "ppcsahbslvw", @@ -520,6 +551,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3d, .name = "sataw", @@ -536,6 +568,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x65, }, }, + .reset_id = TEGRA124_MC_RESET_SATA, }, { .id = 0x3e, .name = "vdebsevw", @@ -552,6 +585,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x3f, .name = "vdedbgw", @@ -568,6 +602,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x40, .name = "vdembew", @@ -584,6 +619,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x41, .name = "vdetpmw", @@ -600,6 +636,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VDE, }, { .id = 0x44, .name = "ispra", @@ -616,6 +653,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x18, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2, }, { .id = 0x46, .name = "ispwa", @@ -632,6 +670,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2, }, { .id = 0x47, .name = "ispwb", @@ -648,6 +687,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2, }, { .id = 0x4a, .name = "xusb_hostr", @@ -664,6 +704,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x39, }, }, + .reset_id = TEGRA124_MC_RESET_XUSB_HOST, }, { .id = 0x4b, .name = "xusb_hostw", @@ -680,6 +721,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_XUSB_HOST, }, { .id = 0x4c, .name = "xusb_devr", @@ -696,6 +738,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x39, }, }, + .reset_id = TEGRA124_MC_RESET_XUSB_DEV, }, { .id = 0x4d, .name = "xusb_devw", @@ -712,6 +755,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_XUSB_DEV, }, { .id = 0x4e, .name = "isprab", @@ -728,6 +772,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x18, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2B, }, { .id = 0x50, .name = "ispwab", @@ -744,6 +789,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2B, }, { .id = 0x51, .name = "ispwbb", @@ -760,6 +806,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_ISP2B, }, { .id = 0x54, .name = "tsecsrd", @@ -776,6 +823,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x9b, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x55, .name = "tsecswr", @@ -792,6 +840,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x56, .name = "a9avpscr", @@ -808,6 +857,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x57, .name = "a9avpscw", @@ -824,6 +874,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x58, .name = "gpusrd", @@ -841,6 +892,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x59, .name = "gpuswr", @@ -858,6 +910,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x5a, .name = "displayt", @@ -874,6 +927,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x60, .name = "sdmmcra", @@ -890,6 +944,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC1, }, { .id = 0x61, .name = "sdmmcraa", @@ -906,6 +961,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC2, }, { .id = 0x62, .name = "sdmmcr", @@ -922,6 +978,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC3, }, { .id = 0x63, .swgroup = TEGRA_SWGROUP_SDMMC4A, @@ -938,6 +995,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC4, }, { .id = 0x64, .name = "sdmmcwa", @@ -954,6 +1012,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC1, }, { .id = 0x65, .name = "sdmmcwaa", @@ -970,6 +1029,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC2, }, { .id = 0x66, .name = "sdmmcw", @@ -986,6 +1046,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC3, }, { .id = 0x67, .name = "sdmmcwab", @@ -1002,6 +1063,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_SDMMC4, }, { .id = 0x6c, .name = "vicsrd", @@ -1018,6 +1080,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA124_MC_RESET_VIC, }, { .id = 0x6d, .name = "vicswr", @@ -1034,6 +1097,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VIC, }, { .id = 0x72, .name = "viw", @@ -1050,6 +1114,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA124_MC_RESET_VI, }, { .id = 0x73, .name = "displayd", @@ -1066,6 +1131,7 @@ static const struct tegra_mc_client tegra124_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, }; diff --git a/drivers/memory/tegra/tegra20.c b/drivers/memory/tegra/tegra20.c index fcd7738fcb536..dc378356f4bad 100644 --- a/drivers/memory/tegra/tegra20.c +++ b/drivers/memory/tegra/tegra20.c @@ -91,159 +91,211 @@ static const struct tegra_mc_client tegra20_mc_clients[] = { { .id = 0x00, .name = "display0a", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x01, .name = "display0ab", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x02, .name = "display0b", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x03, .name = "display0bb", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x04, .name = "display0c", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x05, .name = "display0cb", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x06, .name = "display1b", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x07, .name = "display1bb", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x08, .name = "eppup", + .reset_id = TEGRA20_MC_RESET_EPP, }, { .id = 0x09, .name = "g2pr", + .reset_id = TEGRA20_MC_RESET_2D, }, { .id = 0x0a, .name = "g2sr", + .reset_id = TEGRA20_MC_RESET_2D, }, { .id = 0x0b, .name = "mpeunifbr", + .reset_id = TEGRA20_MC_RESET_MPEB, }, { .id = 0x0c, .name = "viruv", + .reset_id = TEGRA20_MC_RESET_VI, }, { .id = 0x0d, .name = "avpcarm7r", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x0e, .name = "displayhc", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x0f, .name = "displayhcb", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x10, .name = "fdcdrd", + .reset_id = TEGRA20_MC_RESET_3D, }, { .id = 0x11, .name = "g2dr", + .reset_id = TEGRA20_MC_RESET_2D, }, { .id = 0x12, .name = "host1xdmar", + .reset_id = TEGRA20_MC_RESET_HC, }, { .id = 0x13, .name = "host1xr", + .reset_id = TEGRA20_MC_RESET_HC, }, { .id = 0x14, .name = "idxsrd", + .reset_id = TEGRA20_MC_RESET_3D, }, { .id = 0x15, .name = "mpcorer", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x16, .name = "mpe_ipred", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x17, .name = "mpeamemrd", + .reset_id = TEGRA20_MC_RESET_MPEA, }, { .id = 0x18, .name = "mpecsrd", + .reset_id = TEGRA20_MC_RESET_MPEC, }, { .id = 0x19, .name = "ppcsahbdmar", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1a, .name = "ppcsahbslvr", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1b, .name = "texsrd", + .reset_id = TEGRA20_MC_RESET_3D, }, { .id = 0x1c, .name = "vdebsevr", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x1d, .name = "vdember", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x1e, .name = "vdemcer", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x1f, .name = "vdetper", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x20, .name = "eppu", + .reset_id = TEGRA20_MC_RESET_EPP, }, { .id = 0x21, .name = "eppv", + .reset_id = TEGRA20_MC_RESET_EPP, }, { .id = 0x22, .name = "eppy", + .reset_id = TEGRA20_MC_RESET_EPP, }, { .id = 0x23, .name = "mpeunifbw", + .reset_id = TEGRA20_MC_RESET_MPEB, }, { .id = 0x24, .name = "viwsb", + .reset_id = TEGRA20_MC_RESET_VI, }, { .id = 0x25, .name = "viwu", + .reset_id = TEGRA20_MC_RESET_VI, }, { .id = 0x26, .name = "viwv", + .reset_id = TEGRA20_MC_RESET_VI, }, { .id = 0x27, .name = "viwy", + .reset_id = TEGRA20_MC_RESET_VI, }, { .id = 0x28, .name = "g2dw", + .reset_id = TEGRA20_MC_RESET_2D, }, { .id = 0x29, .name = "avpcarm7w", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x2a, .name = "fdcdwr", + .reset_id = TEGRA20_MC_RESET_3D, }, { .id = 0x2b, .name = "host1xw", + .reset_id = TEGRA20_MC_RESET_HC, }, { .id = 0x2c, .name = "ispw", + .reset_id = TEGRA20_MC_RESET_ISP, }, { .id = 0x2d, .name = "mpcorew", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x2e, .name = "mpecswr", + .reset_id = TEGRA20_MC_RESET_MPEC, }, { .id = 0x2f, .name = "ppcsahbdmaw", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x30, .name = "ppcsahbslvw", + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x31, .name = "vdebsevw", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x32, .name = "vdembew", + .reset_id = TEGRA20_MC_RESET_VDE, }, { .id = 0x33, .name = "vdetpmw", + .reset_id = TEGRA20_MC_RESET_VDE, }, }; @@ -728,6 +780,7 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data) const char *error = tegra_mc_status_names[bit]; const char *direction = "read", *secure = ""; const char *client, *desc; + bool block_dma = true; phys_addr_t addr; u32 value, reg; u8 id, type; @@ -742,6 +795,8 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data) if (value & BIT(31)) direction = "write"; + else + block_dma = false; break; case MC_INT_INVALID_GART_PAGE: @@ -753,6 +808,8 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data) if (value & BIT(0)) direction = "write"; + else + block_dma = false; break; case MC_INT_SECURITY_VIOLATION: @@ -766,12 +823,20 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data) if (value & BIT(31)) direction = "write"; + else + block_dma = false; break; default: continue; } + /* Read errors are quite common, hence lets skip them since + * not all drivers support recovering from a blocked DMA. + */ + if (block_dma) + tegra_mc_error_block_client_dma(mc, id); + client = mc->soc->clients[id].name; addr = mc_readl(mc, reg + sizeof(u32)); diff --git a/drivers/memory/tegra/tegra210.c b/drivers/memory/tegra/tegra210.c index 8ab6498dbe7d2..18debe6ed7098 100644 --- a/drivers/memory/tegra/tegra210.c +++ b/drivers/memory/tegra/tegra210.c @@ -12,6 +12,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .id = 0x00, .name = "ptcr", .swgroup = TEGRA_SWGROUP_PTC, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x01, .name = "display0a", @@ -28,6 +29,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x02, .name = "display0ab", @@ -44,6 +46,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x03, .name = "display0b", @@ -60,6 +63,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x04, .name = "display0bb", @@ -76,6 +80,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x05, .name = "display0c", @@ -92,6 +97,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x06, .name = "display0cb", @@ -108,6 +114,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x0e, .name = "afir", @@ -124,6 +131,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x2e, }, }, + .reset_id = TEGRA210_MC_RESET_AFI, }, { .id = 0x0f, .name = "avpcarm7r", @@ -140,6 +148,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x10, .name = "displayhc", @@ -156,6 +165,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x11, .name = "displayhcb", @@ -172,6 +182,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x15, .name = "hdar", @@ -188,6 +199,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x24, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x16, .name = "host1xdmar", @@ -204,6 +216,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA210_MC_RESET_HC, }, { .id = 0x17, .name = "host1xr", @@ -220,6 +233,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA210_MC_RESET_HC, }, { .id = 0x1c, .name = "nvencsrd", @@ -236,6 +250,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x23, }, }, + .reset_id = TEGRA210_MC_RESET_NVENC, }, { .id = 0x1d, .name = "ppcsahbdmar", @@ -252,6 +267,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1e, .name = "ppcsahbslvr", @@ -268,6 +284,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x1f, .name = "satar", @@ -284,6 +301,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x65, }, }, + .reset_id = TEGRA210_MC_RESET_SATA, }, { .id = 0x27, .name = "mpcorer", @@ -296,6 +314,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x2b, .name = "nvencswr", @@ -312,6 +331,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_NVENC, }, { .id = 0x31, .name = "afiw", @@ -328,6 +348,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_AFI, }, { .id = 0x32, .name = "avpcarm7w", @@ -344,6 +365,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x35, .name = "hdaw", @@ -360,6 +382,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x36, .name = "host1xw", @@ -376,6 +399,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_HC, }, { .id = 0x39, .name = "mpcorew", @@ -388,6 +412,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3b, .name = "ppcsahbdmaw", @@ -404,6 +429,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3c, .name = "ppcsahbslvw", @@ -420,6 +446,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x3d, .name = "sataw", @@ -436,6 +463,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_SATA, }, { .id = 0x44, .name = "ispra", @@ -452,6 +480,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x18, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2, }, { .id = 0x46, .name = "ispwa", @@ -468,6 +497,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2, }, { .id = 0x47, .name = "ispwb", @@ -484,6 +514,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2, }, { .id = 0x4a, .name = "xusb_hostr", @@ -500,6 +531,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x7a, }, }, + .reset_id = TEGRA210_MC_RESET_XUSB_HOST, }, { .id = 0x4b, .name = "xusb_hostw", @@ -516,6 +548,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_XUSB_HOST, }, { .id = 0x4c, .name = "xusb_devr", @@ -532,6 +565,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x39, }, }, + .reset_id = TEGRA210_MC_RESET_XUSB_DEV, }, { .id = 0x4d, .name = "xusb_devw", @@ -548,6 +582,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_XUSB_DEV, }, { .id = 0x4e, .name = "isprab", @@ -564,6 +599,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x18, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2B, }, { .id = 0x50, .name = "ispwab", @@ -580,6 +616,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2B, }, { .id = 0x51, .name = "ispwbb", @@ -596,6 +633,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_ISP2B, }, { .id = 0x54, .name = "tsecsrd", @@ -612,6 +650,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x9b, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x55, .name = "tsecswr", @@ -628,6 +667,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x56, .name = "a9avpscr", @@ -644,6 +684,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x57, .name = "a9avpscw", @@ -660,6 +701,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x58, .name = "gpusrd", @@ -677,6 +719,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x59, .name = "gpuswr", @@ -694,6 +737,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x5a, .name = "displayt", @@ -710,6 +754,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x60, .name = "sdmmcra", @@ -726,6 +771,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC1, }, { .id = 0x61, .name = "sdmmcraa", @@ -742,6 +788,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x5a, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC2, }, { .id = 0x62, .name = "sdmmcr", @@ -758,6 +805,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x49, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC3, }, { .id = 0x63, .swgroup = TEGRA_SWGROUP_SDMMC4A, @@ -774,6 +822,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x5a, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC4, }, { .id = 0x64, .name = "sdmmcwa", @@ -790,6 +839,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC1, }, { .id = 0x65, .name = "sdmmcwaa", @@ -806,6 +856,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC2, }, { .id = 0x66, .name = "sdmmcw", @@ -822,6 +873,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC3, }, { .id = 0x67, .name = "sdmmcwab", @@ -838,6 +890,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_SDMMC4, }, { .id = 0x6c, .name = "vicsrd", @@ -854,6 +907,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA210_MC_RESET_VIC, }, { .id = 0x6d, .name = "vicswr", @@ -870,6 +924,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_VIC, }, { .id = 0x72, .name = "viw", @@ -886,6 +941,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_VI, }, { .id = 0x73, .name = "displayd", @@ -902,6 +958,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA210_MC_RESET_DC, }, { .id = 0x78, .name = "nvdecsrd", @@ -918,6 +975,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x23, }, }, + .reset_id = TEGRA210_MC_RESET_NVDEC, }, { .id = 0x79, .name = "nvdecswr", @@ -934,6 +992,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_NVDEC, }, { .id = 0x7a, .name = "aper", @@ -950,6 +1009,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA210_MC_RESET_APE, }, { .id = 0x7b, .name = "apew", @@ -966,6 +1026,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_APE, }, { .id = 0x7e, .name = "nvjpgsrd", @@ -982,6 +1043,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x23, }, }, + .reset_id = TEGRA210_MC_RESET_NVJPG, }, { .id = 0x7f, .name = "nvjpgswr", @@ -998,6 +1060,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA210_MC_RESET_NVJPG, }, { .id = 0x80, .name = "sesrd", @@ -1014,6 +1077,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x2e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x81, .name = "seswr", @@ -1030,6 +1094,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x82, .name = "axiapr", @@ -1046,6 +1111,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x83, .name = "axiapw", @@ -1062,6 +1128,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x84, .name = "etrr", @@ -1078,6 +1145,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x85, .name = "etrw", @@ -1094,6 +1162,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x86, .name = "tsecsrdb", @@ -1110,6 +1179,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x9b, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x87, .name = "tsecswrb", @@ -1126,6 +1196,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x88, .name = "gpusrd2", @@ -1143,6 +1214,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x1a, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, { .id = 0x89, .name = "gpuswr2", @@ -1160,6 +1232,7 @@ static const struct tegra_mc_client tegra210_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, }, }; diff --git a/drivers/memory/tegra/tegra30.c b/drivers/memory/tegra/tegra30.c index 84316357513db..781760b2c26e7 100644 --- a/drivers/memory/tegra/tegra30.c +++ b/drivers/memory/tegra/tegra30.c @@ -45,6 +45,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x01, @@ -62,6 +63,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 128, }, { .id = 0x02, @@ -79,6 +81,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 128, }, { .id = 0x03, @@ -96,6 +99,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 64, }, { .id = 0x04, @@ -113,6 +117,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 64, }, { .id = 0x05, @@ -130,6 +135,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 128, }, { .id = 0x06, @@ -147,6 +153,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 128, }, { .id = 0x07, @@ -164,6 +171,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 64, }, { .id = 0x08, @@ -181,6 +189,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x4e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 64, }, { .id = 0x09, @@ -198,6 +207,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x17, }, }, + .reset_id = TEGRA30_MC_RESET_EPP, .fifo_size = 16 * 8, }, { .id = 0x0a, @@ -215,6 +225,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x09, }, }, + .reset_id = TEGRA30_MC_RESET_2D, .fifo_size = 16 * 64, }, { .id = 0x0b, @@ -232,6 +243,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x09, }, }, + .reset_id = TEGRA30_MC_RESET_2D, .fifo_size = 16 * 64, }, { .id = 0x0c, @@ -249,6 +261,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 8, }, { .id = 0x0d, @@ -266,6 +279,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x2c, }, }, + .reset_id = TEGRA30_MC_RESET_VI, .fifo_size = 16 * 8, }, { .id = 0x0e, @@ -283,6 +297,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA30_MC_RESET_AFI, .fifo_size = 16 * 32, }, { .id = 0x0f, @@ -300,6 +315,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x10, @@ -317,6 +333,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x11, @@ -334,6 +351,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x12, @@ -351,6 +369,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA30_MC_RESET_3D, .fifo_size = 16 * 48, }, { .id = 0x13, @@ -368,6 +387,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA30_MC_RESET_3D2, .fifo_size = 16 * 48, }, { .id = 0x14, @@ -385,6 +405,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA30_MC_RESET_2D, .fifo_size = 16 * 48, }, { .id = 0x15, @@ -402,6 +423,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 16, }, { .id = 0x16, @@ -419,6 +441,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x05, }, }, + .reset_id = TEGRA30_MC_RESET_HC, .fifo_size = 16 * 16, }, { .id = 0x17, @@ -436,6 +459,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x50, }, }, + .reset_id = TEGRA30_MC_RESET_HC, .fifo_size = 16 * 8, }, { .id = 0x18, @@ -453,6 +477,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA30_MC_RESET_3D, .fifo_size = 16 * 64, }, { .id = 0x19, @@ -470,6 +495,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA30_MC_RESET_3D2, .fifo_size = 16 * 64, }, { .id = 0x1a, @@ -487,6 +513,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x80, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 2, }, { .id = 0x1b, @@ -504,6 +531,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x42, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 64, }, { .id = 0x1c, @@ -521,6 +549,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 8, }, { .id = 0x1d, @@ -538,6 +567,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x1e, @@ -555,6 +585,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x12, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 8, }, { .id = 0x1f, @@ -572,6 +603,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x33, }, }, + .reset_id = TEGRA30_MC_RESET_SATA, .fifo_size = 16 * 32, }, { .id = 0x20, @@ -589,6 +621,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA30_MC_RESET_3D, .fifo_size = 16 * 64, }, { .id = 0x21, @@ -606,6 +639,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA30_MC_RESET_3D2, .fifo_size = 16 * 64, }, { .id = 0x22, @@ -623,6 +657,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 8, }, { .id = 0x23, @@ -640,6 +675,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xd0, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 4, }, { .id = 0x24, @@ -657,6 +693,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x2a, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 16, }, { .id = 0x25, @@ -674,6 +711,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x74, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 16, }, { .id = 0x26, @@ -687,6 +725,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 14, }, { .id = 0x27, @@ -700,6 +739,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x04, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 14, }, { .id = 0x28, @@ -717,6 +757,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x6c, }, }, + .reset_id = TEGRA30_MC_RESET_EPP, .fifo_size = 16 * 64, }, { .id = 0x29, @@ -734,6 +775,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x6c, }, }, + .reset_id = TEGRA30_MC_RESET_EPP, .fifo_size = 16 * 64, }, { .id = 0x2a, @@ -751,6 +793,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x6c, }, }, + .reset_id = TEGRA30_MC_RESET_EPP, .fifo_size = 16 * 64, }, { .id = 0x2b, @@ -768,6 +811,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x13, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 8, }, { .id = 0x2c, @@ -785,6 +829,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x12, }, }, + .reset_id = TEGRA30_MC_RESET_VI, .fifo_size = 16 * 64, }, { .id = 0x2d, @@ -802,6 +847,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xb2, }, }, + .reset_id = TEGRA30_MC_RESET_VI, .fifo_size = 16 * 64, }, { .id = 0x2e, @@ -819,6 +865,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xb2, }, }, + .reset_id = TEGRA30_MC_RESET_VI, .fifo_size = 16 * 64, }, { .id = 0x2f, @@ -836,6 +883,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x12, }, }, + .reset_id = TEGRA30_MC_RESET_VI, .fifo_size = 16 * 64, }, { .id = 0x30, @@ -853,6 +901,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x9, }, }, + .reset_id = TEGRA30_MC_RESET_2D, .fifo_size = 16 * 128, }, { .id = 0x31, @@ -870,6 +919,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0c, }, }, + .reset_id = TEGRA30_MC_RESET_AFI, .fifo_size = 16 * 32, }, { .id = 0x32, @@ -887,6 +937,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x33, @@ -904,6 +955,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA30_MC_RESET_3D, .fifo_size = 16 * 48, }, { .id = 0x34, @@ -921,6 +973,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0a, }, }, + .reset_id = TEGRA30_MC_RESET_3D2, .fifo_size = 16 * 48, }, { .id = 0x35, @@ -938,6 +991,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 16, }, { .id = 0x36, @@ -955,6 +1009,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA30_MC_RESET_HC, .fifo_size = 16 * 32, }, { .id = 0x37, @@ -972,6 +1027,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_ISP, .fifo_size = 16 * 64, }, { .id = 0x38, @@ -985,6 +1041,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 24, }, { .id = 0x39, @@ -998,6 +1055,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x0e, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 24, }, { .id = 0x3a, @@ -1015,6 +1073,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_MPE, .fifo_size = 16 * 8, }, { .id = 0x3b, @@ -1032,6 +1091,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x10, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 2, }, { .id = 0x3c, @@ -1049,6 +1109,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x06, }, }, + .reset_id = TEGRA_MC_CLIENT_NO_RESET, .fifo_size = 16 * 4, }, { .id = 0x3d, @@ -1066,6 +1127,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x33, }, }, + .reset_id = TEGRA30_MC_RESET_SATA, .fifo_size = 16 * 32, }, { .id = 0x3e, @@ -1083,6 +1145,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 4, }, { .id = 0x3f, @@ -1100,6 +1163,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0xff, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 16, }, { .id = 0x40, @@ -1117,6 +1181,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x42, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 2, }, { .id = 0x41, @@ -1134,6 +1199,7 @@ static const struct tegra_mc_client tegra30_mc_clients[] = { .def = 0x2a, }, }, + .reset_id = TEGRA30_MC_RESET_VDE, .fifo_size = 16 * 16, }, }; diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index 1066b1194a5a9..80881099e4a71 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -24,9 +24,12 @@ struct tegra_mc_timing { u32 *emem_data; }; +#define TEGRA_MC_CLIENT_NO_RESET UINT_MAX + struct tegra_mc_client { unsigned int id; const char *name; + unsigned int reset_id; /* * For Tegra210 and earlier, this is the SWGROUP ID used for IOVA translations in the * Tegra SMMU, whereas on Tegra186 and later this is the ID used to override the ARM SMMU @@ -235,6 +238,9 @@ struct tegra_mc { int tegra_mc_write_emem_configuration(struct tegra_mc *mc, unsigned long rate); unsigned int tegra_mc_get_emem_device_count(struct tegra_mc *mc); +int tegra_mc_error_block_client_dma(struct tegra_mc *mc, + unsigned int client_idx); + #ifdef CONFIG_TEGRA_MC struct tegra_mc *devm_tegra_memory_controller_get(struct device *dev); int tegra_mc_probe_device(struct tegra_mc *mc, struct device *dev); From 4318eb0448ac79bccc4f3f3e396025f080349597 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 6 May 2018 23:25:55 +0300 Subject: [PATCH 1090/1202] ARM: dts: tegra20: Add IOMMU nodes to Host1x clients Enable IOMMU support for Host1x clients. Signed-off-by: Dmitry Osipenko --- arch/arm/boot/dts/tegra20.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 8010b40d7377f..a2d6a9e6a3d72 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -106,6 +106,8 @@ reset-names = "2d", "mc"; power-domains = <&pd_core>; operating-points-v2 = <&gr2d_dvfs_opp_table>; + + iommus = <&mc>; }; gr3d@54180000 { @@ -116,6 +118,8 @@ reset-names = "3d", "mc"; power-domains = <&pd_3d>; operating-points-v2 = <&gr3d_dvfs_opp_table>; + + iommus = <&mc>; }; dc@54200000 { @@ -143,6 +147,8 @@ "winc", "cursor"; + iommus = <&mc>; + rgb { status = "disabled"; }; @@ -173,6 +179,8 @@ "winc", "cursor"; + iommus = <&mc>; + rgb { status = "disabled"; }; From 4909f0756bb62cc5f2d419d50f27cfffd9b7f64b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 6 Oct 2015 19:27:48 +0300 Subject: [PATCH 1091/1202] xxx: mmc: core: Add quirk for NVIDIA Tegra20 EMMC This quirk is required in order to detect EMMC partitions on some Tegra20 devices. Signed-off-by: Dmitry Osipenko --- drivers/mmc/core/mmc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index b1c1716dacf0e..20e09c4937b1b 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -405,8 +405,17 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) ext_csd[EXT_CSD_SEC_CNT + 3] << 24; /* Cards with density > 2GiB are sector addressed */ - if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512) + if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512) { + if (card->host->caps & MMC_CAP_NONREMOVABLE) { + /* + * Size is in 256K chunks, i.e. 512 sectors each. + * This algorithm is defined and used by NVIDIA, + * according to eMMC 4.41, size is in 128K chunks. + */ + card->ext_csd.sectors -= ext_csd[EXT_CSD_BOOT_MULT] * 512; + } mmc_card_set_blockaddr(card); + } } card->ext_csd.strobe_support = ext_csd[EXT_CSD_STROBE_SUPPORT]; From 6cbb4c7df70ac6817c19a116feb26eb5be2ec996 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 25 Nov 2019 00:03:21 +0300 Subject: [PATCH 1092/1202] Revert "xxx: mmc: core: Add quirk for NVIDIA Tegra20 EMMC" This reverts commit 828663c674099f5b2f831e1281913c762a6f61e8. It makes some T20 devices to see eMMC partitions, but breaks the others. --- drivers/mmc/core/mmc.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 20e09c4937b1b..b1c1716dacf0e 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -405,17 +405,8 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) ext_csd[EXT_CSD_SEC_CNT + 3] << 24; /* Cards with density > 2GiB are sector addressed */ - if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512) { - if (card->host->caps & MMC_CAP_NONREMOVABLE) { - /* - * Size is in 256K chunks, i.e. 512 sectors each. - * This algorithm is defined and used by NVIDIA, - * according to eMMC 4.41, size is in 128K chunks. - */ - card->ext_csd.sectors -= ext_csd[EXT_CSD_BOOT_MULT] * 512; - } + if (card->ext_csd.sectors > (2u * 1024 * 1024 * 1024) / 512) mmc_card_set_blockaddr(card); - } } card->ext_csd.strobe_support = ext_csd[EXT_CSD_STROBE_SUPPORT]; From 70cbc031bbdbfd8d0b5dd3e9ab785a262e98ab68 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sat, 14 Dec 2019 23:20:27 +0300 Subject: [PATCH 1093/1202] xxx: iommu: tegra-gart: Expose as system-wide IOMMU This enables IOMMU for Terga20 GPU. The patch will be removed after updating DRM WIP patch to support IOMMU in a different way. Signed-off-by: Dmitry Osipenko --- drivers/iommu/tegra-gart.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 6a358f92c7e5d..5e4eea31ac5e5 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -364,8 +364,14 @@ struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc) goto unregister_iommu; } + err = bus_set_iommu(&platform_bus_type, &gart_iommu_ops); + if (err) + goto free_savedata; + return gart; +free_savedata: + vfree(gart->savedata); unregister_iommu: iommu_device_unregister(&gart->iommu); remove_sysfs: From 8b950fee4346c4191c8f1729a66478b5aaffb7b7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 4 Oct 2018 16:46:38 +0300 Subject: [PATCH 1094/1202] xxx: staging: android: Add legacy ram-console Device-tree should contain the ram-console node, example: /memreserve/ 0xbeb00000 0x100000; ram-console { compatible = "android,ram-console"; reg = <0xbeb00000 0x100000>; }; --- drivers/staging/android/Kconfig | 2 + drivers/staging/android/Makefile | 2 + drivers/staging/android/ramconsole/Kconfig | 63 +++ drivers/staging/android/ramconsole/Makefile | 1 + .../staging/android/ramconsole/ram_console.c | 487 ++++++++++++++++++ .../staging/android/ramconsole/ram_console.h | 22 + 6 files changed, 577 insertions(+) create mode 100644 drivers/staging/android/ramconsole/Kconfig create mode 100644 drivers/staging/android/ramconsole/Makefile create mode 100644 drivers/staging/android/ramconsole/ram_console.c create mode 100644 drivers/staging/android/ramconsole/ram_console.h diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index 70498adb15759..2517fbe3b8c7d 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -17,3 +17,5 @@ config ASHMEM endif # if ANDROID endmenu + +source "drivers/staging/android/ramconsole/Kconfig" diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile index e9a55a5e65291..d780e07419d69 100644 --- a/drivers/staging/android/Makefile +++ b/drivers/staging/android/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 ccflags-y += -I$(src) # needed for trace events +obj-y += ramconsole/ + obj-$(CONFIG_ASHMEM) += ashmem.o diff --git a/drivers/staging/android/ramconsole/Kconfig b/drivers/staging/android/ramconsole/Kconfig new file mode 100644 index 0000000000000..db03dca04572d --- /dev/null +++ b/drivers/staging/android/ramconsole/Kconfig @@ -0,0 +1,63 @@ +config ANDROID_RAM_CONSOLE + bool "Android RAM buffer console" + default n + +config ANDROID_RAM_CONSOLE_ENABLE_VERBOSE + bool "Enable verbose console messages on Android RAM console" + default y + depends on ANDROID_RAM_CONSOLE + +menuconfig ANDROID_RAM_CONSOLE_ERROR_CORRECTION + bool "Android RAM Console Enable error correction" + default n + depends on ANDROID_RAM_CONSOLE + depends on !ANDROID_RAM_CONSOLE_EARLY_INIT + select REED_SOLOMON + select REED_SOLOMON_ENC8 + select REED_SOLOMON_DEC8 + +if ANDROID_RAM_CONSOLE_ERROR_CORRECTION + +config ANDROID_RAM_CONSOLE_ERROR_CORRECTION_DATA_SIZE + int "Android RAM Console Data data size" + default 128 + help + Must be a power of 2. + +config ANDROID_RAM_CONSOLE_ERROR_CORRECTION_ECC_SIZE + int "Android RAM Console ECC size" + default 16 + +config ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE + int "Android RAM Console Symbol size" + default 8 + +config ANDROID_RAM_CONSOLE_ERROR_CORRECTION_POLYNOMIAL + hex "Android RAM Console Polynomial" + default 0x19 if (ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE = 4) + default 0x29 if (ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE = 5) + default 0x61 if (ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE = 6) + default 0x89 if (ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE = 7) + default 0x11d if (ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE = 8) + +endif # ANDROID_RAM_CONSOLE_ERROR_CORRECTION + +config ANDROID_RAM_CONSOLE_EARLY_INIT + bool "Start Android RAM console early" + default n + depends on ANDROID_RAM_CONSOLE + +config ANDROID_RAM_CONSOLE_EARLY_ADDR + hex "Android RAM console virtual address" + default 0 + depends on ANDROID_RAM_CONSOLE_EARLY_INIT + +config ANDROID_RAM_CONSOLE_EARLY_SIZE + hex "Android RAM console buffer size" + default 0 + depends on ANDROID_RAM_CONSOLE_EARLY_INIT + +config ANDROID_RAM_CONSOLE_DEBUG_CONSOLE_SUSPENDED + bool "Resume console in case of kernel panic" + default n + depends on ANDROID_RAM_CONSOLE diff --git a/drivers/staging/android/ramconsole/Makefile b/drivers/staging/android/ramconsole/Makefile new file mode 100644 index 0000000000000..1125854a955ba --- /dev/null +++ b/drivers/staging/android/ramconsole/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_ANDROID_RAM_CONSOLE) += ram_console.o diff --git a/drivers/staging/android/ramconsole/ram_console.c b/drivers/staging/android/ramconsole/ram_console.c new file mode 100644 index 0000000000000..198e71633319b --- /dev/null +++ b/drivers/staging/android/ramconsole/ram_console.c @@ -0,0 +1,487 @@ +/* drivers/android/ram_console.c + * + * Copyright (C) 2007-2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ram_console.h" + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION +#include +#endif + +struct ram_console_buffer { + __le32 sig; + __le32 start; + __le32 size; + uint8_t data[0]; +}; + +#define RAM_CONSOLE_SIG (0x43474244) /* DBGC */ + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT +static char __initdata + ram_console_old_log_init_buffer[CONFIG_ANDROID_RAM_CONSOLE_EARLY_SIZE]; +#endif +static char *ram_console_old_log; +static size_t ram_console_old_log_size; + +static struct ram_console_buffer *ram_console_buffer; +static size_t ram_console_buffer_size; +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION +static char *ram_console_par_buffer; +static struct rs_control *ram_console_rs_decoder; +static int ram_console_corrected_bytes; +static int ram_console_bad_blocks; +#define ECC_BLOCK_SIZE CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_DATA_SIZE +#define ECC_SIZE CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_ECC_SIZE +#define ECC_SYMSIZE CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_SYMBOL_SIZE +#define ECC_POLY CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION_POLYNOMIAL +#endif + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION +static void ram_console_encode_rs8(uint8_t *data, size_t len, uint8_t *ecc) +{ + int i; + uint16_t par[ECC_SIZE]; + /* Initialize the parity buffer */ + memset(par, 0, sizeof(par)); + encode_rs8(ram_console_rs_decoder, data, len, par, 0); + for (i = 0; i < ECC_SIZE; i++) + ecc[i] = par[i]; +} + +static int ram_console_decode_rs8(void *data, size_t len, uint8_t *ecc) +{ + int i; + uint16_t par[ECC_SIZE]; + for (i = 0; i < ECC_SIZE; i++) + par[i] = ecc[i]; + return decode_rs8(ram_console_rs_decoder, data, par, len, + NULL, 0, NULL, 0, NULL); +} +#endif + +static void ram_console_update(const char *s, unsigned int count) +{ + struct ram_console_buffer *buffer = ram_console_buffer; +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + uint8_t *buffer_end = buffer->data + ram_console_buffer_size; + uint8_t *block; + uint8_t *par; + int size = ECC_BLOCK_SIZE; +#endif + memcpy(buffer->data + le32_to_cpu(buffer->start), s, count); +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + block = buffer->data + (le32_to_cpu(buffer->start) & ~(ECC_BLOCK_SIZE - 1)); + par = ram_console_par_buffer + + (le32_to_cpu(buffer->start) / ECC_BLOCK_SIZE) * ECC_SIZE; + do { + if (block + ECC_BLOCK_SIZE > buffer_end) + size = buffer_end - block; + ram_console_encode_rs8(block, size, par); + block += ECC_BLOCK_SIZE; + par += ECC_SIZE; + } while (block < buffer->data + le32_to_cpu(buffer->start) + count); +#endif +} + +static void ram_console_update_header(void) +{ +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + struct ram_console_buffer *buffer = ram_console_buffer; + uint8_t *par; + par = ram_console_par_buffer + + DIV_ROUND_UP(ram_console_buffer_size, ECC_BLOCK_SIZE) * ECC_SIZE; + ram_console_encode_rs8((uint8_t *)buffer, sizeof(*buffer), par); +#endif +} + +static void +ram_console_write(struct console *console, const char *s, unsigned int count) +{ + int rem; + struct ram_console_buffer *buffer = ram_console_buffer; + + if (count > ram_console_buffer_size) { + s += count - ram_console_buffer_size; + count = ram_console_buffer_size; + } + rem = ram_console_buffer_size - le32_to_cpu(buffer->start); + if (rem < count) { + ram_console_update(s, rem); + s += rem; + count -= rem; + buffer->start = 0; + buffer->size = cpu_to_le32(ram_console_buffer_size); + } + ram_console_update(s, count); + + buffer->start = cpu_to_le32(le32_to_cpu(buffer->start) + count); + if (le32_to_cpu(buffer->size) < ram_console_buffer_size) + buffer->size = cpu_to_le32(le32_to_cpu(buffer->size) + count); + ram_console_update_header(); +} + +static struct console ram_console = { + .name = "ram", + .write = ram_console_write, + .flags = CON_PRINTBUFFER | CON_ENABLED | CON_ANYTIME, + .index = -1, +}; + +void ram_console_enable_console(int enabled) +{ + if (enabled) + ram_console.flags |= CON_ENABLED; + else + ram_console.flags &= ~CON_ENABLED; +} + +static void __init +ram_console_save_old(struct ram_console_buffer *buffer, const char *bootinfo, + char *dest) +{ + size_t old_log_size = buffer->size; + size_t bootinfo_size = 0; + size_t total_size = old_log_size; + char *ptr; + const char *bootinfo_label = "Boot info:\n"; + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + uint8_t *block; + uint8_t *par; + char strbuf[80]; + int strbuf_len = 0; + + block = buffer->data; + par = ram_console_par_buffer; + while (block < buffer->data + buffer->size) { + int numerr; + int size = ECC_BLOCK_SIZE; + if (block + size > buffer->data + ram_console_buffer_size) + size = buffer->data + ram_console_buffer_size - block; + numerr = ram_console_decode_rs8(block, size, par); + if (numerr > 0) { +#if 0 + printk(KERN_INFO "ram_console: error in block %p, %d\n", + block, numerr); +#endif + ram_console_corrected_bytes += numerr; + } else if (numerr < 0) { +#if 0 + printk(KERN_INFO "ram_console: uncorrectable error in " + "block %p\n", block); +#endif + ram_console_bad_blocks++; + } + block += ECC_BLOCK_SIZE; + par += ECC_SIZE; + } + if (ram_console_corrected_bytes || ram_console_bad_blocks) + strbuf_len = snprintf(strbuf, sizeof(strbuf), + "\n%d Corrected bytes, %d unrecoverable blocks\n", + ram_console_corrected_bytes, ram_console_bad_blocks); + else + strbuf_len = snprintf(strbuf, sizeof(strbuf), + "\nNo errors detected\n"); + if (strbuf_len >= sizeof(strbuf)) + strbuf_len = sizeof(strbuf) - 1; + total_size += strbuf_len; +#endif + + if (bootinfo) + bootinfo_size = strlen(bootinfo) + strlen(bootinfo_label); + total_size += bootinfo_size; + + if (dest == NULL) { + dest = kmalloc(total_size, GFP_KERNEL); + if (dest == NULL) { + printk(KERN_ERR + "ram_console: failed to allocate buffer\n"); + return; + } + } + + ram_console_old_log = dest; + ram_console_old_log_size = total_size; + memcpy(ram_console_old_log, + &buffer->data[le32_to_cpu(buffer->start)], buffer->size - le32_to_cpu(buffer->start)); + memcpy(ram_console_old_log + buffer->size - le32_to_cpu(buffer->start), + &buffer->data[0], le32_to_cpu(buffer->start)); + ptr = ram_console_old_log + old_log_size; +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + memcpy(ptr, strbuf, strbuf_len); + ptr += strbuf_len; +#endif + if (bootinfo) { + memcpy(ptr, bootinfo_label, strlen(bootinfo_label)); + ptr += strlen(bootinfo_label); + memcpy(ptr, bootinfo, bootinfo_size); + ptr += bootinfo_size; + } +} + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_DEBUG_CONSOLE_SUSPENDED +static int ram_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + resume_console(); + return NOTIFY_DONE; +} + +static struct notifier_block ram_panic_blk = { + .notifier_call = ram_panic_event, +}; +#endif + +static int __init ram_console_init(struct ram_console_buffer *buffer, + size_t buffer_size, const char *bootinfo, + char *old_buf) +{ +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + int numerr; + uint8_t *par; +#endif + ram_console_buffer = buffer; + ram_console_buffer_size = + buffer_size - sizeof(struct ram_console_buffer); + + if (ram_console_buffer_size > buffer_size) { + pr_err("ram_console: buffer %p, invalid size %zu, " + "datasize %zu\n", buffer, buffer_size, + ram_console_buffer_size); + return 0; + } + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION + ram_console_buffer_size -= (DIV_ROUND_UP(ram_console_buffer_size, + ECC_BLOCK_SIZE) + 1) * ECC_SIZE; + + if (ram_console_buffer_size > buffer_size) { + pr_err("ram_console: buffer %p, invalid size %zu, " + "non-ecc datasize %zu\n", + buffer, buffer_size, ram_console_buffer_size); + return 0; + } + + ram_console_par_buffer = buffer->data + ram_console_buffer_size; + + + /* first consecutive root is 0 + * primitive element to generate roots = 1 + */ + ram_console_rs_decoder = init_rs(ECC_SYMSIZE, ECC_POLY, 0, 1, ECC_SIZE); + if (ram_console_rs_decoder == NULL) { + printk(KERN_INFO "ram_console: init_rs failed\n"); + return 0; + } + + ram_console_corrected_bytes = 0; + ram_console_bad_blocks = 0; + + par = ram_console_par_buffer + + DIV_ROUND_UP(ram_console_buffer_size, ECC_BLOCK_SIZE) * ECC_SIZE; + + numerr = ram_console_decode_rs8(buffer, sizeof(*buffer), par); + if (numerr > 0) { + printk(KERN_INFO "ram_console: error in header, %d\n", numerr); + ram_console_corrected_bytes += numerr; + } else if (numerr < 0) { + printk(KERN_INFO + "ram_console: uncorrectable error in header\n"); + ram_console_bad_blocks++; + } +#endif + + // ignore start corruption + buffer->start = 0; + + if (buffer->sig == RAM_CONSOLE_SIG) { + if (buffer->size > ram_console_buffer_size + || le32_to_cpu(buffer->start) > buffer->size) { + printk(KERN_INFO "ram_console: found existing invalid " + "buffer, size %d, start %d\n", + buffer->size, le32_to_cpu(buffer->start)); + memset(buffer, 0, buffer_size); + } + else { + printk(KERN_INFO "ram_console: found existing buffer, " + "size %d, start %d\n", + buffer->size, le32_to_cpu(buffer->start)); + ram_console_save_old(buffer, bootinfo, old_buf); + } + } else { + printk(KERN_INFO "ram_console: no valid data in buffer " + "(sig = 0x%08x)\n", buffer->sig); + memset(buffer, 0, buffer_size); + } + + buffer->sig = RAM_CONSOLE_SIG; + buffer->start = 0; + buffer->size = 0; + + register_console(&ram_console); +#ifdef CONFIG_ANDROID_RAM_CONSOLE_ENABLE_VERBOSE + console_verbose(); +#endif +#ifdef CONFIG_ANDROID_RAM_CONSOLE_DEBUG_CONSOLE_SUSPENDED + atomic_notifier_chain_register(&panic_notifier_list, + &ram_panic_blk); +#endif + return 0; +} + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT +static int __init ram_console_early_init(void) +{ + return ram_console_init((struct ram_console_buffer *) + CONFIG_ANDROID_RAM_CONSOLE_EARLY_ADDR, + CONFIG_ANDROID_RAM_CONSOLE_EARLY_SIZE, + NULL, + ram_console_old_log_init_buffer); +} +#else +static int ram_console_driver_probe(struct platform_device *pdev) +{ + struct resource *res = pdev->resource; + size_t start; + size_t buffer_size; + void *buffer; + const char *bootinfo = NULL; + struct ram_console_platform_data *pdata = pdev->dev.platform_data; + + if (res == NULL || pdev->num_resources != 1 || + !(res->flags & IORESOURCE_MEM)) { + printk(KERN_ERR "ram_console: invalid resource, %p %d flags " + "%lx\n", res, pdev->num_resources, res ? res->flags : 0); + return -ENXIO; + } + buffer_size = resource_size(res); + start = res->start; + printk(KERN_INFO "ram_console: got buffer at %zx, size %zx\n", + start, buffer_size); + + memblock_remove(res->start, buffer_size); + + buffer = ioremap(res->start, buffer_size); + if (buffer == NULL) { + printk(KERN_ERR "ram_console: failed to map memory\n"); + return -ENOMEM; + } + + if (pdata) + bootinfo = pdata->bootinfo; + + return ram_console_init(buffer, buffer_size, bootinfo, NULL/* allocate */); +} + +static struct of_device_id ram_console_of_match[] = { + { .compatible = "android,ram-console", }, + { }, +}; +MODULE_DEVICE_TABLE(of, ram_console_of_match); + +static struct platform_driver ram_console_driver = { + .probe = ram_console_driver_probe, + .driver = { + .name = "ram_console", + .of_match_table = of_match_ptr(ram_console_of_match), + }, +}; + +static int __init ram_console_module_init(void) +{ + int err; + err = platform_driver_register(&ram_console_driver); + return err; +} +#endif + +#ifndef CONFIG_PRINTK +#define dmesg_restrict 0 +#endif + +static ssize_t ram_console_read_old(struct file *file, char __user *buf, + size_t len, loff_t *offset) +{ + loff_t pos = *offset; + ssize_t count; + + if (dmesg_restrict && !capable(CAP_SYSLOG)) + return -EPERM; + + if (pos >= ram_console_old_log_size) + return 0; + + count = min(len, (size_t)(ram_console_old_log_size - pos)); + if (copy_to_user(buf, ram_console_old_log + pos, count)) + return -EFAULT; + + *offset += count; + return count; +} + +static const struct proc_ops ram_console_file_ops = { + .proc_read = ram_console_read_old, +}; + +static int __init ram_console_late_init(void) +{ + struct proc_dir_entry *entry; + +// if (ram_console_old_log == NULL) +// return 0; +#ifdef CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT + ram_console_old_log = kmalloc(ram_console_old_log_size, GFP_KERNEL); + if (ram_console_old_log == NULL) { + printk(KERN_ERR + "ram_console: failed to allocate buffer for old log\n"); + ram_console_old_log_size = 0; + return 0; + } + memcpy(ram_console_old_log, + ram_console_old_log_init_buffer, ram_console_old_log_size); +#endif + entry = proc_create("last_kmsg", S_IFREG | S_IRUGO, NULL, + &ram_console_file_ops); + if (!entry) { + printk(KERN_ERR "ram_console: failed to create proc entry\n"); + kfree(ram_console_old_log); + ram_console_old_log = NULL; + return 0; + } + + proc_set_size(entry, ram_console_old_log_size); + return 0; +} + +#ifdef CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT +console_initcall(ram_console_early_init); +#else +postcore_initcall(ram_console_module_init); +#endif +late_initcall(ram_console_late_init); + diff --git a/drivers/staging/android/ramconsole/ram_console.h b/drivers/staging/android/ramconsole/ram_console.h new file mode 100644 index 0000000000000..9f1125c110660 --- /dev/null +++ b/drivers/staging/android/ramconsole/ram_console.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _INCLUDE_LINUX_PLATFORM_DATA_RAM_CONSOLE_H_ +#define _INCLUDE_LINUX_PLATFORM_DATA_RAM_CONSOLE_H_ + +struct ram_console_platform_data { + const char *bootinfo; +}; + +#endif /* _INCLUDE_LINUX_PLATFORM_DATA_RAM_CONSOLE_H_ */ From c13eac2b939856d5badc002b20edff2a99eb8123 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 22 Jul 2021 21:19:59 +0300 Subject: [PATCH 1095/1202] mmc: Support non-standard gpt_sector cmdline parameter Add support for the gpt_sector cmdline parameter which will be used for looking up GPT entry on internal eMMC storage of NVIDIA Tegra20+ devices. This parameter is used by a stock downstream bootloader of Android devices. Signed-off-by: Dmitry Osipenko --- block/partitions/efi.c | 9 +++++++++ drivers/mmc/core/core.c | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 5e9be13a56a82..3155a4f6767a6 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -98,6 +98,15 @@ static int force_gpt; static int __init force_gpt_fn(char *str) { + /* + * This check allows to properly parse cmdline variants like + * "gpt gpt_sector=" and "gpt_sector= gpt" since + * "gpt" overlaps with the "gpt_sector=", see tegra_gpt_sector_fn(). + * The argument is absent for a boolean cmdline option. + */ + if (strlen(str)) + return 0; + force_gpt = 1; return 1; } diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 240c5af793dce..5e0c20b3fae2a 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2150,6 +2150,19 @@ int mmc_detect_card_removed(struct mmc_host *host) } EXPORT_SYMBOL(mmc_detect_card_removed); +/* + * This allows a kernel command line option 'gpt_sector=' to + * enable GPT header lookup at a non-standard location. This option + * is provided to kernel by NVIDIA's proprietary bootloader. + */ +static sector_t cmdline_gpt_sector; +static int __init cmdline_gpt_sector_fn(char *str) +{ + WARN_ON(kstrtoull(str, 10, &cmdline_gpt_sector) < 0); + return 1; +} +__setup("gpt_sector=", cmdline_gpt_sector_fn); + int mmc_card_alternative_gpt_sector(struct mmc_card *card, sector_t *gpt_sector) { unsigned int boot_sectors_num; @@ -2164,6 +2177,11 @@ int mmc_card_alternative_gpt_sector(struct mmc_card *card, sector_t *gpt_sector) mmc_card_is_removable(card->host)) return -ENOENT; + if (cmdline_gpt_sector) { + *gpt_sector = cmdline_gpt_sector; + return 0; + } + /* * eMMC storage has two special boot partitions in addition to the * main one. NVIDIA's bootloader linearizes eMMC boot0->boot1->main From ca08fe384ed7e1bbf9452822aa5e4786db77ac6e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 22 Jul 2021 21:19:59 +0300 Subject: [PATCH 1096/1202] partitions: Support NVIDIA Tegra Partition Table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All NVIDIA Tegra devices use a special partition table format for the internal storage partitioning. Most of Tegra devices have GPT partition in addition to TegraPT, but some older Android consumer-grade devices do not or GPT is placed in a wrong sector, and thus, the TegraPT is needed in order to support these devices properly by the upstream kernel. This patch adds support for NVIDIA Tegra Partition Table format that is used at least by all NVIDIA Tegra20 and Tegra30 devices. Tested-by: Nils Östlund Signed-off-by: Dmitry Osipenko --- arch/arm/mach-tegra/tegra.c | 54 ++++ block/partitions/Kconfig | 8 + block/partitions/Makefile | 1 + block/partitions/check.h | 1 + block/partitions/core.c | 3 + block/partitions/tegra.c | 580 ++++++++++++++++++++++++++++++++++ drivers/mmc/core/block.c | 19 ++ include/linux/mmc/blkdev.h | 13 + include/soc/tegra/bootdata.h | 46 +++ include/soc/tegra/partition.h | 84 +++++ 10 files changed, 809 insertions(+) create mode 100644 block/partitions/tegra.c create mode 100644 include/linux/mmc/blkdev.h create mode 100644 include/soc/tegra/bootdata.h create mode 100644 include/soc/tegra/partition.h diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c index ab5008f35803c..06a516c7e19c3 100644 --- a/arch/arm/mach-tegra/tegra.c +++ b/arch/arm/mach-tegra/tegra.c @@ -28,7 +28,9 @@ #include +#include #include +#include #include #include @@ -62,9 +64,61 @@ u32 tegra_uart_config[3] = { 0, }; +static void __init tegra_boot_config_table_init(void) +{ + struct tegra30_boot_config_table __iomem *t30_bct; + struct tegra20_boot_config_table __iomem *t20_bct; + struct tegra20_boot_info_table __iomem *t20_bit; + u32 iram_end = TEGRA_IRAM_BASE + TEGRA_IRAM_SIZE; + u32 iram_start = TEGRA_IRAM_BASE; + u32 pt_addr, pt_size, bct_size; + + t20_bit = IO_ADDRESS(TEGRA_IRAM_BASE); + + if (of_machine_is_compatible("nvidia,tegra20")) { + bct_size = sizeof(*t20_bct); + + if (t20_bit->bct_size != bct_size || + t20_bit->bct_ptr < iram_start || + t20_bit->bct_ptr > iram_end - bct_size) + return; + + t20_bct = IO_ADDRESS(t20_bit->bct_ptr); + + if (t20_bct->boot_data_version != TEGRA_BOOTDATA_VERSION_T20) + return; + + pt_addr = t20_bct->partition_table_logical_sector_address; + pt_size = t20_bct->partition_table_num_logical_sectors; + + } else if (of_machine_is_compatible("nvidia,tegra30")) { + bct_size = sizeof(*t30_bct); + + if (t20_bit->bct_size != bct_size || + t20_bit->bct_ptr < iram_start || + t20_bit->bct_ptr > iram_end - bct_size) + return; + + t30_bct = IO_ADDRESS(t20_bit->bct_ptr); + + if (t30_bct->boot_data_version != TEGRA_BOOTDATA_VERSION_T30) + return; + + pt_addr = t30_bct->partition_table_logical_sector_address; + pt_size = t30_bct->partition_table_num_logical_sectors; + } else { + return; + } + + pr_info("%s: BCT found in IRAM\n", __func__); + + tegra_partition_table_setup(pt_addr, pt_size); +} + static void __init tegra_init_early(void) { of_register_trusted_foundations(); + tegra_boot_config_table_init(); tegra_cpu_reset_handler_init(); call_firmware_op(l2x0_init); } diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig index 7aff4eb81c60f..6521a9af21f0f 100644 --- a/block/partitions/Kconfig +++ b/block/partitions/Kconfig @@ -270,4 +270,12 @@ config CMDLINE_PARTITION Say Y here if you want to read the partition table from bootargs. The format for the command line is just like mtdparts. +config TEGRA_PARTITION + bool "NVIDIA Tegra Partition support" if PARTITION_ADVANCED + default y if ARCH_TEGRA + depends on EFI_PARTITION && MMC_BLOCK && (ARCH_TEGRA || COMPILE_TEST) + help + Say Y here if you would like to be able to read the hard disk + partition table format used by NVIDIA Tegra machines. + endmenu diff --git a/block/partitions/Makefile b/block/partitions/Makefile index a7f05cdb02a84..83cb70c6d08d3 100644 --- a/block/partitions/Makefile +++ b/block/partitions/Makefile @@ -20,3 +20,4 @@ obj-$(CONFIG_IBM_PARTITION) += ibm.o obj-$(CONFIG_EFI_PARTITION) += efi.o obj-$(CONFIG_KARMA_PARTITION) += karma.o obj-$(CONFIG_SYSV68_PARTITION) += sysv68.o +obj-$(CONFIG_TEGRA_PARTITION) += tegra.o diff --git a/block/partitions/check.h b/block/partitions/check.h index d5b28e309d64d..fbfe70405c850 100644 --- a/block/partitions/check.h +++ b/block/partitions/check.h @@ -67,4 +67,5 @@ int osf_partition(struct parsed_partitions *state); int sgi_partition(struct parsed_partitions *state); int sun_partition(struct parsed_partitions *state); int sysv68_partition(struct parsed_partitions *state); +int tegra_partition(struct parsed_partitions *state); int ultrix_partition(struct parsed_partitions *state); diff --git a/block/partitions/core.c b/block/partitions/core.c index 334b72ef1d73f..0c9486e7240c2 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -83,6 +83,9 @@ static int (*check_part[])(struct parsed_partitions *) = { #endif #ifdef CONFIG_SYSV68_PARTITION sysv68_partition, +#endif +#ifdef CONFIG_TEGRA_PARTITION + tegra_partition, #endif NULL }; diff --git a/block/partitions/tegra.c b/block/partitions/tegra.c new file mode 100644 index 0000000000000..e7cbc30c15aae --- /dev/null +++ b/block/partitions/tegra.c @@ -0,0 +1,580 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVIDIA Tegra Partition Table + * + * Copyright (C) 2020 GRATE-DRIVER project + * Copyright (C) 2020 Dmitry Osipenko + * + * Credits for the partition table format: + * + * Andrey Danin (Toshiba AC100 TegraPT format) + * Gilles Grandou (Toshiba AC100 TegraPT format) + * Ryan Grachek (Google TV "Molly" TegraPT format) + * Stephen Warren (Useful suggestions about eMMC/etc) + */ + +#define pr_fmt(fmt) "tegra-partition: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "check.h" + +#define TEGRA_PT_SECTOR_SIZE(ptp) ((ptp)->logical_sector_size / SZ_512) +#define TEGRA_PT_SECTOR(ptp, s) ((s) * TEGRA_PT_SECTOR_SIZE(ptp)) + +#define TEGRA_PT_HEADER_SIZE \ + (sizeof(struct tegra_partition_header_insecure) + \ + sizeof(struct tegra_partition_header_secure)) + +#define TEGRA_PT_MAX_PARTITIONS(ptp) \ + (((ptp)->logical_sector_size - TEGRA_PT_HEADER_SIZE) / \ + sizeof(struct tegra_partition)) + +#define TEGRA_PT_ERR(ptp, fmt, ...) \ + pr_debug("%s: " fmt, \ + (ptp)->state->disk->disk_name, ##__VA_ARGS__) + +#define TEGRA_PT_PARSE_ERR(ptp, fmt, ...) \ + TEGRA_PT_ERR(ptp, "sector %llu: invalid " fmt, \ + (ptp)->sector, ##__VA_ARGS__) + +struct tegra_partition_table_parser { + struct tegra_partition_table *pt; + unsigned int logical_sector_size; + struct parsed_partitions *state; + bool pt_entry_checked; + sector_t sector; + int boot_offset; + u32 dev_instance; + u32 dev_id; +}; + +union tegra_partition_table_u { + struct tegra_partition_table pt; + u8 pt_parts[SZ_4K / SZ_512][SZ_512]; +}; + +struct tegra_partition_type { + unsigned int type; + char *name; +}; + +static sector_t tegra_pt_logical_sector_address; +static sector_t tegra_pt_logical_sectors_num; + +void tegra_partition_table_setup(unsigned int logical_sector_address, + unsigned int logical_sectors_num) +{ + tegra_pt_logical_sector_address = logical_sector_address; + tegra_pt_logical_sectors_num = logical_sectors_num; + + pr_info("initialized to logical sector = %llu sectors_num = %llu\n", + tegra_pt_logical_sector_address, tegra_pt_logical_sectors_num); +} + +/* + * Some partitions are very sensitive, changing data on them may brick device. + * + * For more details about partitions see: + * + * "https://docs.nvidia.com/jetson/l4t/Tegra Linux Driver Package Development Guide/part_config.html" + */ +static const char * const partitions_blacklist[] = { + "BCT", "EBT", "EB2", "EKS", "GP1", "GPT", "MBR", "PT", +}; + +static bool tegra_partition_name_match(struct tegra_partition *p, + const char *name) +{ + return !strncmp(p->partition_name, name, TEGRA_PT_NAME_SIZE); +} + +static bool tegra_partition_skip(struct tegra_partition *p, + struct tegra_partition_table_parser *ptp, + sector_t sector) +{ + unsigned int i; + + /* skip eMMC boot partitions */ + if (sector < ptp->boot_offset) + return true; + + for (i = 0; i < ARRAY_SIZE(partitions_blacklist); i++) { + if (tegra_partition_name_match(p, partitions_blacklist[i])) + return true; + } + + return false; +} + +static const struct tegra_partition_type tegra_partition_expected_types[] = { + { .type = TEGRA_PT_PART_TYPE_BCT, .name = "BCT", }, + { .type = TEGRA_PT_PART_TYPE_EBT, .name = "EBT", }, + { .type = TEGRA_PT_PART_TYPE_EBT, .name = "EB2", }, + { .type = TEGRA_PT_PART_TYPE_PT, .name = "PT", }, + { .type = TEGRA_PT_PART_TYPE_GP1, .name = "GP1", }, + { .type = TEGRA_PT_PART_TYPE_GPT, .name = "GPT", }, + { .type = TEGRA_PT_PART_TYPE_GENERIC, .name = NULL, }, +}; + +static int tegra_partition_type_valid(struct tegra_partition_table_parser *ptp, + struct tegra_partition *p) +{ + const struct tegra_partition_type *ptype; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(tegra_partition_expected_types); i++) { + ptype = &tegra_partition_expected_types[i]; + + if (ptype->name && !tegra_partition_name_match(p, ptype->name)) + continue; + + if (p->part_info.partition_type == ptype->type) + return 0; + + /* + * Unsure about all possible types, let's emit error and + * allow to continue for now. + */ + if (!ptype->name) + return 1; + } + + return -1; +} + +static bool tegra_partition_valid(struct tegra_partition_table_parser *ptp, + struct tegra_partition *p, + struct tegra_partition *prev, + sector_t sector, + sector_t size) +{ + struct tegra_partition_info *prev_pi = &prev->part_info; + sector_t sect_end = TEGRA_PT_SECTOR(ptp, + prev_pi->logical_sector_address + + prev_pi->logical_sectors_num); + char *type, name[2][TEGRA_PT_NAME_SIZE + 1]; + int err; + + strscpy(name[0], p->partition_name, sizeof(name[0])); + strscpy(name[1], prev->partition_name, sizeof(name[1])); + + /* validate expected partition name/type */ + err = tegra_partition_type_valid(ptp, p); + if (err) { + TEGRA_PT_PARSE_ERR(ptp, "partition_type: [%s] partition_type=%u\n", + name[0], p->part_info.partition_type); + if (err < 0) + return false; + + TEGRA_PT_ERR(ptp, "continuing, please update list of expected types\n"); + } + + /* validate partition table BCT addresses */ + if (tegra_partition_name_match(p, "PT")) { + if (sector != TEGRA_PT_SECTOR(ptp, tegra_pt_logical_sector_address) && + size != TEGRA_PT_SECTOR(ptp, tegra_pt_logical_sectors_num)) { + TEGRA_PT_PARSE_ERR(ptp, "PT location: sector=%llu size=%llu\n", + sector, size); + return false; + } + + if (ptp->pt_entry_checked) { + TEGRA_PT_PARSE_ERR(ptp, "(duplicated) PT\n"); + return false; + } + + ptp->pt_entry_checked = true; + } + + if (sector + size < sector) { + TEGRA_PT_PARSE_ERR(ptp, "size: [%s] integer overflow sector=%llu size=%llu\n", + name[0], sector, size); + return false; + } + + /* validate allocation_policy=sequential (absolute unsupported) */ + if (p != prev && sect_end > sector) { + TEGRA_PT_PARSE_ERR(ptp, "allocation_policy: [%s] end=%llu [%s] sector=%llu size=%llu\n", + name[1], sect_end, name[0], sector, size); + return false; + } + + if (ptp->dev_instance != p->mount_info.device_instance) { + TEGRA_PT_PARSE_ERR(ptp, "device_instance: [%s] device_instance=%u|%u\n", + name[0], ptp->dev_instance, + p->mount_info.device_instance); + return false; + } + + if (ptp->dev_id != p->mount_info.device_id) { + TEGRA_PT_PARSE_ERR(ptp, "device_id: [%s] device_id=%u|%u\n", + name[0], ptp->dev_id, + p->mount_info.device_id); + return false; + } + + if (p->partition_id > 127) { + TEGRA_PT_PARSE_ERR(ptp, "partition_id: [%s] partition_id=%u\n", + name[0], p->partition_id); + return false; + } + + sect_end = get_capacity(ptp->state->disk); + + /* eMMC boot partitions are below ptp->boot_offset */ + if (sector < ptp->boot_offset) { + sect_end += ptp->boot_offset; + type = "boot"; + } else { + sector -= ptp->boot_offset; + type = "main"; + } + + /* validate size */ + if (!size || sector + size > sect_end) { + TEGRA_PT_PARSE_ERR(ptp, "size: [%s] %s partition boot_offt=%d end=%llu sector=%llu size=%llu\n", + name[0], type, ptp->boot_offset, sect_end, + sector, size); + return false; + } + + return true; +} + +static bool tegra_partitions_parsed(struct tegra_partition_table_parser *ptp, + bool check_only) +{ + struct parsed_partitions *state = ptp->state; + struct tegra_partition_table *pt = ptp->pt; + sector_t sector, size; + int i, slot = 1; + + ptp->pt_entry_checked = false; + + for (i = 0; i < pt->secure.num_partitions; i++) { + struct tegra_partition *p = &pt->partitions[i]; + struct tegra_partition *prev = &pt->partitions[max(i - 1, 0)]; + struct tegra_partition_info *pi = &p->part_info; + + if (slot == state->limit && !check_only) + break; + + sector = TEGRA_PT_SECTOR(ptp, pi->logical_sector_address); + size = TEGRA_PT_SECTOR(ptp, pi->logical_sectors_num); + + if (check_only && + !tegra_partition_valid(ptp, p, prev, sector, size)) + return false; + + if (check_only || + tegra_partition_skip(p, ptp, sector)) + continue; + + put_partition(state, slot++, sector - ptp->boot_offset, size); + } + + if (check_only && !ptp->pt_entry_checked) { + TEGRA_PT_PARSE_ERR(ptp, "PT: table entry not found\n"); + return false; + } + + return true; +} + +static bool +tegra_partition_table_parsed(struct tegra_partition_table_parser *ptp) +{ + if (ptp->pt->secure.num_partitions == 0 || + ptp->pt->secure.num_partitions > TEGRA_PT_MAX_PARTITIONS(ptp)) { + TEGRA_PT_PARSE_ERR(ptp, "num_partitions=%u\n", + ptp->pt->secure.num_partitions); + return false; + } + + return tegra_partitions_parsed(ptp, true) && + tegra_partitions_parsed(ptp, false); +} + +static int +tegra_partition_table_insec_hdr_valid(struct tegra_partition_table_parser *ptp) +{ + if (ptp->pt->insecure.magic != TEGRA_PT_MAGIC || + ptp->pt->insecure.version != TEGRA_PT_VERSION) { + TEGRA_PT_PARSE_ERR(ptp, "insecure header: magic=0x%llx ver=0x%x\n", + ptp->pt->insecure.magic, + ptp->pt->insecure.version); + return 0; + } + + return 1; +} + +static int +tegra_partition_table_sec_hdr_valid(struct tegra_partition_table_parser *ptp) +{ + size_t pt_size = ptp->pt->secure.num_partitions; + + pt_size *= sizeof(ptp->pt->partitions[0]); + pt_size += TEGRA_PT_HEADER_SIZE; + + if (ptp->pt->secure.magic != TEGRA_PT_MAGIC || + ptp->pt->secure.version != TEGRA_PT_VERSION || + ptp->pt->secure.length != ptp->pt->insecure.length || + ptp->pt->secure.length < pt_size) { + TEGRA_PT_PARSE_ERR(ptp, "secure header: magic=0x%llx ver=0x%x length=%u|%u|%zu\n", + ptp->pt->secure.magic, + ptp->pt->secure.version, + ptp->pt->secure.length, + ptp->pt->insecure.length, + pt_size); + return 0; + } + + return 1; +} + +static int +tegra_partition_table_unencrypted(struct tegra_partition_table_parser *ptp) +{ + /* AES IV, all zeros if unencrypted */ + if (ptp->pt->secure.random_data[0] || ptp->pt->secure.random_data[1] || + ptp->pt->secure.random_data[2] || ptp->pt->secure.random_data[3]) { + pr_err_once("encrypted partition table unsupported\n"); + return 0; + } + + return 1; +} + +static int tegra_read_partition_table(struct tegra_partition_table_parser *ptp) +{ + union tegra_partition_table_u *ptu = (typeof(ptu))ptp->pt; + unsigned int i; + Sector sect; + void *part; + + for (i = 0; i < ptp->logical_sector_size / SZ_512; i++) { + /* + * Partition table takes at maximum 4096 bytes, but + * read_part_sector() guarantees only that SECTOR_SIZE will + * be read at minimum. + */ + part = read_part_sector(ptp->state, ptp->sector + i, §); + if (!part) { + TEGRA_PT_ERR(ptp, "failed to read sector %llu\n", + ptp->sector + i); + return 0; + } + + memcpy(ptu->pt_parts[i], part, SZ_512); + put_dev_sector(sect); + } + + return 1; +} + +static int tegra_partition_scan(struct tegra_partition_table_parser *ptp) +{ + sector_t start_sector, num_sectors; + int ret = 0; + + num_sectors = TEGRA_PT_SECTOR(ptp, tegra_pt_logical_sectors_num); + start_sector = TEGRA_PT_SECTOR(ptp, tegra_pt_logical_sector_address); + + if (start_sector < ptp->boot_offset) { + TEGRA_PT_ERR(ptp, + "scanning eMMC boot partitions unimplemented\n"); + return 0; + } + + ptp->sector = start_sector - ptp->boot_offset; + + /* + * Partition table is duplicated for num_sectors. + * If first table is corrupted, we will try next. + */ + while (num_sectors--) { + ret = tegra_read_partition_table(ptp); + if (!ret) + goto next_sector; + + ret = tegra_partition_table_insec_hdr_valid(ptp); + if (!ret) + goto next_sector; + + ret = tegra_partition_table_unencrypted(ptp); + if (!ret) + goto next_sector; + + ret = tegra_partition_table_sec_hdr_valid(ptp); + if (!ret) + goto next_sector; + + ret = tegra_partition_table_parsed(ptp); + if (ret) + break; +next_sector: + ptp->sector += TEGRA_PT_SECTOR_SIZE(ptp); + } + + return ret; +} + +static const u32 tegra20_sdhci_bases[TEGRA_PT_SDHCI_DEVICE_INSTANCES] = { + 0xc8000000, 0xc8000200, 0xc8000400, 0xc8000600, +}; + +static const u32 tegra30_sdhci_bases[TEGRA_PT_SDHCI_DEVICE_INSTANCES] = { + 0x78000000, 0x78000200, 0x78000400, 0x78000600, +}; + +static const u32 tegra124_sdhci_bases[TEGRA_PT_SDHCI_DEVICE_INSTANCES] = { + 0x700b0000, 0x700b0200, 0x700b0400, 0x700b0600, +}; + +static const struct of_device_id tegra_sdhci_match[] = { + { .compatible = "nvidia,tegra20-sdhci", .data = tegra20_sdhci_bases, }, + { .compatible = "nvidia,tegra30-sdhci", .data = tegra30_sdhci_bases, }, + { .compatible = "nvidia,tegra114-sdhci", .data = tegra30_sdhci_bases, }, + { .compatible = "nvidia,tegra124-sdhci", .data = tegra124_sdhci_bases, }, + {} +}; + +static int +tegra_partition_table_emmc_boot_offset(struct tegra_partition_table_parser *ptp) +{ + struct gendisk *disk = ptp->state->disk; + struct block_device *bdev = disk->part0; + struct mmc_card *card = mmc_bdev_to_card(bdev); + const struct of_device_id *matched; + const u32 *sdhci_bases; + const __be32 *addrp; + u32 sdhci_base; + unsigned int i; + + /* filter out unexpected/untested boot sources */ + if (!card || card->ext_csd.rev < 3 || + !mmc_card_mmc(card) || + !mmc_card_is_blockaddr(card) || + mmc_card_is_removable(card->host) || + bdev_logical_block_size(bdev) != SZ_512) { + TEGRA_PT_ERR(ptp, "unexpected boot source\n"); + return -1; + } + + /* skip everything unrelated to Tegra eMMC */ + matched = of_match_node(tegra_sdhci_match, card->host->parent->of_node); + if (!matched) + return -1; + + sdhci_bases = matched->data; + + /* figure out SDHCI instance ID by the base address */ + addrp = of_get_address(card->host->parent->of_node, 0, NULL, NULL); + if (!addrp) + return -1; + + sdhci_base = of_translate_address(card->host->parent->of_node, addrp); + + for (i = 0; i < TEGRA_PT_SDHCI_DEVICE_INSTANCES; i++) { + if (sdhci_base == sdhci_bases[i]) + break; + } + + if (i == TEGRA_PT_SDHCI_DEVICE_INSTANCES) + return -1; + + ptp->dev_id = TEGRA_PT_SDHCI_DEVICE_ID; + ptp->dev_instance = i; + + /* + * eMMC storage has two special boot partitions in addition to the + * main one. NVIDIA's bootloader linearizes eMMC boot0->boot1->main + * accesses, this means that the partition table addresses are shifted + * by the size of boot partitions. In accordance with the eMMC + * specification, the boot partition size is calculated as follows: + * + * boot partition size = 128K byte x BOOT_SIZE_MULT + * + * This function returns number of sectors occupied by the both boot + * partitions. + */ + return card->ext_csd.raw_boot_mult * SZ_128K / + SZ_512 * MMC_NUM_BOOT_PARTITION; +} + +/* + * Logical sector size may vary per device model and apparently there is no + * way to get information about the size from kernel. The info is hardcoded + * into bootloader and it doesn't tell us, so we'll just try all possible + * well-known sizes until succeed. + * + * For example Samsung Galaxy Tab 10.1 uses 2K sectors. While Acer A500, + * Nexus 7 and Ouya are using 4K sectors. + */ +static const unsigned int tegra_pt_logical_sector_sizes[] = { + SZ_4K, SZ_2K, +}; + +/* + * The 'tegraboot=' command line option is provided to kernel + * by NVIDIA's proprietary bootloader on most Tegra devices. If it isn't + * provided, then it should be added to the cmdline via device-tree bootargs + * or by other means. + */ +static bool tegra_boot_sdmmc; +static int __init tegra_boot_fn(char *str) +{ + tegra_boot_sdmmc = !strcmp(str, "sdmmc"); + return 1; +} +__setup("tegraboot=", tegra_boot_fn); + +int tegra_partition(struct parsed_partitions *state) +{ + struct tegra_partition_table_parser ptp = {}; + unsigned int i; + int ret; + + if (!soc_is_tegra() || !tegra_boot_sdmmc) + return 0; + + ptp.state = state; + + ptp.boot_offset = tegra_partition_table_emmc_boot_offset(&ptp); + if (ptp.boot_offset < 0) + return 0; + + ptp.pt = kmalloc(SZ_4K, GFP_KERNEL); + if (!ptp.pt) + return 0; + + for (i = 0; i < ARRAY_SIZE(tegra_pt_logical_sector_sizes); i++) { + ptp.logical_sector_size = tegra_pt_logical_sector_sizes[i]; + + ret = tegra_partition_scan(&ptp); + if (ret == 1) { + strlcat(state->pp_buf, "\n", PAGE_SIZE); + break; + } + } + + kfree(ptp.pt); + + return ret; +} diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 61590cf7a7b1b..7e3628785fb22 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -832,6 +833,24 @@ static const struct block_device_operations mmc_bdops = { .alternative_gpt_sector = mmc_blk_alternative_gpt_sector, }; +struct mmc_card *mmc_bdev_to_card(struct block_device *bdev) +{ + struct mmc_blk_data *md; + struct mmc_card *card; + + if (bdev->bd_disk->fops != &mmc_bdops) + return NULL; + + md = mmc_blk_get(bdev->bd_disk); + if (!md) + return NULL; + + card = md->queue.card; + mmc_blk_put(md); + + return card; +} + static int mmc_blk_part_switch_pre(struct mmc_card *card, unsigned int part_type) { diff --git a/include/linux/mmc/blkdev.h b/include/linux/mmc/blkdev.h new file mode 100644 index 0000000000000..67608c58de700 --- /dev/null +++ b/include/linux/mmc/blkdev.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/include/linux/mmc/blkdev.h + */ +#ifndef LINUX_MMC_BLOCK_DEVICE_H +#define LINUX_MMC_BLOCK_DEVICE_H + +struct block_device; +struct mmc_card; + +struct mmc_card *mmc_bdev_to_card(struct block_device *bdev); + +#endif /* LINUX_MMC_BLOCK_DEVICE_H */ diff --git a/include/soc/tegra/bootdata.h b/include/soc/tegra/bootdata.h new file mode 100644 index 0000000000000..7be207cb25198 --- /dev/null +++ b/include/soc/tegra/bootdata.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_TEGRA_BOOTDATA_H__ +#define __SOC_TEGRA_BOOTDATA_H__ + +#include +#include + +#define TEGRA_BOOTDATA_VERSION_T20 NVBOOT_BOOTDATA_VERSION(0x2, 0x1) +#define TEGRA_BOOTDATA_VERSION_T30 NVBOOT_BOOTDATA_VERSION(0x3, 0x1) + +#define NVBOOT_BOOTDATA_VERSION(a, b) ((((a) & 0xffff) << 16) | \ + ((b) & 0xffff)) +#define NVBOOT_CMAC_AES_HASH_LENGTH 4 + +struct tegra20_boot_info_table { + u32 unused_data1[14]; + u32 bct_size; + u32 bct_ptr; +} __packed; + +struct tegra20_boot_config_table { + u32 crypto_hash[NVBOOT_CMAC_AES_HASH_LENGTH]; + u32 random_aes_blk[NVBOOT_CMAC_AES_HASH_LENGTH]; + u32 boot_data_version; + u32 unused_data1[712]; + u32 unused_consumer_data1; + u16 partition_table_logical_sector_address; + u16 partition_table_num_logical_sectors; + u32 unused_consumer_data[294]; + u32 unused_data[3]; +} __packed; + +struct tegra30_boot_config_table { + u32 crypto_hash[NVBOOT_CMAC_AES_HASH_LENGTH]; + u32 random_aes_blk[NVBOOT_CMAC_AES_HASH_LENGTH]; + u32 boot_data_version; + u32 unused_data1[1016]; + u32 unused_consumer_data1; + u16 partition_table_logical_sector_address; + u16 partition_table_num_logical_sectors; + u32 unused_consumer_data[502]; + u32 unused_data[3]; +} __packed; + +#endif /* __SOC_TEGRA_BOOTDATA_H__ */ diff --git a/include/soc/tegra/partition.h b/include/soc/tegra/partition.h new file mode 100644 index 0000000000000..e8fcce1a725d0 --- /dev/null +++ b/include/soc/tegra/partition.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_TEGRA_PARTITION_H__ +#define __SOC_TEGRA_PARTITION_H__ + +#include +#include + +#define TEGRA_PT_MAGIC 0xffffffff8f9e8d8bULL +#define TEGRA_PT_VERSION 0x100 +#define TEGRA_PT_AES_HASH_SIZE 4 +#define TEGRA_PT_NAME_SIZE 4 + +#define TEGRA_PT_SDHCI_DEVICE_ID 18 +#define TEGRA_PT_SDHCI_DEVICE_INSTANCES 4 + +#define TEGRA_PT_PART_TYPE_BCT 1 +#define TEGRA_PT_PART_TYPE_EBT 2 +#define TEGRA_PT_PART_TYPE_PT 3 +#define TEGRA_PT_PART_TYPE_GENERIC 6 +#define TEGRA_PT_PART_TYPE_GP1 9 +#define TEGRA_PT_PART_TYPE_GPT 10 + +struct tegra_partition_mount_info { + u32 device_id; + u32 device_instance; + u32 device_attr; + u8 mount_path[TEGRA_PT_NAME_SIZE]; + u32 file_system_type; + u32 file_system_attr; +} __packed; + +struct tegra_partition_info { + u32 partition_attr; + u32 __pad1; + u64 logical_sector_address; + u64 logical_sectors_num; + u64 physical_sector_address; + u64 physical_sectors_num; + u32 partition_type; + u32 __pad2; +} __packed; + +struct tegra_partition { + u32 partition_id; + u8 partition_name[TEGRA_PT_NAME_SIZE]; + struct tegra_partition_mount_info mount_info; + struct tegra_partition_info part_info; +} __packed; + +struct tegra_partition_header_insecure { + u64 magic; + u32 version; + u32 length; + u32 signature[TEGRA_PT_AES_HASH_SIZE]; +} __packed; + +struct tegra_partition_header_secure { + u32 random_data[TEGRA_PT_AES_HASH_SIZE]; + u64 magic; + u32 version; + u32 length; + u32 num_partitions; + u32 __pad; +} __packed; + +struct tegra_partition_table { + struct tegra_partition_header_insecure insecure; + struct tegra_partition_header_secure secure; + struct tegra_partition partitions[]; +} __packed; + +#ifdef CONFIG_TEGRA_PARTITION +void tegra_partition_table_setup(unsigned int logical_sector_address, + unsigned int logical_sectors_num); +#else +static inline void +tegra_partition_table_setup(unsigned int logical_sector_address, + unsigned int logical_sectors_num) +{ +} +#endif + +#endif /* __SOC_TEGRA_PARTITION_H__ */ From eb8aac99dd56ca4cfd0b7d3b5029dd8756ac97fa Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Thu, 14 May 2020 14:48:22 +0300 Subject: [PATCH 1097/1202] soc/tegra: Expose Boot Configuration Table via sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's quite useful to have unencrypted BCT exposed to userspace for debugging purposes, so let's expose it via sysfs. The BCT data will be present in '/sys/tegra/boot_config_table' binary file if BCT is available. Suggested-by: Michał Mirosław Signed-off-by: Dmitry Osipenko --- arch/arm/mach-tegra/tegra.c | 4 +++ drivers/soc/tegra/Makefile | 1 + drivers/soc/tegra/bootdata.c | 67 ++++++++++++++++++++++++++++++++++++ drivers/soc/tegra/common.c | 15 ++++++++ include/soc/tegra/bootdata.h | 2 ++ include/soc/tegra/common.h | 3 ++ 6 files changed, 92 insertions(+) create mode 100644 drivers/soc/tegra/bootdata.c diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c index 06a516c7e19c3..575dca04d77bb 100644 --- a/arch/arm/mach-tegra/tegra.c +++ b/arch/arm/mach-tegra/tegra.c @@ -72,6 +72,7 @@ static void __init tegra_boot_config_table_init(void) u32 iram_end = TEGRA_IRAM_BASE + TEGRA_IRAM_SIZE; u32 iram_start = TEGRA_IRAM_BASE; u32 pt_addr, pt_size, bct_size; + void __iomem *bct_ptr; t20_bit = IO_ADDRESS(TEGRA_IRAM_BASE); @@ -90,6 +91,7 @@ static void __init tegra_boot_config_table_init(void) pt_addr = t20_bct->partition_table_logical_sector_address; pt_size = t20_bct->partition_table_num_logical_sectors; + bct_ptr = t20_bct; } else if (of_machine_is_compatible("nvidia,tegra30")) { bct_size = sizeof(*t30_bct); @@ -106,12 +108,14 @@ static void __init tegra_boot_config_table_init(void) pt_addr = t30_bct->partition_table_logical_sector_address; pt_size = t30_bct->partition_table_num_logical_sectors; + bct_ptr = t30_bct; } else { return; } pr_info("%s: BCT found in IRAM\n", __func__); + tegra_bootdata_bct_setup(bct_ptr, bct_size); tegra_partition_table_setup(pt_addr, pt_size); } diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile index 054e862b63d89..81b521e95cc7b 100644 --- a/drivers/soc/tegra/Makefile +++ b/drivers/soc/tegra/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += fuse/ +obj-y += bootdata.o obj-y += common.o obj-$(CONFIG_SOC_TEGRA_FLOWCTRL) += flowctrl.o obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o diff --git a/drivers/soc/tegra/bootdata.c b/drivers/soc/tegra/bootdata.c new file mode 100644 index 0000000000000..c42d98332f080 --- /dev/null +++ b/drivers/soc/tegra/bootdata.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +#include +#include + +union tegra_bct_entry { + struct tegra20_boot_config_table t20; + struct tegra30_boot_config_table t30; +}; + +/* + * spare_bct will be released once kernel is booted, hence not wasting + * kernel space if BCT is missing. The tegra_bct can't be allocated during + * of BCT setting up because it's too early for the slab allocator. + */ +static union tegra_bct_entry spare_bct __initdata; +static union tegra_bct_entry *tegra_bct; + +static ssize_t boot_config_table_read(struct file *filp, + struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + memcpy(buf, (u8 *)tegra_bct + off, count); + return count; +} +static BIN_ATTR_RO(boot_config_table, 0); + +static int __init tegra_bootdata_bct_sysfs_init(void) +{ + int err; + + if (!bin_attr_boot_config_table.size) + return 0; + + tegra_bct = kmalloc(bin_attr_boot_config_table.size, GFP_KERNEL); + if (!tegra_bct) + return -ENOMEM; + + memcpy(tegra_bct, &spare_bct, bin_attr_boot_config_table.size); + + err = sysfs_create_bin_file(tegra_soc_kobj, + &bin_attr_boot_config_table); + if (err) + goto free_bct; + + return 0; + +free_bct: + kfree(tegra_bct); + + return err; +} +late_initcall(tegra_bootdata_bct_sysfs_init) + +void __init tegra_bootdata_bct_setup(void __iomem *bct_ptr, size_t bct_size) +{ + memcpy_fromio(&spare_bct, bct_ptr, bct_size); + bin_attr_boot_config_table.size = bct_size; +} diff --git a/drivers/soc/tegra/common.c b/drivers/soc/tegra/common.c index bcc93e6f92051..d325752337113 100644 --- a/drivers/soc/tegra/common.c +++ b/drivers/soc/tegra/common.c @@ -11,10 +11,13 @@ #include #include #include +#include #include #include +struct kobject *tegra_soc_kobj; + static const struct of_device_id tegra_machine_match[] = { { .compatible = "nvidia,tegra20", }, { .compatible = "nvidia,tegra30", }, @@ -25,6 +28,18 @@ static const struct of_device_id tegra_machine_match[] = { { } }; +static int __init tegra_soc_sysfs_init(void) +{ + if (!soc_is_tegra()) + return 0; + + tegra_soc_kobj = kobject_create_and_add("tegra", NULL); + WARN_ON(!tegra_soc_kobj); + + return 0; +} +arch_initcall(tegra_soc_sysfs_init); + bool soc_is_tegra(void) { const struct of_device_id *match; diff --git a/include/soc/tegra/bootdata.h b/include/soc/tegra/bootdata.h index 7be207cb25198..d5c7a251517d1 100644 --- a/include/soc/tegra/bootdata.h +++ b/include/soc/tegra/bootdata.h @@ -43,4 +43,6 @@ struct tegra30_boot_config_table { u32 unused_data[3]; } __packed; +void tegra_bootdata_bct_setup(void __iomem *bct_ptr, size_t bct_size); + #endif /* __SOC_TEGRA_BOOTDATA_H__ */ diff --git a/include/soc/tegra/common.h b/include/soc/tegra/common.h index 8ec1ac07fc85c..108d199f5959c 100644 --- a/include/soc/tegra/common.h +++ b/include/soc/tegra/common.h @@ -8,6 +8,7 @@ #include #include +#include struct device; @@ -21,6 +22,8 @@ struct tegra_core_opp_params { }; #ifdef CONFIG_ARCH_TEGRA +extern struct kobject *tegra_soc_kobj; + bool soc_is_tegra(void); int devm_tegra_core_dev_init_opp_table(struct device *dev, From 71071311190407ec2a8d284deca65581324d34b3 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 5 Jul 2020 20:33:12 +0300 Subject: [PATCH 1098/1202] xxx: partitions/tegra: Enable debug by default The debug messages are useful for a WIP devices, so let's enable them. Signed-off-by: Dmitry Osipenko --- block/partitions/tegra.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/partitions/tegra.c b/block/partitions/tegra.c index e7cbc30c15aae..8b5d2cf0d9d4e 100644 --- a/block/partitions/tegra.c +++ b/block/partitions/tegra.c @@ -13,6 +13,8 @@ * Stephen Warren (Useful suggestions about eMMC/etc) */ +#define DEBUG + #define pr_fmt(fmt) "tegra-partition: " fmt #include From 4f674ce747da2e8504bb09d63fb12b54bd7e0adc Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 31 Mar 2020 01:09:15 +0300 Subject: [PATCH 1099/1202] ARM: tegra_defconfig: Enable CONFIG_ARM_APPENDED_DTB Downstream bootloader of Tegra20/30 doesn't support device-tree, so compiled DTB needs to be manually appended to the kernel's zImage. Signed-off-by: Dmitry Osipenko --- arch/arm/configs/tegra_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 8a8f12b3e6dd8..72894f6a28fdc 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -20,6 +20,8 @@ CONFIG_SLAB=y CONFIG_ARCH_TEGRA=y CONFIG_SMP=y CONFIG_HIGHMEM=y +CONFIG_ARM_APPENDED_DTB=y +CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_KEXEC=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y From dec45b1b7537cceeba20b8d0986b4d4e7c8f7cfe Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Fri, 14 Sep 2018 14:59:15 +0200 Subject: [PATCH 1100/1202] PCI: disable nv_msi_ht_cap_quirk_leaf quirk on arm/arm64 This patch disable the use of nv_msi_ht_cap_quirk_leaf quirk on arm and arm64 NVIDIA devices such as Tegra This fixes the following output: "pci 0000:00:01.0: nv_msi_ht_cap_quirk didn't locate host bridge" as experienced on a Trimslice device with PCI host bridge enabled v3: exclude the quirk for arm and arm64 instead of only for x86 v2: use __maybe_unused to avoid a warning on nv_msi_ht_cap_quirk_leaf Signed-off-by: Nicolas Chauvet Reviewed-by: Manikanta Maddireddy Acked-by: Thierry Reding --- drivers/pci/quirks.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f284ab4e81f5a..388639447469b 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2983,12 +2983,15 @@ static void nv_msi_ht_cap_quirk_all(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AL, PCI_ANY_ID, nv_msi_ht_cap_quirk_all); -static void nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev) +static void __maybe_unused nv_msi_ht_cap_quirk_leaf(struct pci_dev *dev) { return __nv_msi_ht_cap_quirk(dev, 0); } +/* HyperTransport is not relevant on theses arches */ +#if !IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_ARM64) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf); DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, nv_msi_ht_cap_quirk_leaf); +#endif static void quirk_msi_intx_disable_bug(struct pci_dev *dev) { From f7d976014df638e153b5b0dbaaef490e71b67573 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 5 May 2020 04:55:08 +0300 Subject: [PATCH 1101/1202] drm/tegra: dc: Add legacy BO tiling compatibility The BO tiling isn't respected since the time when DRM modifiers became supported and in a result older userspace can't set tiling mode. This patch restores the legacy BO tiling flag support, which is useful for developing purposes. Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/tegra/dc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index eb70eee8992aa..bb496646b7fec 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -763,6 +763,23 @@ static void tegra_plane_atomic_update(struct drm_plane *plane, */ if (i < 2) window.stride[i] = fb->pitches[i]; + + /* + * There are two ways to set tiling mode on Tegra: + * + * 1. New: using DRM modifiers + * 2. Old: using Tegra BO flags + * + * Older userspace doesn't support ADDFB2 IOCTL. Assume that + * legacy userspace is used if BO flag is set and FB modifier + * isn't set to maintain userspace compatibility. + */ + if (i == 0 && + window.tiling.mode == TEGRA_BO_TILING_MODE_PITCH && + window.tiling.value == 0) { + struct tegra_bo *bo = tegra_fb_get_plane(fb, i); + window.tiling.mode = bo->tiling.mode; + } } tegra_dc_setup_window(p, &window); From da8304a8fed584d18343993375c83972ec6e45b5 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 26 May 2018 01:50:10 +0300 Subject: [PATCH 1102/1202] XXX: drm: Add generic colorkey properties for display planes Color keying is the action of replacing pixels matching a given color (or range of colors) with transparent pixels in an overlay when performing blitting. Depending on the hardware capabilities, the matching pixel can either become fully transparent or gain adjustment of the pixels component values. Color keying is found in a large number of devices whose capabilities often differ, but they still have enough common features in range to standardize color key properties. This commit adds new generic DRM plane properties related to the color keying, providing initial color keying support. Signed-off-by: Laurent Pinchart Signed-off-by: Dmitry Osipenko --- drivers/gpu/drm/drm_atomic_uapi.c | 20 ++++ drivers/gpu/drm/drm_blend.c | 152 ++++++++++++++++++++++++++++++ include/drm/drm_blend.h | 3 + include/drm/drm_plane.h | 92 ++++++++++++++++++ 4 files changed, 267 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index cdd31fc78bfc0..6edff0541353e 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -592,6 +592,16 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, state->rotation = val; } else if (property == plane->zpos_property) { state->zpos = val; + } else if (property == plane->colorkey.plane_mask_property) { + state->colorkey.plane_mask = val; + } else if (property == plane->colorkey.mode_property) { + state->colorkey.mode = val; + } else if (property == plane->colorkey.mask_property) { + state->colorkey.mask = val; + } else if (property == plane->colorkey.min_property) { + state->colorkey.min = val; + } else if (property == plane->colorkey.max_property) { + state->colorkey.max = val; } else if (property == plane->color_encoding_property) { state->color_encoding = val; } else if (property == plane->color_range_property) { @@ -658,6 +668,16 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->rotation; } else if (property == plane->zpos_property) { *val = state->zpos; + } else if (property == plane->colorkey.plane_mask_property) { + *val = state->colorkey.plane_mask; + } else if (property == plane->colorkey.mode_property) { + *val = state->colorkey.mode; + } else if (property == plane->colorkey.mask_property) { + *val = state->colorkey.mask; + } else if (property == plane->colorkey.min_property) { + *val = state->colorkey.min; + } else if (property == plane->colorkey.max_property) { + *val = state->colorkey.max; } else if (property == plane->color_encoding_property) { *val = state->color_encoding; } else if (property == plane->color_range_property) { diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index ec37cbfabb507..d011c9cd43613 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -185,6 +185,11 @@ * plane does not expose the "alpha" property, then this is * assumed to be 1.0 * + * colorkey: + * Color keying is set up with drm_plane_create_colorkey_properties(). + * It adds support for actions like replacing a range of colors with a + * transparent color in the plane. Color keying is disabled by default. + * * Note that all the property extensions described here apply either to the * plane or the CRTC (e.g. for the background color, which currently is not * exposed and assumed to be black). @@ -616,3 +621,150 @@ int drm_plane_create_blend_mode_property(struct drm_plane *plane, return 0; } EXPORT_SYMBOL(drm_plane_create_blend_mode_property); + +static const char * const plane_colorkey_mode_name[] = { + [DRM_PLANE_COLORKEY_MODE_DISABLED] = "disabled", + [DRM_PLANE_COLORKEY_MODE_TRANSPARENT] = "transparent", +}; + +/** + * drm_plane_create_colorkey_properties - create colorkey properties + * @plane: drm plane + * @supported_modes: bitmask of supported color keying modes + * + * This function creates the generic color keying properties and attaches them + * to the @plane to enable color keying control for blending operations. + * + * Glossary: + * + * Destination plane: + * Plane to which color keying properties are applied, this planes takes + * the effect of color keying operation. The effect is determined by a + * given color keying mode. + * + * Source plane: + * Pixels of this plane are the source for color key matching operation. + * + * Color keying is controlled by these properties: + * + * colorkey.plane_mask: + * The mask property specifies which planes participate in color key + * matching process, these planes are the color key sources. It is up + * to userspace to decide what planes it wants to select. + * + * Drivers return an error from their plane atomic check if mask can't be + * handled. + * + * colorkey.mode: + * The mode is an enumerated property that controls how color keying + * operates. + * + * colorkey.mask: + * This property specifies the pixel components mask. Unmasked pixel + * components are not participating in the matching. This mask value is + * applied to colorkey.min / max values. The mask value is given in a + * 64-bit integer in ARGB16161616 format, where A is the alpha value and + * R, G and B correspond to the color components. Drivers shall convert + * ARGB16161616 value into appropriate format within planes atomic check. + * + * Drivers return an error from their plane atomic check if mask can't be + * handled. + * + * colorkey.min, colorkey.max: + * These two properties specify the colors that are treated as the color + * key. Pixel whose value is in the [min, max] range is the color key + * matching pixel. The minimum and maximum values are expressed as a + * 64-bit integer in ARGB16161616 format, where A is the alpha value and + * R, G and B correspond to the color components. Drivers shall convert + * ARGB16161616 value into appropriate format within planes atomic check. + * The converted value shall be *rounded up* to the nearest value. + * + * When a single color key is desired instead of a range, userspace shall + * set the min and max properties to the same value. + * + * Drivers return an error from their plane atomic check if range can't be + * handled. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int drm_plane_create_colorkey_properties(struct drm_plane *plane, + u32 supported_modes) +{ + struct drm_prop_enum_list modes_list[DRM_PLANE_COLORKEY_MODES_NUM]; + struct drm_device *dev = plane->dev; + struct drm_property *plane_mask_prop; + struct drm_property *mode_prop; + struct drm_property *mask_prop; + struct drm_property *min_prop; + struct drm_property *max_prop; + unsigned int modes_num = 0; + unsigned int i; + + /* modes are driver-specific, build the list of supported modes */ + for (i = 0; i < DRM_PLANE_COLORKEY_MODES_NUM; i++) { + if (!(supported_modes & BIT(i))) + continue; + + modes_list[modes_num].name = plane_colorkey_mode_name[i]; + modes_list[modes_num].type = i; + modes_num++; + } + + /* at least one mode should be supported */ + if (!modes_num) + return -EINVAL; + + plane_mask_prop = drm_property_create_range(dev, 0, + "colorkey.plane_mask", + 0, U64_MAX); + if (!plane_mask_prop) + return -ENOMEM; + + mode_prop = drm_property_create_enum(dev, 0, "colorkey.mode", + modes_list, modes_num); + if (!mode_prop) + goto err_destroy_plane_mask_prop; + + mask_prop = drm_property_create_range(dev, 0, "colorkey.mask", + 0, U64_MAX); + if (!mask_prop) + goto err_destroy_mode_prop; + + min_prop = drm_property_create_range(dev, 0, "colorkey.min", + 0, U64_MAX); + if (!min_prop) + goto err_destroy_mask_prop; + + max_prop = drm_property_create_range(dev, 0, "colorkey.max", + 0, U64_MAX); + if (!max_prop) + goto err_destroy_min_prop; + + drm_object_attach_property(&plane->base, plane_mask_prop, 0); + drm_object_attach_property(&plane->base, mode_prop, 0); + drm_object_attach_property(&plane->base, mask_prop, 0); + drm_object_attach_property(&plane->base, min_prop, 0); + drm_object_attach_property(&plane->base, max_prop, 0); + + plane->colorkey.plane_mask_property = plane_mask_prop; + plane->colorkey.mode_property = mode_prop; + plane->colorkey.mask_property = mask_prop; + plane->colorkey.min_property = min_prop; + plane->colorkey.max_property = max_prop; + + return 0; + +err_destroy_min_prop: + drm_property_destroy(dev, min_prop); +err_destroy_mask_prop: + drm_property_destroy(dev, mask_prop); +err_destroy_mode_prop: + drm_property_destroy(dev, mode_prop); +err_destroy_plane_mask_prop: + drm_property_destroy(dev, plane_mask_prop); + + return -ENOMEM; +} +EXPORT_SYMBOL(drm_plane_create_colorkey_properties); + diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h index 88bdfec3bd884..47e0c90d4177d 100644 --- a/include/drm/drm_blend.h +++ b/include/drm/drm_blend.h @@ -58,4 +58,7 @@ int drm_atomic_normalize_zpos(struct drm_device *dev, struct drm_atomic_state *state); int drm_plane_create_blend_mode_property(struct drm_plane *plane, unsigned int supported_modes); + +int drm_plane_create_colorkey_properties(struct drm_plane *plane, + u32 supported_modes); #endif diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 0c1102dc4d883..e9b9aa0f74f8f 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -40,6 +40,52 @@ enum drm_scaling_filter { DRM_SCALING_FILTER_NEAREST_NEIGHBOR, }; +/** + * enum drm_plane_colorkey_mode - uapi plane colorkey mode enumeration + */ +enum drm_plane_colorkey_mode { + /** + * @DRM_PLANE_COLORKEY_MODE_DISABLED: + * + * No color keying performed in this mode. + */ + DRM_PLANE_COLORKEY_MODE_DISABLED, + + /** + * @DRM_PLANE_COLORKEY_MODE_TRANSPARENT: + * + * Destination plane pixels are completely transparent in areas + * where pixels of a source plane are matching a given color key + * range, in other cases pixels of a destination plane are unaffected. + * In areas where two or more source planes overlap, the topmost + * plane takes precedence. + */ + DRM_PLANE_COLORKEY_MODE_TRANSPARENT, + + /** + * @DRM_PLANE_COLORKEY_MODES_NUM: + * + * Total number of color keying modes. + */ + DRM_PLANE_COLORKEY_MODES_NUM, +}; + +/** + * struct drm_plane_colorkey_state - plane color keying state + * @mode: color keying mode + * @plane_mask: source planes that participate in color key matching + * @mask: color key mask (in ARGB16161616 format) + * @min: color key range minimum (in ARGB16161616 format) + * @max: color key range maximum (in ARGB16161616 format) + */ +struct drm_plane_colorkey_state { + enum drm_plane_colorkey_mode mode; + u32 plane_mask; + u64 mask; + u64 min; + u64 max; +}; + /** * struct drm_plane_state - mutable plane state * @@ -165,6 +211,13 @@ struct drm_plane_state { */ unsigned int normalized_zpos; + /** + * @colorkey: + * Color keying of the plane. See drm_plane_create_colorkey_properties() + * for more details. + */ + struct drm_plane_colorkey_state colorkey; + /** * @color_encoding: * @@ -728,6 +781,19 @@ struct drm_plane { */ struct drm_property *blend_mode_property; + /** + * @colorkey: + * Optional color keying properties for this plane. See + * drm_plane_create_colorkey_properties(). + */ + struct { + struct drm_property *plane_mask_property; + struct drm_property *mode_property; + struct drm_property *mask_property; + struct drm_property *min_property; + struct drm_property *max_property; + } colorkey; + /** * @color_encoding_property: * @@ -907,4 +973,30 @@ drm_plane_get_damage_clips(const struct drm_plane_state *state); int drm_plane_create_scaling_filter_property(struct drm_plane *plane, unsigned int supported_filters); +/** + * drm_colorkey_extract_component - get color key component value + * @ckey64: 64bit color key value + * @comp_name: name of 16bit color component to extract + * @nbits: size in bits of extracted component value + * + * Extract 16bit color component of @ckey64 given by @comp_name (alpha, red, + * green or blue) and convert it to unsigned integer that has bit-width of + * @nbits (result is rounded-up). + */ +#define drm_colorkey_extract_component(ckey64, comp_name, nbits) \ + __drm_ckey_extract(ckey64, __drm_ckey_ ## comp_name ## _shift, nbits) + +#define __drm_ckey_alpha_shift 48 +#define __drm_ckey_red_shift 32 +#define __drm_ckey_green_shift 16 +#define __drm_ckey_blue_shift 0 + +static inline u16 __drm_ckey_extract(u64 ckey64, u8 ckey_shift, u8 nbits) +{ + u16 mask = (1 << (16 - nbits)) - 1; + u32 ret = ((u16)(ckey64 >> ckey_shift) + mask) >> (16 - nbits); + + return min_t(u16, ret, (1 << nbits) - 1); +} + #endif From 2ed8ffd0f2a9083ef65d04938c3dfdf5a261dbed Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 21 Dec 2020 00:57:22 +0300 Subject: [PATCH 1103/1202] WIP: drm/grate: Add Host1x and DRM drivers with experimental changes The new drivers and UAPI provide all features that are necessary for implementing of a proper userspace driver. The new driver takes a different approach in regards to working with sync points by making each submitted job to take an individual sync point, this solves the problem with the sync point recovery that old driver suffered from. The new Host1x driver and v2 UAPI are optimized for performance and minimal resources consumption. The new v2 UAPI exposes to userspace the HW cmdstream-related features that are necessary for a proper HW fencing. The staging v1 UAPI is kept functional and all of current userspace will continue to work with the new driver, note that some of the never-really-used features of v1 UAPI have been removed (like sync point increment IOCTL) and will return EPERM. The goal is to merge new features into the main driver once they are ready. Key moments of v2 UAPI: - Raw sync points are not exposed to userspace. - Job descriptors are embedded into the commands stream. Kernel driver parses the stream and patches the descriptors in-place. - Commands buffer is copied from userspace via usrptr (no BO allocation and uncached-reading overhead). - Channel is not restricted to a single client, thus 3d channel can take a multi-client job that uses 2d / 3d. - Supports explicit jobs fencing. DRM sync object and DRM scheduler are utilized to provide the fencing support. - Allows to use host1x gather opcode, which is restricted to data-upload usecase only. This allows userspace to pre-allocate a gather buffer, put shaders code / data there, and use two-word "gather" opcode to upload data from the buffer without pushing that data into the commands stream on each submission. Signed-off-by: Dmitry Osipenko --- drivers/gpu/Makefile | 1 + drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 3 +- drivers/gpu/drm/grate/Kconfig | 38 + drivers/gpu/drm/grate/Makefile | 38 + drivers/gpu/drm/grate/channel.c | 62 + drivers/gpu/drm/grate/channel.h | 40 + drivers/gpu/drm/grate/client.c | 99 + drivers/gpu/drm/grate/client.h | 51 + drivers/gpu/drm/grate/dc.c | 3366 ++++++++++++++ drivers/gpu/drm/grate/dc.h | 827 ++++ drivers/gpu/drm/grate/dp.c | 876 ++++ drivers/gpu/drm/grate/dp.h | 177 + drivers/gpu/drm/grate/dpaux.c | 823 ++++ drivers/gpu/drm/grate/dpaux.h | 73 + drivers/gpu/drm/grate/drm.c | 738 +++ drivers/gpu/drm/grate/drm.h | 177 + drivers/gpu/drm/grate/dsi.c | 1700 +++++++ drivers/gpu/drm/grate/dsi.h | 143 + drivers/gpu/drm/grate/falcon.c | 222 + drivers/gpu/drm/grate/falcon.h | 114 + drivers/gpu/drm/grate/fb.c | 423 ++ drivers/gpu/drm/grate/gart.c | 715 +++ drivers/gpu/drm/grate/gart.h | 73 + drivers/gpu/drm/grate/gem.c | 727 +++ drivers/gpu/drm/grate/gem.h | 88 + drivers/gpu/drm/grate/gr2d.c | 494 ++ drivers/gpu/drm/grate/gr2d.h | 29 + drivers/gpu/drm/grate/gr3d.c | 749 +++ drivers/gpu/drm/grate/gr3d.h | 24 + drivers/gpu/drm/grate/hda.c | 63 + drivers/gpu/drm/grate/hda.h | 20 + drivers/gpu/drm/grate/hdmi.c | 1769 +++++++ drivers/gpu/drm/grate/hdmi.h | 557 +++ drivers/gpu/drm/grate/hub.c | 1051 +++++ drivers/gpu/drm/grate/hub.h | 98 + drivers/gpu/drm/grate/mipi-phy.c | 134 + drivers/gpu/drm/grate/mipi-phy.h | 48 + drivers/gpu/drm/grate/output.c | 265 ++ drivers/gpu/drm/grate/plane.c | 957 ++++ drivers/gpu/drm/grate/plane.h | 107 + drivers/gpu/drm/grate/rgb.c | 371 ++ drivers/gpu/drm/grate/sor.c | 4054 +++++++++++++++++ drivers/gpu/drm/grate/sor.h | 457 ++ drivers/gpu/drm/grate/trace.c | 2 + drivers/gpu/drm/grate/trace.h | 68 + drivers/gpu/drm/grate/uapi/debug.c | 45 + drivers/gpu/drm/grate/uapi/debug.h | 12 + drivers/gpu/drm/grate/uapi/job.h | 117 + drivers/gpu/drm/grate/uapi/job_v1.c | 666 +++ drivers/gpu/drm/grate/uapi/job_v2.c | 782 ++++ drivers/gpu/drm/grate/uapi/patching.c | 630 +++ drivers/gpu/drm/grate/uapi/scheduler.c | 250 + drivers/gpu/drm/grate/uapi/scheduler.h | 10 + drivers/gpu/drm/grate/uapi/uapi.c | 472 ++ drivers/gpu/drm/grate/uapi/uapi.h | 113 + drivers/gpu/drm/grate/vic.c | 248 + drivers/gpu/drm/grate/vic.h | 37 + drivers/gpu/drm/tegra/Kconfig | 5 +- drivers/gpu/drm/tegra/Makefile | 2 +- drivers/gpu/host1x-grate/Kconfig | 51 + drivers/gpu/host1x-grate/Makefile | 24 + drivers/gpu/host1x-grate/buffer_object.c | 101 + drivers/gpu/host1x-grate/bus.c | 930 ++++ drivers/gpu/host1x-grate/bus.h | 18 + drivers/gpu/host1x-grate/debug.c | 100 + drivers/gpu/host1x-grate/dma_pool.c | 123 + drivers/gpu/host1x-grate/fence.c | 104 + drivers/gpu/host1x-grate/host1x.c | 477 ++ drivers/gpu/host1x-grate/host1x.h | 81 + drivers/gpu/host1x-grate/iommu.c | 139 + drivers/gpu/host1x-grate/mipi.c | 554 +++ drivers/gpu/host1x-grate/soc/channel.c | 363 ++ drivers/gpu/host1x-grate/soc/channel_hw.c | 282 ++ drivers/gpu/host1x-grate/soc/debug.c | 168 + drivers/gpu/host1x-grate/soc/host1x01.c | 78 + drivers/gpu/host1x-grate/soc/host1x01.h | 29 + drivers/gpu/host1x-grate/soc/host1x02.c | 78 + drivers/gpu/host1x-grate/soc/host1x02.h | 30 + drivers/gpu/host1x-grate/soc/host1x04.c | 78 + drivers/gpu/host1x-grate/soc/host1x04.h | 30 + drivers/gpu/host1x-grate/soc/host1x05.c | 78 + drivers/gpu/host1x-grate/soc/host1x05.h | 30 + drivers/gpu/host1x-grate/soc/host1x06.c | 78 + drivers/gpu/host1x-grate/soc/host1x06.h | 30 + drivers/gpu/host1x-grate/soc/host1x07.c | 78 + drivers/gpu/host1x-grate/soc/host1x07.h | 30 + .../host1x-grate/soc/hw/host1x01_hardware.h | 143 + .../host1x-grate/soc/hw/host1x02_hardware.h | 142 + .../host1x-grate/soc/hw/host1x04_hardware.h | 142 + .../host1x-grate/soc/hw/host1x05_hardware.h | 142 + .../host1x-grate/soc/hw/host1x06_hardware.h | 147 + .../host1x-grate/soc/hw/host1x07_hardware.h | 147 + .../host1x-grate/soc/hw/hw_host1x01_channel.h | 123 + .../host1x-grate/soc/hw/hw_host1x01_sync.h | 255 ++ .../host1x-grate/soc/hw/hw_host1x01_uclass.h | 180 + .../host1x-grate/soc/hw/hw_host1x02_channel.h | 123 + .../host1x-grate/soc/hw/hw_host1x02_sync.h | 255 ++ .../host1x-grate/soc/hw/hw_host1x02_uclass.h | 181 + .../host1x-grate/soc/hw/hw_host1x04_channel.h | 135 + .../host1x-grate/soc/hw/hw_host1x04_sync.h | 255 ++ .../host1x-grate/soc/hw/hw_host1x04_uclass.h | 181 + .../host1x-grate/soc/hw/hw_host1x05_channel.h | 135 + .../host1x-grate/soc/hw/hw_host1x05_sync.h | 255 ++ .../host1x-grate/soc/hw/hw_host1x05_uclass.h | 181 + .../soc/hw/hw_host1x06_hypervisor.h | 32 + .../host1x-grate/soc/hw/hw_host1x06_uclass.h | 181 + .../gpu/host1x-grate/soc/hw/hw_host1x06_vm.h | 160 + .../soc/hw/hw_host1x07_hypervisor.h | 32 + .../host1x-grate/soc/hw/hw_host1x07_uclass.h | 181 + .../gpu/host1x-grate/soc/hw/hw_host1x07_vm.h | 159 + drivers/gpu/host1x-grate/soc/mlocks.c | 123 + drivers/gpu/host1x-grate/soc/mlocks_hw.c | 27 + drivers/gpu/host1x-grate/soc/pushbuf.c | 308 ++ drivers/gpu/host1x-grate/soc/syncpoints.c | 246 + drivers/gpu/host1x-grate/soc/syncpoints_hw.c | 261 ++ drivers/gpu/host1x/Kconfig | 1 + drivers/iommu/tegra-gart.c | 4 + drivers/video/Kconfig | 1 + include/linux/host1x-grate.h | 1841 ++++++++ include/uapi/drm/grate_drm.h | 1095 +++++ 121 files changed, 38234 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/grate/Kconfig create mode 100644 drivers/gpu/drm/grate/Makefile create mode 100644 drivers/gpu/drm/grate/channel.c create mode 100644 drivers/gpu/drm/grate/channel.h create mode 100644 drivers/gpu/drm/grate/client.c create mode 100644 drivers/gpu/drm/grate/client.h create mode 100644 drivers/gpu/drm/grate/dc.c create mode 100644 drivers/gpu/drm/grate/dc.h create mode 100644 drivers/gpu/drm/grate/dp.c create mode 100644 drivers/gpu/drm/grate/dp.h create mode 100644 drivers/gpu/drm/grate/dpaux.c create mode 100644 drivers/gpu/drm/grate/dpaux.h create mode 100644 drivers/gpu/drm/grate/drm.c create mode 100644 drivers/gpu/drm/grate/drm.h create mode 100644 drivers/gpu/drm/grate/dsi.c create mode 100644 drivers/gpu/drm/grate/dsi.h create mode 100644 drivers/gpu/drm/grate/falcon.c create mode 100644 drivers/gpu/drm/grate/falcon.h create mode 100644 drivers/gpu/drm/grate/fb.c create mode 100644 drivers/gpu/drm/grate/gart.c create mode 100644 drivers/gpu/drm/grate/gart.h create mode 100644 drivers/gpu/drm/grate/gem.c create mode 100644 drivers/gpu/drm/grate/gem.h create mode 100644 drivers/gpu/drm/grate/gr2d.c create mode 100644 drivers/gpu/drm/grate/gr2d.h create mode 100644 drivers/gpu/drm/grate/gr3d.c create mode 100644 drivers/gpu/drm/grate/gr3d.h create mode 100644 drivers/gpu/drm/grate/hda.c create mode 100644 drivers/gpu/drm/grate/hda.h create mode 100644 drivers/gpu/drm/grate/hdmi.c create mode 100644 drivers/gpu/drm/grate/hdmi.h create mode 100644 drivers/gpu/drm/grate/hub.c create mode 100644 drivers/gpu/drm/grate/hub.h create mode 100644 drivers/gpu/drm/grate/mipi-phy.c create mode 100644 drivers/gpu/drm/grate/mipi-phy.h create mode 100644 drivers/gpu/drm/grate/output.c create mode 100644 drivers/gpu/drm/grate/plane.c create mode 100644 drivers/gpu/drm/grate/plane.h create mode 100644 drivers/gpu/drm/grate/rgb.c create mode 100644 drivers/gpu/drm/grate/sor.c create mode 100644 drivers/gpu/drm/grate/sor.h create mode 100644 drivers/gpu/drm/grate/trace.c create mode 100644 drivers/gpu/drm/grate/trace.h create mode 100644 drivers/gpu/drm/grate/uapi/debug.c create mode 100644 drivers/gpu/drm/grate/uapi/debug.h create mode 100644 drivers/gpu/drm/grate/uapi/job.h create mode 100644 drivers/gpu/drm/grate/uapi/job_v1.c create mode 100644 drivers/gpu/drm/grate/uapi/job_v2.c create mode 100644 drivers/gpu/drm/grate/uapi/patching.c create mode 100644 drivers/gpu/drm/grate/uapi/scheduler.c create mode 100644 drivers/gpu/drm/grate/uapi/scheduler.h create mode 100644 drivers/gpu/drm/grate/uapi/uapi.c create mode 100644 drivers/gpu/drm/grate/uapi/uapi.h create mode 100644 drivers/gpu/drm/grate/vic.c create mode 100644 drivers/gpu/drm/grate/vic.h create mode 100644 drivers/gpu/host1x-grate/Kconfig create mode 100644 drivers/gpu/host1x-grate/Makefile create mode 100644 drivers/gpu/host1x-grate/buffer_object.c create mode 100644 drivers/gpu/host1x-grate/bus.c create mode 100644 drivers/gpu/host1x-grate/bus.h create mode 100644 drivers/gpu/host1x-grate/debug.c create mode 100644 drivers/gpu/host1x-grate/dma_pool.c create mode 100644 drivers/gpu/host1x-grate/fence.c create mode 100644 drivers/gpu/host1x-grate/host1x.c create mode 100644 drivers/gpu/host1x-grate/host1x.h create mode 100644 drivers/gpu/host1x-grate/iommu.c create mode 100644 drivers/gpu/host1x-grate/mipi.c create mode 100644 drivers/gpu/host1x-grate/soc/channel.c create mode 100644 drivers/gpu/host1x-grate/soc/channel_hw.c create mode 100644 drivers/gpu/host1x-grate/soc/debug.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x01.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x01.h create mode 100644 drivers/gpu/host1x-grate/soc/host1x02.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x02.h create mode 100644 drivers/gpu/host1x-grate/soc/host1x04.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x04.h create mode 100644 drivers/gpu/host1x-grate/soc/host1x05.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x05.h create mode 100644 drivers/gpu/host1x-grate/soc/host1x06.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x06.h create mode 100644 drivers/gpu/host1x-grate/soc/host1x07.c create mode 100644 drivers/gpu/host1x-grate/soc/host1x07.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x01_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x02_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x04_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x05_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x06_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/host1x07_hardware.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x01_channel.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x01_sync.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x01_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x02_channel.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x02_sync.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x02_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x04_channel.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x04_sync.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x04_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x05_channel.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x05_sync.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x05_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x06_hypervisor.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x06_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x06_vm.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x07_hypervisor.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x07_uclass.h create mode 100644 drivers/gpu/host1x-grate/soc/hw/hw_host1x07_vm.h create mode 100644 drivers/gpu/host1x-grate/soc/mlocks.c create mode 100644 drivers/gpu/host1x-grate/soc/mlocks_hw.c create mode 100644 drivers/gpu/host1x-grate/soc/pushbuf.c create mode 100644 drivers/gpu/host1x-grate/soc/syncpoints.c create mode 100644 drivers/gpu/host1x-grate/soc/syncpoints_hw.c create mode 100644 include/linux/host1x-grate.h create mode 100644 include/uapi/drm/grate_drm.h diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile index 835c88318cec6..1d53cd887918b 100644 --- a/drivers/gpu/Makefile +++ b/drivers/gpu/Makefile @@ -3,6 +3,7 @@ # taken to initialize them in the correct order. Link order is the only way # to ensure this currently. obj-$(CONFIG_TEGRA_HOST1X) += host1x/ +obj-$(CONFIG_GRATE_HOST1X) += host1x-grate/ obj-y += drm/ vga/ obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/ obj-$(CONFIG_TRACE_GPU_MEM) += trace/ diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index c08860db25204..19d5f4c4bf863 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -344,6 +344,8 @@ source "drivers/gpu/drm/msm/Kconfig" source "drivers/gpu/drm/fsl-dcu/Kconfig" +source "drivers/gpu/drm/grate/Kconfig" + source "drivers/gpu/drm/tegra/Kconfig" source "drivers/gpu/drm/stm/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 7f6eb11b6aac3..9a270db05600d 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -105,7 +105,8 @@ obj-y += tilcdc/ obj-$(CONFIG_DRM_QXL) += qxl/ obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/ obj-$(CONFIG_DRM_MSM) += msm/ -obj-$(CONFIG_DRM_TEGRA) += tegra/ +obj-$(CONFIG_DRM_TEGRA) += grate/ +obj-$(CONFIG_DRM_TEGRA_ORIG) += tegra/ obj-$(CONFIG_DRM_STM) += stm/ obj-$(CONFIG_DRM_STI) += sti/ obj-y += imx/ diff --git a/drivers/gpu/drm/grate/Kconfig b/drivers/gpu/drm/grate/Kconfig new file mode 100644 index 0000000000000..29fd0cb297a00 --- /dev/null +++ b/drivers/gpu/drm/grate/Kconfig @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0-only +config DRM_TEGRA + tristate "NVIDIA Tegra DRM (with experimental changes)" + depends on ARCH_TEGRA || (ARM && COMPILE_TEST) + depends on COMMON_CLK + depends on DRM + depends on OF + select DRM_KMS_HELPER + select DRM_MIPI_DSI + select DRM_PANEL + select DRM_SCHED + select GRATE_HOST1X + select GRATE_HOST1X_DRV + select INTERCONNECT + select IOMMU_IOVA + select CEC_CORE if CEC_NOTIFIER + help + Choose this option if you have an NVIDIA Tegra SoC. + + To compile this driver as a module, choose M here: the module + will be called tegra-drm. + +if DRM_TEGRA + +config DRM_TEGRA_DEBUG + bool "NVIDIA Tegra DRM debug support" + help + Say yes here to enable debugging support. + +config DRM_TEGRA_STAGING + bool "Enable HOST1X interface" + depends on STAGING + help + Say yes if HOST1X should be available for userspace DRM users. + + If unsure, choose N. + +endif diff --git a/drivers/gpu/drm/grate/Makefile b/drivers/gpu/drm/grate/Makefile new file mode 100644 index 0000000000000..14c32a2159258 --- /dev/null +++ b/drivers/gpu/drm/grate/Makefile @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0 +ccflags-y := -I $(srctree)/$(src) +ccflags-y += -I $(srctree)/$(src)/uapi +ccflags-y += -I $(srctree)/drivers/gpu/host1x-grate/soc/hw +ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG + +tegra-drm-y := \ + drm.o \ + gem.o \ + fb.o \ + dp.o \ + hub.o \ + plane.o \ + dc.o \ + output.o \ + rgb.o \ + hda.o \ + hdmi.o \ + mipi-phy.o \ + dsi.o \ + sor.o \ + dpaux.o \ + gr2d.o \ + gr3d.o \ + falcon.o \ + vic.o \ + trace.o \ + channel.o \ + client.o \ + gart.o \ + uapi/debug.o \ + uapi/job_v1.o \ + uapi/job_v2.o \ + uapi/patching.o \ + uapi/scheduler.o \ + uapi/uapi.o + +obj-$(CONFIG_DRM_TEGRA) += tegra-drm.o diff --git a/drivers/gpu/drm/grate/channel.c b/drivers/gpu/drm/grate/channel.c new file mode 100644 index 0000000000000..1b7a7a5381a2d --- /dev/null +++ b/drivers/gpu/drm/grate/channel.c @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "channel.h" +#include "scheduler.h" + +struct tegra_drm_channel * +tegra_drm_open_channel(struct tegra_drm *tegra, + struct tegra_drm_client *drm_client, + u64 pipes_bitmask, + unsigned int num_pushbuf_words, + unsigned int hw_jobs_limit, + unsigned int job_hang_limit, + unsigned int timeout_msecs, + const char *name) +{ + struct drm_device *drm = tegra->drm; + struct host1x *host = dev_get_drvdata(drm->dev->parent); + struct host1x_client *client = &drm_client->base; + struct tegra_drm_channel *drm_channel; + int err; + + drm_channel = kzalloc(sizeof(*drm_channel), GFP_KERNEL); + if (!drm_channel) + return ERR_PTR(-ENOMEM); + + drm_channel->channel = host1x_channel_request(host, client->dev, + num_pushbuf_words); + if (IS_ERR(drm_channel->channel)) { + err = PTR_ERR(drm_channel->channel); + goto err_free_channel; + } + + drm_channel->acceptable_pipes = pipes_bitmask; + + err = drm_sched_init(&drm_channel->sched, + &tegra_drm_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(timeout_msecs / 2), + NULL, NULL, name); + if (err) + goto err_put_channel; + + list_add_tail(&drm_channel->list, &tegra->channels); + + return drm_channel; + +err_put_channel: + host1x_channel_put(drm_channel->channel); + +err_free_channel: + kfree(drm_channel); + + return ERR_PTR(err); +} + +void tegra_drm_close_channel(struct tegra_drm_channel *drm_channel) +{ + drm_sched_fini(&drm_channel->sched); + host1x_channel_put(drm_channel->channel); + list_del(&drm_channel->list); + kfree(drm_channel); +} diff --git a/drivers/gpu/drm/grate/channel.h b/drivers/gpu/drm/grate/channel.h new file mode 100644 index 0000000000000..42eb4886beb26 --- /dev/null +++ b/drivers/gpu/drm/grate/channel.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __TEGRA_DRM_CHANNEL_H +#define __TEGRA_DRM_CHANNEL_H + +#include "drm.h" + +#define TEGRA_DRM_PIPE_2D BIT(DRM_TEGRA_PIPE_ID_2D) +#define TEGRA_DRM_PIPE_3D BIT(DRM_TEGRA_PIPE_ID_3D) +#define TEGRA_DRM_PIPE_VIC BIT(DRM_TEGRA_PIPE_ID_VIC) + +struct tegra_drm_channel { + struct drm_gpu_scheduler sched; + struct host1x_channel *channel; + struct list_head list; + u64 acceptable_pipes; +}; + +static inline struct tegra_drm_channel * +to_tegra_drm_channel(struct drm_gpu_scheduler *sched) +{ + return container_of(sched, struct tegra_drm_channel, sched); +} + +struct tegra_drm; +struct tegra_drm_client; + +struct tegra_drm_channel * +tegra_drm_open_channel(struct tegra_drm *tegra, + struct tegra_drm_client *drm_client, + u64 pipes_bitmask, + unsigned int num_pushbuf_words, + unsigned int hw_jobs_limit, + unsigned int job_hang_limit, + unsigned int timeout_msecs, + const char *name); + +void tegra_drm_close_channel(struct tegra_drm_channel *drm_channel); + +#endif diff --git a/drivers/gpu/drm/grate/client.c b/drivers/gpu/drm/grate/client.c new file mode 100644 index 0000000000000..24103c4f3e1cd --- /dev/null +++ b/drivers/gpu/drm/grate/client.c @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "drm.h" + +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include +#endif + +int tegra_drm_register_client(struct tegra_drm *tegra, + struct tegra_drm_client *drm_client) +{ + struct drm_device *drm = tegra->drm; + struct host1x *host1x = dev_get_drvdata(drm->dev->parent); + struct host1x_client *client = &drm_client->base; + int err; + + drm_client->mlock = host1x_mlock_request(host1x, client->dev); + if (IS_ERR(drm_client->mlock)) { + err = PTR_ERR(drm_client->mlock); + return err; + } + + list_add_tail(&drm_client->list, &tegra->clients); + drm_client->drm = tegra; + + return 0; +} + +void tegra_drm_unregister_client(struct tegra_drm_client *drm_client) +{ + host1x_mlock_put(drm_client->mlock); + list_del(&drm_client->list); + drm_client->drm = NULL; +} + +struct iommu_group * +tegra_drm_client_iommu_attach(struct tegra_drm_client *drm_client, bool shared) +{ + struct host1x_client *client = &drm_client->base; + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + struct iommu_group *group = NULL; + int err; + + if (tegra->domain) { + group = iommu_group_get(client->dev); + if (!group) { + if (tegra->has_gart) { + dev_warn(client->dev, "failed to get IOMMU group\n"); + dev_warn(client->dev, "please update your device-tree\n"); + return 0; + } + + dev_err(client->dev, "failed to get IOMMU group\n"); + return ERR_PTR(-ENODEV); + } + + if (!shared || !tegra->group) { +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (client->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(client->dev); + arm_iommu_detach_device(client->dev); + arm_iommu_release_mapping(mapping); + } +#endif + err = iommu_attach_group(tegra->domain, group); + if (err < 0) { + iommu_group_put(group); + return ERR_PTR(err); + } + + if (shared) + tegra->group = group; + } + } + + return group; +} + +void tegra_drm_client_iommu_detach(struct tegra_drm_client *drm_client, + struct iommu_group *group, + bool shared) +{ + struct host1x_client *client = &drm_client->base; + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + + if (group) { + if (!shared || group == tegra->group) { + iommu_detach_group(tegra->domain, group); + + if (group == tegra->group) + tegra->group = NULL; + } + + iommu_group_put(group); + } +} diff --git a/drivers/gpu/drm/grate/client.h b/drivers/gpu/drm/grate/client.h new file mode 100644 index 0000000000000..2a7e3ee820dc1 --- /dev/null +++ b/drivers/gpu/drm/grate/client.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __TEGRA_DRM_CLIENT_H +#define __TEGRA_DRM_CLIENT_H + +#include "drm.h" + +struct tegra_drm_job; + +struct tegra_drm_client { + struct host1x_client base; + struct host1x_mlock *mlock; + struct tegra_drm *drm; + struct list_head list; + const unsigned long *addr_regs; + unsigned int num_regs; + u64 pipe; + + int (*refine_class)(struct tegra_drm_client *client, u64 pipes, + unsigned int *classid); + + int (*prepare_job)(struct tegra_drm_client *client, + struct tegra_drm_job *job); + + int (*unprepare_job)(struct tegra_drm_client *client, + struct tegra_drm_job *job); + + int (*reset_hw)(struct tegra_drm_client *client); +}; + +static inline struct tegra_drm_client * +to_tegra_drm_client(struct host1x_client *client) +{ + return container_of(client, struct tegra_drm_client, base); +} + +struct tegra_drm; + +int tegra_drm_register_client(struct tegra_drm *tegra, + struct tegra_drm_client *drm_client); + +void tegra_drm_unregister_client(struct tegra_drm_client *drm_client); + +struct iommu_group * +tegra_drm_client_iommu_attach(struct tegra_drm_client *drm_client, bool shared); + +void tegra_drm_client_iommu_detach(struct tegra_drm_client *drm_client, + struct iommu_group *group, + bool shared); + +#endif diff --git a/drivers/gpu/drm/grate/dc.c b/drivers/gpu/drm/grate/dc.c new file mode 100644 index 0000000000000..5bd8b83681c99 --- /dev/null +++ b/drivers/gpu/drm/grate/dc.c @@ -0,0 +1,3366 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Avionic Design GmbH + * Copyright (C) 2012 NVIDIA CORPORATION. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "dc.h" +#include "drm.h" +#include "gem.h" +#include "hub.h" +#include "plane.h" + +static void tegra_crtc_atomic_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state); + +static void tegra_dc_stats_reset(struct tegra_dc_stats *stats) +{ + stats->frames = 0; + stats->vblank = 0; + stats->underflow = 0; + stats->overflow = 0; +} + +/* Reads the active copy of a register. */ +static u32 tegra_dc_readl_active(struct tegra_dc *dc, unsigned long offset) +{ + u32 value; + + tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS); + value = tegra_dc_readl(dc, offset); + tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS); + + return value; +} + +static inline unsigned int tegra_plane_offset(struct tegra_plane *plane, + unsigned int offset) +{ + if (offset >= 0x500 && offset <= 0x638) { + offset = 0x000 + (offset - 0x500); + return plane->offset + offset; + } + + if (offset >= 0x700 && offset <= 0x719) { + offset = 0x180 + (offset - 0x700); + return plane->offset + offset; + } + + if (offset >= 0x800 && offset <= 0x839) { + offset = 0x1c0 + (offset - 0x800); + return plane->offset + offset; + } + + dev_WARN(plane->dc->dev, "invalid offset: %x\n", offset); + + return plane->offset + offset; +} + +static inline u32 tegra_plane_readl(struct tegra_plane *plane, + unsigned int offset) +{ + return tegra_dc_readl(plane->dc, tegra_plane_offset(plane, offset)); +} + +static inline void tegra_plane_writel(struct tegra_plane *plane, u32 value, + unsigned int offset) +{ + tegra_dc_writel(plane->dc, value, tegra_plane_offset(plane, offset)); +} + +bool tegra_dc_has_output(struct tegra_dc *dc, struct device *dev) +{ + struct device_node *np = dc->dev->of_node; + struct of_phandle_iterator it; + int err; + + of_for_each_phandle(&it, err, np, "nvidia,outputs", NULL, 0) + if (it.node == dev->of_node) + return true; + + return false; +} + +/* + * Double-buffered registers have two copies: ASSEMBLY and ACTIVE. When the + * *_ACT_REQ bits are set the ASSEMBLY copy is latched into the ACTIVE copy. + * Latching happens mmediately if the display controller is in STOP mode or + * on the next frame boundary otherwise. + * + * Triple-buffered registers have three copies: ASSEMBLY, ARM and ACTIVE. The + * ASSEMBLY copy is latched into the ARM copy immediately after *_UPDATE bits + * are written. When the *_ACT_REQ bits are written, the ARM copy is latched + * into the ACTIVE copy, either immediately if the display controller is in + * STOP mode, or at the next frame boundary otherwise. + */ +void tegra_dc_commit(struct tegra_dc *dc) +{ + tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL); + tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL); +} + +static inline u32 compute_dda_inc(unsigned int in, unsigned int out, bool v, + unsigned int bpp) +{ + fixed20_12 outf = dfixed_init(out); + fixed20_12 inf = dfixed_init(in); + u32 dda_inc; + int max; + + if (v) + max = 15; + else { + switch (bpp) { + case 2: + max = 8; + break; + + default: + WARN_ON_ONCE(1); + fallthrough; + case 4: + max = 4; + break; + } + } + + outf.full = max_t(u32, outf.full - dfixed_const(1), dfixed_const(1)); + inf.full -= dfixed_const(1); + + dda_inc = dfixed_div(inf, outf); + dda_inc = min_t(u32, dda_inc, dfixed_const(max)); + + return dda_inc; +} + +static inline u32 compute_initial_dda(unsigned int in) +{ + fixed20_12 inf = dfixed_init(in); + return dfixed_frac(inf); +} + +static void tegra_plane_setup_blending_legacy(struct tegra_plane *plane) +{ + u32 background[3] = { + BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) | BLEND_COLOR_KEY_NONE, + BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) | BLEND_COLOR_KEY_NONE, + BLEND_WEIGHT1(0) | BLEND_WEIGHT0(0) | BLEND_COLOR_KEY_NONE, + }; + u32 foreground = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255) | + BLEND_COLOR_KEY_NONE; + u32 blendnokey = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255); + enum drm_plane_colorkey_mode mode; + struct tegra_plane_state *state; + u32 blending[2]; + unsigned int i; + + /* disable blending for non-overlapping case */ + tegra_plane_writel(plane, blendnokey, DC_WIN_BLEND_NOKEY); + tegra_plane_writel(plane, foreground, DC_WIN_BLEND_1WIN); + + state = to_tegra_plane_state(plane->base.state); + mode = plane->base.state->colorkey.mode; + + /* setup color keying */ + if (mode == DRM_PLANE_COLORKEY_MODE_TRANSPARENT) { + /* color key matched areas are transparent */ + foreground = background[0] | BLEND_COLOR_KEY_0; + } + + /* setup alpha blending */ + if (state->opaque) { + /* + * Since custom fix-weight blending isn't utilized and weight + * of top window is set to max, we can enforce dependent + * blending which in this case results in transparent bottom + * window if top window is opaque and if top window enables + * alpha blending, then bottom window is getting alpha value + * of 1 minus the sum of alpha components of the overlapping + * plane. + */ + background[0] |= BLEND_CONTROL_DEPENDENT; + background[1] |= BLEND_CONTROL_DEPENDENT; + + /* + * The region where three windows overlap is the intersection + * of the two regions where two windows overlap. It contributes + * to the area if all of the windows on top of it have an alpha + * component. + */ + switch (state->base.normalized_zpos) { + case 0: + if (state->blending[0].alpha && + state->blending[1].alpha) + background[2] |= BLEND_CONTROL_DEPENDENT; + break; + + case 1: + background[2] |= BLEND_CONTROL_DEPENDENT; + break; + } + } else { + /* + * Enable alpha blending if pixel format has an alpha + * component. + */ + foreground |= BLEND_CONTROL_ALPHA; + + /* + * If any of the windows on top of this window is opaque, it + * will completely conceal this window within that area. If + * top window has an alpha component, it is blended over the + * bottom window. + */ + for (i = 0; i < 2; i++) { + if (state->blending[i].alpha && + state->blending[i].top) + background[i] |= BLEND_CONTROL_DEPENDENT; + } + + switch (state->base.normalized_zpos) { + case 0: + if (state->blending[0].alpha && + state->blending[1].alpha) + background[2] |= BLEND_CONTROL_DEPENDENT; + break; + + case 1: + /* + * When both middle and topmost windows have an alpha, + * these windows a mixed together and then the result + * is blended over the bottom window. + */ + if (state->blending[0].alpha && + state->blending[0].top) + background[2] |= BLEND_CONTROL_ALPHA; + + if (state->blending[1].alpha && + state->blending[1].top) + background[2] |= BLEND_CONTROL_ALPHA; + break; + } + } + + switch (state->base.normalized_zpos) { + case 0: + tegra_plane_writel(plane, background[0], DC_WIN_BLEND_2WIN_X); + tegra_plane_writel(plane, background[1], DC_WIN_BLEND_2WIN_Y); + tegra_plane_writel(plane, background[2], DC_WIN_BLEND_3WIN_XY); + break; + + case 1: + /* + * If window B / C is topmost, then X / Y registers are + * matching the order of blending[...] state indices, + * otherwise a swap is required. + */ + if (!state->blending[0].top && state->blending[1].top) { + blending[0] = foreground; + blending[1] = background[1]; + } else { + blending[0] = background[0]; + blending[1] = foreground; + } + + tegra_plane_writel(plane, blending[0], DC_WIN_BLEND_2WIN_X); + tegra_plane_writel(plane, blending[1], DC_WIN_BLEND_2WIN_Y); + tegra_plane_writel(plane, background[2], DC_WIN_BLEND_3WIN_XY); + break; + + case 2: + tegra_plane_writel(plane, foreground, DC_WIN_BLEND_2WIN_X); + tegra_plane_writel(plane, foreground, DC_WIN_BLEND_2WIN_Y); + tegra_plane_writel(plane, foreground, DC_WIN_BLEND_3WIN_XY); + break; + } +} + +static void tegra_plane_setup_blending(struct tegra_plane *plane, + const struct tegra_dc_window *window) +{ + u32 value; + + value = BLEND_FACTOR_DST_ALPHA_ZERO | BLEND_FACTOR_SRC_ALPHA_K2 | + BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC | + BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC; + tegra_plane_writel(plane, value, DC_WIN_BLEND_MATCH_SELECT); + + value = BLEND_FACTOR_DST_ALPHA_ZERO | BLEND_FACTOR_SRC_ALPHA_K2 | + BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC | + BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC; + tegra_plane_writel(plane, value, DC_WIN_BLEND_NOMATCH_SELECT); + + value = K2(255) | K1(255) | WINDOW_LAYER_DEPTH(255 - window->zpos); + tegra_plane_writel(plane, value, DC_WIN_BLEND_LAYER_CONTROL); +} + +static bool +tegra_plane_use_horizontal_filtering(struct tegra_plane *plane, + const struct tegra_dc_window *window) +{ + struct tegra_dc *dc = plane->dc; + + if (window->src.w == window->dst.w) + return false; + + if (plane->index == 0 && dc->soc->has_win_a_without_filters) + return false; + + return true; +} + +static bool +tegra_plane_use_vertical_filtering(struct tegra_plane *plane, + const struct tegra_dc_window *window) +{ + struct tegra_dc *dc = plane->dc; + + if (window->src.h == window->dst.h) + return false; + + if (plane->index == 0 && dc->soc->has_win_a_without_filters) + return false; + + if (plane->index == 2 && dc->soc->has_win_c_without_vert_filter) + return false; + + return true; +} + +static void tegra_dc_setup_window(struct tegra_plane *plane, + const struct tegra_dc_window *window) +{ + unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp; + struct tegra_dc *dc = plane->dc; + bool yuv, planar; + u32 value; + + /* + * For YUV planar modes, the number of bytes per pixel takes into + * account only the luma component and therefore is 1. + */ + yuv = tegra_plane_format_is_yuv(window->format, &planar); + if (!yuv) + bpp = window->bits_per_pixel / 8; + else + bpp = planar ? 1 : 2; + + tegra_plane_writel(plane, window->format, DC_WIN_COLOR_DEPTH); + tegra_plane_writel(plane, window->swap, DC_WIN_BYTE_SWAP); + + value = V_POSITION(window->dst.y) | H_POSITION(window->dst.x); + tegra_plane_writel(plane, value, DC_WIN_POSITION); + + value = V_SIZE(window->dst.h) | H_SIZE(window->dst.w); + tegra_plane_writel(plane, value, DC_WIN_SIZE); + + h_offset = window->src.x * bpp; + v_offset = window->src.y; + h_size = window->src.w * bpp; + v_size = window->src.h; + + if (window->reflect_x) + h_offset += (window->src.w - 1) * bpp; + + if (window->reflect_y) + v_offset += window->src.h - 1; + + value = V_PRESCALED_SIZE(v_size) | H_PRESCALED_SIZE(h_size); + tegra_plane_writel(plane, value, DC_WIN_PRESCALED_SIZE); + + /* + * For DDA computations the number of bytes per pixel for YUV planar + * modes needs to take into account all Y, U and V components. + */ + if (yuv && planar) + bpp = 2; + + h_dda = compute_dda_inc(window->src.w, window->dst.w, false, bpp); + v_dda = compute_dda_inc(window->src.h, window->dst.h, true, bpp); + + value = V_DDA_INC(v_dda) | H_DDA_INC(h_dda); + tegra_plane_writel(plane, value, DC_WIN_DDA_INC); + + h_dda = compute_initial_dda(window->src.x); + v_dda = compute_initial_dda(window->src.y); + + tegra_plane_writel(plane, h_dda, DC_WIN_H_INITIAL_DDA); + tegra_plane_writel(plane, v_dda, DC_WIN_V_INITIAL_DDA); + + tegra_plane_writel(plane, 0, DC_WIN_UV_BUF_STRIDE); + tegra_plane_writel(plane, 0, DC_WIN_BUF_STRIDE); + + tegra_plane_writel(plane, window->base[0], DC_WINBUF_START_ADDR); + + if (yuv && planar) { + tegra_plane_writel(plane, window->base[1], DC_WINBUF_START_ADDR_U); + tegra_plane_writel(plane, window->base[2], DC_WINBUF_START_ADDR_V); + value = window->stride[1] << 16 | window->stride[0]; + tegra_plane_writel(plane, value, DC_WIN_LINE_STRIDE); + } else { + tegra_plane_writel(plane, window->stride[0], DC_WIN_LINE_STRIDE); + } + + tegra_plane_writel(plane, h_offset, DC_WINBUF_ADDR_H_OFFSET); + tegra_plane_writel(plane, v_offset, DC_WINBUF_ADDR_V_OFFSET); + + if (dc->soc->supports_block_linear) { + unsigned long height = window->tiling.value; + + switch (window->tiling.mode) { + case TEGRA_BO_TILING_MODE_PITCH: + value = DC_WINBUF_SURFACE_KIND_PITCH; + break; + + case TEGRA_BO_TILING_MODE_TILED: + value = DC_WINBUF_SURFACE_KIND_TILED; + break; + + case TEGRA_BO_TILING_MODE_BLOCK: + value = DC_WINBUF_SURFACE_KIND_BLOCK_HEIGHT(height) | + DC_WINBUF_SURFACE_KIND_BLOCK; + break; + } + + tegra_plane_writel(plane, value, DC_WINBUF_SURFACE_KIND); + } else { + switch (window->tiling.mode) { + case TEGRA_BO_TILING_MODE_PITCH: + value = DC_WIN_BUFFER_ADDR_MODE_LINEAR_UV | + DC_WIN_BUFFER_ADDR_MODE_LINEAR; + break; + + case TEGRA_BO_TILING_MODE_TILED: + value = DC_WIN_BUFFER_ADDR_MODE_TILE_UV | + DC_WIN_BUFFER_ADDR_MODE_TILE; + break; + + case TEGRA_BO_TILING_MODE_BLOCK: + /* + * No need to handle this here because ->atomic_check + * will already have filtered it out. + */ + break; + } + + tegra_plane_writel(plane, value, DC_WIN_BUFFER_ADDR_MODE); + } + + value = WIN_ENABLE; + + if (yuv) { + /* setup colorspace conversion coefficients */ + tegra_plane_writel(plane, window->csc.yof, DC_WIN_CSC_YOF); + tegra_plane_writel(plane, window->csc.kyrgb, DC_WIN_CSC_KYRGB); + tegra_plane_writel(plane, window->csc.kur, DC_WIN_CSC_KUR); + tegra_plane_writel(plane, window->csc.kvr, DC_WIN_CSC_KVR); + tegra_plane_writel(plane, window->csc.kug, DC_WIN_CSC_KUG); + tegra_plane_writel(plane, window->csc.kvg, DC_WIN_CSC_KVG); + tegra_plane_writel(plane, window->csc.kub, DC_WIN_CSC_KUB); + tegra_plane_writel(plane, window->csc.kvb, DC_WIN_CSC_KVB); + + value |= CSC_ENABLE; + } else if (window->bits_per_pixel < 24) { + value |= COLOR_EXPAND; + } + + if (window->reflect_x) + value |= H_DIRECTION; + + if (window->reflect_y) + value |= V_DIRECTION; + + if (tegra_plane_use_horizontal_filtering(plane, window)) { + /* + * Enable horizontal 6-tap filter and set filtering + * coefficients to the default values defined in TRM. + */ + tegra_plane_writel(plane, 0x00008000, DC_WIN_H_FILTER_P(0)); + tegra_plane_writel(plane, 0x3e087ce1, DC_WIN_H_FILTER_P(1)); + tegra_plane_writel(plane, 0x3b117ac1, DC_WIN_H_FILTER_P(2)); + tegra_plane_writel(plane, 0x591b73aa, DC_WIN_H_FILTER_P(3)); + tegra_plane_writel(plane, 0x57256d9a, DC_WIN_H_FILTER_P(4)); + tegra_plane_writel(plane, 0x552f668b, DC_WIN_H_FILTER_P(5)); + tegra_plane_writel(plane, 0x73385e8b, DC_WIN_H_FILTER_P(6)); + tegra_plane_writel(plane, 0x72435583, DC_WIN_H_FILTER_P(7)); + tegra_plane_writel(plane, 0x714c4c8b, DC_WIN_H_FILTER_P(8)); + tegra_plane_writel(plane, 0x70554393, DC_WIN_H_FILTER_P(9)); + tegra_plane_writel(plane, 0x715e389b, DC_WIN_H_FILTER_P(10)); + tegra_plane_writel(plane, 0x71662faa, DC_WIN_H_FILTER_P(11)); + tegra_plane_writel(plane, 0x536d25ba, DC_WIN_H_FILTER_P(12)); + tegra_plane_writel(plane, 0x55731bca, DC_WIN_H_FILTER_P(13)); + tegra_plane_writel(plane, 0x387a11d9, DC_WIN_H_FILTER_P(14)); + tegra_plane_writel(plane, 0x3c7c08f1, DC_WIN_H_FILTER_P(15)); + + value |= H_FILTER; + } + + if (tegra_plane_use_vertical_filtering(plane, window)) { + unsigned int i, k; + + /* + * Enable vertical 2-tap filter and set filtering + * coefficients to the default values defined in TRM. + */ + for (i = 0, k = 128; i < 16; i++, k -= 8) + tegra_plane_writel(plane, k, DC_WIN_V_FILTER_P(i)); + + value |= V_FILTER; + } + + tegra_plane_writel(plane, value, DC_WIN_WIN_OPTIONS); + + if (dc->soc->has_legacy_blending) + tegra_plane_setup_blending_legacy(plane); + else + tegra_plane_setup_blending(plane, window); +} + +static const u32 tegra20_primary_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* non-native formats */ + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB8888, +}; + +static const u64 tegra20_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED, + DRM_FORMAT_MOD_INVALID +}; + +static const u32 tegra114_primary_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* new on Tegra114 */ + DRM_FORMAT_ABGR4444, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_BGR565, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, +}; + +static const u32 tegra124_primary_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* new on Tegra114 */ + DRM_FORMAT_ABGR4444, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_BGR565, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + /* new on Tegra124 */ + DRM_FORMAT_RGBX8888, + DRM_FORMAT_BGRX8888, +}; + +static const u64 tegra124_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0), + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1), + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2), + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3), + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4), + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5), + DRM_FORMAT_MOD_INVALID +}; + +static int tegra_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state); + unsigned int supported_rotation = DRM_MODE_ROTATE_0 | + DRM_MODE_REFLECT_X | + DRM_MODE_REFLECT_Y; + unsigned int rotation = new_plane_state->rotation; + struct tegra_bo_tiling *tiling = &plane_state->tiling; + struct tegra_plane *tegra = to_tegra_plane(plane); + struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc); + int err; + + plane_state->peak_memory_bandwidth = 0; + plane_state->avg_memory_bandwidth = 0; + + /* no need for further checks if the plane is being disabled */ + if (!new_plane_state->crtc) { + plane_state->total_peak_memory_bandwidth = 0; + return 0; + } + + err = tegra_plane_format(new_plane_state->fb->format->format, + &plane_state->format, + &plane_state->swap); + if (err < 0) + return err; + + /* + * Tegra20 and Tegra30 are special cases here because they support + * only variants of specific formats with an alpha component, but not + * the corresponding opaque formats. However, the opaque formats can + * be emulated by disabling alpha blending for the plane. + */ + if (dc->soc->has_legacy_blending) { + err = tegra_plane_setup_legacy_state(tegra, plane_state); + if (err < 0) + return err; + } + + err = tegra_fb_get_tiling(new_plane_state->fb, tiling); + if (err < 0) + return err; + + if (tiling->mode == TEGRA_BO_TILING_MODE_BLOCK && + !dc->soc->supports_block_linear) { + DRM_ERROR("hardware doesn't support block linear mode\n"); + return -EINVAL; + } + + /* + * Older userspace used custom BO flag in order to specify the Y + * reflection, while modern userspace uses the generic DRM rotation + * property in order to achieve the same result. The legacy BO flag + * duplicates the DRM rotation property when both are set. + */ + if (tegra_fb_is_bottom_up(new_plane_state->fb)) + rotation |= DRM_MODE_REFLECT_Y; + + rotation = drm_rotation_simplify(rotation, supported_rotation); + + if (rotation & DRM_MODE_REFLECT_X) + plane_state->reflect_x = true; + else + plane_state->reflect_x = false; + + if (rotation & DRM_MODE_REFLECT_Y) + plane_state->reflect_y = true; + else + plane_state->reflect_y = false; + + /* + * Tegra doesn't support different strides for U and V planes so we + * error out if the user tries to display a framebuffer with such a + * configuration. + */ + if (new_plane_state->fb->format->num_planes > 2) { + if (new_plane_state->fb->pitches[2] != new_plane_state->fb->pitches[1]) { + DRM_ERROR("unsupported UV-plane configuration\n"); + return -EINVAL; + } + } + + err = tegra_plane_state_add(tegra, new_plane_state); + if (err < 0) + return err; + + return 0; +} + +static void tegra_plane_atomic_disable(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + plane); + struct tegra_plane *p = to_tegra_plane(plane); + u32 value; + + /* rien ne va plus */ + if (!old_state || !old_state->crtc) + return; + + value = tegra_plane_readl(p, DC_WIN_WIN_OPTIONS); + value &= ~WIN_ENABLE; + tegra_plane_writel(p, value, DC_WIN_WIN_OPTIONS); +} + +static void tegra_plane_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, + plane); + struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state); + struct drm_framebuffer *fb = new_state->fb; + struct tegra_plane *p = to_tegra_plane(plane); + struct tegra_dc_window window; + const struct drm_tegra_plane_csc_blob *csc; + unsigned int i; + + /* rien ne va plus */ + if (!new_state->crtc || !new_state->fb) + return; + + if (!new_state->visible) + return tegra_plane_atomic_disable(plane, state); + + memset(&window, 0, sizeof(window)); + window.src.x = new_state->src.x1 >> 16; + window.src.y = new_state->src.y1 >> 16; + window.src.w = drm_rect_width(&new_state->src) >> 16; + window.src.h = drm_rect_height(&new_state->src) >> 16; + window.dst.x = new_state->dst.x1; + window.dst.y = new_state->dst.y1; + window.dst.w = drm_rect_width(&new_state->dst); + window.dst.h = drm_rect_height(&new_state->dst); + window.bits_per_pixel = fb->format->cpp[0] * 8; + window.reflect_x = tegra_plane_state->reflect_x; + window.reflect_y = tegra_plane_state->reflect_y; + + /* copy from state */ + window.zpos = new_state->normalized_zpos; + window.tiling = tegra_plane_state->tiling; + window.format = tegra_plane_state->format; + window.swap = tegra_plane_state->swap; + + for (i = 0; i < fb->format->num_planes; i++) { + window.base[i] = tegra_plane_state->iova[i] + fb->offsets[i]; + + /* + * Tegra uses a shared stride for UV planes. Framebuffers are + * already checked for this in the tegra_plane_atomic_check() + * function, so it's safe to ignore the V-plane pitch here. + */ + if (i < 2) + window.stride[i] = fb->pitches[i]; + + /* + * There are two ways to set tiling mode on Tegra: + * + * 1. New: using DRM modifiers + * 2. Old: using Tegra BO flags + * + * Older userspace doesn't support ADDFB2 IOCTL. Assume that + * legacy userspace is used if BO flag is set and FB modifier + * isn't set to maintain userspace compatibility. + */ + if (i == 0 && + window.tiling.mode == TEGRA_BO_TILING_MODE_PITCH && + window.tiling.value == 0) { + struct tegra_bo *bo = tegra_fb_get_plane(fb, i); + window.tiling.mode = bo->tiling.mode; + } + } + + if (tegra_plane_state->csc_blob) { + csc = tegra_plane_state->csc_blob->data; + + window.csc.yof = csc->yof; + window.csc.kyrgb = csc->kyrgb; + window.csc.kur = csc->kur; + window.csc.kvr = csc->kvr; + window.csc.kug = csc->kug; + window.csc.kvg = csc->kvg; + window.csc.kub = csc->kub; + window.csc.kvb = csc->kvb; + } else { + window.csc.yof = 0x00f0; + window.csc.kyrgb = 0x012a; + window.csc.kur = 0x0000; + window.csc.kvr = 0x0198; + window.csc.kug = 0x039b; + window.csc.kvg = 0x032f; + window.csc.kub = 0x0204; + window.csc.kvb = 0x0000; + } + + tegra_dc_setup_window(p, &window); +} + +static int tegra_plane_atomic_async_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state; + int err; + + /* + * It is not obvious whether it's fine for framebuffer to disappear + * while display controller could be accessing it or hardware could + * cope with that somehow. Let's just disallow that to happen. + */ + new_plane_state = drm_atomic_get_new_plane_state(state, plane); + if (!new_plane_state->fb) + return -EINVAL; + + /* the rest should be fine to change asynchronously */ + err = tegra_plane_atomic_check(plane, state); + if (err) + return err; + + return 0; +} + +static inline void tegra_plane_clear_latching(struct drm_plane *plane) +{ + struct tegra_plane *p = to_tegra_plane(plane); + struct tegra_dc *dc = p->dc; + + /* clear pending latching request from the previous async update */ + tegra_dc_writel(dc, 0, DC_CMD_STATE_CONTROL); +} + +static inline void tegra_plane_atomic_flush(struct drm_plane *plane) +{ + struct tegra_plane *p = to_tegra_plane(plane); + struct tegra_dc *dc = p->dc; + + /* latch updated registers and activate the new state */ + tegra_dc_writel(dc, 1 << (p->index + 9), DC_CMD_STATE_CONTROL); + tegra_dc_writel(dc, 1 << (p->index + 1), DC_CMD_STATE_CONTROL); +} + +static void tegra_plane_atomic_async_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state; + + new_plane_state = drm_atomic_get_new_plane_state(state, plane); + + tegra_plane_clear_latching(plane); + tegra_plane_copy_state(plane, new_plane_state); + tegra_plane_atomic_update(plane, state); + tegra_plane_atomic_flush(plane); +} + +static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = { + .prepare_fb = tegra_plane_prepare_fb, + .cleanup_fb = tegra_plane_cleanup_fb, + .atomic_check = tegra_plane_atomic_check, + .atomic_disable = tegra_plane_atomic_disable, + .atomic_update = tegra_plane_atomic_update, + .atomic_async_check = tegra_plane_atomic_async_check, + .atomic_async_update = tegra_plane_atomic_async_update, +}; + +static unsigned long tegra_plane_get_possible_crtcs(struct drm_device *drm) +{ + /* + * Ideally this would use drm_crtc_mask(), but that would require the + * CRTC to already be in the mode_config's list of CRTCs. However, it + * will only be added to that list in the drm_crtc_init_with_planes() + * (in tegra_dc_init()), which in turn requires registration of these + * planes. So we have ourselves a nice little chicken and egg problem + * here. + * + * We work around this by manually creating the mask from the number + * of CRTCs that have been registered, and should therefore always be + * the same as drm_crtc_index() after registration. + */ + return 1 << drm->mode_config.num_crtc; +} + +static void tegra_plane_create_csc_property(struct tegra_plane *plane) +{ + /* set default colorspace conversion coefficients to ITU-R BT.601 */ + struct drm_tegra_plane_csc_blob csc_bt601 = { + .yof = 0x00f0, + .kyrgb = 0x012a, + .kur = 0x0000, + .kvr = 0x0198, + .kug = 0x039b, + .kvg = 0x032f, + .kub = 0x0204, + .kvb = 0x0000, + }; + struct drm_property_blob *blob; + + blob = drm_property_create_blob(plane->base.dev, sizeof(csc_bt601), + &csc_bt601); + if (!blob) { + dev_err(plane->dc->dev, "failed to create CSC BLOB\n"); + return; + } + + plane->props.csc_blob = drm_property_create( + plane->base.dev, DRM_MODE_PROP_BLOB, "YUV to RGB CSC", 0); + + if (!plane->props.csc_blob) { + dev_err(plane->dc->dev, "failed to create CSC property\n"); + drm_property_blob_put(blob); + return; + } + + drm_object_attach_property(&plane->base.base, plane->props.csc_blob, 0); + + plane->csc_default = blob; +} + +static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm, + struct tegra_dc *dc) +{ + unsigned long possible_crtcs = tegra_plane_get_possible_crtcs(drm); + enum drm_plane_type type = DRM_PLANE_TYPE_PRIMARY; + struct tegra_plane *plane; + unsigned int num_formats; + const u64 *modifiers; + const u32 *formats; + int err; + + plane = kzalloc(sizeof(*plane), GFP_KERNEL); + if (!plane) + return ERR_PTR(-ENOMEM); + + /* Always use window A as primary window */ + plane->offset = 0xa00; + plane->index = 0; + plane->dc = dc; + + num_formats = dc->soc->num_primary_formats; + formats = dc->soc->primary_formats; + modifiers = dc->soc->modifiers; + + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + + err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, + &tegra_plane_funcs, formats, + num_formats, modifiers, type, NULL); + if (err < 0) { + kfree(plane); + return ERR_PTR(err); + } + + drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs); + drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255); + + err = drm_plane_create_rotation_property(&plane->base, + DRM_MODE_ROTATE_0, + DRM_MODE_ROTATE_0 | + DRM_MODE_ROTATE_180 | + DRM_MODE_REFLECT_X | + DRM_MODE_REFLECT_Y); + if (err < 0) + dev_err(dc->dev, "failed to create rotation property: %d\n", + err); + + if (dc->soc->has_legacy_blending) + drm_plane_create_colorkey_properties(&plane->base, + BIT(DRM_PLANE_COLORKEY_MODE_DISABLED) | + BIT(DRM_PLANE_COLORKEY_MODE_TRANSPARENT)); + + if (dc->soc->has_win_a_csc) + tegra_plane_create_csc_property(plane); + + return &plane->base; +} + +static const u32 tegra_legacy_cursor_plane_formats[] = { + DRM_FORMAT_RGBA8888, +}; + +static const u32 tegra_cursor_plane_formats[] = { + DRM_FORMAT_ARGB8888, +}; + +static int tegra_cursor_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state); + struct tegra_plane *tegra = to_tegra_plane(plane); + int err; + + plane_state->peak_memory_bandwidth = 0; + plane_state->avg_memory_bandwidth = 0; + + /* no need for further checks if the plane is being disabled */ + if (!new_plane_state->crtc) { + plane_state->total_peak_memory_bandwidth = 0; + return 0; + } + + /* scaling not supported for cursor */ + if ((new_plane_state->src_w >> 16 != new_plane_state->crtc_w) || + (new_plane_state->src_h >> 16 != new_plane_state->crtc_h)) + return -EINVAL; + + /* only square cursors supported */ + if (new_plane_state->src_w != new_plane_state->src_h) + return -EINVAL; + + if (new_plane_state->crtc_w != 32 && new_plane_state->crtc_w != 64 && + new_plane_state->crtc_w != 128 && new_plane_state->crtc_w != 256) + return -EINVAL; + + err = tegra_plane_state_add(tegra, new_plane_state); + if (err < 0) + return err; + + return 0; +} + +static void tegra_cursor_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, + plane); + struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state); + struct tegra_dc *dc = to_tegra_dc(new_state->crtc); + struct tegra_drm *tegra = plane->dev->dev_private; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + u64 dma_mask = *dc->dev->dma_mask; +#endif + unsigned int x, y; + u32 value = 0; + + /* rien ne va plus */ + if (!new_state->crtc || !new_state->fb) + return; + + /* + * Legacy display supports hardware clipping of the cursor, but + * nvdisplay relies on software to clip the cursor to the screen. + */ + if (!dc->soc->has_nvdisplay) + value |= CURSOR_CLIP_DISPLAY; + + switch (new_state->crtc_w) { + case 32: + value |= CURSOR_SIZE_32x32; + break; + + case 64: + value |= CURSOR_SIZE_64x64; + break; + + case 128: + value |= CURSOR_SIZE_128x128; + break; + + case 256: + value |= CURSOR_SIZE_256x256; + break; + + default: + WARN(1, "cursor size %ux%u not supported\n", + new_state->crtc_w, new_state->crtc_h); + return; + } + + value |= (tegra_plane_state->iova[0] >> 10) & 0x3fffff; + tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR); + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + value = (tegra_plane_state->iova[0] >> 32) & (dma_mask >> 32); + tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI); +#endif + + /* enable cursor and set blend mode */ + value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS); + value |= CURSOR_ENABLE; + tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); + + value = tegra_dc_readl(dc, DC_DISP_BLEND_CURSOR_CONTROL); + value &= ~CURSOR_DST_BLEND_MASK; + value &= ~CURSOR_SRC_BLEND_MASK; + + if (dc->soc->has_nvdisplay) + value &= ~CURSOR_COMPOSITION_MODE_XOR; + else + value |= CURSOR_MODE_NORMAL; + + value |= CURSOR_DST_BLEND_NEG_K1_TIMES_SRC; + value |= CURSOR_SRC_BLEND_K1_TIMES_SRC; + value |= CURSOR_ALPHA; + tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL); + + /* nvdisplay relies on software for clipping */ + if (dc->soc->has_nvdisplay) { + struct drm_rect src; + + x = new_state->dst.x1; + y = new_state->dst.y1; + + drm_rect_fp_to_int(&src, &new_state->src); + + value = (src.y1 & tegra->vmask) << 16 | (src.x1 & tegra->hmask); + tegra_dc_writel(dc, value, DC_DISP_PCALC_HEAD_SET_CROPPED_POINT_IN_CURSOR); + + value = (drm_rect_height(&src) & tegra->vmask) << 16 | + (drm_rect_width(&src) & tegra->hmask); + tegra_dc_writel(dc, value, DC_DISP_PCALC_HEAD_SET_CROPPED_SIZE_IN_CURSOR); + } else { + x = new_state->crtc_x; + y = new_state->crtc_y; + } + + /* position the cursor */ + value = ((y & tegra->vmask) << 16) | (x & tegra->hmask); + tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION); +} + +static void tegra_cursor_atomic_disable(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, + plane); + struct tegra_dc *dc; + u32 value; + + /* rien ne va plus */ + if (!old_state || !old_state->crtc) + return; + + dc = to_tegra_dc(old_state->crtc); + + value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS); + value &= ~CURSOR_ENABLE; + tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); +} + +static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = { + .prepare_fb = tegra_plane_prepare_fb, + .cleanup_fb = tegra_plane_cleanup_fb, + .atomic_check = tegra_cursor_atomic_check, + .atomic_update = tegra_cursor_atomic_update, + .atomic_disable = tegra_cursor_atomic_disable, +}; + +static const uint64_t linear_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + +static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm, + struct tegra_dc *dc) +{ + unsigned long possible_crtcs = tegra_plane_get_possible_crtcs(drm); + struct tegra_plane *plane; + unsigned int num_formats; + const u32 *formats; + int err; + + plane = kzalloc(sizeof(*plane), GFP_KERNEL); + if (!plane) + return ERR_PTR(-ENOMEM); + + /* + * This index is kind of fake. The cursor isn't a regular plane, but + * its update and activation request bits in DC_CMD_STATE_CONTROL do + * use the same programming. Setting this fake index here allows the + * code in tegra_add_plane_state() to do the right thing without the + * need to special-casing the cursor plane. + */ + plane->index = 6; + plane->dc = dc; + + if (!dc->soc->has_nvdisplay) { + num_formats = ARRAY_SIZE(tegra_legacy_cursor_plane_formats); + formats = tegra_legacy_cursor_plane_formats; + } else { + num_formats = ARRAY_SIZE(tegra_cursor_plane_formats); + formats = tegra_cursor_plane_formats; + } + + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + + err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, + &tegra_plane_funcs, formats, + num_formats, linear_modifiers, + DRM_PLANE_TYPE_CURSOR, NULL); + if (err < 0) { + kfree(plane); + return ERR_PTR(err); + } + + drm_plane_helper_add(&plane->base, &tegra_cursor_plane_helper_funcs); + drm_plane_create_zpos_immutable_property(&plane->base, 255); + + return &plane->base; +} + +static const u32 tegra20_overlay_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* non-native formats */ + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB8888, + /* planar formats */ + DRM_FORMAT_UYVY, + DRM_FORMAT_YUYV, + DRM_FORMAT_YUV420, + DRM_FORMAT_YUV422, +}; + +static const u32 tegra114_overlay_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* new on Tegra114 */ + DRM_FORMAT_ABGR4444, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_BGR565, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + /* planar formats */ + DRM_FORMAT_UYVY, + DRM_FORMAT_YUYV, + DRM_FORMAT_YUV420, + DRM_FORMAT_YUV422, +}; + +static const u32 tegra124_overlay_formats[] = { + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + /* new on Tegra114 */ + DRM_FORMAT_ABGR4444, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_BGR565, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + /* new on Tegra124 */ + DRM_FORMAT_RGBX8888, + DRM_FORMAT_BGRX8888, + /* planar formats */ + DRM_FORMAT_UYVY, + DRM_FORMAT_YUYV, + DRM_FORMAT_YUV420, + DRM_FORMAT_YUV422, +}; + +static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm, + struct tegra_dc *dc, + unsigned int index, + bool cursor) +{ + unsigned long possible_crtcs = tegra_plane_get_possible_crtcs(drm); + struct tegra_plane *plane; + unsigned int num_formats; + enum drm_plane_type type; + const u32 *formats; + int err; + + plane = kzalloc(sizeof(*plane), GFP_KERNEL); + if (!plane) + return ERR_PTR(-ENOMEM); + + plane->offset = 0xa00 + 0x200 * index; + plane->index = index; + plane->dc = dc; + + num_formats = dc->soc->num_overlay_formats; + formats = dc->soc->overlay_formats; + + err = tegra_plane_interconnect_init(plane); + if (err) { + kfree(plane); + return ERR_PTR(err); + } + + if (!cursor) + type = DRM_PLANE_TYPE_OVERLAY; + else + type = DRM_PLANE_TYPE_CURSOR; + + err = drm_universal_plane_init(drm, &plane->base, possible_crtcs, + &tegra_plane_funcs, formats, + num_formats, linear_modifiers, + type, NULL); + if (err < 0) { + kfree(plane); + return ERR_PTR(err); + } + + drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs); + drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255); + tegra_plane_create_csc_property(plane); + + err = drm_plane_create_rotation_property(&plane->base, + DRM_MODE_ROTATE_0, + DRM_MODE_ROTATE_0 | + DRM_MODE_ROTATE_180 | + DRM_MODE_REFLECT_X | + DRM_MODE_REFLECT_Y); + if (err < 0) + dev_err(dc->dev, "failed to create rotation property: %d\n", + err); + + if (dc->soc->has_legacy_blending) + drm_plane_create_colorkey_properties(&plane->base, + BIT(DRM_PLANE_COLORKEY_MODE_DISABLED) | + BIT(DRM_PLANE_COLORKEY_MODE_TRANSPARENT)); + + return &plane->base; +} + +static struct drm_plane *tegra_dc_add_shared_planes(struct drm_device *drm, + struct tegra_dc *dc) +{ + struct drm_plane *plane, *primary = NULL; + unsigned int i, j; + + for (i = 0; i < dc->soc->num_wgrps; i++) { + const struct tegra_windowgroup_soc *wgrp = &dc->soc->wgrps[i]; + + if (wgrp->dc == dc->pipe) { + for (j = 0; j < wgrp->num_windows; j++) { + unsigned int index = wgrp->windows[j]; + + plane = tegra_shared_plane_create(drm, dc, + wgrp->index, + index); + if (IS_ERR(plane)) + return plane; + + /* + * Choose the first shared plane owned by this + * head as the primary plane. + */ + if (!primary) { + plane->type = DRM_PLANE_TYPE_PRIMARY; + primary = plane; + } + } + } + } + + return primary; +} + +static struct drm_plane *tegra_dc_add_planes(struct drm_device *drm, + struct tegra_dc *dc) +{ + struct drm_plane *planes[2], *primary; + unsigned int planes_num; + unsigned int i; + int err; + + primary = tegra_primary_plane_create(drm, dc); + if (IS_ERR(primary)) + return primary; + + if (dc->soc->supports_cursor) + planes_num = 2; + else + planes_num = 1; + + for (i = 0; i < planes_num; i++) { + planes[i] = tegra_dc_overlay_plane_create(drm, dc, 1 + i, + false); + if (IS_ERR(planes[i])) { + err = PTR_ERR(planes[i]); + + while (i--) + tegra_plane_funcs.destroy(planes[i]); + + tegra_plane_funcs.destroy(primary); + return ERR_PTR(err); + } + } + + return primary; +} + +static void tegra_dc_destroy(struct drm_crtc *crtc) +{ + drm_crtc_cleanup(crtc); +} + +static void tegra_crtc_reset(struct drm_crtc *crtc) +{ + struct tegra_dc_state *state = kzalloc(sizeof(*state), GFP_KERNEL); + + if (crtc->state) + tegra_crtc_atomic_destroy_state(crtc, crtc->state); + + __drm_atomic_helper_crtc_reset(crtc, &state->base); +} + +static struct drm_crtc_state * +tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc) +{ + struct tegra_dc_state *state = to_dc_state(crtc->state); + struct tegra_dc_state *copy; + + copy = kmalloc(sizeof(*copy), GFP_KERNEL); + if (!copy) + return NULL; + + __drm_atomic_helper_crtc_duplicate_state(crtc, ©->base); + copy->ckey = state->ckey; + copy->clk = state->clk; + copy->pclk = state->pclk; + copy->div = state->div; + copy->planes = state->planes; + + return ©->base; +} + +static void tegra_crtc_atomic_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + __drm_atomic_helper_crtc_destroy_state(state); + kfree(state); +} + +#define DEBUGFS_REG32(_name) { .name = #_name, .offset = _name } + +static const struct debugfs_reg32 tegra_dc_regs[] = { + DEBUGFS_REG32(DC_CMD_GENERAL_INCR_SYNCPT), + DEBUGFS_REG32(DC_CMD_GENERAL_INCR_SYNCPT_CNTRL), + DEBUGFS_REG32(DC_CMD_GENERAL_INCR_SYNCPT_ERROR), + DEBUGFS_REG32(DC_CMD_WIN_A_INCR_SYNCPT), + DEBUGFS_REG32(DC_CMD_WIN_A_INCR_SYNCPT_CNTRL), + DEBUGFS_REG32(DC_CMD_WIN_A_INCR_SYNCPT_ERROR), + DEBUGFS_REG32(DC_CMD_WIN_B_INCR_SYNCPT), + DEBUGFS_REG32(DC_CMD_WIN_B_INCR_SYNCPT_CNTRL), + DEBUGFS_REG32(DC_CMD_WIN_B_INCR_SYNCPT_ERROR), + DEBUGFS_REG32(DC_CMD_WIN_C_INCR_SYNCPT), + DEBUGFS_REG32(DC_CMD_WIN_C_INCR_SYNCPT_CNTRL), + DEBUGFS_REG32(DC_CMD_WIN_C_INCR_SYNCPT_ERROR), + DEBUGFS_REG32(DC_CMD_CONT_SYNCPT_VSYNC), + DEBUGFS_REG32(DC_CMD_DISPLAY_COMMAND_OPTION0), + DEBUGFS_REG32(DC_CMD_DISPLAY_COMMAND), + DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE), + DEBUGFS_REG32(DC_CMD_DISPLAY_POWER_CONTROL), + DEBUGFS_REG32(DC_CMD_INT_STATUS), + DEBUGFS_REG32(DC_CMD_INT_MASK), + DEBUGFS_REG32(DC_CMD_INT_ENABLE), + DEBUGFS_REG32(DC_CMD_INT_TYPE), + DEBUGFS_REG32(DC_CMD_INT_POLARITY), + DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE1), + DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE2), + DEBUGFS_REG32(DC_CMD_SIGNAL_RAISE3), + DEBUGFS_REG32(DC_CMD_STATE_ACCESS), + DEBUGFS_REG32(DC_CMD_STATE_CONTROL), + DEBUGFS_REG32(DC_CMD_DISPLAY_WINDOW_HEADER), + DEBUGFS_REG32(DC_CMD_REG_ACT_CONTROL), + DEBUGFS_REG32(DC_COM_CRC_CONTROL), + DEBUGFS_REG32(DC_COM_CRC_CHECKSUM), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(0)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(1)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(2)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_ENABLE(3)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(0)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(1)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(2)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_POLARITY(3)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(0)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(1)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(2)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_DATA(3)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(0)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(1)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(2)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_ENABLE(3)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_DATA(0)), + DEBUGFS_REG32(DC_COM_PIN_INPUT_DATA(1)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(0)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(1)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(2)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(3)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(4)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(5)), + DEBUGFS_REG32(DC_COM_PIN_OUTPUT_SELECT(6)), + DEBUGFS_REG32(DC_COM_PIN_MISC_CONTROL), + DEBUGFS_REG32(DC_COM_PIN_PM0_CONTROL), + DEBUGFS_REG32(DC_COM_PIN_PM0_DUTY_CYCLE), + DEBUGFS_REG32(DC_COM_PIN_PM1_CONTROL), + DEBUGFS_REG32(DC_COM_PIN_PM1_DUTY_CYCLE), + DEBUGFS_REG32(DC_COM_SPI_CONTROL), + DEBUGFS_REG32(DC_COM_SPI_START_BYTE), + DEBUGFS_REG32(DC_COM_HSPI_WRITE_DATA_AB), + DEBUGFS_REG32(DC_COM_HSPI_WRITE_DATA_CD), + DEBUGFS_REG32(DC_COM_HSPI_CS_DC), + DEBUGFS_REG32(DC_COM_SCRATCH_REGISTER_A), + DEBUGFS_REG32(DC_COM_SCRATCH_REGISTER_B), + DEBUGFS_REG32(DC_COM_GPIO_CTRL), + DEBUGFS_REG32(DC_COM_GPIO_DEBOUNCE_COUNTER), + DEBUGFS_REG32(DC_COM_CRC_CHECKSUM_LATCHED), + DEBUGFS_REG32(DC_DISP_DISP_SIGNAL_OPTIONS0), + DEBUGFS_REG32(DC_DISP_DISP_SIGNAL_OPTIONS1), + DEBUGFS_REG32(DC_DISP_DISP_WIN_OPTIONS), + DEBUGFS_REG32(DC_DISP_DISP_MEM_HIGH_PRIORITY), + DEBUGFS_REG32(DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER), + DEBUGFS_REG32(DC_DISP_DISP_TIMING_OPTIONS), + DEBUGFS_REG32(DC_DISP_REF_TO_SYNC), + DEBUGFS_REG32(DC_DISP_SYNC_WIDTH), + DEBUGFS_REG32(DC_DISP_BACK_PORCH), + DEBUGFS_REG32(DC_DISP_ACTIVE), + DEBUGFS_REG32(DC_DISP_FRONT_PORCH), + DEBUGFS_REG32(DC_DISP_H_PULSE0_CONTROL), + DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_A), + DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_B), + DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_C), + DEBUGFS_REG32(DC_DISP_H_PULSE0_POSITION_D), + DEBUGFS_REG32(DC_DISP_H_PULSE1_CONTROL), + DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_A), + DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_B), + DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_C), + DEBUGFS_REG32(DC_DISP_H_PULSE1_POSITION_D), + DEBUGFS_REG32(DC_DISP_H_PULSE2_CONTROL), + DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_A), + DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_B), + DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_C), + DEBUGFS_REG32(DC_DISP_H_PULSE2_POSITION_D), + DEBUGFS_REG32(DC_DISP_V_PULSE0_CONTROL), + DEBUGFS_REG32(DC_DISP_V_PULSE0_POSITION_A), + DEBUGFS_REG32(DC_DISP_V_PULSE0_POSITION_B), + DEBUGFS_REG32(DC_DISP_V_PULSE0_POSITION_C), + DEBUGFS_REG32(DC_DISP_V_PULSE1_CONTROL), + DEBUGFS_REG32(DC_DISP_V_PULSE1_POSITION_A), + DEBUGFS_REG32(DC_DISP_V_PULSE1_POSITION_B), + DEBUGFS_REG32(DC_DISP_V_PULSE1_POSITION_C), + DEBUGFS_REG32(DC_DISP_V_PULSE2_CONTROL), + DEBUGFS_REG32(DC_DISP_V_PULSE2_POSITION_A), + DEBUGFS_REG32(DC_DISP_V_PULSE3_CONTROL), + DEBUGFS_REG32(DC_DISP_V_PULSE3_POSITION_A), + DEBUGFS_REG32(DC_DISP_M0_CONTROL), + DEBUGFS_REG32(DC_DISP_M1_CONTROL), + DEBUGFS_REG32(DC_DISP_DI_CONTROL), + DEBUGFS_REG32(DC_DISP_PP_CONTROL), + DEBUGFS_REG32(DC_DISP_PP_SELECT_A), + DEBUGFS_REG32(DC_DISP_PP_SELECT_B), + DEBUGFS_REG32(DC_DISP_PP_SELECT_C), + DEBUGFS_REG32(DC_DISP_PP_SELECT_D), + DEBUGFS_REG32(DC_DISP_DISP_CLOCK_CONTROL), + DEBUGFS_REG32(DC_DISP_DISP_INTERFACE_CONTROL), + DEBUGFS_REG32(DC_DISP_DISP_COLOR_CONTROL), + DEBUGFS_REG32(DC_DISP_SHIFT_CLOCK_OPTIONS), + DEBUGFS_REG32(DC_DISP_DATA_ENABLE_OPTIONS), + DEBUGFS_REG32(DC_DISP_SERIAL_INTERFACE_OPTIONS), + DEBUGFS_REG32(DC_DISP_LCD_SPI_OPTIONS), + DEBUGFS_REG32(DC_DISP_BORDER_COLOR), + DEBUGFS_REG32(DC_DISP_COLOR_KEY0_LOWER), + DEBUGFS_REG32(DC_DISP_COLOR_KEY0_UPPER), + DEBUGFS_REG32(DC_DISP_COLOR_KEY1_LOWER), + DEBUGFS_REG32(DC_DISP_COLOR_KEY1_UPPER), + DEBUGFS_REG32(DC_DISP_CURSOR_FOREGROUND), + DEBUGFS_REG32(DC_DISP_CURSOR_BACKGROUND), + DEBUGFS_REG32(DC_DISP_CURSOR_START_ADDR), + DEBUGFS_REG32(DC_DISP_CURSOR_START_ADDR_NS), + DEBUGFS_REG32(DC_DISP_CURSOR_POSITION), + DEBUGFS_REG32(DC_DISP_CURSOR_POSITION_NS), + DEBUGFS_REG32(DC_DISP_INIT_SEQ_CONTROL), + DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_A), + DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_B), + DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_C), + DEBUGFS_REG32(DC_DISP_SPI_INIT_SEQ_DATA_D), + DEBUGFS_REG32(DC_DISP_DC_MCCIF_FIFOCTRL), + DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY0A_HYST), + DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY0B_HYST), + DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY1A_HYST), + DEBUGFS_REG32(DC_DISP_MCCIF_DISPLAY1B_HYST), + DEBUGFS_REG32(DC_DISP_DAC_CRT_CTRL), + DEBUGFS_REG32(DC_DISP_DISP_MISC_CONTROL), + DEBUGFS_REG32(DC_DISP_SD_CONTROL), + DEBUGFS_REG32(DC_DISP_SD_CSC_COEFF), + DEBUGFS_REG32(DC_DISP_SD_LUT(0)), + DEBUGFS_REG32(DC_DISP_SD_LUT(1)), + DEBUGFS_REG32(DC_DISP_SD_LUT(2)), + DEBUGFS_REG32(DC_DISP_SD_LUT(3)), + DEBUGFS_REG32(DC_DISP_SD_LUT(4)), + DEBUGFS_REG32(DC_DISP_SD_LUT(5)), + DEBUGFS_REG32(DC_DISP_SD_LUT(6)), + DEBUGFS_REG32(DC_DISP_SD_LUT(7)), + DEBUGFS_REG32(DC_DISP_SD_LUT(8)), + DEBUGFS_REG32(DC_DISP_SD_FLICKER_CONTROL), + DEBUGFS_REG32(DC_DISP_DC_PIXEL_COUNT), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(0)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(1)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(2)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(3)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(4)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(5)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(6)), + DEBUGFS_REG32(DC_DISP_SD_HISTOGRAM(7)), + DEBUGFS_REG32(DC_DISP_SD_BL_TF(0)), + DEBUGFS_REG32(DC_DISP_SD_BL_TF(1)), + DEBUGFS_REG32(DC_DISP_SD_BL_TF(2)), + DEBUGFS_REG32(DC_DISP_SD_BL_TF(3)), + DEBUGFS_REG32(DC_DISP_SD_BL_CONTROL), + DEBUGFS_REG32(DC_DISP_SD_HW_K_VALUES), + DEBUGFS_REG32(DC_DISP_SD_MAN_K_VALUES), + DEBUGFS_REG32(DC_DISP_CURSOR_START_ADDR_HI), + DEBUGFS_REG32(DC_DISP_BLEND_CURSOR_CONTROL), + DEBUGFS_REG32(DC_WIN_WIN_OPTIONS), + DEBUGFS_REG32(DC_WIN_BYTE_SWAP), + DEBUGFS_REG32(DC_WIN_BUFFER_CONTROL), + DEBUGFS_REG32(DC_WIN_COLOR_DEPTH), + DEBUGFS_REG32(DC_WIN_POSITION), + DEBUGFS_REG32(DC_WIN_SIZE), + DEBUGFS_REG32(DC_WIN_PRESCALED_SIZE), + DEBUGFS_REG32(DC_WIN_H_INITIAL_DDA), + DEBUGFS_REG32(DC_WIN_V_INITIAL_DDA), + DEBUGFS_REG32(DC_WIN_DDA_INC), + DEBUGFS_REG32(DC_WIN_LINE_STRIDE), + DEBUGFS_REG32(DC_WIN_BUF_STRIDE), + DEBUGFS_REG32(DC_WIN_UV_BUF_STRIDE), + DEBUGFS_REG32(DC_WIN_BUFFER_ADDR_MODE), + DEBUGFS_REG32(DC_WIN_DV_CONTROL), + DEBUGFS_REG32(DC_WIN_BLEND_NOKEY), + DEBUGFS_REG32(DC_WIN_BLEND_1WIN), + DEBUGFS_REG32(DC_WIN_BLEND_2WIN_X), + DEBUGFS_REG32(DC_WIN_BLEND_2WIN_Y), + DEBUGFS_REG32(DC_WIN_BLEND_3WIN_XY), + DEBUGFS_REG32(DC_WIN_HP_FETCH_CONTROL), + DEBUGFS_REG32(DC_WINBUF_START_ADDR), + DEBUGFS_REG32(DC_WINBUF_START_ADDR_NS), + DEBUGFS_REG32(DC_WINBUF_START_ADDR_U), + DEBUGFS_REG32(DC_WINBUF_START_ADDR_U_NS), + DEBUGFS_REG32(DC_WINBUF_START_ADDR_V), + DEBUGFS_REG32(DC_WINBUF_START_ADDR_V_NS), + DEBUGFS_REG32(DC_WINBUF_ADDR_H_OFFSET), + DEBUGFS_REG32(DC_WINBUF_ADDR_H_OFFSET_NS), + DEBUGFS_REG32(DC_WINBUF_ADDR_V_OFFSET), + DEBUGFS_REG32(DC_WINBUF_ADDR_V_OFFSET_NS), + DEBUGFS_REG32(DC_WINBUF_UFLOW_STATUS), + DEBUGFS_REG32(DC_WINBUF_AD_UFLOW_STATUS), + DEBUGFS_REG32(DC_WINBUF_BD_UFLOW_STATUS), + DEBUGFS_REG32(DC_WINBUF_CD_UFLOW_STATUS), +}; + +static int tegra_dc_show_regs(struct seq_file *s, void *data) +{ + struct drm_info_node *node = s->private; + struct tegra_dc *dc = node->info_ent->data; + unsigned int i; + int err = 0; + + drm_modeset_lock(&dc->base.mutex, NULL); + + if (!dc->base.state->active) { + err = -EBUSY; + goto unlock; + } + + for (i = 0; i < ARRAY_SIZE(tegra_dc_regs); i++) { + unsigned int offset = tegra_dc_regs[i].offset; + + seq_printf(s, "%-40s %#05x %08x\n", tegra_dc_regs[i].name, + offset, tegra_dc_readl(dc, offset)); + } + +unlock: + drm_modeset_unlock(&dc->base.mutex); + return err; +} + +static int tegra_dc_show_crc(struct seq_file *s, void *data) +{ + struct drm_info_node *node = s->private; + struct tegra_dc *dc = node->info_ent->data; + int err = 0; + u32 value; + + drm_modeset_lock(&dc->base.mutex, NULL); + + if (!dc->base.state->active) { + err = -EBUSY; + goto unlock; + } + + value = DC_COM_CRC_CONTROL_ACTIVE_DATA | DC_COM_CRC_CONTROL_ENABLE; + tegra_dc_writel(dc, value, DC_COM_CRC_CONTROL); + tegra_dc_commit(dc); + + drm_crtc_wait_one_vblank(&dc->base); + drm_crtc_wait_one_vblank(&dc->base); + + value = tegra_dc_readl(dc, DC_COM_CRC_CHECKSUM); + seq_printf(s, "%08x\n", value); + + tegra_dc_writel(dc, 0, DC_COM_CRC_CONTROL); + +unlock: + drm_modeset_unlock(&dc->base.mutex); + return err; +} + +static int tegra_dc_show_stats(struct seq_file *s, void *data) +{ + struct drm_info_node *node = s->private; + struct tegra_dc *dc = node->info_ent->data; + + seq_printf(s, "frames: %lu\n", dc->stats.frames); + seq_printf(s, "vblank: %lu\n", dc->stats.vblank); + seq_printf(s, "underflow: %lu\n", dc->stats.underflow); + seq_printf(s, "overflow: %lu\n", dc->stats.overflow); + + seq_printf(s, "frames total: %lu\n", dc->stats.frames_total); + seq_printf(s, "vblank total: %lu\n", dc->stats.vblank_total); + seq_printf(s, "underflow total: %lu\n", dc->stats.underflow_total); + seq_printf(s, "overflow total: %lu\n", dc->stats.overflow_total); + + return 0; +} + +static struct drm_info_list debugfs_files[] = { + { "regs", tegra_dc_show_regs, 0, NULL }, + { "crc", tegra_dc_show_crc, 0, NULL }, + { "stats", tegra_dc_show_stats, 0, NULL }, +}; + +static int tegra_dc_late_register(struct drm_crtc *crtc) +{ + unsigned int i, count = ARRAY_SIZE(debugfs_files); + struct drm_minor *minor = crtc->dev->primary; + struct dentry *root; + struct tegra_dc *dc = to_tegra_dc(crtc); + +#ifdef CONFIG_DEBUG_FS + root = crtc->debugfs_entry; +#else + root = NULL; +#endif + + dc->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files), + GFP_KERNEL); + if (!dc->debugfs_files) + return -ENOMEM; + + for (i = 0; i < count; i++) + dc->debugfs_files[i].data = dc; + + drm_debugfs_create_files(dc->debugfs_files, count, root, minor); + + return 0; +} + +static void tegra_dc_early_unregister(struct drm_crtc *crtc) +{ + unsigned int count = ARRAY_SIZE(debugfs_files); + struct drm_minor *minor = crtc->dev->primary; + struct tegra_dc *dc = to_tegra_dc(crtc); + + drm_debugfs_remove_files(dc->debugfs_files, count, minor); + kfree(dc->debugfs_files); + dc->debugfs_files = NULL; +} + +static u32 tegra_dc_get_vblank_counter(struct drm_crtc *crtc) +{ + struct tegra_dc *dc = to_tegra_dc(crtc); + + /* XXX vblank syncpoints don't work with nvdisplay yet */ + if (dc->syncpt && !dc->soc->has_nvdisplay) + return host1x_syncpt_read(dc->syncpt); + + /* fallback to software emulated VBLANK counter */ + return (u32)drm_crtc_vblank_count(&dc->base); +} + +static int tegra_dc_enable_vblank(struct drm_crtc *crtc) +{ + struct tegra_dc *dc = to_tegra_dc(crtc); + u32 value; + + value = tegra_dc_readl(dc, DC_CMD_INT_MASK); + value |= VBLANK_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_MASK); + + return 0; +} + +static void tegra_dc_disable_vblank(struct drm_crtc *crtc) +{ + struct tegra_dc *dc = to_tegra_dc(crtc); + u32 value; + + value = tegra_dc_readl(dc, DC_CMD_INT_MASK); + value &= ~VBLANK_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_MASK); +} + +static const struct drm_crtc_funcs tegra_crtc_funcs = { + .page_flip = drm_atomic_helper_page_flip, + .set_config = drm_atomic_helper_set_config, + .destroy = tegra_dc_destroy, + .reset = tegra_crtc_reset, + .atomic_duplicate_state = tegra_crtc_atomic_duplicate_state, + .atomic_destroy_state = tegra_crtc_atomic_destroy_state, + .late_register = tegra_dc_late_register, + .early_unregister = tegra_dc_early_unregister, + .get_vblank_counter = tegra_dc_get_vblank_counter, + .enable_vblank = tegra_dc_enable_vblank, + .disable_vblank = tegra_dc_disable_vblank, +}; + +static int tegra_dc_set_timings(struct tegra_dc *dc, + struct drm_display_mode *mode) +{ + unsigned int h_ref_to_sync = 1; + unsigned int v_ref_to_sync = 1; + unsigned long value; + + if (!dc->soc->has_nvdisplay) { + tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS); + + value = (v_ref_to_sync << 16) | h_ref_to_sync; + tegra_dc_writel(dc, value, DC_DISP_REF_TO_SYNC); + } + + value = ((mode->vsync_end - mode->vsync_start) << 16) | + ((mode->hsync_end - mode->hsync_start) << 0); + tegra_dc_writel(dc, value, DC_DISP_SYNC_WIDTH); + + value = ((mode->vtotal - mode->vsync_end) << 16) | + ((mode->htotal - mode->hsync_end) << 0); + tegra_dc_writel(dc, value, DC_DISP_BACK_PORCH); + + value = ((mode->vsync_start - mode->vdisplay) << 16) | + ((mode->hsync_start - mode->hdisplay) << 0); + tegra_dc_writel(dc, value, DC_DISP_FRONT_PORCH); + + value = (mode->vdisplay << 16) | mode->hdisplay; + tegra_dc_writel(dc, value, DC_DISP_ACTIVE); + + return 0; +} + +/** + * tegra_dc_state_setup_clock - check clock settings and store them in atomic + * state + * @dc: display controller + * @crtc_state: CRTC atomic state + * @clk: parent clock for display controller + * @pclk: pixel clock + * @div: shift clock divider + * + * Returns: + * 0 on success or a negative error-code on failure. + */ +int tegra_dc_state_setup_clock(struct tegra_dc *dc, + struct drm_crtc_state *crtc_state, + struct clk *clk, unsigned long pclk, + unsigned int div) +{ + struct tegra_dc_state *state = to_dc_state(crtc_state); + + if (!clk_has_parent(dc->clk, clk)) + return -EINVAL; + + state->clk = clk; + state->pclk = pclk; + state->div = div; + + return 0; +} + +static void tegra_dc_update_voltage_state(struct tegra_dc *dc, + struct tegra_dc_state *state) +{ + unsigned long rate, pstate; + struct dev_pm_opp *opp; + int err; + + if (!dc->has_opp_table) + return; + + /* calculate actual pixel clock rate which depends on internal divider */ + rate = DIV_ROUND_UP(clk_get_rate(dc->clk) * 2, state->div + 2); + + /* find suitable OPP for the rate */ + opp = dev_pm_opp_find_freq_ceil(dc->dev, &rate); + + if (opp == ERR_PTR(-ERANGE)) + opp = dev_pm_opp_find_freq_floor(dc->dev, &rate); + + if (IS_ERR(opp)) { + dev_err(dc->dev, "failed to find OPP for %luHz: %pe\n", + rate, opp); + return; + } + + pstate = dev_pm_opp_get_required_pstate(opp, 0); + dev_pm_opp_put(opp); + + /* + * The minimum core voltage depends on the pixel clock rate (which + * depends on internal clock divider of the CRTC) and not on the + * rate of the display controller clock. This is why we're not using + * dev_pm_opp_set_rate() API and instead controlling the power domain + * directly. + */ + err = dev_pm_genpd_set_performance_state(dc->dev, pstate); + if (err) + dev_err(dc->dev, "failed to set power domain state to %lu: %d\n", + pstate, err); +} + +static void tegra_dc_set_clock_rate(struct tegra_dc *dc, + struct tegra_dc_state *state) +{ + int err; + + err = clk_set_parent(dc->clk, state->clk); + if (err < 0) + dev_err(dc->dev, "failed to set parent clock: %d\n", err); + + /* + * Outputs may not want to change the parent clock rate. This is only + * relevant to Tegra20 where only a single display PLL is available. + * Since that PLL would typically be used for HDMI, an internal LVDS + * panel would need to be driven by some other clock such as PLL_P + * which is shared with other peripherals. Changing the clock rate + * should therefore be avoided. + */ + if (state->pclk > 0) { + err = clk_set_rate(state->clk, state->pclk); + if (err < 0) + dev_err(dc->dev, + "failed to set clock rate to %lu Hz\n", + state->pclk); + + err = clk_set_rate(dc->clk, state->pclk); + if (err < 0) + dev_err(dc->dev, "failed to set clock %pC to %lu Hz: %d\n", + dc->clk, state->pclk, err); + } + + DRM_DEBUG_KMS("rate: %lu, div: %u\n", clk_get_rate(dc->clk), + state->div); + DRM_DEBUG_KMS("pclk: %lu\n", state->pclk); + + tegra_dc_update_voltage_state(dc, state); +} + +static void tegra_dc_stop(struct tegra_dc *dc) +{ + u32 value; + + /* stop the display controller */ + value = tegra_dc_readl(dc, DC_CMD_DISPLAY_COMMAND); + value &= ~DISP_CTRL_MODE_MASK; + tegra_dc_writel(dc, value, DC_CMD_DISPLAY_COMMAND); + + tegra_dc_commit(dc); +} + +static bool tegra_dc_idle(struct tegra_dc *dc) +{ + u32 value; + + value = tegra_dc_readl_active(dc, DC_CMD_DISPLAY_COMMAND); + + return (value & DISP_CTRL_MODE_MASK) == 0; +} + +static int tegra_dc_wait_idle(struct tegra_dc *dc, unsigned long timeout) +{ + timeout = jiffies + msecs_to_jiffies(timeout); + + while (time_before(jiffies, timeout)) { + if (tegra_dc_idle(dc)) + return 0; + + usleep_range(1000, 2000); + } + + dev_dbg(dc->dev, "timeout waiting for DC to become idle\n"); + return -ETIMEDOUT; +} + +static void +tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc, + struct drm_atomic_state *state, + bool prepare_bandwidth_transition) +{ + const struct tegra_plane_state *old_tegra_state, *new_tegra_state; + u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw; + const struct drm_plane_state *old_plane_state; + const struct drm_crtc_state *old_crtc_state; + struct tegra_dc_window window, old_window; + struct tegra_dc *dc = to_tegra_dc(crtc); + struct tegra_plane *tegra; + struct drm_plane *plane; + + if (dc->soc->has_nvdisplay) + return; + + old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); + + if (!crtc->state->active) { + if (!old_crtc_state->active) + return; + + /* + * When CRTC is disabled on DPMS, the state of attached planes + * is kept unchanged. Hence we need to enforce removal of the + * bandwidths from the ICC paths. + */ + drm_atomic_crtc_for_each_plane(plane, crtc) { + tegra = to_tegra_plane(plane); + + icc_set_bw(tegra->icc_mem, 0, 0); + icc_set_bw(tegra->icc_mem_vfilter, 0, 0); + } + + return; + } + + for_each_old_plane_in_state(old_crtc_state->state, plane, + old_plane_state, i) { + old_tegra_state = to_const_tegra_plane_state(old_plane_state); + new_tegra_state = to_const_tegra_plane_state(plane->state); + tegra = to_tegra_plane(plane); + + /* + * We're iterating over the global atomic state and it contains + * planes from another CRTC, hence we need to filter out the + * planes unrelated to this CRTC. + */ + if (tegra->dc != dc) + continue; + + new_avg_bw = new_tegra_state->avg_memory_bandwidth; + old_avg_bw = old_tegra_state->avg_memory_bandwidth; + + new_peak_bw = new_tegra_state->total_peak_memory_bandwidth; + old_peak_bw = old_tegra_state->total_peak_memory_bandwidth; + + /* + * See the comment related to !crtc->state->active above, + * which explains why bandwidths need to be updated when + * CRTC is turning ON. + */ + if (new_avg_bw == old_avg_bw && new_peak_bw == old_peak_bw && + old_crtc_state->active) + continue; + + window.src.h = drm_rect_height(&plane->state->src) >> 16; + window.dst.h = drm_rect_height(&plane->state->dst); + + old_window.src.h = drm_rect_height(&old_plane_state->src) >> 16; + old_window.dst.h = drm_rect_height(&old_plane_state->dst); + + /* + * During the preparation phase (atomic_begin), the memory + * freq should go high before the DC changes are committed + * if bandwidth requirement goes up, otherwise memory freq + * should to stay high if BW requirement goes down. The + * opposite applies to the completion phase (post_commit). + */ + if (prepare_bandwidth_transition) { + new_avg_bw = max(old_avg_bw, new_avg_bw); + new_peak_bw = max(old_peak_bw, new_peak_bw); + + if (tegra_plane_use_vertical_filtering(tegra, &old_window)) + window = old_window; + } + + icc_set_bw(tegra->icc_mem, new_avg_bw, new_peak_bw); + + if (tegra_plane_use_vertical_filtering(tegra, &window)) + icc_set_bw(tegra->icc_mem_vfilter, new_avg_bw, new_peak_bw); + else + icc_set_bw(tegra->icc_mem_vfilter, 0, 0); + } +} + +static void tegra_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct tegra_dc *dc = to_tegra_dc(crtc); + u32 value; + int err; + + if (!tegra_dc_idle(dc)) { + tegra_dc_stop(dc); + + /* + * Ignore the return value, there isn't anything useful to do + * in case this fails. + */ + tegra_dc_wait_idle(dc, 100); + } + + /* + * This should really be part of the RGB encoder driver, but clearing + * these bits has the side-effect of stopping the display controller. + * When that happens no VBLANK interrupts will be raised. At the same + * time the encoder is disabled before the display controller, so the + * above code is always going to timeout waiting for the controller + * to go idle. + * + * Given the close coupling between the RGB encoder and the display + * controller doing it here is still kind of okay. None of the other + * encoder drivers require these bits to be cleared. + * + * XXX: Perhaps given that the display controller is switched off at + * this point anyway maybe clearing these bits isn't even useful for + * the RGB encoder? + */ + if (dc->rgb) { + value = tegra_dc_readl(dc, DC_CMD_DISPLAY_POWER_CONTROL); + value &= ~(PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE | + PW4_ENABLE | PM0_ENABLE | PM1_ENABLE); + tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL); + } + + tegra_dc_stats_reset(&dc->stats); + drm_crtc_vblank_off(crtc); + + spin_lock_irq(&crtc->dev->event_lock); + + if (crtc->state->event) { + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + } + + spin_unlock_irq(&crtc->dev->event_lock); + + err = host1x_client_suspend(&dc->client); + if (err < 0) + dev_err(dc->dev, "failed to suspend: %d\n", err); + + dev_pm_genpd_set_performance_state(dc->dev, 0); +} + +static void tegra_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_display_mode *mode = &crtc->state->adjusted_mode; + struct tegra_dc_state *crtc_state = to_dc_state(crtc->state); + struct tegra_dc *dc = to_tegra_dc(crtc); + u32 value; + int err; + + /* apply PLL changes */ + tegra_dc_set_clock_rate(dc, crtc_state); + + err = host1x_client_resume(&dc->client); + if (err < 0) { + dev_err(dc->dev, "failed to resume: %d\n", err); + return; + } + + /* initialize display controller */ + if (dc->syncpt) { + u32 syncpt = dc->syncpt->id, enable; + + if (dc->soc->has_nvdisplay) + enable = 1 << 31; + else + enable = 1 << 8; + + value = SYNCPT_CNTRL_NO_STALL; + tegra_dc_writel(dc, value, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL); + + value = enable | syncpt; + tegra_dc_writel(dc, value, DC_CMD_CONT_SYNCPT_VSYNC); + } + + if (dc->soc->has_nvdisplay) { + value = DSC_TO_UF_INT | DSC_BBUF_UF_INT | DSC_RBUF_UF_INT | + DSC_OBUF_UF_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_TYPE); + + value = DSC_TO_UF_INT | DSC_BBUF_UF_INT | DSC_RBUF_UF_INT | + DSC_OBUF_UF_INT | SD3_BUCKET_WALK_DONE_INT | + HEAD_UF_INT | MSF_INT | REG_TMOUT_INT | + REGION_CRC_INT | V_PULSE2_INT | V_PULSE3_INT | + VBLANK_INT | FRAME_END_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY); + + value = SD3_BUCKET_WALK_DONE_INT | HEAD_UF_INT | VBLANK_INT | + FRAME_END_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE); + + value = HEAD_UF_INT | REG_TMOUT_INT | FRAME_END_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_MASK); + + tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS); + } else { + value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | + WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_TYPE); + + value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | + WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY); + + /* initialize timer */ + value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) | + WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20); + tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY); + + value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(1) | + WINDOW_B_THRESHOLD(1) | WINDOW_C_THRESHOLD(1); + tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER); + + value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | + WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE); + + value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | + WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT; + tegra_dc_writel(dc, value, DC_CMD_INT_MASK); + } + + if (dc->soc->supports_background_color) + tegra_dc_writel(dc, 0, DC_DISP_BLEND_BACKGROUND_COLOR); + else + tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR); + + /* apply pixel clock changes */ + if (!dc->soc->has_nvdisplay) { + value = SHIFT_CLK_DIVIDER(crtc_state->div) | PIXEL_CLK_DIVIDER_PCD1; + tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL); + } + + /* program display mode */ + tegra_dc_set_timings(dc, mode); + + /* interlacing isn't supported yet, so disable it */ + if (dc->soc->supports_interlacing) { + value = tegra_dc_readl(dc, DC_DISP_INTERLACE_CONTROL); + value &= ~INTERLACE_ENABLE; + tegra_dc_writel(dc, value, DC_DISP_INTERLACE_CONTROL); + } + + value = tegra_dc_readl(dc, DC_CMD_DISPLAY_COMMAND); + value &= ~DISP_CTRL_MODE_MASK; + value |= DISP_CTRL_MODE_C_DISPLAY; + tegra_dc_writel(dc, value, DC_CMD_DISPLAY_COMMAND); + + if (!dc->soc->has_nvdisplay) { + value = tegra_dc_readl(dc, DC_CMD_DISPLAY_POWER_CONTROL); + value |= PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE | + PW4_ENABLE | PM0_ENABLE | PM1_ENABLE; + tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL); + } + + /* enable underflow reporting and display red for missing pixels */ + if (dc->soc->has_nvdisplay) { + value = UNDERFLOW_MODE_RED | UNDERFLOW_REPORT_ENABLE; + tegra_dc_writel(dc, value, DC_COM_RG_UNDERFLOW); + } + + /* + * TC358768 DPI to DSI bridge, used on Asus TF700T, requires to have + * a usable PCLK output before encoder is enabled because bridge is + * clocked by PCLK and bridge is programmed before encoder is enabled. + * Hence the PCLK clock shifter must be programmed here, otherwise + * output clock is not usable and bridge hangs because of it. + */ + if (dc->rgb) { + /* XXX: parameterize? */ + value = SC0_H_QUALIFIER_NONE | SC1_H_QUALIFIER_NONE; + tegra_dc_writel(dc, value, DC_DISP_SHIFT_CLOCK_OPTIONS); + } + + tegra_dc_commit(dc); + + drm_crtc_vblank_on(crtc); +} + +static void tegra_crtc_atomic_begin(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + unsigned long flags; + + tegra_crtc_update_memory_bandwidth(crtc, state, true); + + if (crtc->state->event) { + spin_lock_irqsave(&crtc->dev->event_lock, flags); + + if (drm_crtc_vblank_get(crtc) != 0) + drm_crtc_send_vblank_event(crtc, crtc->state->event); + else + drm_crtc_arm_vblank_event(crtc, crtc->state->event); + + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + + crtc->state->event = NULL; + } +} + +static void tegra_crtc_atomic_flush(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, + crtc); + struct tegra_dc_state *dc_state = to_dc_state(crtc_state); + struct tegra_dc *dc = to_tegra_dc(crtc); + u32 value; + + if (dc->soc->has_legacy_blending) { + tegra_dc_writel(dc, dc_state->ckey.min, DC_DISP_COLOR_KEY0_LOWER); + tegra_dc_writel(dc, dc_state->ckey.max, DC_DISP_COLOR_KEY0_UPPER); + } + + value = dc_state->planes << 8 | GENERAL_UPDATE; + tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); + + value = dc_state->planes | GENERAL_ACT_REQ; + tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); +} + +static bool tegra_plane_is_cursor(const struct drm_plane_state *state) +{ + const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc; + const struct drm_format_info *fmt = state->fb->format; + unsigned int src_w = drm_rect_width(&state->src) >> 16; + unsigned int dst_w = drm_rect_width(&state->dst); + + if (state->plane->type != DRM_PLANE_TYPE_CURSOR) + return false; + + if (soc->supports_cursor) + return true; + + if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256) + return false; + + return true; +} + +static unsigned long +tegra_plane_overlap_mask(struct drm_crtc_state *state, + const struct drm_plane_state *plane_state) +{ + const struct drm_plane_state *other_state; + const struct tegra_plane *tegra; + unsigned long overlap_mask = 0; + struct drm_plane *plane; + struct drm_rect rect; + + if (!plane_state->visible || !plane_state->fb) + return 0; + + /* + * Data-prefetch FIFO will easily help to overcome temporal memory + * pressure if other plane overlaps with the cursor plane. + */ + if (tegra_plane_is_cursor(plane_state)) + return 0; + + drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) { + rect = plane_state->dst; + + tegra = to_tegra_plane(other_state->plane); + + if (!other_state->visible || !other_state->fb) + continue; + + /* + * Ignore cursor plane overlaps because it's not practical to + * assume that it contributes to the bandwidth in overlapping + * area if window width is small. + */ + if (tegra_plane_is_cursor(other_state)) + continue; + + if (drm_rect_intersect(&rect, &other_state->dst)) + overlap_mask |= BIT(tegra->index); + } + + return overlap_mask; +} + +static int tegra_crtc_calculate_memory_bandwidth(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + ulong overlap_mask[TEGRA_DC_LEGACY_PLANES_NUM] = {}, mask; + u32 plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM] = {}; + bool all_planes_overlap_simultaneously = true; + const struct tegra_plane_state *tegra_state; + const struct drm_plane_state *plane_state; + struct tegra_dc *dc = to_tegra_dc(crtc); + const struct drm_crtc_state *old_state; + struct drm_crtc_state *new_state; + struct tegra_plane *tegra; + struct drm_plane *plane; + + /* + * The nv-display uses shared planes. The algorithm below assumes + * maximum 3 planes per-CRTC, this assumption isn't applicable to + * the nv-display. Note that T124 support has additional windows, + * but currently they aren't supported by the driver. + */ + if (dc->soc->has_nvdisplay) + return 0; + + new_state = drm_atomic_get_new_crtc_state(state, crtc); + old_state = drm_atomic_get_old_crtc_state(state, crtc); + + /* + * For overlapping planes pixel's data is fetched for each plane at + * the same time, hence bandwidths are accumulated in this case. + * This needs to be taken into account for calculating total bandwidth + * consumed by all planes. + * + * Here we get the overlapping state of each plane, which is a + * bitmask of plane indices telling with what planes there is an + * overlap. Note that bitmask[plane] includes BIT(plane) in order + * to make further code nicer and simpler. + */ + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) { + tegra_state = to_const_tegra_plane_state(plane_state); + tegra = to_tegra_plane(plane); + + if (WARN_ON_ONCE(tegra->index >= TEGRA_DC_LEGACY_PLANES_NUM)) + return -EINVAL; + + plane_peak_bw[tegra->index] = tegra_state->peak_memory_bandwidth; + mask = tegra_plane_overlap_mask(new_state, plane_state); + overlap_mask[tegra->index] = mask; + + if (hweight_long(mask) != 3) + all_planes_overlap_simultaneously = false; + } + + /* + * Then we calculate maximum bandwidth of each plane state. + * The bandwidth includes the plane BW + BW of the "simultaneously" + * overlapping planes, where "simultaneously" means areas where DC + * fetches from the planes simultaneously during of scan-out process. + * + * For example, if plane A overlaps with planes B and C, but B and C + * don't overlap, then the peak bandwidth will be either in area where + * A-and-B or A-and-C planes overlap. + * + * The plane_peak_bw[] contains peak memory bandwidth values of + * each plane, this information is needed by interconnect provider + * in order to set up latency allowance based on the peak BW, see + * tegra_crtc_update_memory_bandwidth(). + */ + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) { + u32 i, old_peak_bw, new_peak_bw, overlap_bw = 0; + + /* + * Note that plane's atomic check doesn't touch the + * total_peak_memory_bandwidth of enabled plane, hence the + * current state contains the old bandwidth state from the + * previous CRTC commit. + */ + tegra_state = to_const_tegra_plane_state(plane_state); + tegra = to_tegra_plane(plane); + + for_each_set_bit(i, &overlap_mask[tegra->index], 3) { + if (i == tegra->index) + continue; + + if (all_planes_overlap_simultaneously) + overlap_bw += plane_peak_bw[i]; + else + overlap_bw = max(overlap_bw, plane_peak_bw[i]); + } + + new_peak_bw = plane_peak_bw[tegra->index] + overlap_bw; + old_peak_bw = tegra_state->total_peak_memory_bandwidth; + + /* + * If plane's peak bandwidth changed (for example plane isn't + * overlapped anymore) and plane isn't in the atomic state, + * then add plane to the state in order to have the bandwidth + * updated. + */ + if (old_peak_bw != new_peak_bw) { + struct tegra_plane_state *new_tegra_state; + struct drm_plane_state *new_plane_state; + + new_plane_state = drm_atomic_get_plane_state(state, plane); + if (IS_ERR(new_plane_state)) + return PTR_ERR(new_plane_state); + + new_tegra_state = to_tegra_plane_state(new_plane_state); + new_tegra_state->total_peak_memory_bandwidth = new_peak_bw; + } + } + + return 0; +} + +static int tegra_crtc_atomic_check(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + int err; + + err = tegra_crtc_calculate_memory_bandwidth(crtc, state); + if (err) + return err; + + return 0; +} + +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + /* + * Display bandwidth is allowed to go down only once hardware state + * is known to be armed, i.e. state was committed and VBLANK event + * received. + */ + tegra_crtc_update_memory_bandwidth(crtc, state, false); +} + +static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = { + .atomic_check = tegra_crtc_atomic_check, + .atomic_begin = tegra_crtc_atomic_begin, + .atomic_flush = tegra_crtc_atomic_flush, + .atomic_enable = tegra_crtc_atomic_enable, + .atomic_disable = tegra_crtc_atomic_disable, +}; + +static irqreturn_t tegra_dc_irq(int irq, void *data) +{ + struct tegra_dc *dc = data; + unsigned long status; + + status = tegra_dc_readl(dc, DC_CMD_INT_STATUS); + tegra_dc_writel(dc, status, DC_CMD_INT_STATUS); + + if (status & FRAME_END_INT) { + /* + dev_dbg(dc->dev, "%s(): frame end\n", __func__); + */ + dc->stats.frames_total++; + dc->stats.frames++; + } + + if (status & VBLANK_INT) { + /* + dev_dbg(dc->dev, "%s(): vertical blank\n", __func__); + */ + drm_crtc_handle_vblank(&dc->base); + dc->stats.vblank_total++; + dc->stats.vblank++; + } + + if (status & (WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT)) { + /* + dev_dbg(dc->dev, "%s(): underflow\n", __func__); + */ + dc->stats.underflow_total++; + dc->stats.underflow++; + } + + if (status & (WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT)) { + /* + dev_dbg(dc->dev, "%s(): overflow\n", __func__); + */ + dc->stats.overflow_total++; + dc->stats.overflow++; + } + + if (status & HEAD_UF_INT) { + dev_dbg_ratelimited(dc->dev, "%s(): head underflow\n", __func__); + dc->stats.underflow_total++; + dc->stats.underflow++; + } + + return IRQ_HANDLED; +} + +static bool tegra_dc_has_window_groups(struct tegra_dc *dc) +{ + unsigned int i; + + if (!dc->soc->wgrps) + return true; + + for (i = 0; i < dc->soc->num_wgrps; i++) { + const struct tegra_windowgroup_soc *wgrp = &dc->soc->wgrps[i]; + + if (wgrp->dc == dc->pipe && wgrp->num_windows > 0) + return true; + } + + return false; +} + +static int tegra_dc_early_init(struct host1x_client *client) +{ + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + + tegra->num_crtcs++; + + return 0; +} + +static int tegra_dc_init(struct host1x_client *client) +{ + struct tegra_drm_client *drm_client = to_tegra_drm_client(client); + struct drm_device *drm = dev_get_drvdata(client->host); + struct host1x *host = dev_get_drvdata(drm->dev->parent); + struct tegra_dc *dc = host1x_client_to_dc(client); + struct tegra_drm *tegra = drm->dev_private; + struct drm_plane *primary = NULL; + struct drm_plane *cursor = NULL; + int err; + + /* + * XXX do not register DCs with no window groups because we cannot + * assign a primary plane to them, which in turn will cause KMS to + * crash. + */ + if (!tegra_dc_has_window_groups(dc)) + return 0; + + /* + * Set the display hub as the host1x client parent for the display + * controller. This is needed for the runtime reference counting that + * ensures the display hub is always powered when any of the display + * controllers are. + */ + if (dc->soc->has_nvdisplay) + client->parent = &tegra->hub->client; + + dc->syncpt = host1x_syncpt_request(host); + if (IS_ERR(dc->syncpt)) { + dev_warn(dc->dev, "failed to allocate syncpoint %ld\n", + PTR_ERR(dc->syncpt)); + dc->syncpt = NULL; + } else { + host1x_syncpt_associate_device(dc->syncpt, dc->dev); + host1x_syncpt_reset(dc->syncpt, 0); + } + + dc->group = tegra_drm_client_iommu_attach(drm_client, true); + if (IS_ERR(dc->group)) { + err = PTR_ERR(dc->group); + dev_err(client->dev, "failed to attach to domain: %d\n", err); + return err; + } + + if (dc->soc->wgrps) + primary = tegra_dc_add_shared_planes(drm, dc); + else + primary = tegra_dc_add_planes(drm, dc); + + if (IS_ERR(primary)) { + err = PTR_ERR(primary); + goto cleanup; + } + + if (dc->soc->supports_cursor) { + cursor = tegra_dc_cursor_plane_create(drm, dc); + if (IS_ERR(cursor)) { + err = PTR_ERR(cursor); + goto cleanup; + } + } else { + /* dedicate one overlay to mouse cursor */ + cursor = tegra_dc_overlay_plane_create(drm, dc, 2, true); + if (IS_ERR(cursor)) { + err = PTR_ERR(cursor); + goto cleanup; + } + } + + err = drm_crtc_init_with_planes(drm, &dc->base, primary, cursor, + &tegra_crtc_funcs, NULL); + if (err < 0) + goto cleanup; + + drm_crtc_helper_add(&dc->base, &tegra_crtc_helper_funcs); + + /* + * Keep track of the minimum pitch alignment across all display + * controllers. + */ + if (dc->soc->pitch_align > tegra->pitch_align) + tegra->pitch_align = dc->soc->pitch_align; + + /* track maximum resolution */ + if (dc->soc->has_nvdisplay) + drm->mode_config.max_width = drm->mode_config.max_height = 16384; + else + drm->mode_config.max_width = drm->mode_config.max_height = 4096; + + err = tegra_dc_rgb_init(drm, dc); + if (err < 0 && err != -ENODEV) { + dev_err(dc->dev, "failed to initialize RGB output: %d\n", err); + goto cleanup; + } + + err = devm_request_irq(dc->dev, dc->irq, tegra_dc_irq, 0, + dev_name(dc->dev), dc); + if (err < 0) { + dev_err(dc->dev, "failed to request IRQ#%u: %d\n", dc->irq, + err); + goto cleanup; + } + + /* + * Inherit the DMA parameters (such as maximum segment size) from the + * parent host1x device. + */ + client->dev->dma_parms = client->host->dma_parms; + + return 0; + +cleanup: + if (!IS_ERR_OR_NULL(cursor)) + drm_plane_cleanup(cursor); + + if (!IS_ERR(primary)) + drm_plane_cleanup(primary); + + tegra_drm_client_iommu_detach(drm_client, dc->group, true); + host1x_syncpt_put(dc->syncpt); + + return err; +} + +static int tegra_dc_exit(struct host1x_client *client) +{ + struct tegra_drm_client *drm_client = to_tegra_drm_client(client); + struct tegra_dc *dc = host1x_client_to_dc(client); + int err; + + if (!tegra_dc_has_window_groups(dc)) + return 0; + + /* avoid a dangling pointer just in case this disappears */ + client->dev->dma_parms = NULL; + + devm_free_irq(dc->dev, dc->irq, dc); + + err = tegra_dc_rgb_exit(dc); + if (err) { + dev_err(dc->dev, "failed to shutdown RGB output: %d\n", err); + return err; + } + + tegra_drm_client_iommu_detach(drm_client, dc->group, true); + host1x_syncpt_put(dc->syncpt); + + return 0; +} + +static int tegra_dc_late_exit(struct host1x_client *client) +{ + struct drm_device *drm = dev_get_drvdata(client->host); + struct tegra_drm *tegra = drm->dev_private; + + tegra->num_crtcs--; + + return 0; +} + +static int tegra_dc_runtime_suspend(struct host1x_client *client) +{ + struct tegra_dc *dc = host1x_client_to_dc(client); + struct device *dev = client->dev; + int err; + + err = reset_control_assert(dc->rst); + if (err < 0) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; + } + + if (dc->soc->has_powergate) + tegra_powergate_power_off(dc->powergate); + + clk_disable_unprepare(dc->clk); + pm_runtime_put_sync(dev); + + return 0; +} + +static int tegra_dc_runtime_resume(struct host1x_client *client) +{ + struct tegra_dc *dc = host1x_client_to_dc(client); + struct device *dev = client->dev; + int err; + + err = pm_runtime_resume_and_get(dev); + if (err < 0) { + dev_err(dev, "failed to get runtime PM: %d\n", err); + return err; + } + + if (dc->soc->has_powergate) { + err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk, + dc->rst); + if (err < 0) { + dev_err(dev, "failed to power partition: %d\n", err); + goto put_rpm; + } + } else { + err = clk_prepare_enable(dc->clk); + if (err < 0) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto put_rpm; + } + + err = reset_control_deassert(dc->rst); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + } + + return 0; + +disable_clk: + clk_disable_unprepare(dc->clk); +put_rpm: + pm_runtime_put_sync(dev); + return err; +} + +static const struct host1x_client_ops dc_client_ops = { + .early_init = tegra_dc_early_init, + .init = tegra_dc_init, + .exit = tegra_dc_exit, + .late_exit = tegra_dc_late_exit, + .suspend = tegra_dc_runtime_suspend, + .resume = tegra_dc_runtime_resume, +}; + +static const struct tegra_dc_soc_info tegra20_dc_soc_info = { + .has_win_a_csc = false, + .supports_background_color = false, + .supports_interlacing = false, + .supports_cursor = false, + .supports_block_linear = false, + .supports_sector_layout = false, + .has_legacy_blending = true, + .pitch_align = 8, + .has_powergate = false, + .coupled_pm = true, + .has_nvdisplay = false, + .num_primary_formats = ARRAY_SIZE(tegra20_primary_formats), + .primary_formats = tegra20_primary_formats, + .num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats), + .overlay_formats = tegra20_overlay_formats, + .modifiers = tegra20_modifiers, + .has_win_a_without_filters = true, + .has_win_b_vfilter_mem_client = true, + .has_win_c_without_vert_filter = true, + .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = false, +}; + +static const struct tegra_dc_soc_info tegra30_dc_soc_info = { + .has_win_a_csc = false, + .supports_background_color = false, + .supports_interlacing = false, + .supports_cursor = false, + .supports_block_linear = false, + .supports_sector_layout = false, + .has_legacy_blending = true, + .pitch_align = 8, + .has_powergate = false, + .coupled_pm = false, + .has_nvdisplay = false, + .num_primary_formats = ARRAY_SIZE(tegra20_primary_formats), + .primary_formats = tegra20_primary_formats, + .num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats), + .overlay_formats = tegra20_overlay_formats, + .modifiers = tegra20_modifiers, + .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = true, + .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = true, + .has_pll_d2_out0 = true, +}; + +static const struct tegra_dc_soc_info tegra114_dc_soc_info = { + .has_win_a_csc = true, + .supports_background_color = false, + .supports_interlacing = false, + .supports_cursor = false, + .supports_block_linear = false, + .supports_sector_layout = false, + .has_legacy_blending = true, + .pitch_align = 64, + .has_powergate = true, + .coupled_pm = false, + .has_nvdisplay = false, + .num_primary_formats = ARRAY_SIZE(tegra114_primary_formats), + .primary_formats = tegra114_primary_formats, + .num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats), + .overlay_formats = tegra114_overlay_formats, + .modifiers = tegra20_modifiers, + .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, + .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = true, + .has_pll_d2_out0 = true, +}; + +static const struct tegra_dc_soc_info tegra124_dc_soc_info = { + .has_win_a_csc = true, + .supports_background_color = true, + .supports_interlacing = true, + .supports_cursor = true, + .supports_block_linear = true, + .supports_sector_layout = false, + .has_legacy_blending = false, + .pitch_align = 64, + .has_powergate = true, + .coupled_pm = false, + .has_nvdisplay = false, + .num_primary_formats = ARRAY_SIZE(tegra124_primary_formats), + .primary_formats = tegra124_primary_formats, + .num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats), + .overlay_formats = tegra124_overlay_formats, + .modifiers = tegra124_modifiers, + .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, + .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = true, +}; + +static const struct tegra_dc_soc_info tegra210_dc_soc_info = { + .has_win_a_csc = true, + .supports_background_color = true, + .supports_interlacing = true, + .supports_cursor = true, + .supports_block_linear = true, + .supports_sector_layout = false, + .has_legacy_blending = false, + .pitch_align = 64, + .has_powergate = true, + .coupled_pm = false, + .has_nvdisplay = false, + .num_primary_formats = ARRAY_SIZE(tegra114_primary_formats), + .primary_formats = tegra114_primary_formats, + .num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats), + .overlay_formats = tegra114_overlay_formats, + .modifiers = tegra124_modifiers, + .has_win_a_without_filters = false, + .has_win_b_vfilter_mem_client = false, + .has_win_c_without_vert_filter = false, + .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = true, +}; + +static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = { + { + .index = 0, + .dc = 0, + .windows = (const unsigned int[]) { 0 }, + .num_windows = 1, + }, { + .index = 1, + .dc = 1, + .windows = (const unsigned int[]) { 1 }, + .num_windows = 1, + }, { + .index = 2, + .dc = 1, + .windows = (const unsigned int[]) { 2 }, + .num_windows = 1, + }, { + .index = 3, + .dc = 2, + .windows = (const unsigned int[]) { 3 }, + .num_windows = 1, + }, { + .index = 4, + .dc = 2, + .windows = (const unsigned int[]) { 4 }, + .num_windows = 1, + }, { + .index = 5, + .dc = 2, + .windows = (const unsigned int[]) { 5 }, + .num_windows = 1, + }, +}; + +static const struct tegra_dc_soc_info tegra186_dc_soc_info = { + .supports_background_color = true, + .supports_interlacing = true, + .supports_cursor = true, + .supports_block_linear = true, + .supports_sector_layout = false, + .has_legacy_blending = false, + .pitch_align = 64, + .has_powergate = false, + .coupled_pm = false, + .has_nvdisplay = true, + .wgrps = tegra186_dc_wgrps, + .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps), + .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = false, +}; + +static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = { + { + .index = 0, + .dc = 0, + .windows = (const unsigned int[]) { 0 }, + .num_windows = 1, + }, { + .index = 1, + .dc = 1, + .windows = (const unsigned int[]) { 1 }, + .num_windows = 1, + }, { + .index = 2, + .dc = 1, + .windows = (const unsigned int[]) { 2 }, + .num_windows = 1, + }, { + .index = 3, + .dc = 2, + .windows = (const unsigned int[]) { 3 }, + .num_windows = 1, + }, { + .index = 4, + .dc = 2, + .windows = (const unsigned int[]) { 4 }, + .num_windows = 1, + }, { + .index = 5, + .dc = 2, + .windows = (const unsigned int[]) { 5 }, + .num_windows = 1, + }, +}; + +static const struct tegra_dc_soc_info tegra194_dc_soc_info = { + .supports_background_color = true, + .supports_interlacing = true, + .supports_cursor = true, + .supports_block_linear = true, + .supports_sector_layout = true, + .has_legacy_blending = false, + .pitch_align = 64, + .has_powergate = false, + .coupled_pm = false, + .has_nvdisplay = true, + .wgrps = tegra194_dc_wgrps, + .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps), + .plane_tiled_memory_bandwidth_x2 = false, + .has_pll_d2_out0 = false, +}; + +static const struct of_device_id tegra_dc_of_match[] = { + { + .compatible = "nvidia,tegra194-dc", + .data = &tegra194_dc_soc_info, + }, { + .compatible = "nvidia,tegra186-dc", + .data = &tegra186_dc_soc_info, + }, { + .compatible = "nvidia,tegra210-dc", + .data = &tegra210_dc_soc_info, + }, { + .compatible = "nvidia,tegra124-dc", + .data = &tegra124_dc_soc_info, + }, { + .compatible = "nvidia,tegra114-dc", + .data = &tegra114_dc_soc_info, + }, { + .compatible = "nvidia,tegra30-dc", + .data = &tegra30_dc_soc_info, + }, { + .compatible = "nvidia,tegra20-dc", + .data = &tegra20_dc_soc_info, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, tegra_dc_of_match); + +static int tegra_dc_parse_dt(struct tegra_dc *dc) +{ + struct device_node *np; + u32 value = 0; + int err; + + err = of_property_read_u32(dc->dev->of_node, "nvidia,head", &value); + if (err < 0) { + dev_err(dc->dev, "missing \"nvidia,head\" property\n"); + + /* + * If the nvidia,head property isn't present, try to find the + * correct head number by looking up the position of this + * display controller's node within the device tree. Assuming + * that the nodes are ordered properly in the DTS file and + * that the translation into a flattened device tree blob + * preserves that ordering this will actually yield the right + * head number. + * + * If those assumptions don't hold, this will still work for + * cases where only a single display controller is used. + */ + for_each_matching_node(np, tegra_dc_of_match) { + if (np == dc->dev->of_node) { + of_node_put(np); + break; + } + + value++; + } + } + + dc->pipe = value; + + return 0; +} + +static int tegra_dc_match_by_pipe(struct device *dev, const void *data) +{ + struct tegra_dc *dc = dev_get_drvdata(dev); + unsigned int pipe = (unsigned long)(void *)data; + + return dc->pipe == pipe; +} + +static int tegra_dc_couple(struct tegra_dc *dc) +{ + /* + * On Tegra20, DC1 requires DC0 to be taken out of reset in order to + * be enabled, otherwise CPU hangs on writing to CMD_DISPLAY_COMMAND / + * POWER_CONTROL registers during CRTC enabling. + */ + if (dc->soc->coupled_pm && dc->pipe == 1) { + struct device *companion; + struct tegra_dc *parent; + + companion = driver_find_device(dc->dev->driver, NULL, (const void *)0, + tegra_dc_match_by_pipe); + if (!companion) + return -EPROBE_DEFER; + + parent = dev_get_drvdata(companion); + dc->client.parent = &parent->client; + + dev_dbg(dc->dev, "coupled to %s\n", dev_name(companion)); + } + + return 0; +} + +static int tegra_dc_init_opp_table(struct tegra_dc *dc) +{ + struct tegra_core_opp_params opp_params = {}; + int err; + + err = devm_tegra_core_dev_init_opp_table(dc->dev, &opp_params); + if (err && err != -ENODEV) + return err; + + if (err) + dc->has_opp_table = false; + else + dc->has_opp_table = true; + + return 0; +} + +static int tegra_dc_probe(struct platform_device *pdev) +{ + u64 dma_mask = dma_get_mask(pdev->dev.parent); + struct tegra_dc *dc; + int err; + + err = dma_coerce_mask_and_coherent(&pdev->dev, dma_mask); + if (err < 0) { + dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err); + return err; + } + + dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL); + if (!dc) + return -ENOMEM; + + dc->soc = of_device_get_match_data(&pdev->dev); + + INIT_LIST_HEAD(&dc->list); + dc->dev = &pdev->dev; + + err = tegra_dc_parse_dt(dc); + if (err < 0) + return err; + + err = tegra_dc_couple(dc); + if (err < 0) + return err; + + dc->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(dc->clk)) { + dev_err(&pdev->dev, "failed to get clock\n"); + return PTR_ERR(dc->clk); + } + + dc->rst = devm_reset_control_get(&pdev->dev, "dc"); + if (IS_ERR(dc->rst)) { + dev_err(&pdev->dev, "failed to get reset\n"); + return PTR_ERR(dc->rst); + } + + /* assert reset and disable clock */ + err = clk_prepare_enable(dc->clk); + if (err < 0) + return err; + + usleep_range(2000, 4000); + + err = reset_control_assert(dc->rst); + if (err < 0) + return err; + + usleep_range(2000, 4000); + + clk_disable_unprepare(dc->clk); + + if (dc->soc->has_powergate) { + if (dc->pipe == 0) + dc->powergate = TEGRA_POWERGATE_DIS; + else + dc->powergate = TEGRA_POWERGATE_DISB; + + tegra_powergate_power_off(dc->powergate); + } + + err = tegra_dc_init_opp_table(dc); + if (err < 0) + return err; + + dc->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(dc->regs)) + return PTR_ERR(dc->regs); + + dc->irq = platform_get_irq(pdev, 0); + if (dc->irq < 0) + return -ENXIO; + + err = tegra_dc_rgb_probe(dc); + if (err < 0 && err != -ENODEV) + return dev_err_probe(&pdev->dev, err, + "failed to probe RGB output\n"); + + platform_set_drvdata(pdev, dc); + pm_runtime_enable(&pdev->dev); + + INIT_LIST_HEAD(&dc->client.list); + dc->client.ops = &dc_client_ops; + dc->client.dev = &pdev->dev; + + err = host1x_client_register(&dc->client); + if (err < 0) { + dev_err(&pdev->dev, "failed to register host1x client: %d\n", + err); + goto disable_pm; + } + + return 0; + +disable_pm: + pm_runtime_disable(&pdev->dev); + tegra_dc_rgb_remove(dc); + + return err; +} + +static int tegra_dc_remove(struct platform_device *pdev) +{ + struct tegra_dc *dc = platform_get_drvdata(pdev); + int err; + + err = host1x_client_unregister(&dc->client); + if (err < 0) { + dev_err(&pdev->dev, "failed to unregister host1x client: %d\n", + err); + return err; + } + + err = tegra_dc_rgb_remove(dc); + if (err < 0) { + dev_err(&pdev->dev, "failed to remove RGB output: %d\n", err); + return err; + } + + pm_runtime_disable(&pdev->dev); + + return 0; +} + +struct platform_driver tegra_dc_driver = { + .driver = { + .name = "tegra-dc", + .of_match_table = tegra_dc_of_match, + }, + .probe = tegra_dc_probe, + .remove = tegra_dc_remove, +}; diff --git a/drivers/gpu/drm/grate/dc.h b/drivers/gpu/drm/grate/dc.h new file mode 100644 index 0000000000000..0418a4e0ad780 --- /dev/null +++ b/drivers/gpu/drm/grate/dc.h @@ -0,0 +1,827 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Avionic Design GmbH + * Copyright (C) 2012 NVIDIA CORPORATION. All rights reserved. + */ + +#ifndef TEGRA_DC_H +#define TEGRA_DC_H 1 + +#include + +#include + +#include "drm.h" + +struct tegra_output; + +#define TEGRA_DC_LEGACY_PLANES_NUM 7 + +struct tegra_dc_color_key_state { + u32 min; + u32 max; +}; + +struct tegra_dc_state { + struct drm_crtc_state base; + + struct clk *clk; + unsigned long pclk; + unsigned int div; + + struct tegra_dc_color_key_state ckey; + + u32 planes; +}; + +static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state) +{ + if (state) + return container_of(state, struct tegra_dc_state, base); + + return NULL; +} + +struct tegra_dc_stats { + unsigned long frames; + unsigned long vblank; + unsigned long underflow; + unsigned long overflow; + + unsigned long frames_total; + unsigned long vblank_total; + unsigned long underflow_total; + unsigned long overflow_total; +}; + +struct tegra_windowgroup_soc { + unsigned int index; + unsigned int dc; + const unsigned int *windows; + unsigned int num_windows; +}; + +struct tegra_dc_soc_info { + bool has_win_a_csc; + bool supports_background_color; + bool supports_interlacing; + bool supports_cursor; + bool supports_block_linear; + bool supports_sector_layout; + bool has_legacy_blending; + unsigned int pitch_align; + bool has_powergate; + bool coupled_pm; + bool has_nvdisplay; + const struct tegra_windowgroup_soc *wgrps; + unsigned int num_wgrps; + const u32 *primary_formats; + unsigned int num_primary_formats; + const u32 *overlay_formats; + unsigned int num_overlay_formats; + const u64 *modifiers; + bool has_win_a_without_filters; + bool has_win_b_vfilter_mem_client; + bool has_win_c_without_vert_filter; + bool plane_tiled_memory_bandwidth_x2; + bool has_pll_d2_out0; +}; + +struct tegra_dc { + struct host1x_client client; + struct host1x_syncpt *syncpt; + struct device *dev; + + struct iommu_group *group; + + struct drm_crtc base; + unsigned int powergate; + int pipe; + + struct clk *clk; + struct reset_control *rst; + void __iomem *regs; + int irq; + + struct tegra_output *rgb; + + struct tegra_dc_stats stats; + struct list_head list; + + struct drm_info_list *debugfs_files; + + const struct tegra_dc_soc_info *soc; + + bool has_opp_table; +}; + +static inline struct tegra_dc * +host1x_client_to_dc(struct host1x_client *client) +{ + return container_of(client, struct tegra_dc, client); +} + +static inline struct tegra_dc *to_tegra_dc(struct drm_crtc *crtc) +{ + return crtc ? container_of(crtc, struct tegra_dc, base) : NULL; +} + +static inline void tegra_dc_writel(struct tegra_dc *dc, u32 value, + unsigned int offset) +{ + trace_dc_writel(dc->dev, offset, value); + writel(value, dc->regs + (offset << 2)); +} + +static inline u32 tegra_dc_readl(struct tegra_dc *dc, unsigned int offset) +{ + u32 value = readl(dc->regs + (offset << 2)); + + trace_dc_readl(dc->dev, offset, value); + + return value; +} + +struct tegra_dc_window { + struct { + unsigned int yof; + unsigned int kyrgb; + unsigned int kur; + unsigned int kvr; + unsigned int kug; + unsigned int kvg; + unsigned int kub; + unsigned int kvb; + } csc; + struct { + unsigned int x; + unsigned int y; + unsigned int w; + unsigned int h; + } src; + struct { + unsigned int x; + unsigned int y; + unsigned int w; + unsigned int h; + } dst; + unsigned int bits_per_pixel; + unsigned int stride[2]; + unsigned long base[3]; + unsigned int zpos; + bool reflect_x; + bool reflect_y; + + struct tegra_bo_tiling tiling; + u32 format; + u32 swap; +}; + +/* from dc.c */ +bool tegra_dc_has_output(struct tegra_dc *dc, struct device *dev); +void tegra_dc_commit(struct tegra_dc *dc); +int tegra_dc_state_setup_clock(struct tegra_dc *dc, + struct drm_crtc_state *crtc_state, + struct clk *clk, unsigned long pclk, + unsigned int div); +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc, + struct drm_atomic_state *state); + +/* from rgb.c */ +int tegra_dc_rgb_probe(struct tegra_dc *dc); +int tegra_dc_rgb_remove(struct tegra_dc *dc); +int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc); +int tegra_dc_rgb_exit(struct tegra_dc *dc); + +#define DC_CMD_GENERAL_INCR_SYNCPT 0x000 +#define DC_CMD_GENERAL_INCR_SYNCPT_CNTRL 0x001 +#define SYNCPT_CNTRL_NO_STALL (1 << 8) +#define SYNCPT_CNTRL_SOFT_RESET (1 << 0) +#define DC_CMD_GENERAL_INCR_SYNCPT_ERROR 0x002 +#define DC_CMD_WIN_A_INCR_SYNCPT 0x008 +#define DC_CMD_WIN_A_INCR_SYNCPT_CNTRL 0x009 +#define DC_CMD_WIN_A_INCR_SYNCPT_ERROR 0x00a +#define DC_CMD_WIN_B_INCR_SYNCPT 0x010 +#define DC_CMD_WIN_B_INCR_SYNCPT_CNTRL 0x011 +#define DC_CMD_WIN_B_INCR_SYNCPT_ERROR 0x012 +#define DC_CMD_WIN_C_INCR_SYNCPT 0x018 +#define DC_CMD_WIN_C_INCR_SYNCPT_CNTRL 0x019 +#define DC_CMD_WIN_C_INCR_SYNCPT_ERROR 0x01a +#define DC_CMD_CONT_SYNCPT_VSYNC 0x028 +#define SYNCPT_VSYNC_ENABLE (1 << 8) +#define DC_CMD_DISPLAY_COMMAND_OPTION0 0x031 +#define DC_CMD_DISPLAY_COMMAND 0x032 +#define DISP_CTRL_MODE_STOP (0 << 5) +#define DISP_CTRL_MODE_C_DISPLAY (1 << 5) +#define DISP_CTRL_MODE_NC_DISPLAY (2 << 5) +#define DISP_CTRL_MODE_MASK (3 << 5) +#define DC_CMD_SIGNAL_RAISE 0x033 +#define DC_CMD_DISPLAY_POWER_CONTROL 0x036 +#define PW0_ENABLE (1 << 0) +#define PW1_ENABLE (1 << 2) +#define PW2_ENABLE (1 << 4) +#define PW3_ENABLE (1 << 6) +#define PW4_ENABLE (1 << 8) +#define PM0_ENABLE (1 << 16) +#define PM1_ENABLE (1 << 18) + +#define DC_CMD_INT_STATUS 0x037 +#define DC_CMD_INT_MASK 0x038 +#define DC_CMD_INT_ENABLE 0x039 +#define DC_CMD_INT_TYPE 0x03a +#define DC_CMD_INT_POLARITY 0x03b +#define CTXSW_INT (1 << 0) +#define FRAME_END_INT (1 << 1) +#define VBLANK_INT (1 << 2) +#define V_PULSE3_INT (1 << 4) +#define V_PULSE2_INT (1 << 5) +#define REGION_CRC_INT (1 << 6) +#define REG_TMOUT_INT (1 << 7) +#define WIN_A_UF_INT (1 << 8) +#define WIN_B_UF_INT (1 << 9) +#define WIN_C_UF_INT (1 << 10) +#define MSF_INT (1 << 12) +#define WIN_A_OF_INT (1 << 14) +#define WIN_B_OF_INT (1 << 15) +#define WIN_C_OF_INT (1 << 16) +#define HEAD_UF_INT (1 << 23) +#define SD3_BUCKET_WALK_DONE_INT (1 << 24) +#define DSC_OBUF_UF_INT (1 << 26) +#define DSC_RBUF_UF_INT (1 << 27) +#define DSC_BBUF_UF_INT (1 << 28) +#define DSC_TO_UF_INT (1 << 29) + +#define DC_CMD_SIGNAL_RAISE1 0x03c +#define DC_CMD_SIGNAL_RAISE2 0x03d +#define DC_CMD_SIGNAL_RAISE3 0x03e + +#define DC_CMD_STATE_ACCESS 0x040 +#define READ_MUX (1 << 0) +#define WRITE_MUX (1 << 2) + +#define DC_CMD_STATE_CONTROL 0x041 +#define GENERAL_ACT_REQ (1 << 0) +#define WIN_A_ACT_REQ (1 << 1) +#define WIN_B_ACT_REQ (1 << 2) +#define WIN_C_ACT_REQ (1 << 3) +#define CURSOR_ACT_REQ (1 << 7) +#define GENERAL_UPDATE (1 << 8) +#define WIN_A_UPDATE (1 << 9) +#define WIN_B_UPDATE (1 << 10) +#define WIN_C_UPDATE (1 << 11) +#define CURSOR_UPDATE (1 << 15) +#define COMMON_ACTREQ (1 << 16) +#define COMMON_UPDATE (1 << 17) +#define NC_HOST_TRIG (1 << 24) + +#define DC_CMD_DISPLAY_WINDOW_HEADER 0x042 +#define WINDOW_A_SELECT (1 << 4) +#define WINDOW_B_SELECT (1 << 5) +#define WINDOW_C_SELECT (1 << 6) + +#define DC_CMD_REG_ACT_CONTROL 0x043 + +#define DC_COM_CRC_CONTROL 0x300 +#define DC_COM_CRC_CONTROL_ALWAYS (1 << 3) +#define DC_COM_CRC_CONTROL_FULL_FRAME (0 << 2) +#define DC_COM_CRC_CONTROL_ACTIVE_DATA (1 << 2) +#define DC_COM_CRC_CONTROL_WAIT (1 << 1) +#define DC_COM_CRC_CONTROL_ENABLE (1 << 0) +#define DC_COM_CRC_CHECKSUM 0x301 +#define DC_COM_PIN_OUTPUT_ENABLE(x) (0x302 + (x)) +#define DC_COM_PIN_OUTPUT_POLARITY(x) (0x306 + (x)) +#define LVS_OUTPUT_POLARITY_LOW (1 << 28) +#define LHS_OUTPUT_POLARITY_LOW (1 << 30) +#define DC_COM_PIN_OUTPUT_DATA(x) (0x30a + (x)) +#define DC_COM_PIN_INPUT_ENABLE(x) (0x30e + (x)) +#define DC_COM_PIN_INPUT_DATA(x) (0x312 + (x)) +#define DC_COM_PIN_OUTPUT_SELECT(x) (0x314 + (x)) + +#define DC_COM_PIN_MISC_CONTROL 0x31b +#define DC_COM_PIN_PM0_CONTROL 0x31c +#define DC_COM_PIN_PM0_DUTY_CYCLE 0x31d +#define DC_COM_PIN_PM1_CONTROL 0x31e +#define DC_COM_PIN_PM1_DUTY_CYCLE 0x31f + +#define DC_COM_SPI_CONTROL 0x320 +#define DC_COM_SPI_START_BYTE 0x321 +#define DC_COM_HSPI_WRITE_DATA_AB 0x322 +#define DC_COM_HSPI_WRITE_DATA_CD 0x323 +#define DC_COM_HSPI_CS_DC 0x324 +#define DC_COM_SCRATCH_REGISTER_A 0x325 +#define DC_COM_SCRATCH_REGISTER_B 0x326 +#define DC_COM_GPIO_CTRL 0x327 +#define DC_COM_GPIO_DEBOUNCE_COUNTER 0x328 +#define DC_COM_CRC_CHECKSUM_LATCHED 0x329 + +#define DC_COM_RG_UNDERFLOW 0x365 +#define UNDERFLOW_MODE_RED (1 << 8) +#define UNDERFLOW_REPORT_ENABLE (1 << 0) + +#define DC_DISP_DISP_SIGNAL_OPTIONS0 0x400 +#define H_PULSE0_ENABLE (1 << 8) +#define H_PULSE1_ENABLE (1 << 10) +#define H_PULSE2_ENABLE (1 << 12) + +#define DC_DISP_DISP_SIGNAL_OPTIONS1 0x401 + +#define DC_DISP_DISP_WIN_OPTIONS 0x402 +#define HDMI_ENABLE (1 << 30) +#define DSI_ENABLE (1 << 29) +#define SOR1_TIMING_CYA (1 << 27) +#define CURSOR_ENABLE (1 << 16) + +#define SOR_ENABLE(x) (1 << (25 + (((x) > 1) ? ((x) + 1) : (x)))) + +#define DC_DISP_DISP_MEM_HIGH_PRIORITY 0x403 +#define CURSOR_THRESHOLD(x) (((x) & 0x03) << 24) +#define WINDOW_A_THRESHOLD(x) (((x) & 0x7f) << 16) +#define WINDOW_B_THRESHOLD(x) (((x) & 0x7f) << 8) +#define WINDOW_C_THRESHOLD(x) (((x) & 0xff) << 0) + +#define DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER 0x404 +#define CURSOR_DELAY(x) (((x) & 0x3f) << 24) +#define WINDOW_A_DELAY(x) (((x) & 0x3f) << 16) +#define WINDOW_B_DELAY(x) (((x) & 0x3f) << 8) +#define WINDOW_C_DELAY(x) (((x) & 0x3f) << 0) + +#define DC_DISP_DISP_TIMING_OPTIONS 0x405 +#define VSYNC_H_POSITION(x) ((x) & 0xfff) + +#define DC_DISP_REF_TO_SYNC 0x406 +#define DC_DISP_SYNC_WIDTH 0x407 +#define DC_DISP_BACK_PORCH 0x408 +#define DC_DISP_ACTIVE 0x409 +#define DC_DISP_FRONT_PORCH 0x40a +#define DC_DISP_H_PULSE0_CONTROL 0x40b +#define DC_DISP_H_PULSE0_POSITION_A 0x40c +#define DC_DISP_H_PULSE0_POSITION_B 0x40d +#define DC_DISP_H_PULSE0_POSITION_C 0x40e +#define DC_DISP_H_PULSE0_POSITION_D 0x40f +#define DC_DISP_H_PULSE1_CONTROL 0x410 +#define DC_DISP_H_PULSE1_POSITION_A 0x411 +#define DC_DISP_H_PULSE1_POSITION_B 0x412 +#define DC_DISP_H_PULSE1_POSITION_C 0x413 +#define DC_DISP_H_PULSE1_POSITION_D 0x414 +#define DC_DISP_H_PULSE2_CONTROL 0x415 +#define DC_DISP_H_PULSE2_POSITION_A 0x416 +#define DC_DISP_H_PULSE2_POSITION_B 0x417 +#define DC_DISP_H_PULSE2_POSITION_C 0x418 +#define DC_DISP_H_PULSE2_POSITION_D 0x419 +#define DC_DISP_V_PULSE0_CONTROL 0x41a +#define DC_DISP_V_PULSE0_POSITION_A 0x41b +#define DC_DISP_V_PULSE0_POSITION_B 0x41c +#define DC_DISP_V_PULSE0_POSITION_C 0x41d +#define DC_DISP_V_PULSE1_CONTROL 0x41e +#define DC_DISP_V_PULSE1_POSITION_A 0x41f +#define DC_DISP_V_PULSE1_POSITION_B 0x420 +#define DC_DISP_V_PULSE1_POSITION_C 0x421 +#define DC_DISP_V_PULSE2_CONTROL 0x422 +#define DC_DISP_V_PULSE2_POSITION_A 0x423 +#define DC_DISP_V_PULSE3_CONTROL 0x424 +#define DC_DISP_V_PULSE3_POSITION_A 0x425 +#define DC_DISP_M0_CONTROL 0x426 +#define DC_DISP_M1_CONTROL 0x427 +#define DC_DISP_DI_CONTROL 0x428 +#define DC_DISP_PP_CONTROL 0x429 +#define DC_DISP_PP_SELECT_A 0x42a +#define DC_DISP_PP_SELECT_B 0x42b +#define DC_DISP_PP_SELECT_C 0x42c +#define DC_DISP_PP_SELECT_D 0x42d + +#define PULSE_MODE_NORMAL (0 << 3) +#define PULSE_MODE_ONE_CLOCK (1 << 3) +#define PULSE_POLARITY_HIGH (0 << 4) +#define PULSE_POLARITY_LOW (1 << 4) +#define PULSE_QUAL_ALWAYS (0 << 6) +#define PULSE_QUAL_VACTIVE (2 << 6) +#define PULSE_QUAL_VACTIVE1 (3 << 6) +#define PULSE_LAST_START_A (0 << 8) +#define PULSE_LAST_END_A (1 << 8) +#define PULSE_LAST_START_B (2 << 8) +#define PULSE_LAST_END_B (3 << 8) +#define PULSE_LAST_START_C (4 << 8) +#define PULSE_LAST_END_C (5 << 8) +#define PULSE_LAST_START_D (6 << 8) +#define PULSE_LAST_END_D (7 << 8) + +#define PULSE_START(x) (((x) & 0xfff) << 0) +#define PULSE_END(x) (((x) & 0xfff) << 16) + +#define DC_DISP_DISP_CLOCK_CONTROL 0x42e +#define PIXEL_CLK_DIVIDER_PCD1 (0 << 8) +#define PIXEL_CLK_DIVIDER_PCD1H (1 << 8) +#define PIXEL_CLK_DIVIDER_PCD2 (2 << 8) +#define PIXEL_CLK_DIVIDER_PCD3 (3 << 8) +#define PIXEL_CLK_DIVIDER_PCD4 (4 << 8) +#define PIXEL_CLK_DIVIDER_PCD6 (5 << 8) +#define PIXEL_CLK_DIVIDER_PCD8 (6 << 8) +#define PIXEL_CLK_DIVIDER_PCD9 (7 << 8) +#define PIXEL_CLK_DIVIDER_PCD12 (8 << 8) +#define PIXEL_CLK_DIVIDER_PCD16 (9 << 8) +#define PIXEL_CLK_DIVIDER_PCD18 (10 << 8) +#define PIXEL_CLK_DIVIDER_PCD24 (11 << 8) +#define PIXEL_CLK_DIVIDER_PCD13 (12 << 8) +#define SHIFT_CLK_DIVIDER(x) ((x) & 0xff) + +#define DC_DISP_DISP_INTERFACE_CONTROL 0x42f +#define DISP_DATA_FORMAT_DF1P1C (0 << 0) +#define DISP_DATA_FORMAT_DF1P2C24B (1 << 0) +#define DISP_DATA_FORMAT_DF1P2C18B (2 << 0) +#define DISP_DATA_FORMAT_DF1P2C16B (3 << 0) +#define DISP_DATA_FORMAT_DF2S (4 << 0) +#define DISP_DATA_FORMAT_DF3S (5 << 0) +#define DISP_DATA_FORMAT_DFSPI (6 << 0) +#define DISP_DATA_FORMAT_DF1P3C24B (7 << 0) +#define DISP_DATA_FORMAT_DF1P3C18B (8 << 0) +#define DISP_ALIGNMENT_MSB (0 << 8) +#define DISP_ALIGNMENT_LSB (1 << 8) +#define DISP_ORDER_RED_BLUE (0 << 9) +#define DISP_ORDER_BLUE_RED (1 << 9) + +#define DC_DISP_DISP_COLOR_CONTROL 0x430 +#define BASE_COLOR_SIZE666 ( 0 << 0) +#define BASE_COLOR_SIZE111 ( 1 << 0) +#define BASE_COLOR_SIZE222 ( 2 << 0) +#define BASE_COLOR_SIZE333 ( 3 << 0) +#define BASE_COLOR_SIZE444 ( 4 << 0) +#define BASE_COLOR_SIZE555 ( 5 << 0) +#define BASE_COLOR_SIZE565 ( 6 << 0) +#define BASE_COLOR_SIZE332 ( 7 << 0) +#define BASE_COLOR_SIZE888 ( 8 << 0) +#define BASE_COLOR_SIZE101010 (10 << 0) +#define BASE_COLOR_SIZE121212 (12 << 0) +#define DITHER_CONTROL_MASK (3 << 8) +#define DITHER_CONTROL_DISABLE (0 << 8) +#define DITHER_CONTROL_ORDERED (2 << 8) +#define DITHER_CONTROL_ERRDIFF (3 << 8) +#define BASE_COLOR_SIZE_MASK (0xf << 0) +#define BASE_COLOR_SIZE_666 ( 0 << 0) +#define BASE_COLOR_SIZE_111 ( 1 << 0) +#define BASE_COLOR_SIZE_222 ( 2 << 0) +#define BASE_COLOR_SIZE_333 ( 3 << 0) +#define BASE_COLOR_SIZE_444 ( 4 << 0) +#define BASE_COLOR_SIZE_555 ( 5 << 0) +#define BASE_COLOR_SIZE_565 ( 6 << 0) +#define BASE_COLOR_SIZE_332 ( 7 << 0) +#define BASE_COLOR_SIZE_888 ( 8 << 0) +#define BASE_COLOR_SIZE_101010 ( 10 << 0) +#define BASE_COLOR_SIZE_121212 ( 12 << 0) + +#define DC_DISP_SHIFT_CLOCK_OPTIONS 0x431 +#define SC1_H_QUALIFIER_NONE (1 << 16) +#define SC0_H_QUALIFIER_NONE (1 << 0) + +#define DC_DISP_DATA_ENABLE_OPTIONS 0x432 +#define DE_SELECT_ACTIVE_BLANK (0 << 0) +#define DE_SELECT_ACTIVE (1 << 0) +#define DE_SELECT_ACTIVE_IS (2 << 0) +#define DE_CONTROL_ONECLK (0 << 2) +#define DE_CONTROL_NORMAL (1 << 2) +#define DE_CONTROL_EARLY_EXT (2 << 2) +#define DE_CONTROL_EARLY (3 << 2) +#define DE_CONTROL_ACTIVE_BLANK (4 << 2) + +#define DC_DISP_SERIAL_INTERFACE_OPTIONS 0x433 +#define DC_DISP_LCD_SPI_OPTIONS 0x434 +#define DC_DISP_BORDER_COLOR 0x435 +#define DC_DISP_COLOR_KEY0_LOWER 0x436 +#define DC_DISP_COLOR_KEY0_UPPER 0x437 +#define DC_DISP_COLOR_KEY1_LOWER 0x438 +#define DC_DISP_COLOR_KEY1_UPPER 0x439 + +#define DC_DISP_CURSOR_FOREGROUND 0x43c +#define DC_DISP_CURSOR_BACKGROUND 0x43d + +#define DC_DISP_CURSOR_START_ADDR 0x43e +#define CURSOR_CLIP_DISPLAY (0 << 28) +#define CURSOR_CLIP_WIN_A (1 << 28) +#define CURSOR_CLIP_WIN_B (2 << 28) +#define CURSOR_CLIP_WIN_C (3 << 28) +#define CURSOR_SIZE_32x32 (0 << 24) +#define CURSOR_SIZE_64x64 (1 << 24) +#define CURSOR_SIZE_128x128 (2 << 24) +#define CURSOR_SIZE_256x256 (3 << 24) +#define DC_DISP_CURSOR_START_ADDR_NS 0x43f + +#define DC_DISP_CURSOR_POSITION 0x440 +#define DC_DISP_CURSOR_POSITION_NS 0x441 + +#define DC_DISP_INIT_SEQ_CONTROL 0x442 +#define DC_DISP_SPI_INIT_SEQ_DATA_A 0x443 +#define DC_DISP_SPI_INIT_SEQ_DATA_B 0x444 +#define DC_DISP_SPI_INIT_SEQ_DATA_C 0x445 +#define DC_DISP_SPI_INIT_SEQ_DATA_D 0x446 + +#define DC_DISP_DC_MCCIF_FIFOCTRL 0x480 +#define DC_DISP_MCCIF_DISPLAY0A_HYST 0x481 +#define DC_DISP_MCCIF_DISPLAY0B_HYST 0x482 +#define DC_DISP_MCCIF_DISPLAY1A_HYST 0x483 +#define DC_DISP_MCCIF_DISPLAY1B_HYST 0x484 + +#define DC_DISP_DAC_CRT_CTRL 0x4c0 +#define DC_DISP_DISP_MISC_CONTROL 0x4c1 +#define DC_DISP_SD_CONTROL 0x4c2 +#define DC_DISP_SD_CSC_COEFF 0x4c3 +#define DC_DISP_SD_LUT(x) (0x4c4 + (x)) +#define DC_DISP_SD_FLICKER_CONTROL 0x4cd +#define DC_DISP_DC_PIXEL_COUNT 0x4ce +#define DC_DISP_SD_HISTOGRAM(x) (0x4cf + (x)) +#define DC_DISP_SD_BL_PARAMETERS 0x4d7 +#define DC_DISP_SD_BL_TF(x) (0x4d8 + (x)) +#define DC_DISP_SD_BL_CONTROL 0x4dc +#define DC_DISP_SD_HW_K_VALUES 0x4dd +#define DC_DISP_SD_MAN_K_VALUES 0x4de + +#define DC_DISP_BLEND_BACKGROUND_COLOR 0x4e4 +#define BACKGROUND_COLOR_ALPHA(x) (((x) & 0xff) << 24) +#define BACKGROUND_COLOR_BLUE(x) (((x) & 0xff) << 16) +#define BACKGROUND_COLOR_GREEN(x) (((x) & 0xff) << 8) +#define BACKGROUND_COLOR_RED(x) (((x) & 0xff) << 0) + +#define DC_DISP_INTERLACE_CONTROL 0x4e5 +#define INTERLACE_STATUS (1 << 2) +#define INTERLACE_START (1 << 1) +#define INTERLACE_ENABLE (1 << 0) + +#define DC_DISP_CURSOR_START_ADDR_HI 0x4ec +#define DC_DISP_BLEND_CURSOR_CONTROL 0x4f1 +#define CURSOR_COMPOSITION_MODE_BLEND (0 << 25) +#define CURSOR_COMPOSITION_MODE_XOR (1 << 25) +#define CURSOR_MODE_LEGACY (0 << 24) +#define CURSOR_MODE_NORMAL (1 << 24) +#define CURSOR_DST_BLEND_ZERO (0 << 16) +#define CURSOR_DST_BLEND_K1 (1 << 16) +#define CURSOR_DST_BLEND_NEG_K1_TIMES_SRC (2 << 16) +#define CURSOR_DST_BLEND_MASK (3 << 16) +#define CURSOR_SRC_BLEND_K1 (0 << 8) +#define CURSOR_SRC_BLEND_K1_TIMES_SRC (1 << 8) +#define CURSOR_SRC_BLEND_MASK (3 << 8) +#define CURSOR_ALPHA 0xff + +#define DC_WIN_CORE_ACT_CONTROL 0x50e +#define VCOUNTER (0 << 0) +#define HCOUNTER (1 << 0) + +#define DC_WIN_CORE_IHUB_WGRP_LATENCY_CTLA 0x543 +#define LATENCY_CTL_MODE_ENABLE (1 << 2) + +#define DC_WIN_CORE_IHUB_WGRP_LATENCY_CTLB 0x544 +#define WATERMARK_MASK 0x1fffffff + +#define DC_WIN_CORE_PRECOMP_WGRP_PIPE_METER 0x560 +#define PIPE_METER_INT(x) (((x) & 0xff) << 8) +#define PIPE_METER_FRAC(x) (((x) & 0xff) << 0) + +#define DC_WIN_CORE_IHUB_WGRP_POOL_CONFIG 0x561 +#define MEMPOOL_ENTRIES(x) (((x) & 0xffff) << 0) + +#define DC_WIN_CORE_IHUB_WGRP_FETCH_METER 0x562 +#define SLOTS(x) (((x) & 0xff) << 0) + +#define DC_WIN_CORE_IHUB_LINEBUF_CONFIG 0x563 +#define MODE_TWO_LINES (0 << 14) +#define MODE_FOUR_LINES (1 << 14) + +#define DC_WIN_CORE_IHUB_THREAD_GROUP 0x568 +#define THREAD_NUM_MASK (0x1f << 1) +#define THREAD_NUM(x) (((x) & 0x1f) << 1) +#define THREAD_GROUP_ENABLE (1 << 0) + +#define DC_WIN_H_FILTER_P(p) (0x601 + (p)) +#define DC_WIN_V_FILTER_P(p) (0x619 + (p)) + +#define DC_WIN_CSC_YOF 0x611 +#define DC_WIN_CSC_KYRGB 0x612 +#define DC_WIN_CSC_KUR 0x613 +#define DC_WIN_CSC_KVR 0x614 +#define DC_WIN_CSC_KUG 0x615 +#define DC_WIN_CSC_KVG 0x616 +#define DC_WIN_CSC_KUB 0x617 +#define DC_WIN_CSC_KVB 0x618 + +#define DC_WIN_WIN_OPTIONS 0x700 +#define H_DIRECTION (1 << 0) +#define V_DIRECTION (1 << 2) +#define COLOR_EXPAND (1 << 6) +#define H_FILTER (1 << 8) +#define V_FILTER (1 << 10) +#define CSC_ENABLE (1 << 18) +#define WIN_ENABLE (1 << 30) + +#define DC_WIN_BYTE_SWAP 0x701 +#define BYTE_SWAP_NOSWAP (0 << 0) +#define BYTE_SWAP_SWAP2 (1 << 0) +#define BYTE_SWAP_SWAP4 (2 << 0) +#define BYTE_SWAP_SWAP4HW (3 << 0) + +#define DC_WIN_BUFFER_CONTROL 0x702 +#define BUFFER_CONTROL_HOST (0 << 0) +#define BUFFER_CONTROL_VI (1 << 0) +#define BUFFER_CONTROL_EPP (2 << 0) +#define BUFFER_CONTROL_MPEGE (3 << 0) +#define BUFFER_CONTROL_SB2D (4 << 0) + +#define DC_WIN_COLOR_DEPTH 0x703 +#define WIN_COLOR_DEPTH_P1 0 +#define WIN_COLOR_DEPTH_P2 1 +#define WIN_COLOR_DEPTH_P4 2 +#define WIN_COLOR_DEPTH_P8 3 +#define WIN_COLOR_DEPTH_B4G4R4A4 4 +#define WIN_COLOR_DEPTH_B5G5R5A1 5 +#define WIN_COLOR_DEPTH_B5G6R5 6 +#define WIN_COLOR_DEPTH_A1B5G5R5 7 +#define WIN_COLOR_DEPTH_B8G8R8A8 12 +#define WIN_COLOR_DEPTH_R8G8B8A8 13 +#define WIN_COLOR_DEPTH_B6x2G6x2R6x2A8 14 +#define WIN_COLOR_DEPTH_R6x2G6x2B6x2A8 15 +#define WIN_COLOR_DEPTH_YCbCr422 16 +#define WIN_COLOR_DEPTH_YUV422 17 +#define WIN_COLOR_DEPTH_YCbCr420P 18 +#define WIN_COLOR_DEPTH_YUV420P 19 +#define WIN_COLOR_DEPTH_YCbCr422P 20 +#define WIN_COLOR_DEPTH_YUV422P 21 +#define WIN_COLOR_DEPTH_YCbCr422R 22 +#define WIN_COLOR_DEPTH_YUV422R 23 +#define WIN_COLOR_DEPTH_YCbCr422RA 24 +#define WIN_COLOR_DEPTH_YUV422RA 25 +#define WIN_COLOR_DEPTH_R4G4B4A4 27 +#define WIN_COLOR_DEPTH_R5G5B5A 28 +#define WIN_COLOR_DEPTH_AR5G5B5 29 +#define WIN_COLOR_DEPTH_B5G5R5X1 30 +#define WIN_COLOR_DEPTH_X1B5G5R5 31 +#define WIN_COLOR_DEPTH_R5G5B5X1 32 +#define WIN_COLOR_DEPTH_X1R5G5B5 33 +#define WIN_COLOR_DEPTH_R5G6B5 34 +#define WIN_COLOR_DEPTH_A8R8G8B8 35 +#define WIN_COLOR_DEPTH_A8B8G8R8 36 +#define WIN_COLOR_DEPTH_B8G8R8X8 37 +#define WIN_COLOR_DEPTH_R8G8B8X8 38 +#define WIN_COLOR_DEPTH_X8B8G8R8 65 +#define WIN_COLOR_DEPTH_X8R8G8B8 66 + +#define DC_WIN_POSITION 0x704 +#define H_POSITION(x) (((x) & 0x1fff) << 0) /* XXX 0x7fff on Tegra186 */ +#define V_POSITION(x) (((x) & 0x1fff) << 16) /* XXX 0x7fff on Tegra186 */ + +#define DC_WIN_SIZE 0x705 +#define H_SIZE(x) (((x) & 0x1fff) << 0) /* XXX 0x7fff on Tegra186 */ +#define V_SIZE(x) (((x) & 0x1fff) << 16) /* XXX 0x7fff on Tegra186 */ + +#define DC_WIN_PRESCALED_SIZE 0x706 +#define H_PRESCALED_SIZE(x) (((x) & 0x7fff) << 0) +#define V_PRESCALED_SIZE(x) (((x) & 0x1fff) << 16) /* XXX 0x7fff on Tegra186 */ + +#define DC_WIN_H_INITIAL_DDA 0x707 +#define DC_WIN_V_INITIAL_DDA 0x708 +#define DC_WIN_DDA_INC 0x709 +#define H_DDA_INC(x) (((x) & 0xffff) << 0) +#define V_DDA_INC(x) (((x) & 0xffff) << 16) + +#define DC_WIN_LINE_STRIDE 0x70a +#define DC_WIN_BUF_STRIDE 0x70b +#define DC_WIN_UV_BUF_STRIDE 0x70c +#define DC_WIN_BUFFER_ADDR_MODE 0x70d +#define DC_WIN_BUFFER_ADDR_MODE_LINEAR (0 << 0) +#define DC_WIN_BUFFER_ADDR_MODE_TILE (1 << 0) +#define DC_WIN_BUFFER_ADDR_MODE_LINEAR_UV (0 << 16) +#define DC_WIN_BUFFER_ADDR_MODE_TILE_UV (1 << 16) + +#define DC_WIN_DV_CONTROL 0x70e + +#define DC_WIN_BLEND_NOKEY 0x70f +#define BLEND_WEIGHT1(x) (((x) & 0xff) << 16) +#define BLEND_WEIGHT0(x) (((x) & 0xff) << 8) + +#define DC_WIN_BLEND_1WIN 0x710 +#define BLEND_CONTROL_FIX (0 << 2) +#define BLEND_CONTROL_ALPHA (1 << 2) +#define BLEND_COLOR_KEY_NONE (0 << 0) +#define BLEND_COLOR_KEY_0 (1 << 0) +#define BLEND_COLOR_KEY_1 (2 << 0) +#define BLEND_COLOR_KEY_BOTH (3 << 0) + +#define DC_WIN_BLEND_2WIN_X 0x711 +#define BLEND_CONTROL_DEPENDENT (2 << 2) + +#define DC_WIN_BLEND_2WIN_Y 0x712 +#define DC_WIN_BLEND_3WIN_XY 0x713 + +#define DC_WIN_HP_FETCH_CONTROL 0x714 + +#define DC_WINBUF_START_ADDR 0x800 +#define DC_WINBUF_START_ADDR_NS 0x801 +#define DC_WINBUF_START_ADDR_U 0x802 +#define DC_WINBUF_START_ADDR_U_NS 0x803 +#define DC_WINBUF_START_ADDR_V 0x804 +#define DC_WINBUF_START_ADDR_V_NS 0x805 + +#define DC_WINBUF_ADDR_H_OFFSET 0x806 +#define DC_WINBUF_ADDR_H_OFFSET_NS 0x807 +#define DC_WINBUF_ADDR_V_OFFSET 0x808 +#define DC_WINBUF_ADDR_V_OFFSET_NS 0x809 + +#define DC_WINBUF_UFLOW_STATUS 0x80a +#define DC_WINBUF_SURFACE_KIND 0x80b +#define DC_WINBUF_SURFACE_KIND_PITCH (0 << 0) +#define DC_WINBUF_SURFACE_KIND_TILED (1 << 0) +#define DC_WINBUF_SURFACE_KIND_BLOCK (2 << 0) +#define DC_WINBUF_SURFACE_KIND_BLOCK_HEIGHT(x) (((x) & 0x7) << 4) + +#define DC_WINBUF_START_ADDR_HI 0x80d + +#define DC_WINBUF_CDE_CONTROL 0x82f +#define ENABLE_SURFACE (1 << 0) + +#define DC_WINBUF_AD_UFLOW_STATUS 0xbca +#define DC_WINBUF_BD_UFLOW_STATUS 0xdca +#define DC_WINBUF_CD_UFLOW_STATUS 0xfca + +/* Tegra186 and later */ +#define DC_DISP_CORE_SOR_SET_CONTROL(x) (0x403 + (x)) +#define PROTOCOL_MASK (0xf << 8) +#define PROTOCOL_SINGLE_TMDS_A (0x1 << 8) + +#define DC_DISP_PCALC_HEAD_SET_CROPPED_POINT_IN_CURSOR 0x442 +#define DC_DISP_PCALC_HEAD_SET_CROPPED_SIZE_IN_CURSOR 0x446 + +#define DC_WIN_CORE_WINDOWGROUP_SET_CONTROL 0x702 +#define OWNER_MASK (0xf << 0) +#define OWNER(x) (((x) & 0xf) << 0) + +#define DC_WIN_CROPPED_SIZE 0x706 + +#define DC_WIN_PLANAR_STORAGE 0x709 +#define PITCH(x) (((x) >> 6) & 0x1fff) + +#define DC_WIN_SET_PARAMS 0x70d +#define CLAMP_BEFORE_BLEND (1 << 15) +#define DEGAMMA_NONE (0 << 13) +#define DEGAMMA_SRGB (1 << 13) +#define DEGAMMA_YUV8_10 (2 << 13) +#define DEGAMMA_YUV12 (3 << 13) +#define INPUT_RANGE_BYPASS (0 << 10) +#define INPUT_RANGE_LIMITED (1 << 10) +#define INPUT_RANGE_FULL (2 << 10) +#define COLOR_SPACE_RGB (0 << 8) +#define COLOR_SPACE_YUV_601 (1 << 8) +#define COLOR_SPACE_YUV_709 (2 << 8) +#define COLOR_SPACE_YUV_2020 (3 << 8) + +#define DC_WIN_WINDOWGROUP_SET_CONTROL_INPUT_SCALER 0x70e +#define HORIZONTAL_TAPS_2 (1 << 3) +#define HORIZONTAL_TAPS_5 (4 << 3) +#define VERTICAL_TAPS_2 (1 << 0) +#define VERTICAL_TAPS_5 (4 << 0) + +#define DC_WIN_WINDOWGROUP_SET_INPUT_SCALER_USAGE 0x711 +#define INPUT_SCALER_USE422 (1 << 2) +#define INPUT_SCALER_VBYPASS (1 << 1) +#define INPUT_SCALER_HBYPASS (1 << 0) + +#define DC_WIN_BLEND_LAYER_CONTROL 0x716 +#define COLOR_KEY_NONE (0 << 25) +#define COLOR_KEY_SRC (1 << 25) +#define COLOR_KEY_DST (2 << 25) +#define BLEND_BYPASS (1 << 24) +#define K2(x) (((x) & 0xff) << 16) +#define K1(x) (((x) & 0xff) << 8) +#define WINDOW_LAYER_DEPTH(x) (((x) & 0xff) << 0) + +#define DC_WIN_BLEND_MATCH_SELECT 0x717 +#define BLEND_FACTOR_DST_ALPHA_ZERO (0 << 12) +#define BLEND_FACTOR_DST_ALPHA_ONE (1 << 12) +#define BLEND_FACTOR_DST_ALPHA_NEG_K1_TIMES_SRC (2 << 12) +#define BLEND_FACTOR_DST_ALPHA_K2 (3 << 12) +#define BLEND_FACTOR_SRC_ALPHA_ZERO (0 << 8) +#define BLEND_FACTOR_SRC_ALPHA_K1 (1 << 8) +#define BLEND_FACTOR_SRC_ALPHA_K2 (2 << 8) +#define BLEND_FACTOR_SRC_ALPHA_NEG_K1_TIMES_DST (3 << 8) +#define BLEND_FACTOR_DST_COLOR_ZERO (0 << 4) +#define BLEND_FACTOR_DST_COLOR_ONE (1 << 4) +#define BLEND_FACTOR_DST_COLOR_K1 (2 << 4) +#define BLEND_FACTOR_DST_COLOR_K2 (3 << 4) +#define BLEND_FACTOR_DST_COLOR_K1_TIMES_DST (4 << 4) +#define BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_DST (5 << 4) +#define BLEND_FACTOR_DST_COLOR_NEG_K1_TIMES_SRC (6 << 4) +#define BLEND_FACTOR_DST_COLOR_NEG_K1 (7 << 4) +#define BLEND_FACTOR_SRC_COLOR_ZERO (0 << 0) +#define BLEND_FACTOR_SRC_COLOR_ONE (1 << 0) +#define BLEND_FACTOR_SRC_COLOR_K1 (2 << 0) +#define BLEND_FACTOR_SRC_COLOR_K1_TIMES_DST (3 << 0) +#define BLEND_FACTOR_SRC_COLOR_NEG_K1_TIMES_DST (4 << 0) +#define BLEND_FACTOR_SRC_COLOR_K1_TIMES_SRC (5 << 0) + +#define DC_WIN_BLEND_NOMATCH_SELECT 0x718 + +#define DC_WIN_PRECOMP_WGRP_PARAMS 0x724 +#define SWAP_UV (1 << 0) + +#define DC_WIN_WINDOW_SET_CONTROL 0x730 +#define CONTROL_CSC_ENABLE (1 << 5) + +#define DC_WINBUF_CROPPED_POINT 0x806 +#define OFFSET_Y(x) (((x) & 0xffff) << 16) +#define OFFSET_X(x) (((x) & 0xffff) << 0) + +#endif /* TEGRA_DC_H */ diff --git a/drivers/gpu/drm/grate/dp.c b/drivers/gpu/drm/grate/dp.c new file mode 100644 index 0000000000000..70dfb7d1dec55 --- /dev/null +++ b/drivers/gpu/drm/grate/dp.c @@ -0,0 +1,876 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright (C) 2013-2019 NVIDIA Corporation + * Copyright (C) 2015 Rob Clark + */ + +#include +#include +#include + +#include "dp.h" + +static const u8 drm_dp_edp_revisions[] = { 0x11, 0x12, 0x13, 0x14 }; + +static void drm_dp_link_caps_reset(struct drm_dp_link_caps *caps) +{ + caps->enhanced_framing = false; + caps->tps3_supported = false; + caps->fast_training = false; + caps->channel_coding = false; + caps->alternate_scrambler_reset = false; +} + +void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest, + const struct drm_dp_link_caps *src) +{ + dest->enhanced_framing = src->enhanced_framing; + dest->tps3_supported = src->tps3_supported; + dest->fast_training = src->fast_training; + dest->channel_coding = src->channel_coding; + dest->alternate_scrambler_reset = src->alternate_scrambler_reset; +} + +static void drm_dp_link_reset(struct drm_dp_link *link) +{ + unsigned int i; + + if (!link) + return; + + link->revision = 0; + link->max_rate = 0; + link->max_lanes = 0; + + drm_dp_link_caps_reset(&link->caps); + link->aux_rd_interval.cr = 0; + link->aux_rd_interval.ce = 0; + link->edp = 0; + + link->rate = 0; + link->lanes = 0; + + for (i = 0; i < DP_MAX_SUPPORTED_RATES; i++) + link->rates[i] = 0; + + link->num_rates = 0; +} + +/** + * drm_dp_link_add_rate() - add a rate to the list of supported rates + * @link: the link to add the rate to + * @rate: the rate to add + * + * Add a link rate to the list of supported link rates. + * + * Returns: + * 0 on success or one of the following negative error codes on failure: + * - ENOSPC if the maximum number of supported rates has been reached + * - EEXISTS if the link already supports this rate + * + * See also: + * drm_dp_link_remove_rate() + */ +int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate) +{ + unsigned int i, pivot; + + if (link->num_rates == DP_MAX_SUPPORTED_RATES) + return -ENOSPC; + + for (pivot = 0; pivot < link->num_rates; pivot++) + if (rate <= link->rates[pivot]) + break; + + if (pivot != link->num_rates && rate == link->rates[pivot]) + return -EEXIST; + + for (i = link->num_rates; i > pivot; i--) + link->rates[i] = link->rates[i - 1]; + + link->rates[pivot] = rate; + link->num_rates++; + + return 0; +} + +/** + * drm_dp_link_remove_rate() - remove a rate from the list of supported rates + * @link: the link from which to remove the rate + * @rate: the rate to remove + * + * Removes a link rate from the list of supported link rates. + * + * Returns: + * 0 on success or one of the following negative error codes on failure: + * - EINVAL if the specified rate is not among the supported rates + * + * See also: + * drm_dp_link_add_rate() + */ +int drm_dp_link_remove_rate(struct drm_dp_link *link, unsigned long rate) +{ + unsigned int i; + + for (i = 0; i < link->num_rates; i++) + if (rate == link->rates[i]) + break; + + if (i == link->num_rates) + return -EINVAL; + + link->num_rates--; + + while (i < link->num_rates) { + link->rates[i] = link->rates[i + 1]; + i++; + } + + return 0; +} + +/** + * drm_dp_link_update_rates() - normalize the supported link rates array + * @link: the link for which to normalize the supported link rates + * + * Users should call this function after they've manually modified the array + * of supported link rates. This function removes any stale entries, compacts + * the array and updates the supported link rate count. Note that calling the + * drm_dp_link_remove_rate() function already does this janitorial work. + * + * See also: + * drm_dp_link_add_rate(), drm_dp_link_remove_rate() + */ +void drm_dp_link_update_rates(struct drm_dp_link *link) +{ + unsigned int i, count = 0; + + for (i = 0; i < link->num_rates; i++) { + if (link->rates[i] != 0) + link->rates[count++] = link->rates[i]; + } + + for (i = count; i < link->num_rates; i++) + link->rates[i] = 0; + + link->num_rates = count; +} + +/** + * drm_dp_link_probe() - probe a DisplayPort link for capabilities + * @aux: DisplayPort AUX channel + * @link: pointer to structure in which to return link capabilities + * + * The structure filled in by this function can usually be passed directly + * into drm_dp_link_power_up() and drm_dp_link_configure() to power up and + * configure the link based on the link's capabilities. + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 dpcd[DP_RECEIVER_CAP_SIZE], value; + unsigned int rd_interval; + int err; + + drm_dp_link_reset(link); + + err = drm_dp_dpcd_read(aux, DP_DPCD_REV, dpcd, sizeof(dpcd)); + if (err < 0) + return err; + + link->revision = dpcd[DP_DPCD_REV]; + link->max_rate = drm_dp_max_link_rate(dpcd); + link->max_lanes = drm_dp_max_lane_count(dpcd); + + link->caps.enhanced_framing = drm_dp_enhanced_frame_cap(dpcd); + link->caps.tps3_supported = drm_dp_tps3_supported(dpcd); + link->caps.fast_training = drm_dp_fast_training_cap(dpcd); + link->caps.channel_coding = drm_dp_channel_coding_supported(dpcd); + + if (drm_dp_alternate_scrambler_reset_cap(dpcd)) { + link->caps.alternate_scrambler_reset = true; + + err = drm_dp_dpcd_readb(aux, DP_EDP_DPCD_REV, &value); + if (err < 0) + return err; + + if (value >= ARRAY_SIZE(drm_dp_edp_revisions)) + DRM_ERROR("unsupported eDP version: %02x\n", value); + else + link->edp = drm_dp_edp_revisions[value]; + } + + /* + * The DPCD stores the AUX read interval in units of 4 ms. There are + * two special cases: + * + * 1) if the TRAINING_AUX_RD_INTERVAL field is 0, the clock recovery + * and channel equalization should use 100 us or 400 us AUX read + * intervals, respectively + * + * 2) for DP v1.4 and above, clock recovery should always use 100 us + * AUX read intervals + */ + rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] & + DP_TRAINING_AUX_RD_MASK; + + if (rd_interval > 4) { + DRM_DEBUG_KMS("AUX interval %u out of range (max. 4)\n", + rd_interval); + rd_interval = 4; + } + + rd_interval *= 4 * USEC_PER_MSEC; + + if (rd_interval == 0 || link->revision >= DP_DPCD_REV_14) + link->aux_rd_interval.cr = 100; + + if (rd_interval == 0) + link->aux_rd_interval.ce = 400; + + link->rate = link->max_rate; + link->lanes = link->max_lanes; + + /* Parse SUPPORTED_LINK_RATES from eDP 1.4 */ + if (link->edp >= 0x14) { + u8 supported_rates[DP_MAX_SUPPORTED_RATES * 2]; + unsigned int i; + u16 rate; + + err = drm_dp_dpcd_read(aux, DP_SUPPORTED_LINK_RATES, + supported_rates, + sizeof(supported_rates)); + if (err < 0) + return err; + + for (i = 0; i < DP_MAX_SUPPORTED_RATES; i++) { + rate = supported_rates[i * 2 + 1] << 8 | + supported_rates[i * 2 + 0]; + + drm_dp_link_add_rate(link, rate * 200); + } + } + + return 0; +} + +/** + * drm_dp_link_power_up() - power up a DisplayPort link + * @aux: DisplayPort AUX channel + * @link: pointer to a structure containing the link configuration + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 value; + int err; + + /* DP_SET_POWER register is only available on DPCD v1.1 and later */ + if (link->revision < 0x11) + return 0; + + err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); + if (err < 0) + return err; + + value &= ~DP_SET_POWER_MASK; + value |= DP_SET_POWER_D0; + + err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); + if (err < 0) + return err; + + /* + * According to the DP 1.1 specification, a "Sink Device must exit the + * power saving state within 1 ms" (Section 2.5.3.1, Table 5-52, "Sink + * Control Field" (register 0x600). + */ + usleep_range(1000, 2000); + + return 0; +} + +/** + * drm_dp_link_power_down() - power down a DisplayPort link + * @aux: DisplayPort AUX channel + * @link: pointer to a structure containing the link configuration + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_power_down(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 value; + int err; + + /* DP_SET_POWER register is only available on DPCD v1.1 and later */ + if (link->revision < 0x11) + return 0; + + err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value); + if (err < 0) + return err; + + value &= ~DP_SET_POWER_MASK; + value |= DP_SET_POWER_D3; + + err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value); + if (err < 0) + return err; + + return 0; +} + +/** + * drm_dp_link_configure() - configure a DisplayPort link + * @aux: DisplayPort AUX channel + * @link: pointer to a structure containing the link configuration + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link) +{ + u8 values[2], value; + int err; + + if (link->ops && link->ops->configure) { + err = link->ops->configure(link); + if (err < 0) { + DRM_ERROR("failed to configure DP link: %d\n", err); + return err; + } + } + + values[0] = drm_dp_link_rate_to_bw_code(link->rate); + values[1] = link->lanes; + + if (link->caps.enhanced_framing) + values[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN; + + err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, values, sizeof(values)); + if (err < 0) + return err; + + if (link->caps.channel_coding) + value = DP_SET_ANSI_8B10B; + else + value = 0; + + err = drm_dp_dpcd_writeb(aux, DP_MAIN_LINK_CHANNEL_CODING_SET, value); + if (err < 0) + return err; + + if (link->caps.alternate_scrambler_reset) { + err = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET, + DP_ALTERNATE_SCRAMBLER_RESET_ENABLE); + if (err < 0) + return err; + } + + return 0; +} + +/** + * drm_dp_link_choose() - choose the lowest possible configuration for a mode + * @link: DRM DP link object + * @mode: DRM display mode + * @info: DRM display information + * + * According to the eDP specification, a source should select a configuration + * with the lowest number of lanes and the lowest possible link rate that can + * match the bitrate requirements of a video mode. However it must ensure not + * to exceed the capabilities of the sink. + * + * Returns: 0 on success or a negative error code on failure. + */ +int drm_dp_link_choose(struct drm_dp_link *link, + const struct drm_display_mode *mode, + const struct drm_display_info *info) +{ + /* available link symbol clock rates */ + static const unsigned int rates[3] = { 162000, 270000, 540000 }; + /* available number of lanes */ + static const unsigned int lanes[3] = { 1, 2, 4 }; + unsigned long requirement, capacity; + unsigned int rate = link->max_rate; + unsigned int i, j; + + /* bandwidth requirement */ + requirement = mode->clock * info->bpc * 3; + + for (i = 0; i < ARRAY_SIZE(lanes) && lanes[i] <= link->max_lanes; i++) { + for (j = 0; j < ARRAY_SIZE(rates) && rates[j] <= rate; j++) { + /* + * Capacity for this combination of lanes and rate, + * factoring in the ANSI 8B/10B encoding. + * + * Link rates in the DRM DP helpers are really link + * symbol frequencies, so a tenth of the actual rate + * of the link. + */ + capacity = lanes[i] * (rates[j] * 10) * 8 / 10; + + if (capacity >= requirement) { + DRM_DEBUG_KMS("using %u lanes at %u kHz (%lu/%lu kbps)\n", + lanes[i], rates[j], requirement, + capacity); + link->lanes = lanes[i]; + link->rate = rates[j]; + return 0; + } + } + } + + return -ERANGE; +} + +/** + * DOC: Link training + * + * These functions contain common logic and helpers to implement DisplayPort + * link training. + */ + +/** + * drm_dp_link_train_init() - initialize DisplayPort link training state + * @train: DisplayPort link training state + */ +void drm_dp_link_train_init(struct drm_dp_link_train *train) +{ + struct drm_dp_link_train_set *request = &train->request; + struct drm_dp_link_train_set *adjust = &train->adjust; + unsigned int i; + + for (i = 0; i < 4; i++) { + request->voltage_swing[i] = 0; + adjust->voltage_swing[i] = 0; + + request->pre_emphasis[i] = 0; + adjust->pre_emphasis[i] = 0; + + request->post_cursor[i] = 0; + adjust->post_cursor[i] = 0; + } + + train->pattern = DP_TRAINING_PATTERN_DISABLE; + train->clock_recovered = false; + train->channel_equalized = false; +} + +static bool drm_dp_link_train_valid(const struct drm_dp_link_train *train) +{ + return train->clock_recovered && train->channel_equalized; +} + +static int drm_dp_link_apply_training(struct drm_dp_link *link) +{ + struct drm_dp_link_train_set *request = &link->train.request; + unsigned int lanes = link->lanes, *vs, *pe, *pc, i; + struct drm_dp_aux *aux = link->aux; + u8 values[4], pattern = 0; + int err; + + err = link->ops->apply_training(link); + if (err < 0) { + DRM_ERROR("failed to apply link training: %d\n", err); + return err; + } + + vs = request->voltage_swing; + pe = request->pre_emphasis; + pc = request->post_cursor; + + /* write currently selected voltage-swing and pre-emphasis levels */ + for (i = 0; i < lanes; i++) + values[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL(vs[i]) | + DP_TRAIN_PRE_EMPHASIS_LEVEL(pe[i]); + + err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, values, lanes); + if (err < 0) { + DRM_ERROR("failed to set training parameters: %d\n", err); + return err; + } + + /* write currently selected post-cursor level (if supported) */ + if (link->revision >= 0x12 && link->rate == 540000) { + values[0] = values[1] = 0; + + for (i = 0; i < lanes; i++) + values[i / 2] |= DP_LANE_POST_CURSOR(i, pc[i]); + + err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_1_SET2, values, + DIV_ROUND_UP(lanes, 2)); + if (err < 0) { + DRM_ERROR("failed to set post-cursor: %d\n", err); + return err; + } + } + + /* write link pattern */ + if (link->train.pattern != DP_TRAINING_PATTERN_DISABLE) + pattern |= DP_LINK_SCRAMBLING_DISABLE; + + pattern |= link->train.pattern; + + err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, pattern); + if (err < 0) { + DRM_ERROR("failed to set training pattern: %d\n", err); + return err; + } + + return 0; +} + +static void drm_dp_link_train_wait(struct drm_dp_link *link) +{ + unsigned long min = 0; + + switch (link->train.pattern) { + case DP_TRAINING_PATTERN_1: + min = link->aux_rd_interval.cr; + break; + + case DP_TRAINING_PATTERN_2: + case DP_TRAINING_PATTERN_3: + min = link->aux_rd_interval.ce; + break; + + default: + break; + } + + if (min > 0) + usleep_range(min, 2 * min); +} + +static void drm_dp_link_get_adjustments(struct drm_dp_link *link, + u8 status[DP_LINK_STATUS_SIZE]) +{ + struct drm_dp_link_train_set *adjust = &link->train.adjust; + unsigned int i; + + for (i = 0; i < link->lanes; i++) { + adjust->voltage_swing[i] = + drm_dp_get_adjust_request_voltage(status, i) >> + DP_TRAIN_VOLTAGE_SWING_SHIFT; + + adjust->pre_emphasis[i] = + drm_dp_get_adjust_request_pre_emphasis(status, i) >> + DP_TRAIN_PRE_EMPHASIS_SHIFT; + + adjust->post_cursor[i] = + drm_dp_get_adjust_request_post_cursor(status, i); + } +} + +static void drm_dp_link_train_adjust(struct drm_dp_link_train *train) +{ + struct drm_dp_link_train_set *request = &train->request; + struct drm_dp_link_train_set *adjust = &train->adjust; + unsigned int i; + + for (i = 0; i < 4; i++) + if (request->voltage_swing[i] != adjust->voltage_swing[i]) + request->voltage_swing[i] = adjust->voltage_swing[i]; + + for (i = 0; i < 4; i++) + if (request->pre_emphasis[i] != adjust->pre_emphasis[i]) + request->pre_emphasis[i] = adjust->pre_emphasis[i]; + + for (i = 0; i < 4; i++) + if (request->post_cursor[i] != adjust->post_cursor[i]) + request->post_cursor[i] = adjust->post_cursor[i]; +} + +static int drm_dp_link_recover_clock(struct drm_dp_link *link) +{ + u8 status[DP_LINK_STATUS_SIZE]; + int err; + + err = drm_dp_link_apply_training(link); + if (err < 0) + return err; + + drm_dp_link_train_wait(link); + + err = drm_dp_dpcd_read_link_status(link->aux, status); + if (err < 0) { + DRM_ERROR("failed to read link status: %d\n", err); + return err; + } + + if (!drm_dp_clock_recovery_ok(status, link->lanes)) + drm_dp_link_get_adjustments(link, status); + else + link->train.clock_recovered = true; + + return 0; +} + +static int drm_dp_link_clock_recovery(struct drm_dp_link *link) +{ + unsigned int repeat; + int err; + + /* start clock recovery using training pattern 1 */ + link->train.pattern = DP_TRAINING_PATTERN_1; + + for (repeat = 1; repeat < 5; repeat++) { + err = drm_dp_link_recover_clock(link); + if (err < 0) { + DRM_ERROR("failed to recover clock: %d\n", err); + return err; + } + + if (link->train.clock_recovered) + break; + + drm_dp_link_train_adjust(&link->train); + } + + return 0; +} + +static int drm_dp_link_equalize_channel(struct drm_dp_link *link) +{ + struct drm_dp_aux *aux = link->aux; + u8 status[DP_LINK_STATUS_SIZE]; + int err; + + err = drm_dp_link_apply_training(link); + if (err < 0) + return err; + + drm_dp_link_train_wait(link); + + err = drm_dp_dpcd_read_link_status(aux, status); + if (err < 0) { + DRM_ERROR("failed to read link status: %d\n", err); + return err; + } + + if (!drm_dp_clock_recovery_ok(status, link->lanes)) { + DRM_ERROR("clock recovery lost while equalizing channel\n"); + link->train.clock_recovered = false; + return 0; + } + + if (!drm_dp_channel_eq_ok(status, link->lanes)) + drm_dp_link_get_adjustments(link, status); + else + link->train.channel_equalized = true; + + return 0; +} + +static int drm_dp_link_channel_equalization(struct drm_dp_link *link) +{ + unsigned int repeat; + int err; + + /* start channel equalization using pattern 2 or 3 */ + if (link->caps.tps3_supported) + link->train.pattern = DP_TRAINING_PATTERN_3; + else + link->train.pattern = DP_TRAINING_PATTERN_2; + + for (repeat = 1; repeat < 5; repeat++) { + err = drm_dp_link_equalize_channel(link); + if (err < 0) { + DRM_ERROR("failed to equalize channel: %d\n", err); + return err; + } + + if (link->train.channel_equalized) + break; + + drm_dp_link_train_adjust(&link->train); + } + + return 0; +} + +static int drm_dp_link_downgrade(struct drm_dp_link *link) +{ + switch (link->rate) { + case 162000: + return -EINVAL; + + case 270000: + link->rate = 162000; + break; + + case 540000: + link->rate = 270000; + return 0; + } + + return 0; +} + +static void drm_dp_link_train_disable(struct drm_dp_link *link) +{ + int err; + + link->train.pattern = DP_TRAINING_PATTERN_DISABLE; + + err = drm_dp_link_apply_training(link); + if (err < 0) + DRM_ERROR("failed to disable link training: %d\n", err); +} + +static int drm_dp_link_train_full(struct drm_dp_link *link) +{ + int err; + +retry: + DRM_DEBUG_KMS("full-training link: %u lane%s at %u MHz\n", + link->lanes, (link->lanes > 1) ? "s" : "", + link->rate / 100); + + err = drm_dp_link_configure(link->aux, link); + if (err < 0) { + DRM_ERROR("failed to configure DP link: %d\n", err); + return err; + } + + err = drm_dp_link_clock_recovery(link); + if (err < 0) { + DRM_ERROR("clock recovery failed: %d\n", err); + goto out; + } + + if (!link->train.clock_recovered) { + DRM_ERROR("clock recovery failed, downgrading link\n"); + + err = drm_dp_link_downgrade(link); + if (err < 0) + goto out; + + goto retry; + } + + DRM_DEBUG_KMS("clock recovery succeeded\n"); + + err = drm_dp_link_channel_equalization(link); + if (err < 0) { + DRM_ERROR("channel equalization failed: %d\n", err); + goto out; + } + + if (!link->train.channel_equalized) { + DRM_ERROR("channel equalization failed, downgrading link\n"); + + err = drm_dp_link_downgrade(link); + if (err < 0) + goto out; + + goto retry; + } + + DRM_DEBUG_KMS("channel equalization succeeded\n"); + +out: + drm_dp_link_train_disable(link); + return err; +} + +static int drm_dp_link_train_fast(struct drm_dp_link *link) +{ + u8 status[DP_LINK_STATUS_SIZE]; + int err; + + DRM_DEBUG_KMS("fast-training link: %u lane%s at %u MHz\n", + link->lanes, (link->lanes > 1) ? "s" : "", + link->rate / 100); + + err = drm_dp_link_configure(link->aux, link); + if (err < 0) { + DRM_ERROR("failed to configure DP link: %d\n", err); + return err; + } + + /* transmit training pattern 1 for 500 microseconds */ + link->train.pattern = DP_TRAINING_PATTERN_1; + + err = drm_dp_link_apply_training(link); + if (err < 0) + goto out; + + usleep_range(500, 1000); + + /* transmit training pattern 2 or 3 for 500 microseconds */ + if (link->caps.tps3_supported) + link->train.pattern = DP_TRAINING_PATTERN_3; + else + link->train.pattern = DP_TRAINING_PATTERN_2; + + err = drm_dp_link_apply_training(link); + if (err < 0) + goto out; + + usleep_range(500, 1000); + + err = drm_dp_dpcd_read_link_status(link->aux, status); + if (err < 0) { + DRM_ERROR("failed to read link status: %d\n", err); + goto out; + } + + if (!drm_dp_clock_recovery_ok(status, link->lanes)) { + DRM_ERROR("clock recovery failed\n"); + err = -EIO; + } + + if (!drm_dp_channel_eq_ok(status, link->lanes)) { + DRM_ERROR("channel equalization failed\n"); + err = -EIO; + } + +out: + drm_dp_link_train_disable(link); + return err; +} + +/** + * drm_dp_link_train() - perform DisplayPort link training + * @link: a DP link object + * + * Uses the context stored in the DP link object to perform link training. It + * is expected that drivers will call drm_dp_link_probe() to obtain the link + * capabilities before performing link training. + * + * If the sink supports fast link training (no AUX CH handshake) and valid + * training settings are available, this function will try to perform fast + * link training and fall back to full link training on failure. + * + * Returns: 0 on success or a negative error code on failure. + */ +int drm_dp_link_train(struct drm_dp_link *link) +{ + int err; + + drm_dp_link_train_init(&link->train); + + if (link->caps.fast_training) { + if (drm_dp_link_train_valid(&link->train)) { + err = drm_dp_link_train_fast(link); + if (err < 0) + DRM_ERROR("fast link training failed: %d\n", + err); + else + return 0; + } else { + DRM_DEBUG_KMS("training parameters not available\n"); + } + } else { + DRM_DEBUG_KMS("fast link training not supported\n"); + } + + err = drm_dp_link_train_full(link); + if (err < 0) + DRM_ERROR("full link training failed: %d\n", err); + + return err; +} diff --git a/drivers/gpu/drm/grate/dp.h b/drivers/gpu/drm/grate/dp.h new file mode 100644 index 0000000000000..cb12ed0c54e7d --- /dev/null +++ b/drivers/gpu/drm/grate/dp.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2013-2019 NVIDIA Corporation. + * Copyright (C) 2015 Rob Clark + */ + +#ifndef DRM_TEGRA_DP_H +#define DRM_TEGRA_DP_H 1 + +#include + +struct drm_display_info; +struct drm_display_mode; +struct drm_dp_aux; +struct drm_dp_link; + +/** + * struct drm_dp_link_caps - DP link capabilities + */ +struct drm_dp_link_caps { + /** + * @enhanced_framing: + * + * enhanced framing capability (mandatory as of DP 1.2) + */ + bool enhanced_framing; + + /** + * tps3_supported: + * + * training pattern sequence 3 supported for equalization + */ + bool tps3_supported; + + /** + * @fast_training: + * + * AUX CH handshake not required for link training + */ + bool fast_training; + + /** + * @channel_coding: + * + * ANSI 8B/10B channel coding capability + */ + bool channel_coding; + + /** + * @alternate_scrambler_reset: + * + * eDP alternate scrambler reset capability + */ + bool alternate_scrambler_reset; +}; + +void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest, + const struct drm_dp_link_caps *src); + +/** + * struct drm_dp_link_ops - DP link operations + */ +struct drm_dp_link_ops { + /** + * @apply_training: + */ + int (*apply_training)(struct drm_dp_link *link); + + /** + * @configure: + */ + int (*configure)(struct drm_dp_link *link); +}; + +#define DP_TRAIN_VOLTAGE_SWING_LEVEL(x) ((x) << 0) +#define DP_TRAIN_PRE_EMPHASIS_LEVEL(x) ((x) << 3) +#define DP_LANE_POST_CURSOR(i, x) (((x) & 0x3) << (((i) & 1) << 2)) + +/** + * struct drm_dp_link_train_set - link training settings + * @voltage_swing: per-lane voltage swing + * @pre_emphasis: per-lane pre-emphasis + * @post_cursor: per-lane post-cursor + */ +struct drm_dp_link_train_set { + unsigned int voltage_swing[4]; + unsigned int pre_emphasis[4]; + unsigned int post_cursor[4]; +}; + +/** + * struct drm_dp_link_train - link training state information + * @request: currently requested settings + * @adjust: adjustments requested by sink + * @pattern: currently requested training pattern + * @clock_recovered: flag to track if clock recovery has completed + * @channel_equalized: flag to track if channel equalization has completed + */ +struct drm_dp_link_train { + struct drm_dp_link_train_set request; + struct drm_dp_link_train_set adjust; + + unsigned int pattern; + + bool clock_recovered; + bool channel_equalized; +}; + +/** + * struct drm_dp_link - DP link capabilities and configuration + * @revision: DP specification revision supported on the link + * @max_rate: maximum clock rate supported on the link + * @max_lanes: maximum number of lanes supported on the link + * @caps: capabilities supported on the link (see &drm_dp_link_caps) + * @aux_rd_interval: AUX read interval to use for training (in microseconds) + * @edp: eDP revision (0x11: eDP 1.1, 0x12: eDP 1.2, ...) + * @rate: currently configured link rate + * @lanes: currently configured number of lanes + * @rates: additional supported link rates in kHz (eDP 1.4) + * @num_rates: number of additional supported link rates (eDP 1.4) + */ +struct drm_dp_link { + unsigned char revision; + unsigned int max_rate; + unsigned int max_lanes; + + struct drm_dp_link_caps caps; + + /** + * @cr: clock recovery read interval + * @ce: channel equalization read interval + */ + struct { + unsigned int cr; + unsigned int ce; + } aux_rd_interval; + + unsigned char edp; + + unsigned int rate; + unsigned int lanes; + + unsigned long rates[DP_MAX_SUPPORTED_RATES]; + unsigned int num_rates; + + /** + * @ops: DP link operations + */ + const struct drm_dp_link_ops *ops; + + /** + * @aux: DP AUX channel + */ + struct drm_dp_aux *aux; + + /** + * @train: DP link training state + */ + struct drm_dp_link_train train; +}; + +int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate); +int drm_dp_link_remove_rate(struct drm_dp_link *link, unsigned long rate); +void drm_dp_link_update_rates(struct drm_dp_link *link); + +int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link); +int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link); +int drm_dp_link_power_down(struct drm_dp_aux *aux, struct drm_dp_link *link); +int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link); +int drm_dp_link_choose(struct drm_dp_link *link, + const struct drm_display_mode *mode, + const struct drm_display_info *info); + +void drm_dp_link_train_init(struct drm_dp_link_train *train); +int drm_dp_link_train(struct drm_dp_link *link); + +#endif diff --git a/drivers/gpu/drm/grate/dpaux.c b/drivers/gpu/drm/grate/dpaux.c new file mode 100644 index 0000000000000..7d7cc90b6fc94 --- /dev/null +++ b/drivers/gpu/drm/grate/dpaux.c @@ -0,0 +1,823 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2013 NVIDIA Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "dp.h" +#include "dpaux.h" +#include "drm.h" +#include "trace.h" + +static DEFINE_MUTEX(dpaux_lock); +static LIST_HEAD(dpaux_list); + +struct tegra_dpaux_soc { + unsigned int cmh; + unsigned int drvz; + unsigned int drvi; +}; + +struct tegra_dpaux { + struct drm_dp_aux aux; + struct device *dev; + + const struct tegra_dpaux_soc *soc; + + void __iomem *regs; + int irq; + + struct tegra_output *output; + + struct reset_control *rst; + struct clk *clk_parent; + struct clk *clk; + + struct regulator *vdd; + + struct completion complete; + struct work_struct work; + struct list_head list; + +#ifdef CONFIG_GENERIC_PINCONF + struct pinctrl_dev *pinctrl; + struct pinctrl_desc desc; +#endif +}; + +static inline struct tegra_dpaux *to_dpaux(struct drm_dp_aux *aux) +{ + return container_of(aux, struct tegra_dpaux, aux); +} + +static inline struct tegra_dpaux *work_to_dpaux(struct work_struct *work) +{ + return container_of(work, struct tegra_dpaux, work); +} + +static inline u32 tegra_dpaux_readl(struct tegra_dpaux *dpaux, + unsigned int offset) +{ + u32 value = readl(dpaux->regs + (offset << 2)); + + trace_dpaux_readl(dpaux->dev, offset, value); + + return value; +} + +static inline void tegra_dpaux_writel(struct tegra_dpaux *dpaux, + u32 value, unsigned int offset) +{ + trace_dpaux_writel(dpaux->dev, offset, value); + writel(value, dpaux->regs + (offset << 2)); +} + +static void tegra_dpaux_write_fifo(struct tegra_dpaux *dpaux, const u8 *buffer, + size_t size) +{ + size_t i, j; + + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); + u32 value = 0; + + for (j = 0; j < num; j++) + value |= buffer[i * 4 + j] << (j * 8); + + tegra_dpaux_writel(dpaux, value, DPAUX_DP_AUXDATA_WRITE(i)); + } +} + +static void tegra_dpaux_read_fifo(struct tegra_dpaux *dpaux, u8 *buffer, + size_t size) +{ + size_t i, j; + + for (i = 0; i < DIV_ROUND_UP(size, 4); i++) { + size_t num = min_t(size_t, size - i * 4, 4); + u32 value; + + value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXDATA_READ(i)); + + for (j = 0; j < num; j++) + buffer[i * 4 + j] = value >> (j * 8); + } +} + +static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux, + struct drm_dp_aux_msg *msg) +{ + unsigned long timeout = msecs_to_jiffies(250); + struct tegra_dpaux *dpaux = to_dpaux(aux); + unsigned long status; + ssize_t ret = 0; + u8 reply = 0; + u32 value; + + /* Tegra has 4x4 byte DP AUX transmit and receive FIFOs. */ + if (msg->size > 16) + return -EINVAL; + + /* + * Allow zero-sized messages only for I2C, in which case they specify + * address-only transactions. + */ + if (msg->size < 1) { + switch (msg->request & ~DP_AUX_I2C_MOT) { + case DP_AUX_I2C_WRITE_STATUS_UPDATE: + case DP_AUX_I2C_WRITE: + case DP_AUX_I2C_READ: + value = DPAUX_DP_AUXCTL_CMD_ADDRESS_ONLY; + break; + + default: + return -EINVAL; + } + } else { + /* For non-zero-sized messages, set the CMDLEN field. */ + value = DPAUX_DP_AUXCTL_CMDLEN(msg->size - 1); + } + + switch (msg->request & ~DP_AUX_I2C_MOT) { + case DP_AUX_I2C_WRITE: + if (msg->request & DP_AUX_I2C_MOT) + value |= DPAUX_DP_AUXCTL_CMD_MOT_WR; + else + value |= DPAUX_DP_AUXCTL_CMD_I2C_WR; + + break; + + case DP_AUX_I2C_READ: + if (msg->request & DP_AUX_I2C_MOT) + value |= DPAUX_DP_AUXCTL_CMD_MOT_RD; + else + value |= DPAUX_DP_AUXCTL_CMD_I2C_RD; + + break; + + case DP_AUX_I2C_WRITE_STATUS_UPDATE: + if (msg->request & DP_AUX_I2C_MOT) + value |= DPAUX_DP_AUXCTL_CMD_MOT_RQ; + else + value |= DPAUX_DP_AUXCTL_CMD_I2C_RQ; + + break; + + case DP_AUX_NATIVE_WRITE: + value |= DPAUX_DP_AUXCTL_CMD_AUX_WR; + break; + + case DP_AUX_NATIVE_READ: + value |= DPAUX_DP_AUXCTL_CMD_AUX_RD; + break; + + default: + return -EINVAL; + } + + tegra_dpaux_writel(dpaux, msg->address, DPAUX_DP_AUXADDR); + tegra_dpaux_writel(dpaux, value, DPAUX_DP_AUXCTL); + + if ((msg->request & DP_AUX_I2C_READ) == 0) { + tegra_dpaux_write_fifo(dpaux, msg->buffer, msg->size); + ret = msg->size; + } + + /* start transaction */ + value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXCTL); + value |= DPAUX_DP_AUXCTL_TRANSACTREQ; + tegra_dpaux_writel(dpaux, value, DPAUX_DP_AUXCTL); + + status = wait_for_completion_timeout(&dpaux->complete, timeout); + if (!status) + return -ETIMEDOUT; + + /* read status and clear errors */ + value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXSTAT); + tegra_dpaux_writel(dpaux, 0xf00, DPAUX_DP_AUXSTAT); + + if (value & DPAUX_DP_AUXSTAT_TIMEOUT_ERROR) + return -ETIMEDOUT; + + if ((value & DPAUX_DP_AUXSTAT_RX_ERROR) || + (value & DPAUX_DP_AUXSTAT_SINKSTAT_ERROR) || + (value & DPAUX_DP_AUXSTAT_NO_STOP_ERROR)) + return -EIO; + + switch ((value & DPAUX_DP_AUXSTAT_REPLY_TYPE_MASK) >> 16) { + case 0x00: + reply = DP_AUX_NATIVE_REPLY_ACK; + break; + + case 0x01: + reply = DP_AUX_NATIVE_REPLY_NACK; + break; + + case 0x02: + reply = DP_AUX_NATIVE_REPLY_DEFER; + break; + + case 0x04: + reply = DP_AUX_I2C_REPLY_NACK; + break; + + case 0x08: + reply = DP_AUX_I2C_REPLY_DEFER; + break; + } + + if ((msg->size > 0) && (msg->reply == DP_AUX_NATIVE_REPLY_ACK)) { + if (msg->request & DP_AUX_I2C_READ) { + size_t count = value & DPAUX_DP_AUXSTAT_REPLY_MASK; + + /* + * There might be a smarter way to do this, but since + * the DP helpers will already retry transactions for + * an -EBUSY return value, simply reuse that instead. + */ + if (count != msg->size) { + ret = -EBUSY; + goto out; + } + + tegra_dpaux_read_fifo(dpaux, msg->buffer, count); + ret = count; + } + } + + msg->reply = reply; + +out: + return ret; +} + +static void tegra_dpaux_hotplug(struct work_struct *work) +{ + struct tegra_dpaux *dpaux = work_to_dpaux(work); + + if (dpaux->output) + drm_helper_hpd_irq_event(dpaux->output->connector.dev); +} + +static irqreturn_t tegra_dpaux_irq(int irq, void *data) +{ + struct tegra_dpaux *dpaux = data; + irqreturn_t ret = IRQ_HANDLED; + u32 value; + + /* clear interrupts */ + value = tegra_dpaux_readl(dpaux, DPAUX_INTR_AUX); + tegra_dpaux_writel(dpaux, value, DPAUX_INTR_AUX); + + if (value & (DPAUX_INTR_PLUG_EVENT | DPAUX_INTR_UNPLUG_EVENT)) + schedule_work(&dpaux->work); + + if (value & DPAUX_INTR_IRQ_EVENT) { + /* TODO: handle this */ + } + + if (value & DPAUX_INTR_AUX_DONE) + complete(&dpaux->complete); + + return ret; +} + +enum tegra_dpaux_functions { + DPAUX_PADCTL_FUNC_AUX, + DPAUX_PADCTL_FUNC_I2C, + DPAUX_PADCTL_FUNC_OFF, +}; + +static void tegra_dpaux_pad_power_down(struct tegra_dpaux *dpaux) +{ + u32 value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE); + + value |= DPAUX_HYBRID_SPARE_PAD_POWER_DOWN; + + tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE); +} + +static void tegra_dpaux_pad_power_up(struct tegra_dpaux *dpaux) +{ + u32 value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE); + + value &= ~DPAUX_HYBRID_SPARE_PAD_POWER_DOWN; + + tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE); +} + +static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function) +{ + u32 value; + + switch (function) { + case DPAUX_PADCTL_FUNC_AUX: + value = DPAUX_HYBRID_PADCTL_AUX_CMH(dpaux->soc->cmh) | + DPAUX_HYBRID_PADCTL_AUX_DRVZ(dpaux->soc->drvz) | + DPAUX_HYBRID_PADCTL_AUX_DRVI(dpaux->soc->drvi) | + DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV | + DPAUX_HYBRID_PADCTL_MODE_AUX; + break; + + case DPAUX_PADCTL_FUNC_I2C: + value = DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV | + DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV | + DPAUX_HYBRID_PADCTL_AUX_CMH(dpaux->soc->cmh) | + DPAUX_HYBRID_PADCTL_AUX_DRVZ(dpaux->soc->drvz) | + DPAUX_HYBRID_PADCTL_AUX_DRVI(dpaux->soc->drvi) | + DPAUX_HYBRID_PADCTL_MODE_I2C; + break; + + case DPAUX_PADCTL_FUNC_OFF: + tegra_dpaux_pad_power_down(dpaux); + return 0; + + default: + return -ENOTSUPP; + } + + tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_PADCTL); + tegra_dpaux_pad_power_up(dpaux); + + return 0; +} + +#ifdef CONFIG_GENERIC_PINCONF +static const struct pinctrl_pin_desc tegra_dpaux_pins[] = { + PINCTRL_PIN(0, "DP_AUX_CHx_P"), + PINCTRL_PIN(1, "DP_AUX_CHx_N"), +}; + +static const unsigned tegra_dpaux_pin_numbers[] = { 0, 1 }; + +static const char * const tegra_dpaux_groups[] = { + "dpaux-io", +}; + +static const char * const tegra_dpaux_functions[] = { + "aux", + "i2c", + "off", +}; + +static int tegra_dpaux_get_groups_count(struct pinctrl_dev *pinctrl) +{ + return ARRAY_SIZE(tegra_dpaux_groups); +} + +static const char *tegra_dpaux_get_group_name(struct pinctrl_dev *pinctrl, + unsigned int group) +{ + return tegra_dpaux_groups[group]; +} + +static int tegra_dpaux_get_group_pins(struct pinctrl_dev *pinctrl, + unsigned group, const unsigned **pins, + unsigned *num_pins) +{ + *pins = tegra_dpaux_pin_numbers; + *num_pins = ARRAY_SIZE(tegra_dpaux_pin_numbers); + + return 0; +} + +static const struct pinctrl_ops tegra_dpaux_pinctrl_ops = { + .get_groups_count = tegra_dpaux_get_groups_count, + .get_group_name = tegra_dpaux_get_group_name, + .get_group_pins = tegra_dpaux_get_group_pins, + .dt_node_to_map = pinconf_generic_dt_node_to_map_group, + .dt_free_map = pinconf_generic_dt_free_map, +}; + +static int tegra_dpaux_get_functions_count(struct pinctrl_dev *pinctrl) +{ + return ARRAY_SIZE(tegra_dpaux_functions); +} + +static const char *tegra_dpaux_get_function_name(struct pinctrl_dev *pinctrl, + unsigned int function) +{ + return tegra_dpaux_functions[function]; +} + +static int tegra_dpaux_get_function_groups(struct pinctrl_dev *pinctrl, + unsigned int function, + const char * const **groups, + unsigned * const num_groups) +{ + *num_groups = ARRAY_SIZE(tegra_dpaux_groups); + *groups = tegra_dpaux_groups; + + return 0; +} + +static int tegra_dpaux_set_mux(struct pinctrl_dev *pinctrl, + unsigned int function, unsigned int group) +{ + struct tegra_dpaux *dpaux = pinctrl_dev_get_drvdata(pinctrl); + + return tegra_dpaux_pad_config(dpaux, function); +} + +static const struct pinmux_ops tegra_dpaux_pinmux_ops = { + .get_functions_count = tegra_dpaux_get_functions_count, + .get_function_name = tegra_dpaux_get_function_name, + .get_function_groups = tegra_dpaux_get_function_groups, + .set_mux = tegra_dpaux_set_mux, +}; +#endif + +static int tegra_dpaux_probe(struct platform_device *pdev) +{ + struct tegra_dpaux *dpaux; + struct resource *regs; + u32 value; + int err; + + dpaux = devm_kzalloc(&pdev->dev, sizeof(*dpaux), GFP_KERNEL); + if (!dpaux) + return -ENOMEM; + + dpaux->soc = of_device_get_match_data(&pdev->dev); + INIT_WORK(&dpaux->work, tegra_dpaux_hotplug); + init_completion(&dpaux->complete); + INIT_LIST_HEAD(&dpaux->list); + dpaux->dev = &pdev->dev; + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dpaux->regs = devm_ioremap_resource(&pdev->dev, regs); + if (IS_ERR(dpaux->regs)) + return PTR_ERR(dpaux->regs); + + dpaux->irq = platform_get_irq(pdev, 0); + if (dpaux->irq < 0) { + dev_err(&pdev->dev, "failed to get IRQ\n"); + return -ENXIO; + } + + if (!pdev->dev.pm_domain) { + dpaux->rst = devm_reset_control_get(&pdev->dev, "dpaux"); + if (IS_ERR(dpaux->rst)) { + dev_err(&pdev->dev, + "failed to get reset control: %ld\n", + PTR_ERR(dpaux->rst)); + return PTR_ERR(dpaux->rst); + } + } + + dpaux->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(dpaux->clk)) { + dev_err(&pdev->dev, "failed to get module clock: %ld\n", + PTR_ERR(dpaux->clk)); + return PTR_ERR(dpaux->clk); + } + + dpaux->clk_parent = devm_clk_get(&pdev->dev, "parent"); + if (IS_ERR(dpaux->clk_parent)) { + dev_err(&pdev->dev, "failed to get parent clock: %ld\n", + PTR_ERR(dpaux->clk_parent)); + return PTR_ERR(dpaux->clk_parent); + } + + err = clk_set_rate(dpaux->clk_parent, 270000000); + if (err < 0) { + dev_err(&pdev->dev, "failed to set clock to 270 MHz: %d\n", + err); + return err; + } + + dpaux->vdd = devm_regulator_get_optional(&pdev->dev, "vdd"); + if (IS_ERR(dpaux->vdd)) { + if (PTR_ERR(dpaux->vdd) != -ENODEV) { + if (PTR_ERR(dpaux->vdd) != -EPROBE_DEFER) + dev_err(&pdev->dev, + "failed to get VDD supply: %ld\n", + PTR_ERR(dpaux->vdd)); + + return PTR_ERR(dpaux->vdd); + } + + dpaux->vdd = NULL; + } + + platform_set_drvdata(pdev, dpaux); + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + err = devm_request_irq(dpaux->dev, dpaux->irq, tegra_dpaux_irq, 0, + dev_name(dpaux->dev), dpaux); + if (err < 0) { + dev_err(dpaux->dev, "failed to request IRQ#%u: %d\n", + dpaux->irq, err); + return err; + } + + disable_irq(dpaux->irq); + + dpaux->aux.transfer = tegra_dpaux_transfer; + dpaux->aux.dev = &pdev->dev; + + drm_dp_aux_init(&dpaux->aux); + + /* + * Assume that by default the DPAUX/I2C pads will be used for HDMI, + * so power them up and configure them in I2C mode. + * + * The DPAUX code paths reconfigure the pads in AUX mode, but there + * is no possibility to perform the I2C mode configuration in the + * HDMI path. + */ + err = tegra_dpaux_pad_config(dpaux, DPAUX_PADCTL_FUNC_I2C); + if (err < 0) + return err; + +#ifdef CONFIG_GENERIC_PINCONF + dpaux->desc.name = dev_name(&pdev->dev); + dpaux->desc.pins = tegra_dpaux_pins; + dpaux->desc.npins = ARRAY_SIZE(tegra_dpaux_pins); + dpaux->desc.pctlops = &tegra_dpaux_pinctrl_ops; + dpaux->desc.pmxops = &tegra_dpaux_pinmux_ops; + dpaux->desc.owner = THIS_MODULE; + + dpaux->pinctrl = devm_pinctrl_register(&pdev->dev, &dpaux->desc, dpaux); + if (IS_ERR(dpaux->pinctrl)) { + dev_err(&pdev->dev, "failed to register pincontrol\n"); + return PTR_ERR(dpaux->pinctrl); + } +#endif + /* enable and clear all interrupts */ + value = DPAUX_INTR_AUX_DONE | DPAUX_INTR_IRQ_EVENT | + DPAUX_INTR_UNPLUG_EVENT | DPAUX_INTR_PLUG_EVENT; + tegra_dpaux_writel(dpaux, value, DPAUX_INTR_EN_AUX); + tegra_dpaux_writel(dpaux, value, DPAUX_INTR_AUX); + + mutex_lock(&dpaux_lock); + list_add_tail(&dpaux->list, &dpaux_list); + mutex_unlock(&dpaux_lock); + + return 0; +} + +static int tegra_dpaux_remove(struct platform_device *pdev) +{ + struct tegra_dpaux *dpaux = platform_get_drvdata(pdev); + + cancel_work_sync(&dpaux->work); + + /* make sure pads are powered down when not in use */ + tegra_dpaux_pad_power_down(dpaux); + + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + + mutex_lock(&dpaux_lock); + list_del(&dpaux->list); + mutex_unlock(&dpaux_lock); + + return 0; +} + +#ifdef CONFIG_PM +static int tegra_dpaux_suspend(struct device *dev) +{ + struct tegra_dpaux *dpaux = dev_get_drvdata(dev); + int err = 0; + + if (dpaux->rst) { + err = reset_control_assert(dpaux->rst); + if (err < 0) { + dev_err(dev, "failed to assert reset: %d\n", err); + return err; + } + } + + usleep_range(1000, 2000); + + clk_disable_unprepare(dpaux->clk_parent); + clk_disable_unprepare(dpaux->clk); + + return err; +} + +static int tegra_dpaux_resume(struct device *dev) +{ + struct tegra_dpaux *dpaux = dev_get_drvdata(dev); + int err; + + err = clk_prepare_enable(dpaux->clk); + if (err < 0) { + dev_err(dev, "failed to enable clock: %d\n", err); + return err; + } + + err = clk_prepare_enable(dpaux->clk_parent); + if (err < 0) { + dev_err(dev, "failed to enable parent clock: %d\n", err); + goto disable_clk; + } + + usleep_range(1000, 2000); + + if (dpaux->rst) { + err = reset_control_deassert(dpaux->rst); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_parent; + } + + usleep_range(1000, 2000); + } + + return 0; + +disable_parent: + clk_disable_unprepare(dpaux->clk_parent); +disable_clk: + clk_disable_unprepare(dpaux->clk); + return err; +} +#endif + +static const struct dev_pm_ops tegra_dpaux_pm_ops = { + SET_RUNTIME_PM_OPS(tegra_dpaux_suspend, tegra_dpaux_resume, NULL) +}; + +static const struct tegra_dpaux_soc tegra124_dpaux_soc = { + .cmh = 0x02, + .drvz = 0x04, + .drvi = 0x18, +}; + +static const struct tegra_dpaux_soc tegra210_dpaux_soc = { + .cmh = 0x02, + .drvz = 0x04, + .drvi = 0x30, +}; + +static const struct tegra_dpaux_soc tegra194_dpaux_soc = { + .cmh = 0x02, + .drvz = 0x04, + .drvi = 0x2c, +}; + +static const struct of_device_id tegra_dpaux_of_match[] = { + { .compatible = "nvidia,tegra194-dpaux", .data = &tegra194_dpaux_soc }, + { .compatible = "nvidia,tegra186-dpaux", .data = &tegra210_dpaux_soc }, + { .compatible = "nvidia,tegra210-dpaux", .data = &tegra210_dpaux_soc }, + { .compatible = "nvidia,tegra124-dpaux", .data = &tegra124_dpaux_soc }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_dpaux_of_match); + +struct platform_driver tegra_dpaux_driver = { + .driver = { + .name = "tegra-dpaux", + .of_match_table = tegra_dpaux_of_match, + .pm = &tegra_dpaux_pm_ops, + }, + .probe = tegra_dpaux_probe, + .remove = tegra_dpaux_remove, +}; + +struct drm_dp_aux *drm_dp_aux_find_by_of_node(struct device_node *np) +{ + struct tegra_dpaux *dpaux; + + mutex_lock(&dpaux_lock); + + list_for_each_entry(dpaux, &dpaux_list, list) + if (np == dpaux->dev->of_node) { + mutex_unlock(&dpaux_lock); + return &dpaux->aux; + } + + mutex_unlock(&dpaux_lock); + + return NULL; +} + +int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output) +{ + struct tegra_dpaux *dpaux = to_dpaux(aux); + unsigned long timeout; + int err; + + aux->drm_dev = output->connector.dev; + err = drm_dp_aux_register(aux); + if (err < 0) + return err; + + output->connector.polled = DRM_CONNECTOR_POLL_HPD; + dpaux->output = output; + + if (output->panel) { + enum drm_connector_status status; + + if (dpaux->vdd) { + err = regulator_enable(dpaux->vdd); + if (err < 0) + return err; + } + + timeout = jiffies + msecs_to_jiffies(250); + + while (time_before(jiffies, timeout)) { + status = drm_dp_aux_detect(aux); + + if (status == connector_status_connected) + break; + + usleep_range(1000, 2000); + } + + if (status != connector_status_connected) + return -ETIMEDOUT; + } + + enable_irq(dpaux->irq); + return 0; +} + +int drm_dp_aux_detach(struct drm_dp_aux *aux) +{ + struct tegra_dpaux *dpaux = to_dpaux(aux); + unsigned long timeout; + int err; + + drm_dp_aux_unregister(aux); + disable_irq(dpaux->irq); + + if (dpaux->output->panel) { + enum drm_connector_status status; + + if (dpaux->vdd) { + err = regulator_disable(dpaux->vdd); + if (err < 0) + return err; + } + + timeout = jiffies + msecs_to_jiffies(250); + + while (time_before(jiffies, timeout)) { + status = drm_dp_aux_detect(aux); + + if (status == connector_status_disconnected) + break; + + usleep_range(1000, 2000); + } + + if (status != connector_status_disconnected) + return -ETIMEDOUT; + + dpaux->output = NULL; + } + + return 0; +} + +enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux) +{ + struct tegra_dpaux *dpaux = to_dpaux(aux); + u32 value; + + value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXSTAT); + + if (value & DPAUX_DP_AUXSTAT_HPD_STATUS) + return connector_status_connected; + + return connector_status_disconnected; +} + +int drm_dp_aux_enable(struct drm_dp_aux *aux) +{ + struct tegra_dpaux *dpaux = to_dpaux(aux); + + return tegra_dpaux_pad_config(dpaux, DPAUX_PADCTL_FUNC_AUX); +} + +int drm_dp_aux_disable(struct drm_dp_aux *aux) +{ + struct tegra_dpaux *dpaux = to_dpaux(aux); + + tegra_dpaux_pad_power_down(dpaux); + + return 0; +} diff --git a/drivers/gpu/drm/grate/dpaux.h b/drivers/gpu/drm/grate/dpaux.h new file mode 100644 index 0000000000000..5eced10fad37d --- /dev/null +++ b/drivers/gpu/drm/grate/dpaux.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2013 NVIDIA Corporation + */ + +#ifndef DRM_TEGRA_DPAUX_H +#define DRM_TEGRA_DPAUX_H + +#define DPAUX_CTXSW 0x00 + +#define DPAUX_INTR_EN_AUX 0x01 +#define DPAUX_INTR_AUX 0x05 +#define DPAUX_INTR_AUX_DONE (1 << 3) +#define DPAUX_INTR_IRQ_EVENT (1 << 2) +#define DPAUX_INTR_UNPLUG_EVENT (1 << 1) +#define DPAUX_INTR_PLUG_EVENT (1 << 0) + +#define DPAUX_DP_AUXDATA_WRITE(x) (0x09 + ((x) << 2)) +#define DPAUX_DP_AUXDATA_READ(x) (0x19 + ((x) << 2)) +#define DPAUX_DP_AUXADDR 0x29 + +#define DPAUX_DP_AUXCTL 0x2d +#define DPAUX_DP_AUXCTL_TRANSACTREQ (1 << 16) +#define DPAUX_DP_AUXCTL_CMD_AUX_RD (9 << 12) +#define DPAUX_DP_AUXCTL_CMD_AUX_WR (8 << 12) +#define DPAUX_DP_AUXCTL_CMD_MOT_RQ (6 << 12) +#define DPAUX_DP_AUXCTL_CMD_MOT_RD (5 << 12) +#define DPAUX_DP_AUXCTL_CMD_MOT_WR (4 << 12) +#define DPAUX_DP_AUXCTL_CMD_I2C_RQ (2 << 12) +#define DPAUX_DP_AUXCTL_CMD_I2C_RD (1 << 12) +#define DPAUX_DP_AUXCTL_CMD_I2C_WR (0 << 12) +#define DPAUX_DP_AUXCTL_CMD_ADDRESS_ONLY (1 << 8) +#define DPAUX_DP_AUXCTL_CMDLEN(x) ((x) & 0xff) + +#define DPAUX_DP_AUXSTAT 0x31 +#define DPAUX_DP_AUXSTAT_HPD_STATUS (1 << 28) +#define DPAUX_DP_AUXSTAT_REPLY_TYPE_MASK (0xf0000) +#define DPAUX_DP_AUXSTAT_NO_STOP_ERROR (1 << 11) +#define DPAUX_DP_AUXSTAT_SINKSTAT_ERROR (1 << 10) +#define DPAUX_DP_AUXSTAT_RX_ERROR (1 << 9) +#define DPAUX_DP_AUXSTAT_TIMEOUT_ERROR (1 << 8) +#define DPAUX_DP_AUXSTAT_REPLY_MASK (0xff) + +#define DPAUX_DP_AUX_SINKSTAT_LO 0x35 +#define DPAUX_DP_AUX_SINKSTAT_HI 0x39 + +#define DPAUX_HPD_CONFIG 0x3d +#define DPAUX_HPD_CONFIG_UNPLUG_MIN_TIME(x) (((x) & 0xffff) << 16) +#define DPAUX_HPD_CONFIG_PLUG_MIN_TIME(x) ((x) & 0xffff) + +#define DPAUX_HPD_IRQ_CONFIG 0x41 +#define DPAUX_HPD_IRQ_CONFIG_MIN_LOW_TIME(x) ((x) & 0xffff) + +#define DPAUX_DP_AUX_CONFIG 0x45 + +#define DPAUX_HYBRID_PADCTL 0x49 +#define DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV (1 << 15) +#define DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV (1 << 14) +#define DPAUX_HYBRID_PADCTL_AUX_CMH(x) (((x) & 0x3) << 12) +#define DPAUX_HYBRID_PADCTL_AUX_DRVZ(x) (((x) & 0x7) << 8) +#define DPAUX_HYBRID_PADCTL_AUX_DRVI(x) (((x) & 0x3f) << 2) +#define DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV (1 << 1) +#define DPAUX_HYBRID_PADCTL_MODE_I2C (1 << 0) +#define DPAUX_HYBRID_PADCTL_MODE_AUX (0 << 0) + +#define DPAUX_HYBRID_SPARE 0x4d +#define DPAUX_HYBRID_SPARE_PAD_POWER_DOWN (1 << 0) + +#define DPAUX_SCRATCH_REG0 0x51 +#define DPAUX_SCRATCH_REG1 0x55 +#define DPAUX_SCRATCH_REG2 0x59 + +#endif diff --git a/drivers/gpu/drm/grate/drm.c b/drivers/gpu/drm/grate/drm.c new file mode 100644 index 0000000000000..0e8a7f55e1b1f --- /dev/null +++ b/drivers/gpu/drm/grate/drm.c @@ -0,0 +1,738 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Avionic Design GmbH + * Copyright (C) 2012-2016 NVIDIA CORPORATION. All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dc.h" +#include "drm.h" +#include "uapi.h" + +#define DRIVER_NAME "tegra" +#define DRIVER_DESC "NVIDIA Tegra graphics" +#define DRIVER_DATE "20120330" +#define DRIVER_MAJOR GRATE_KERNEL_DRM_VERSION +#define DRIVER_MINOR 0 +#define DRIVER_PATCHLEVEL 0 + +#define CARVEOUT_SZ SZ_64M + +static int tegra_atomic_check(struct drm_device *drm, + struct drm_atomic_state *state) +{ + int err; + + err = drm_atomic_helper_check(drm, state); + if (err < 0) + return err; + + return tegra_display_hub_atomic_check(drm, state); +} + +static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = { + .fb_create = tegra_fb_create, +#ifdef CONFIG_DRM_FBDEV_EMULATION + .output_poll_changed = drm_fb_helper_output_poll_changed, +#endif + .atomic_check = tegra_atomic_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static void tegra_atomic_post_commit(struct drm_device *drm, + struct drm_atomic_state *old_state) +{ + struct drm_crtc_state *old_crtc_state __maybe_unused; + struct drm_crtc *crtc; + unsigned int i; + + for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i) + tegra_crtc_atomic_post_commit(crtc, old_state); +} + +static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state) +{ + struct drm_device *drm = old_state->dev; + struct tegra_drm *tegra = drm->dev_private; + + if (tegra->hub) { + bool fence_cookie = dma_fence_begin_signalling(); + + drm_atomic_helper_commit_modeset_disables(drm, old_state); + tegra_display_hub_atomic_commit(drm, old_state); + drm_atomic_helper_commit_planes(drm, old_state, 0); + drm_atomic_helper_commit_modeset_enables(drm, old_state); + drm_atomic_helper_commit_hw_done(old_state); + dma_fence_end_signalling(fence_cookie); + drm_atomic_helper_wait_for_vblanks(drm, old_state); + drm_atomic_helper_cleanup_planes(drm, old_state); + } else { + drm_atomic_helper_commit_tail_rpm(old_state); + } + + tegra_atomic_post_commit(drm, old_state); +} + +static const struct drm_mode_config_helper_funcs +tegra_drm_mode_config_helpers = { + .atomic_commit_tail = tegra_atomic_commit_tail, +}; + +static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp) +{ + struct host1x *host = dev_get_drvdata(drm->dev->parent); + struct tegra_drm *tegra = drm->dev_private; + struct tegra_drm_channel *drm_channel; + struct drm_gpu_scheduler *sched; + struct host1x_channel *channel; + struct tegra_drm_file *fpriv; + int err; + + fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); + if (!fpriv) + return -ENOMEM; + + filp->driver_priv = fpriv; + + /* each host1x channel has its own per-context job-queue */ + fpriv->sched_entities = kcalloc(host->soc->nb_channels, + sizeof(*fpriv->sched_entities), + GFP_KERNEL); + if (!fpriv->sched_entities) { + err = -ENOMEM; + goto err_free_fpriv; + } + + list_for_each_entry(drm_channel, &tegra->channels, list) { + channel = drm_channel->channel; + sched = &drm_channel->sched; + + err = drm_sched_entity_init(&fpriv->sched_entities[channel->id], + DRM_SCHED_PRIORITY_NORMAL, &sched, + 1, NULL); + if (err) + goto err_destroy_sched_entities; + } + + idr_preload(GFP_KERNEL); + spin_lock(&tegra->context_lock); + + err = idr_alloc(&tegra->drm_contexts, fpriv, 1, 0, GFP_ATOMIC); + + spin_unlock(&tegra->context_lock); + idr_preload_end(); + + if (err < 0) + goto err_destroy_sched_entities; + + fpriv->drm_context = err; + + idr_init(&fpriv->uapi_v1_contexts); + + return 0; + +err_destroy_sched_entities: + list_for_each_entry_continue_reverse(drm_channel, &tegra->channels, + list) { + channel = drm_channel->channel; + drm_sched_entity_destroy(&fpriv->sched_entities[channel->id]); + } + +err_free_fpriv: + kfree(fpriv); + + return err; +} + +static const struct drm_ioctl_desc tegra_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_uapi_gem_create, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_uapi_gem_mmap, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_READ, tegra_uapi_syncpt_read, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_INCR, tegra_uapi_syncpt_incr, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_WAIT, tegra_uapi_syncpt_wait, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_OPEN_CHANNEL, tegra_uapi_open_channel, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_CLOSE_CHANNEL, tegra_uapi_close_channel, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT, tegra_uapi_get_syncpt, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_SUBMIT, tegra_uapi_v1_submit, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT_BASE, tegra_uapi_get_syncpt_base, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GEM_SET_TILING, tegra_uapi_gem_set_tiling, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GEM_GET_TILING, tegra_uapi_gem_get_tiling, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GEM_SET_FLAGS, tegra_uapi_gem_set_flags, \ + DRM_RENDER_ALLOW), \ + DRM_IOCTL_DEF_DRV(TEGRA_GEM_GET_FLAGS, tegra_uapi_gem_get_flags, \ + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_GEM_CPU_PREP, tegra_uapi_gem_cpu_prep, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_SUBMIT_V2, tegra_uapi_v2_submit, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(TEGRA_VERSION, tegra_uapi_version, + DRM_RENDER_ALLOW), +}; + +static const struct file_operations tegra_drm_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .mmap = tegra_drm_mmap, + .poll = drm_poll, + .read = drm_read, + .compat_ioctl = drm_compat_ioctl, + .llseek = noop_llseek, +}; + +static int tegra_uapi_v1_contexts_cleanup(int id, void *p, void *data) +{ + struct tegra_drm_context_v1 *context = p; + tegra_uapi_v1_free_context(context); + return 0; +} + +static void tegra_drm_postclose(struct drm_device *drm, struct drm_file *file) +{ + struct tegra_drm_file *fpriv = file->driver_priv; + struct tegra_drm *tegra = drm->dev_private; + struct tegra_drm_channel *drm_channel; + struct host1x_channel *channel; + int val, err; + + spin_lock(&tegra->context_lock); + idr_remove(&tegra->drm_contexts, fpriv->drm_context); + spin_unlock(&tegra->context_lock); + + list_for_each_entry(drm_channel, &tegra->channels, list) { + channel = drm_channel->channel; + drm_sched_entity_destroy(&fpriv->sched_entities[channel->id]); + } + + /* job's completion is asynchronous, see tegra_drm_work_free_job() */ + err = readx_poll_timeout(atomic_read, &fpriv->num_active_jobs, + val, val == 0, 100000, 30 * 1000 * 1000); + WARN_ON_ONCE(err); + + spin_lock(&tegra->context_lock); + idr_for_each(&fpriv->uapi_v1_contexts, tegra_uapi_v1_contexts_cleanup, + NULL); + spin_unlock(&tegra->context_lock); + + idr_destroy(&fpriv->uapi_v1_contexts); + + kfree(fpriv->sched_entities); + kfree(fpriv); +} + +#ifdef CONFIG_DEBUG_FS +static int tegra_debugfs_framebuffers(struct seq_file *s, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)s->private; + struct drm_device *drm = node->minor->dev; + struct drm_framebuffer *fb; + + mutex_lock(&drm->mode_config.fb_lock); + + list_for_each_entry(fb, &drm->mode_config.fb_list, head) { + seq_printf(s, "%3d: user size: %d x %d, depth %d, %d bpp, refcount %d\n", + fb->base.id, fb->width, fb->height, + fb->format->depth, + fb->format->cpp[0] * 8, + drm_framebuffer_read_refcount(fb)); + } + + mutex_unlock(&drm->mode_config.fb_lock); + + return 0; +} + +static int tegra_debugfs_iova(struct seq_file *s, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)s->private; + struct drm_device *drm = node->minor->dev; + struct tegra_drm *tegra = drm->dev_private; + struct drm_printer p = drm_seq_file_printer(s); + + if (tegra->domain) { + mutex_lock(&tegra->mm_lock); + drm_mm_print(&tegra->mm, &p); + mutex_unlock(&tegra->mm_lock); + } + + return 0; +} + +static struct drm_info_list tegra_debugfs_list[] = { + { "framebuffers", tegra_debugfs_framebuffers, 0 }, + { "iova", tegra_debugfs_iova, 0 }, +}; + +static void tegra_debugfs_init(struct drm_minor *minor) +{ + drm_debugfs_create_files(tegra_debugfs_list, + ARRAY_SIZE(tegra_debugfs_list), + minor->debugfs_root, minor); +} +#endif + +static struct drm_driver tegra_drm_driver = { + .driver_features = DRIVER_MODESET | DRIVER_GEM | + DRIVER_ATOMIC | DRIVER_RENDER | + DRIVER_SYNCOBJ, + .open = tegra_drm_open, + .postclose = tegra_drm_postclose, + .lastclose = drm_fb_helper_lastclose, + +#if defined(CONFIG_DEBUG_FS) + .debugfs_init = tegra_debugfs_init, +#endif + + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = tegra_gem_prime_import, + + .dumb_create = tegra_bo_dumb_create, + + .ioctls = tegra_drm_ioctls, + .num_ioctls = ARRAY_SIZE(tegra_drm_ioctls), + .fops = &tegra_drm_fops, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma) +{ + struct iova *alloc; + void *virt; + gfp_t gfp; + int err; + + if (!tegra->carveout.inited) + return NULL; + + if (tegra->domain) + size = iova_align(&tegra->carveout.domain, size); + else + size = PAGE_ALIGN(size); + + gfp = GFP_KERNEL | __GFP_ZERO; + if (!tegra->domain) { + /* + * Many units only support 32-bit addresses, even on 64-bit + * SoCs. If there is no IOMMU to translate into a 32-bit IO + * virtual address space, force allocations to be in the + * lower 32-bit range. + */ + gfp |= GFP_DMA; + } + + virt = (void *)__get_free_pages(gfp, get_order(size)); + if (!virt) + return ERR_PTR(-ENOMEM); + + if (!tegra->domain) { + /* + * If IOMMU is disabled, devices address physical memory + * directly. + */ + *dma = virt_to_phys(virt); + return virt; + } + + alloc = alloc_iova(&tegra->carveout.domain, + size >> tegra->carveout.shift, + tegra->carveout.limit, true); + if (!alloc) { + err = -EBUSY; + goto free_pages; + } + + *dma = iova_dma_addr(&tegra->carveout.domain, alloc); + err = iommu_map(tegra->domain, *dma, virt_to_phys(virt), + size, IOMMU_READ | IOMMU_WRITE); + if (err < 0) + goto free_iova; + + return virt; + +free_iova: + __free_iova(&tegra->carveout.domain, alloc); +free_pages: + free_pages((unsigned long)virt, get_order(size)); + + return ERR_PTR(err); +} + +void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt, + dma_addr_t dma) +{ + if (tegra->domain) + size = iova_align(&tegra->carveout.domain, size); + else + size = PAGE_ALIGN(size); + + if (tegra->domain) { + iommu_unmap(tegra->domain, dma, size); + free_iova(&tegra->carveout.domain, + iova_pfn(&tegra->carveout.domain, dma)); + } + + free_pages((unsigned long)virt, get_order(size)); +} + +static int host1x_drm_probe(struct host1x_device *dev) +{ + struct drm_driver *driver = &tegra_drm_driver; + struct tegra_drm *tegra; + struct drm_device *drm; + int err; + + drm = drm_dev_alloc(driver, &dev->dev); + if (IS_ERR(drm)) + return PTR_ERR(drm); + + tegra = kzalloc(sizeof(*tegra), GFP_KERNEL); + if (!tegra) { + err = -ENOMEM; + goto put; + } + + if (iommu_present(&platform_bus_type)) { + tegra->domain = iommu_domain_alloc(&platform_bus_type); + if (!tegra->domain) { + err = -ENOMEM; + goto free; + } + + err = iova_cache_get(); + if (err < 0) + goto domain; + + if (of_machine_is_compatible("nvidia,tegra20")) + tegra->has_gart = true; + } + + INIT_LIST_HEAD(&tegra->clients); + INIT_LIST_HEAD(&tegra->channels); + INIT_LIST_HEAD(&tegra->mm_eviction_list); + + mutex_init(&tegra->mm_lock); + idr_init_base(&tegra->drm_contexts, 1); + spin_lock_init(&tegra->context_lock); + init_completion(&tegra->gart_free_up); + + dev_set_drvdata(&dev->dev, drm); + drm->dev_private = tegra; + tegra->drm = drm; + + drm_mode_config_init(drm); + + drm->mode_config.min_width = 0; + drm->mode_config.min_height = 0; + + drm->mode_config.max_width = 4096; + drm->mode_config.max_height = 4096; + + drm->mode_config.allow_fb_modifiers = true; + + drm->mode_config.normalize_zpos = true; + + drm->mode_config.funcs = &tegra_drm_mode_config_funcs; + drm->mode_config.helper_private = &tegra_drm_mode_config_helpers; + + err = tegra_drm_fb_prepare(drm); + if (err < 0) + goto config; + + drm_kms_helper_poll_init(drm); + + err = host1x_device_init(dev); + if (err < 0) + goto fbdev; + + if (tegra->domain) { + u64 carveout_start, carveout_end, gem_start, gem_end; + u64 dma_mask = dma_get_mask(&dev->dev); + dma_addr_t start, end; + unsigned long order; + bool need_carveout; + + start = tegra->domain->geometry.aperture_start & dma_mask; + end = tegra->domain->geometry.aperture_end & dma_mask; + + /* + * Carveout isn't needed on pre-Tegra124, especially on Tegra20 + * as it uses GART that has very limited amount of IOVA space. + */ + if (of_machine_is_compatible("nvidia,tegra20") || + of_machine_is_compatible("nvidia,tegra30") || + of_machine_is_compatible("nvidia,tegra114")) + need_carveout = false; + else + need_carveout = true; + + gem_start = start; + gem_end = end; + + if (need_carveout) { + gem_end -= CARVEOUT_SZ; + carveout_start = gem_end + 1; + carveout_end = end; + + order = __ffs(tegra->domain->pgsize_bitmap); + init_iova_domain(&tegra->carveout.domain, 1UL << order, + carveout_start >> order); + + tegra->carveout.shift = + iova_shift(&tegra->carveout.domain); + tegra->carveout.limit = + carveout_end >> tegra->carveout.shift; + + tegra->carveout.inited = 1; + } + + drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1); + + DRM_DEBUG_DRIVER("IOMMU apertures:\n"); + DRM_DEBUG_DRIVER(" GEM: %#llx-%#llx\n", gem_start, gem_end); + + if (need_carveout) + DRM_DEBUG_DRIVER(" Carveout: %#llx-%#llx\n", + carveout_start, carveout_end); + } + + if (tegra->hub) { + err = tegra_display_hub_prepare(tegra->hub); + if (err < 0) + goto device; + } + + /* syncpoints are used for full 32-bit hardware VBLANK counters */ + drm->max_vblank_count = 0xffffffff; + + err = drm_vblank_init(drm, drm->mode_config.num_crtc); + if (err < 0) + goto hub; + + drm_mode_config_reset(drm); + + err = drm_aperture_remove_framebuffers(false, &tegra_drm_driver); + if (err < 0) + goto hub; + + err = tegra_drm_fb_init(drm); + if (err < 0) + goto hub; + + err = drm_dev_register(drm, 0); + if (err < 0) + goto fb; + + return 0; + +fb: + tegra_drm_fb_exit(drm); +hub: + if (tegra->hub) + tegra_display_hub_cleanup(tegra->hub); +device: + if (tegra->domain) { + drm_mm_takedown(&tegra->mm); + if (tegra->carveout.inited) + put_iova_domain(&tegra->carveout.domain); + iova_cache_put(); + } + + host1x_device_exit(dev); +fbdev: + drm_kms_helper_poll_fini(drm); + tegra_drm_fb_free(drm); +config: + drm_mode_config_cleanup(drm); + + idr_destroy(&tegra->drm_contexts); + mutex_destroy(&tegra->mm_lock); +domain: + if (tegra->domain) + iommu_domain_free(tegra->domain); +free: + kfree(tegra); +put: + drm_dev_put(drm); + return err; +} + +static int host1x_drm_remove(struct host1x_device *dev) +{ + struct drm_device *drm = dev_get_drvdata(&dev->dev); + struct tegra_drm *tegra = drm->dev_private; + int err; + + drm_dev_unregister(drm); + + drm_kms_helper_poll_fini(drm); + tegra_drm_fb_exit(drm); + drm_atomic_helper_shutdown(drm); + drm_mode_config_cleanup(drm); + + if (tegra->hub) + tegra_display_hub_cleanup(tegra->hub); + + err = host1x_device_exit(dev); + if (err < 0) + dev_err(&dev->dev, "host1x device cleanup failed: %d\n", err); + + if (tegra->domain) { + drm_mm_takedown(&tegra->mm); + if (tegra->carveout.inited) + put_iova_domain(&tegra->carveout.domain); + iova_cache_put(); + iommu_domain_free(tegra->domain); + } + + idr_destroy(&tegra->drm_contexts); + mutex_destroy(&tegra->mm_lock); + + kfree(tegra); + drm_dev_put(drm); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int host1x_drm_suspend(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + + return drm_mode_config_helper_suspend(drm); +} + +static int host1x_drm_resume(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + + return drm_mode_config_helper_resume(drm); +} +#endif + +static SIMPLE_DEV_PM_OPS(host1x_drm_pm_ops, host1x_drm_suspend, + host1x_drm_resume); + +static const struct of_device_id host1x_drm_subdevs[] = { + { .compatible = "nvidia,tegra20-dc", }, + { .compatible = "nvidia,tegra20-hdmi", }, + { .compatible = "nvidia,tegra20-gr2d", }, + { .compatible = "nvidia,tegra20-gr3d", }, + { .compatible = "nvidia,tegra30-dc", }, + { .compatible = "nvidia,tegra30-hdmi", }, + { .compatible = "nvidia,tegra30-gr2d", }, + { .compatible = "nvidia,tegra30-gr3d", }, + { .compatible = "nvidia,tegra114-dc", }, + { .compatible = "nvidia,tegra114-dsi", }, + { .compatible = "nvidia,tegra114-hdmi", }, + { .compatible = "nvidia,tegra114-gr2d", }, + { .compatible = "nvidia,tegra114-gr3d", }, + { .compatible = "nvidia,tegra124-dc", }, + { .compatible = "nvidia,tegra124-sor", }, + { .compatible = "nvidia,tegra124-hdmi", }, + { .compatible = "nvidia,tegra124-dsi", }, + { .compatible = "nvidia,tegra124-vic", }, + { .compatible = "nvidia,tegra132-dsi", }, + { .compatible = "nvidia,tegra210-dc", }, + { .compatible = "nvidia,tegra210-dsi", }, + { .compatible = "nvidia,tegra210-sor", }, + { .compatible = "nvidia,tegra210-sor1", }, + { .compatible = "nvidia,tegra210-vic", }, + { .compatible = "nvidia,tegra186-display", }, + { .compatible = "nvidia,tegra186-dc", }, + { .compatible = "nvidia,tegra186-sor", }, + { .compatible = "nvidia,tegra186-sor1", }, + { .compatible = "nvidia,tegra186-vic", }, + { .compatible = "nvidia,tegra194-display", }, + { .compatible = "nvidia,tegra194-dc", }, + { .compatible = "nvidia,tegra194-sor", }, + { .compatible = "nvidia,tegra194-vic", }, + { /* sentinel */ } +}; + +static struct host1x_driver host1x_drm_driver = { + .driver = { + .name = "drm", + .pm = &host1x_drm_pm_ops, + }, + .probe = host1x_drm_probe, + .remove = host1x_drm_remove, + .subdevs = host1x_drm_subdevs, +}; + +static struct platform_driver * const drivers[] = { + &tegra_display_hub_driver, + &tegra_dc_driver, + &tegra_hdmi_driver, + &tegra_dsi_driver, + &tegra_dpaux_driver, + &tegra_sor_driver, + &tegra_gr2d_driver, + &tegra_gr3d_driver, + &tegra_vic_driver, +}; + +static int __init host1x_drm_init(void) +{ + int err; + + err = host1x_driver_register(&host1x_drm_driver); + if (err < 0) + return err; + + err = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); + if (err < 0) + goto unregister_host1x; + + return 0; + +unregister_host1x: + host1x_driver_unregister(&host1x_drm_driver); + return err; +} +module_init(host1x_drm_init); + +static void __exit host1x_drm_exit(void) +{ + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); + host1x_driver_unregister(&host1x_drm_driver); +} +module_exit(host1x_drm_exit); + +MODULE_AUTHOR("Thierry Reding "); +MODULE_DESCRIPTION("NVIDIA Tegra DRM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/grate/drm.h b/drivers/gpu/drm/grate/drm.h new file mode 100644 index 0000000000000..2f91c3d1939a6 --- /dev/null +++ b/drivers/gpu/drm/grate/drm.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Avionic Design GmbH + * Copyright (C) 2012-2013 NVIDIA CORPORATION. All rights reserved. + */ + +#ifndef HOST1X_DRM_H +#define HOST1X_DRM_H 1 + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "gem.h" +#include "channel.h" +#include "client.h" +#include "hub.h" +#include "trace.h" + +#define GRATE_KERNEL_DRM_VERSION (99991 + 6) + +/* XXX move to include/uapi/drm/drm_fourcc.h? */ +#define DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT BIT_ULL(22) + +struct reset_control; + +#ifdef CONFIG_DRM_FBDEV_EMULATION +struct tegra_fbdev { + struct drm_fb_helper base; + struct drm_framebuffer *fb; +}; +#endif + +struct tegra_drm { + struct drm_device *drm; + + struct iommu_domain *domain; + struct iommu_group *group; + struct mutex mm_lock; + struct drm_mm mm; + struct list_head mm_eviction_list; + + struct { + struct iova_domain domain; + unsigned long shift; + unsigned long limit; + bool inited : 1; + } carveout; + + struct list_head clients; + struct list_head channels; + + spinlock_t context_lock; + struct idr drm_contexts; + +#ifdef CONFIG_DRM_FBDEV_EMULATION + struct tegra_fbdev *fbdev; +#endif + + unsigned int hmask, vmask; + unsigned int pitch_align; + unsigned int num_crtcs; + + struct tegra_display_hub *hub; + + struct completion gart_free_up; + + bool has_gart; +}; + +struct tegra_drm_file { + struct drm_sched_entity *sched_entities; + struct idr uapi_v1_contexts; + atomic_t num_active_jobs; + u64 drm_context; +}; + +int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm); +int tegra_drm_exit(struct tegra_drm *tegra); + +void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *iova); +void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt, + dma_addr_t iova); + +struct cec_notifier; + +struct tegra_output { + struct device_node *of_node; + struct device *dev; + + struct drm_bridge *bridge; + struct drm_panel *panel; + struct i2c_adapter *ddc; + const struct edid *edid; + struct cec_notifier *cec; + unsigned int hpd_irq; + struct gpio_desc *hpd_gpio; + + struct drm_encoder encoder; + struct drm_connector connector; +}; + +static inline struct tegra_output *encoder_to_output(struct drm_encoder *e) +{ + return container_of(e, struct tegra_output, encoder); +} + +static inline struct tegra_output *connector_to_output(struct drm_connector *c) +{ + return container_of(c, struct tegra_output, connector); +} + +/* from output.c */ +int tegra_output_probe(struct tegra_output *output); +void tegra_output_remove(struct tegra_output *output); +int tegra_output_init(struct drm_device *drm, struct tegra_output *output); +void tegra_output_exit(struct tegra_output *output); +void tegra_output_find_possible_crtcs(struct tegra_output *output, + struct drm_device *drm); +int tegra_output_suspend(struct tegra_output *output); +int tegra_output_resume(struct tegra_output *output); + +int tegra_output_connector_get_modes(struct drm_connector *connector); +enum drm_connector_status +tegra_output_connector_detect(struct drm_connector *connector, bool force); +void tegra_output_connector_destroy(struct drm_connector *connector); + +/* from dpaux.c */ +struct drm_dp_aux *drm_dp_aux_find_by_of_node(struct device_node *np); +enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux); +int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output); +int drm_dp_aux_detach(struct drm_dp_aux *aux); +int drm_dp_aux_enable(struct drm_dp_aux *aux); +int drm_dp_aux_disable(struct drm_dp_aux *aux); + +/* from fb.c */ +struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer, + unsigned int index); +bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer); +int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, + struct tegra_bo_tiling *tiling); +struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, + struct drm_file *file, + const struct drm_mode_fb_cmd2 *cmd); +int tegra_drm_fb_prepare(struct drm_device *drm); +void tegra_drm_fb_free(struct drm_device *drm); +int tegra_drm_fb_init(struct drm_device *drm); +void tegra_drm_fb_exit(struct drm_device *drm); + +extern struct platform_driver tegra_display_hub_driver; +extern struct platform_driver tegra_dc_driver; +extern struct platform_driver tegra_hdmi_driver; +extern struct platform_driver tegra_dsi_driver; +extern struct platform_driver tegra_dpaux_driver; +extern struct platform_driver tegra_sor_driver; +extern struct platform_driver tegra_gr2d_driver; +extern struct platform_driver tegra_gr3d_driver; +extern struct platform_driver tegra_vic_driver; + +#endif /* HOST1X_DRM_H */ diff --git a/drivers/gpu/drm/grate/dsi.c b/drivers/gpu/drm/grate/dsi.c new file mode 100644 index 0000000000000..f6d13602e1b78 --- /dev/null +++ b/drivers/gpu/drm/grate/dsi.c @@ -0,0 +1,1700 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2013 NVIDIA Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include