Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
"These are mostly Oliver's Arm changes: lock ordering fixes for the
vGIC, and reverts for a buggy attempt to avoid RCU stalls on large
VMs.
Arm:
- Invalidate nested MMUs upon freeing the PGD to avoid WARNs when
visiting from an MMU notifier
- Fixes to the TLB match process and TLB invalidation range for
managing the VCNR pseudo-TLB
- Prevent SPE from erroneously profiling guests due to UNKNOWN reset
values in PMSCR_EL1
- Fix save/restore of host MDCR_EL2 to account for eagerly
programming at vcpu_load() on VHE systems
- Correct lock ordering when dealing with VGIC LPIs, avoiding
scenarios where an xarray's spinlock was nested with a *raw*
spinlock
- Permit stage-2 read permission aborts which are possible in the
case of NV depending on the guest hypervisor's stage-2 translation
- Call raw_spin_unlock() instead of the internal spinlock API
- Fix parameter ordering when assigning VBAR_EL1
- Reverted a couple of fixes for RCU stalls when destroying a stage-2
page table.
There appears to be some nasty refcounting / UAF issues lurking in
those patches and the band-aid we tried to apply didn't hold.
s390:
- mm fixes, including userfaultfd bug fix
x86:
- Sync the vTPR from the local APIC to the VMCB even when AVIC is
active.
This fixes a bug where host updates to the vTPR, e.g. via
KVM_SET_LAPIC or emulation of a guest access, are lost and result
in interrupt delivery issues in the guest"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: SVM: Sync TPR from LAPIC into VMCB::V_TPR even if AVIC is active
Revert "KVM: arm64: Split kvm_pgtable_stage2_destroy()"
Revert "KVM: arm64: Reschedule as needed when destroying the stage-2 page-tables"
KVM: arm64: vgic: fix incorrect spinlock API usage
KVM: arm64: Remove stage 2 read fault check
KVM: arm64: Fix parameter ordering for VBAR_EL1 assignment
KVM: arm64: nv: Fix incorrect VNCR invalidation range calculation
KVM: arm64: vgic-v3: Indicate vgic_put_irq() may take LPI xarray lock
KVM: arm64: vgic-v3: Don't require IRQs be disabled for LPI xarray lock
KVM: arm64: vgic-v3: Erase LPIs from xarray outside of raw spinlocks
KVM: arm64: Spin off release helper from vgic_put_irq()
KVM: arm64: vgic-v3: Use bare refcount for VGIC LPIs
KVM: arm64: vgic: Drop stale comment on IRQ active state
KVM: arm64: VHE: Save and restore host MDCR_EL2 value correctly
KVM: arm64: Initialize PMSCR_EL1 when in VHE
KVM: arm64: nv: fix VNCR TLB ASID match logic for non-Global entries
KVM: s390: Fix FOLL_*/FAULT_FLAG_* confusion
KVM: s390: Fix incorrect usage of mmu_notifier_register()
KVM: s390: Fix access to unavailable adapter indicator pages during postcopy
KVM: arm64: Mark freed S2 MMUs as invalid
diff --git a/MAINTAINERS b/MAINTAINERS
index f620696..6bbe4b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16196,6 +16196,7 @@
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
R: Harry Yoo <harry.yoo@oracle.com>
+R: Jann Horn <jannh@google.com>
L: linux-mm@kvack.org
S: Maintained
F: include/linux/rmap.h
@@ -16240,6 +16241,7 @@
R: Ryan Roberts <ryan.roberts@arm.com>
R: Dev Jain <dev.jain@arm.com>
R: Barry Song <baohua@kernel.org>
+R: Lance Yang <lance.yang@linux.dev>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index ad8d78f..de7867a 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -1250,10 +1250,12 @@ static int virtio_uml_probe(struct platform_device *pdev)
device_set_wakeup_capable(&vu_dev->vdev.dev, true);
rc = register_virtio_device(&vu_dev->vdev);
- if (rc)
+ if (rc) {
put_device(&vu_dev->vdev.dev);
+ return rc;
+ }
vu_dev->registered = 1;
- return rc;
+ return 0;
error_init:
os_close_file(vu_dev->sock);
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 617886d..21f0e50 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -535,7 +535,7 @@ ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
cmsg->cmsg_type != SCM_RIGHTS)
return n;
- memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len);
+ memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len - CMSG_LEN(0));
return n;
}
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 4193e04..e3ad71a 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -20,8 +20,7 @@
void stack_protections(unsigned long address)
{
- if (mprotect((void *) address, UM_THREAD_SIZE,
- PROT_READ | PROT_WRITE | PROT_EXEC) < 0)
+ if (mprotect((void *) address, UM_THREAD_SIZE, PROT_READ | PROT_WRITE) < 0)
panic("protecting stack failed, errno = %d", errno);
}
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 8acad3c..f316520 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1795,6 +1795,7 @@ static int write_same_filled_page(struct zram *zram, unsigned long fill,
u32 index)
{
zram_slot_lock(zram, index);
+ zram_free_page(zram, index);
zram_set_flag(zram, index, ZRAM_SAME);
zram_set_handle(zram, index, fill);
zram_slot_unlock(zram, index);
@@ -1832,6 +1833,7 @@ static int write_incompressible_page(struct zram *zram, struct page *page,
kunmap_local(src);
zram_slot_lock(zram, index);
+ zram_free_page(zram, index);
zram_set_flag(zram, index, ZRAM_HUGE);
zram_set_handle(zram, index, handle);
zram_set_obj_size(zram, index, PAGE_SIZE);
@@ -1855,11 +1857,6 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index)
unsigned long element;
bool same_filled;
- /* First, free memory allocated to this slot (if any) */
- zram_slot_lock(zram, index);
- zram_free_page(zram, index);
- zram_slot_unlock(zram, index);
-
mem = kmap_local_page(page);
same_filled = page_same_filled(mem, &element);
kunmap_local(mem);
@@ -1901,6 +1898,7 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index)
zcomp_stream_put(zstrm);
zram_slot_lock(zram, index);
+ zram_free_page(zram, index);
zram_set_handle(zram, index, handle);
zram_set_obj_size(zram, index, comp_len);
zram_slot_unlock(zram, index);
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index efeee0a..ab96b69 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -133,7 +133,7 @@ struct journal_sector {
commit_id_t commit_id;
};
-#define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK]))
+#define MAX_TAG_SIZE 255
#define METADATA_PADDING_SECTORS 8
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 79ea85d..f4b904e 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3813,8 +3813,10 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
struct raid_set *rs = ti->private;
unsigned int chunk_size_bytes = to_bytes(rs->md.chunk_sectors);
- limits->io_min = chunk_size_bytes;
- limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs);
+ if (chunk_size_bytes) {
+ limits->io_min = chunk_size_bytes;
+ limits->io_opt = chunk_size_bytes * mddev_data_stripes(rs);
+ }
}
static void raid_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 5890209..1461dc74 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -456,11 +456,15 @@ static void stripe_io_hints(struct dm_target *ti,
struct queue_limits *limits)
{
struct stripe_c *sc = ti->private;
- unsigned int chunk_size = sc->chunk_size << SECTOR_SHIFT;
+ unsigned int io_min, io_opt;
limits->chunk_sectors = sc->chunk_size;
- limits->io_min = chunk_size;
- limits->io_opt = chunk_size * sc->stripes;
+
+ if (!check_shl_overflow(sc->chunk_size, SECTOR_SHIFT, &io_min) &&
+ !check_mul_overflow(io_min, sc->stripes, &io_opt)) {
+ limits->io_min = io_min;
+ limits->io_opt = io_opt;
+ }
}
static struct target_type stripe_target = {
diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c
index 18fb441..4d0a38e 100644
--- a/drivers/platform/x86/amd/pmc/pmc-quirks.c
+++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c
@@ -239,6 +239,14 @@ static const struct dmi_system_id fwbug_list[] = {
DMI_MATCH(DMI_BOARD_NAME, "WUJIE14-GX4HRXL"),
}
},
+ {
+ .ident = "MECHREVO Yilong15Pro Series GM5HG7A",
+ .driver_data = &quirk_spurious_8042,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "MECHREVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Yilong15Pro Series GM5HG7A"),
+ }
+ },
/* https://bugzilla.kernel.org/show_bug.cgi?id=220116 */
{
.ident = "PCSpecialist Lafite Pro V 14M",
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index ef98860..bc544a4 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -403,6 +403,7 @@ static const struct acpi_device_id amd_pmf_acpi_ids[] = {
{"AMDI0103", 0},
{"AMDI0105", 0},
{"AMDI0107", 0},
+ {"AMDI0108", 0},
{ }
};
MODULE_DEVICE_TABLE(acpi, amd_pmf_acpi_ids);
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 3a488cf..6a62bc5 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -673,6 +673,8 @@ static void asus_nb_wmi_key_filter(struct asus_wmi_driver *asus_wmi, int *code,
if (atkbd_reports_vol_keys)
*code = ASUS_WMI_KEY_IGNORE;
break;
+ case 0x5D: /* Wireless console Toggle */
+ case 0x5E: /* Wireless console Enable / Keyboard Attach, Detach */
case 0x5F: /* Wireless console Disable / Special Key */
if (quirks->key_wlan_event)
*code = quirks->key_wlan_event;
diff --git a/drivers/platform/x86/oxpec.c b/drivers/platform/x86/oxpec.c
index eb076bb..54377b2 100644
--- a/drivers/platform/x86/oxpec.c
+++ b/drivers/platform/x86/oxpec.c
@@ -126,6 +126,13 @@ static const struct dmi_system_id dmi_table[] = {
},
{
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "AOKZOE"),
+ DMI_EXACT_MATCH(DMI_BOARD_NAME, "AOKZOE A1X"),
+ },
+ .driver_data = (void *)oxp_fly,
+ },
+ {
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "AYANEO"),
DMI_MATCH(DMI_BOARD_NAME, "AYANEO 2"),
},
@@ -306,6 +313,13 @@ static const struct dmi_system_id dmi_table[] = {
},
.driver_data = (void *)oxp_x1,
},
+ {
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ONE-NETBOOK"),
+ DMI_EXACT_MATCH(DMI_BOARD_NAME, "ONEXPLAYER X1Pro EVA-02"),
+ },
+ .driver_data = (void *)oxp_x1,
+ },
{},
};
diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 93dcebb..ad2d9ec 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -1919,8 +1919,8 @@ static void bq27xxx_battery_update_unlocked(struct bq27xxx_device_info *di)
bool has_singe_flag = di->opts & BQ27XXX_O_ZERO;
cache.flags = bq27xxx_read(di, BQ27XXX_REG_FLAGS, has_singe_flag);
- if ((cache.flags & 0xff) == 0xff)
- cache.flags = -1; /* read error */
+ if (di->chip == BQ27000 && (cache.flags & 0xff) == 0xff)
+ cache.flags = -ENODEV; /* bq27000 hdq read error */
if (cache.flags >= 0) {
cache.capacity = bq27xxx_battery_read_soc(di);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 9bf282d..499a9ed 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1795,7 +1795,14 @@ static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
bg1 = list_entry(a, struct btrfs_block_group, bg_list);
bg2 = list_entry(b, struct btrfs_block_group, bg_list);
- return bg1->used > bg2->used;
+ /*
+ * Some other task may be updating the ->used field concurrently, but it
+ * is not serious if we get a stale value or load/store tearing issues,
+ * as sorting the list of block groups to reclaim is not critical and an
+ * occasional imperfect order is ok. So silence KCSAN and avoid the
+ * overhead of locking or any other synchronization.
+ */
+ return data_race(bg1->used > bg2->used);
}
static inline bool btrfs_should_reclaim(const struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0f8d8e2..c0c1ddd 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1843,7 +1843,6 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_delayed_node *delayed_node;
struct btrfs_inode_item *inode_item;
struct inode *vfs_inode = &inode->vfs_inode;
@@ -1864,8 +1863,6 @@ int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
i_uid_write(vfs_inode, btrfs_stack_inode_uid(inode_item));
i_gid_write(vfs_inode, btrfs_stack_inode_gid(inode_item));
btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
- btrfs_inode_set_file_extent_range(inode, 0,
- round_up(i_size_read(vfs_inode), fs_info->sectorsize));
vfs_inode->i_mode = btrfs_stack_inode_mode(inode_item);
set_nlink(vfs_inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(vfs_inode, btrfs_stack_inode_nbytes(inode_item));
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e7218e78..18db105 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3885,10 +3885,6 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
bool filled = false;
int first_xattr_slot;
- ret = btrfs_init_file_extent_tree(inode);
- if (ret)
- goto out;
-
ret = btrfs_fill_inode(inode, &rdev);
if (!ret)
filled = true;
@@ -3920,8 +3916,6 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
i_uid_write(vfs_inode, btrfs_inode_uid(leaf, inode_item));
i_gid_write(vfs_inode, btrfs_inode_gid(leaf, inode_item));
btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
- btrfs_inode_set_file_extent_range(inode, 0,
- round_up(i_size_read(vfs_inode), fs_info->sectorsize));
inode_set_atime(vfs_inode, btrfs_timespec_sec(leaf, &inode_item->atime),
btrfs_timespec_nsec(leaf, &inode_item->atime));
@@ -3953,6 +3947,11 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
btrfs_set_inode_mapping_order(inode);
cache_index:
+ ret = btrfs_init_file_extent_tree(inode);
+ if (ret)
+ goto out;
+ btrfs_inode_set_file_extent_range(inode, 0,
+ round_up(i_size_read(vfs_inode), fs_info->sectorsize));
/*
* If we were modified in the current generation and evicted from memory
* and then re-read we need to do a full sync since we don't have any
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7d5d908..7a63afe 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1964,7 +1964,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
search_key.objectid = log_key.objectid;
search_key.type = BTRFS_INODE_EXTREF_KEY;
- search_key.offset = key->objectid;
+ search_key.offset = btrfs_extref_hash(key->objectid, name.name, name.len);
ret = backref_in_log(root->log_root, &search_key, key->objectid, &name);
if (ret < 0) {
goto out;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index ea66203..efc2a81 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2582,9 +2582,9 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
spin_lock(&space_info->lock);
space_info->total_bytes -= bg->length;
space_info->disk_total -= bg->length * factor;
+ space_info->disk_total -= bg->zone_unusable;
/* There is no allocation ever happened. */
ASSERT(bg->used == 0);
- ASSERT(bg->zone_unusable == 0);
/* No super block in a block group on the zoned setup. */
ASSERT(bg->bytes_super == 0);
spin_unlock(&space_info->lock);
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 14868a3..bc52afb 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -1075,7 +1075,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
************************************************************************/
static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
+ struct kobj_attribute *attr, char *buf)
{
return sysfs_emit(buf, "%d.%d\n",
NILFS_CURRENT_REV, NILFS_MINOR_REV);
@@ -1087,7 +1087,7 @@ static const char features_readme_str[] =
"(1) revision\n\tshow current revision of NILFS file system driver.\n";
static ssize_t nilfs_feature_README_show(struct kobject *kobj,
- struct attribute *attr,
+ struct kobj_attribute *attr,
char *buf)
{
return sysfs_emit(buf, features_readme_str);
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
index 78a87a0..d370cd5 100644
--- a/fs/nilfs2/sysfs.h
+++ b/fs/nilfs2/sysfs.h
@@ -50,16 +50,16 @@ struct nilfs_sysfs_dev_subgroups {
struct completion sg_segments_kobj_unregister;
};
-#define NILFS_COMMON_ATTR_STRUCT(name) \
+#define NILFS_KOBJ_ATTR_STRUCT(name) \
struct nilfs_##name##_attr { \
struct attribute attr; \
- ssize_t (*show)(struct kobject *, struct attribute *, \
+ ssize_t (*show)(struct kobject *, struct kobj_attribute *, \
char *); \
- ssize_t (*store)(struct kobject *, struct attribute *, \
+ ssize_t (*store)(struct kobject *, struct kobj_attribute *, \
const char *, size_t); \
}
-NILFS_COMMON_ATTR_STRUCT(feature);
+NILFS_KOBJ_ATTR_STRUCT(feature);
#define NILFS_DEV_ATTR_STRUCT(name) \
struct nilfs_##name##_attr { \
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index 5466aa8..6550bd9 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -554,7 +554,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
case SMB_DIRECT_MSG_DATA_TRANSFER: {
struct smb_direct_data_transfer *data_transfer =
(struct smb_direct_data_transfer *)recvmsg->packet;
- unsigned int data_length;
+ u32 remaining_data_length, data_offset, data_length;
int avail_recvmsg_count, receive_credits;
if (wc->byte_len <
@@ -564,15 +564,25 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
+ remaining_data_length = le32_to_cpu(data_transfer->remaining_data_length);
data_length = le32_to_cpu(data_transfer->data_length);
- if (data_length) {
- if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
- (u64)data_length) {
- put_recvmsg(t, recvmsg);
- smb_direct_disconnect_rdma_connection(t);
- return;
- }
+ data_offset = le32_to_cpu(data_transfer->data_offset);
+ if (wc->byte_len < data_offset ||
+ wc->byte_len < (u64)data_offset + data_length) {
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
+ return;
+ }
+ if (remaining_data_length > t->max_fragmented_recv_size ||
+ data_length > t->max_fragmented_recv_size ||
+ (u64)remaining_data_length + (u64)data_length >
+ (u64)t->max_fragmented_recv_size) {
+ put_recvmsg(t, recvmsg);
+ smb_direct_disconnect_rdma_connection(t);
+ return;
+ }
+ if (data_length) {
if (t->full_packet_received)
recvmsg->first_segment = true;
@@ -1209,78 +1219,130 @@ static int smb_direct_writev(struct ksmbd_transport *t,
bool need_invalidate, unsigned int remote_key)
{
struct smb_direct_transport *st = smb_trans_direct_transfort(t);
- int remaining_data_length;
- int start, i, j;
- int max_iov_size = st->max_send_size -
+ size_t remaining_data_length;
+ size_t iov_idx;
+ size_t iov_ofs;
+ size_t max_iov_size = st->max_send_size -
sizeof(struct smb_direct_data_transfer);
int ret;
- struct kvec vec;
struct smb_direct_send_ctx send_ctx;
+ int error = 0;
if (st->status != SMB_DIRECT_CS_CONNECTED)
return -ENOTCONN;
//FIXME: skip RFC1002 header..
+ if (WARN_ON_ONCE(niovs <= 1 || iov[0].iov_len != 4))
+ return -EINVAL;
buflen -= 4;
+ iov_idx = 1;
+ iov_ofs = 0;
remaining_data_length = buflen;
ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
- start = i = 1;
- buflen = 0;
- while (true) {
- buflen += iov[i].iov_len;
- if (buflen > max_iov_size) {
- if (i > start) {
- remaining_data_length -=
- (buflen - iov[i].iov_len);
- ret = smb_direct_post_send_data(st, &send_ctx,
- &iov[start], i - start,
- remaining_data_length);
- if (ret)
- goto done;
- } else {
- /* iov[start] is too big, break it */
- int nvec = (buflen + max_iov_size - 1) /
- max_iov_size;
+ while (remaining_data_length) {
+ struct kvec vecs[SMB_DIRECT_MAX_SEND_SGES - 1]; /* minus smbdirect hdr */
+ size_t possible_bytes = max_iov_size;
+ size_t possible_vecs;
+ size_t bytes = 0;
+ size_t nvecs = 0;
- for (j = 0; j < nvec; j++) {
- vec.iov_base =
- (char *)iov[start].iov_base +
- j * max_iov_size;
- vec.iov_len =
- min_t(int, max_iov_size,
- buflen - max_iov_size * j);
- remaining_data_length -= vec.iov_len;
- ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
- remaining_data_length);
- if (ret)
- goto done;
- }
- i++;
- if (i == niovs)
- break;
- }
- start = i;
- buflen = 0;
- } else {
- i++;
- if (i == niovs) {
- /* send out all remaining vecs */
- remaining_data_length -= buflen;
- ret = smb_direct_post_send_data(st, &send_ctx,
- &iov[start], i - start,
- remaining_data_length);
- if (ret)
+ /*
+ * For the last message remaining_data_length should be
+ * have been 0 already!
+ */
+ if (WARN_ON_ONCE(iov_idx >= niovs)) {
+ error = -EINVAL;
+ goto done;
+ }
+
+ /*
+ * We have 2 factors which limit the arguments we pass
+ * to smb_direct_post_send_data():
+ *
+ * 1. The number of supported sges for the send,
+ * while one is reserved for the smbdirect header.
+ * And we currently need one SGE per page.
+ * 2. The number of negotiated payload bytes per send.
+ */
+ possible_vecs = min_t(size_t, ARRAY_SIZE(vecs), niovs - iov_idx);
+
+ while (iov_idx < niovs && possible_vecs && possible_bytes) {
+ struct kvec *v = &vecs[nvecs];
+ int page_count;
+
+ v->iov_base = ((u8 *)iov[iov_idx].iov_base) + iov_ofs;
+ v->iov_len = min_t(size_t,
+ iov[iov_idx].iov_len - iov_ofs,
+ possible_bytes);
+ page_count = get_buf_page_count(v->iov_base, v->iov_len);
+ if (page_count > possible_vecs) {
+ /*
+ * If the number of pages in the buffer
+ * is to much (because we currently require
+ * one SGE per page), we need to limit the
+ * length.
+ *
+ * We know possible_vecs is at least 1,
+ * so we always keep the first page.
+ *
+ * We need to calculate the number extra
+ * pages (epages) we can also keep.
+ *
+ * We calculate the number of bytes in the
+ * first page (fplen), this should never be
+ * larger than v->iov_len because page_count is
+ * at least 2, but adding a limitation feels
+ * better.
+ *
+ * Then we calculate the number of bytes (elen)
+ * we can keep for the extra pages.
+ */
+ size_t epages = possible_vecs - 1;
+ size_t fpofs = offset_in_page(v->iov_base);
+ size_t fplen = min_t(size_t, PAGE_SIZE - fpofs, v->iov_len);
+ size_t elen = min_t(size_t, v->iov_len - fplen, epages*PAGE_SIZE);
+
+ v->iov_len = fplen + elen;
+ page_count = get_buf_page_count(v->iov_base, v->iov_len);
+ if (WARN_ON_ONCE(page_count > possible_vecs)) {
+ /*
+ * Something went wrong in the above
+ * logic...
+ */
+ error = -EINVAL;
goto done;
- break;
+ }
}
+ possible_vecs -= page_count;
+ nvecs += 1;
+ possible_bytes -= v->iov_len;
+ bytes += v->iov_len;
+
+ iov_ofs += v->iov_len;
+ if (iov_ofs >= iov[iov_idx].iov_len) {
+ iov_idx += 1;
+ iov_ofs = 0;
+ }
+ }
+
+ remaining_data_length -= bytes;
+
+ ret = smb_direct_post_send_data(st, &send_ctx,
+ vecs, nvecs,
+ remaining_data_length);
+ if (unlikely(ret)) {
+ error = ret;
+ goto done;
}
}
done:
ret = smb_direct_flush_send_list(st, &send_ctx, true);
+ if (unlikely(!ret && error))
+ ret = error;
/*
* As an optimization, we don't wait for individual I/O to finish
@@ -1744,6 +1806,11 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
return -EINVAL;
}
+ if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
+ pr_err("warning: device max_send_sge = %d too small\n",
+ device->attrs.max_send_sge);
+ return -EINVAL;
+ }
if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
pr_err("warning: device max_recv_sge = %d too small\n",
device->attrs.max_recv_sge);
@@ -1767,7 +1834,7 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
cap->max_send_wr = max_send_wrs;
cap->max_recv_wr = t->recv_credit_max;
- cap->max_send_sge = max_sge_per_wr;
+ cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
cap->max_inline_data = 0;
cap->max_rdma_ctxs = t->max_rw_credits;
diff --git a/include/linux/damon.h b/include/linux/damon.h
index f13664c..9e62b2a 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -636,6 +636,7 @@ struct damon_operations {
* @data: Data that will be passed to @fn.
* @repeat: Repeat invocations.
* @return_code: Return code from @fn invocation.
+ * @dealloc_on_cancel: De-allocate when canceled.
*
* Control damon_call(), which requests specific kdamond to invoke a given
* function. Refer to damon_call() for more details.
@@ -645,6 +646,7 @@ struct damon_call_control {
void *data;
bool repeat;
int return_code;
+ bool dealloc_on_cancel;
/* private: internal use only */
/* informs if the kdamond finished handling of the request */
struct completion completion;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2fe6ed2..7012a0f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -385,6 +385,16 @@ void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
void mark_page_accessed(struct page *);
void folio_mark_accessed(struct folio *);
+static inline bool folio_may_be_lru_cached(struct folio *folio)
+{
+ /*
+ * Holding PMD-sized folios in per-CPU LRU cache unbalances accounting.
+ * Holding small numbers of low-order mTHP folios in per-CPU LRU cache
+ * will be sensible, but nobody has implemented and tested that yet.
+ */
+ return !folio_test_large(folio);
+}
+
extern atomic_t lru_disable_count;
static inline bool lru_cache_disabled(void)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 312c6a8..77d02f8 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -126,8 +126,31 @@ DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
* of concurrent destructions. Use a separate workqueue so that cgroup
* destruction work items don't end up filling up max_active of system_wq
* which may lead to deadlock.
+ *
+ * A cgroup destruction should enqueue work sequentially to:
+ * cgroup_offline_wq: use for css offline work
+ * cgroup_release_wq: use for css release work
+ * cgroup_free_wq: use for free work
+ *
+ * Rationale for using separate workqueues:
+ * The cgroup root free work may depend on completion of other css offline
+ * operations. If all tasks were enqueued to a single workqueue, this could
+ * create a deadlock scenario where:
+ * - Free work waits for other css offline work to complete.
+ * - But other css offline work is queued after free work in the same queue.
+ *
+ * Example deadlock scenario with single workqueue (cgroup_destroy_wq):
+ * 1. umount net_prio
+ * 2. net_prio root destruction enqueues work to cgroup_destroy_wq (CPUx)
+ * 3. perf_event CSS A offline enqueues work to same cgroup_destroy_wq (CPUx)
+ * 4. net_prio cgroup_destroy_root->cgroup_lock_and_drain_offline.
+ * 5. net_prio root destruction blocks waiting for perf_event CSS A offline,
+ * which can never complete as it's behind in the same queue and
+ * workqueue's max_active is 1.
*/
-static struct workqueue_struct *cgroup_destroy_wq;
+static struct workqueue_struct *cgroup_offline_wq;
+static struct workqueue_struct *cgroup_release_wq;
+static struct workqueue_struct *cgroup_free_wq;
/* generate an array of cgroup subsystem pointers */
#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
@@ -4159,6 +4182,7 @@ static void cgroup_file_release(struct kernfs_open_file *of)
cft->release(of);
put_cgroup_ns(ctx->ns);
kfree(ctx);
+ of->priv = NULL;
}
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
@@ -5558,7 +5582,7 @@ static void css_release_work_fn(struct work_struct *work)
cgroup_unlock();
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
- queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
+ queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
}
static void css_release(struct percpu_ref *ref)
@@ -5567,7 +5591,7 @@ static void css_release(struct percpu_ref *ref)
container_of(ref, struct cgroup_subsys_state, refcnt);
INIT_WORK(&css->destroy_work, css_release_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ queue_work(cgroup_release_wq, &css->destroy_work);
}
static void init_and_link_css(struct cgroup_subsys_state *css,
@@ -5701,7 +5725,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
list_del_rcu(&css->sibling);
err_free_css:
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
- queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
+ queue_rcu_work(cgroup_free_wq, &css->destroy_rwork);
return ERR_PTR(err);
}
@@ -5939,7 +5963,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
if (atomic_dec_and_test(&css->online_cnt)) {
INIT_WORK(&css->destroy_work, css_killed_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ queue_work(cgroup_offline_wq, &css->destroy_work);
}
}
@@ -6325,8 +6349,14 @@ static int __init cgroup_wq_init(void)
* We would prefer to do this in cgroup_init() above, but that
* is called before init_workqueues(): so leave this until after.
*/
- cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
- BUG_ON(!cgroup_destroy_wq);
+ cgroup_offline_wq = alloc_workqueue("cgroup_offline", 0, 1);
+ BUG_ON(!cgroup_offline_wq);
+
+ cgroup_release_wq = alloc_workqueue("cgroup_release", 0, 1);
+ BUG_ON(!cgroup_release_wq);
+
+ cgroup_free_wq = alloc_workqueue("cgroup_free", 0, 1);
+ BUG_ON(!cgroup_free_wq);
return 0;
}
core_initcall(cgroup_wq_init);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index be00629..ccba6fc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9551,7 +9551,7 @@ static unsigned long tg_weight(struct task_group *tg)
#ifdef CONFIG_FAIR_GROUP_SCHED
return scale_load_down(tg->shares);
#else
- return sched_weight_from_cgroup(tg->scx_weight);
+ return sched_weight_from_cgroup(tg->scx.weight);
#endif
}
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 4ae32ef..088ceff 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -6788,12 +6788,8 @@ __bpf_kfunc u32 scx_bpf_reenqueue_local(void)
* CPUs disagree, they use %ENQUEUE_RESTORE which is bypassed to
* the current local DSQ for running tasks and thus are not
* visible to the BPF scheduler.
- *
- * Also skip re-enqueueing tasks that can only run on this
- * CPU, as they would just be re-added to the same local
- * DSQ without any benefit.
*/
- if (p->migration_pending || is_migration_disabled(p) || p->nr_cpus_allowed == 1)
+ if (p->migration_pending)
continue;
dispatch_dequeue(rq, p);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index ccae62d..fa60362 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -908,6 +908,8 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
return -EINVAL;
}
buf = kmemdup(&argv[0][1], len + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
buf[len] = '\0';
ret = kstrtouint(buf, 0, &maxactive);
if (ret || !maxactive) {
diff --git a/mm/damon/core.c b/mm/damon/core.c
index c2e0b46..08065b363 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -2479,10 +2479,14 @@ static void kdamond_call(struct damon_ctx *ctx, bool cancel)
mutex_lock(&ctx->call_controls_lock);
list_del(&control->list);
mutex_unlock(&ctx->call_controls_lock);
- if (!control->repeat)
+ if (!control->repeat) {
complete(&control->completion);
- else
+ } else if (control->canceled && control->dealloc_on_cancel) {
+ kfree(control);
+ continue;
+ } else {
list_add(&control->list, &repeat_controls);
+ }
}
control = list_first_entry_or_null(&repeat_controls,
struct damon_call_control, list);
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 7b9254c..c96c215 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1534,14 +1534,10 @@ static int damon_sysfs_repeat_call_fn(void *data)
return 0;
}
-static struct damon_call_control damon_sysfs_repeat_call_control = {
- .fn = damon_sysfs_repeat_call_fn,
- .repeat = true,
-};
-
static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond)
{
struct damon_ctx *ctx;
+ struct damon_call_control *repeat_call_control;
int err;
if (damon_sysfs_kdamond_running(kdamond))
@@ -1554,18 +1550,29 @@ static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond)
damon_destroy_ctx(kdamond->damon_ctx);
kdamond->damon_ctx = NULL;
+ repeat_call_control = kmalloc(sizeof(*repeat_call_control),
+ GFP_KERNEL);
+ if (!repeat_call_control)
+ return -ENOMEM;
+
ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]);
- if (IS_ERR(ctx))
+ if (IS_ERR(ctx)) {
+ kfree(repeat_call_control);
return PTR_ERR(ctx);
+ }
err = damon_start(&ctx, 1, false);
if (err) {
+ kfree(repeat_call_control);
damon_destroy_ctx(ctx);
return err;
}
kdamond->damon_ctx = ctx;
- damon_sysfs_repeat_call_control.data = kdamond;
- damon_call(ctx, &damon_sysfs_repeat_call_control);
+ repeat_call_control->fn = damon_sysfs_repeat_call_fn;
+ repeat_call_control->data = kdamond;
+ repeat_call_control->repeat = true;
+ repeat_call_control->dealloc_on_cancel = true;
+ damon_call(ctx, repeat_call_control);
return err;
}
diff --git a/mm/gup.c b/mm/gup.c
index adffe66..0bc4d14 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2287,8 +2287,8 @@ static unsigned long collect_longterm_unpinnable_folios(
struct pages_or_folios *pofs)
{
unsigned long collected = 0;
- bool drain_allow = true;
struct folio *folio;
+ int drained = 0;
long i = 0;
for (folio = pofs_get_folio(pofs, i); folio;
@@ -2307,9 +2307,17 @@ static unsigned long collect_longterm_unpinnable_folios(
continue;
}
- if (!folio_test_lru(folio) && drain_allow) {
+ if (drained == 0 && folio_may_be_lru_cached(folio) &&
+ folio_ref_count(folio) !=
+ folio_expected_ref_count(folio) + 1) {
+ lru_add_drain();
+ drained = 1;
+ }
+ if (drained == 1 && folio_may_be_lru_cached(folio) &&
+ folio_ref_count(folio) !=
+ folio_expected_ref_count(folio) + 1) {
lru_add_drain_all();
- drain_allow = false;
+ drained = 2;
}
if (!folio_isolate_lru(folio))
diff --git a/mm/mlock.c b/mm/mlock.c
index a1d93ad..bb0776f 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -255,7 +255,7 @@ void mlock_folio(struct folio *folio)
folio_get(folio);
if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
- folio_test_large(folio) || lru_cache_disabled())
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
}
@@ -278,7 +278,7 @@ void mlock_new_folio(struct folio *folio)
folio_get(folio);
if (!folio_batch_add(fbatch, mlock_new(folio)) ||
- folio_test_large(folio) || lru_cache_disabled())
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
}
@@ -299,7 +299,7 @@ void munlock_folio(struct folio *folio)
*/
folio_get(folio);
if (!folio_batch_add(fbatch, folio) ||
- folio_test_large(folio) || lru_cache_disabled())
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
}
diff --git a/mm/swap.c b/mm/swap.c
index 3632dd0..b74ebe8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -164,6 +164,10 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
for (i = 0; i < folio_batch_count(fbatch); i++) {
struct folio *folio = fbatch->folios[i];
+ /* block memcg migration while the folio moves between lru */
+ if (move_fn != lru_add && !folio_test_clear_lru(folio))
+ continue;
+
folio_lruvec_relock_irqsave(folio, &lruvec, &flags);
move_fn(lruvec, folio);
@@ -176,14 +180,10 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
}
static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch,
- struct folio *folio, move_fn_t move_fn,
- bool on_lru, bool disable_irq)
+ struct folio *folio, move_fn_t move_fn, bool disable_irq)
{
unsigned long flags;
- if (on_lru && !folio_test_clear_lru(folio))
- return;
-
folio_get(folio);
if (disable_irq)
@@ -191,8 +191,8 @@ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch,
else
local_lock(&cpu_fbatches.lock);
- if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || folio_test_large(folio) ||
- lru_cache_disabled())
+ if (!folio_batch_add(this_cpu_ptr(fbatch), folio) ||
+ !folio_may_be_lru_cached(folio) || lru_cache_disabled())
folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn);
if (disable_irq)
@@ -201,13 +201,13 @@ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch,
local_unlock(&cpu_fbatches.lock);
}
-#define folio_batch_add_and_move(folio, op, on_lru) \
- __folio_batch_add_and_move( \
- &cpu_fbatches.op, \
- folio, \
- op, \
- on_lru, \
- offsetof(struct cpu_fbatches, op) >= offsetof(struct cpu_fbatches, lock_irq) \
+#define folio_batch_add_and_move(folio, op) \
+ __folio_batch_add_and_move( \
+ &cpu_fbatches.op, \
+ folio, \
+ op, \
+ offsetof(struct cpu_fbatches, op) >= \
+ offsetof(struct cpu_fbatches, lock_irq) \
)
static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
@@ -231,10 +231,10 @@ static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
void folio_rotate_reclaimable(struct folio *folio)
{
if (folio_test_locked(folio) || folio_test_dirty(folio) ||
- folio_test_unevictable(folio))
+ folio_test_unevictable(folio) || !folio_test_lru(folio))
return;
- folio_batch_add_and_move(folio, lru_move_tail, true);
+ folio_batch_add_and_move(folio, lru_move_tail);
}
void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
@@ -328,10 +328,11 @@ static void folio_activate_drain(int cpu)
void folio_activate(struct folio *folio)
{
- if (folio_test_active(folio) || folio_test_unevictable(folio))
+ if (folio_test_active(folio) || folio_test_unevictable(folio) ||
+ !folio_test_lru(folio))
return;
- folio_batch_add_and_move(folio, lru_activate, true);
+ folio_batch_add_and_move(folio, lru_activate);
}
#else
@@ -507,7 +508,7 @@ void folio_add_lru(struct folio *folio)
lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
folio_set_active(folio);
- folio_batch_add_and_move(folio, lru_add, false);
+ folio_batch_add_and_move(folio, lru_add);
}
EXPORT_SYMBOL(folio_add_lru);
@@ -685,13 +686,13 @@ void lru_add_drain_cpu(int cpu)
void deactivate_file_folio(struct folio *folio)
{
/* Deactivating an unevictable folio will not accelerate reclaim */
- if (folio_test_unevictable(folio))
+ if (folio_test_unevictable(folio) || !folio_test_lru(folio))
return;
if (lru_gen_enabled() && lru_gen_clear_refs(folio))
return;
- folio_batch_add_and_move(folio, lru_deactivate_file, true);
+ folio_batch_add_and_move(folio, lru_deactivate_file);
}
/*
@@ -704,13 +705,13 @@ void deactivate_file_folio(struct folio *folio)
*/
void folio_deactivate(struct folio *folio)
{
- if (folio_test_unevictable(folio))
+ if (folio_test_unevictable(folio) || !folio_test_lru(folio))
return;
if (lru_gen_enabled() ? lru_gen_clear_refs(folio) : !folio_test_active(folio))
return;
- folio_batch_add_and_move(folio, lru_deactivate, true);
+ folio_batch_add_and_move(folio, lru_deactivate);
}
/**
@@ -723,10 +724,11 @@ void folio_deactivate(struct folio *folio)
void folio_mark_lazyfree(struct folio *folio)
{
if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) ||
+ !folio_test_lru(folio) ||
folio_test_swapcache(folio) || folio_test_unevictable(folio))
return;
- folio_batch_add_and_move(folio, lru_lazyfree, true);
+ folio_batch_add_and_move(folio, lru_lazyfree);
}
void lru_add_drain(void)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a48aec8..6749999 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4507,7 +4507,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
}
/* ineligible */
- if (!folio_test_lru(folio) || zone > sc->reclaim_idx) {
+ if (zone > sc->reclaim_idx) {
gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
return true;
diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c
index 7ebd352..beaf366 100644
--- a/samples/damon/mtier.c
+++ b/samples/damon/mtier.c
@@ -208,6 +208,9 @@ static int damon_sample_mtier_enable_store(
if (enabled == is_enabled)
return 0;
+ if (!init_called)
+ return 0;
+
if (enabled) {
err = damon_sample_mtier_start();
if (err)
diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c
index 1b839c06..0226652 100644
--- a/samples/damon/prcl.c
+++ b/samples/damon/prcl.c
@@ -137,6 +137,9 @@ static int damon_sample_prcl_enable_store(
if (enabled == is_enabled)
return 0;
+ if (!init_called)
+ return 0;
+
if (enabled) {
err = damon_sample_prcl_start();
if (err)
diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c
index da05202..21eaf15 100644
--- a/samples/damon/wsse.c
+++ b/samples/damon/wsse.c
@@ -118,6 +118,9 @@ static int damon_sample_wsse_enable_store(
return 0;
if (enabled) {
+ if (!init_called)
+ return 0;
+
err = damon_sample_wsse_start();
if (err)
enabled = false;
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 9ef5694..ddaeb4e 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -75,6 +75,9 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
size_t ci, cj, ei;
int cmp;
+ if (!excludes->cnt)
+ return;
+
ci = cj = ei = 0;
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index fd49703..0786344 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -2009,6 +2009,7 @@ static int __cmd_contention(int argc, const char **argv)
.owner = show_lock_owner,
.cgroups = RB_ROOT,
};
+ struct perf_env host_env;
lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
if (!lockhash_table)
@@ -2024,7 +2025,10 @@ static int __cmd_contention(int argc, const char **argv)
eops.mmap = perf_event__process_mmap;
eops.tracing_data = perf_event__process_tracing_data;
- session = perf_session__new(use_bpf ? NULL : &data, &eops);
+ perf_env__init(&host_env);
+ session = __perf_session__new(use_bpf ? NULL : &data, &eops,
+ /*trace_event_repipe=*/false, &host_env);
+
if (IS_ERR(session)) {
pr_err("Initializing perf session failed\n");
err = PTR_ERR(session);
@@ -2142,6 +2146,7 @@ static int __cmd_contention(int argc, const char **argv)
evlist__delete(con.evlist);
lock_contention_finish(&con);
perf_session__delete(session);
+ perf_env__exit(&host_env);
zfree(&lockhash_table);
return err;
}
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 85b2a93..779f623 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -477,6 +477,7 @@ static int __maps__insert(struct maps *maps, struct map *new)
}
/* Insert the value at the end. */
maps_by_address[nr_maps] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name)
maps_by_name[nr_maps] = map__get(new);
@@ -502,8 +503,6 @@ static int __maps__insert(struct maps *maps, struct map *new)
if (map__end(new) < map__start(new))
RC_CHK_ACCESS(maps)->ends_broken = true;
- map__set_kmap_maps(new, maps);
-
return 0;
}
@@ -891,6 +890,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
if (before) {
map__put(maps_by_address[i]);
maps_by_address[i] = before;
+ map__set_kmap_maps(before, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
@@ -918,6 +918,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
@@ -942,14 +943,13 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
maps_by_name[ni] = map__get(new);
}
- map__set_kmap_maps(new, maps);
-
check_invariants(maps);
return err;
}
@@ -1019,6 +1019,7 @@ int maps__copy_from(struct maps *dest, struct maps *parent)
err = unwind__prepare_access(dest, new, NULL);
if (!err) {
dest_maps_by_address[i] = new;
+ map__set_kmap_maps(new, dest);
if (dest_maps_by_name)
dest_maps_by_name[i] = map__get(new);
RC_CHK_ACCESS(dest)->nr_maps = i + 1;